├── scripts
    ├── keras_benchmarks
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── timehistory.py
    │   │   ├── lstm_benchmark.py
    │   │   ├── mnist_mlp_benchmark.py
    │   │   └── cifar10_cnn_benchmark.py
    │   ├── run_tf_backend.sh
    │   ├── run_cntk_backend.sh
    │   ├── run_theano_backend.sh
    │   ├── config.json
    │   ├── data_generator.py
    │   ├── gpu_mode.py
    │   ├── setup_cpu.sh
    │   ├── setup_gpu.sh
    │   ├── run_benchmark.py
    │   └── upload_benchmarks_bq.py
    └── tf_cnn_benchmarks
    │   ├── models
    │       ├── __init__.py
    │       ├── experimental
    │       │   ├── __init__.py
    │       │   └── official_ncf_model.py
    │       ├── lenet_model.py
    │       ├── overfeat_model.py
    │       ├── trivial_model.py
    │       ├── googlenet_model.py
    │       ├── vgg_model.py
    │       ├── resnet_model_test.py
    │       ├── alexnet_model.py
    │       ├── official_resnet_model.py
    │       ├── densenet_model.py
    │       ├── model_config.py
    │       ├── mobilenet_test.py
    │       ├── mobilenet_v2.py
    │       └── inception_model.py
    │   ├── platforms
    │       ├── __init__.py
    │       ├── default
    │       │   ├── __init__.py
    │       │   └── util.py
    │       └── util.py
    │   ├── test_data
    │       ├── __init__.py
    │       ├── images
    │       │   ├── black_image.jpg
    │       │   └── white_image.jpg
    │       ├── fake_tf_record_data
    │       │   ├── train-00000-of-00008
    │       │   ├── train-00001-of-00008
    │       │   ├── train-00002-of-00008
    │       │   ├── train-00003-of-00008
    │       │   ├── train-00004-of-00008
    │       │   ├── train-00005-of-00008
    │       │   ├── train-00006-of-00008
    │       │   ├── train-00007-of-00008
    │       │   ├── validation-00000-of-00002
    │       │   └── validation-00001-of-00002
    │       └── tfrecord_image_generator.py
    │   ├── constants.py
    │   ├── all_reduce_benchmark_test.py
    │   ├── tf_cnn_benchmarks.py
    │   ├── README.md
    │   ├── flags.py
    │   ├── run_tests.py
    │   ├── ssd_constants.py
    │   ├── cnn_util_test.py
    │   ├── benchmark_cnn_distributed_test_runner.py
    │   ├── variable_mgr_util_test.py
    │   ├── coco_metric.py
    │   ├── mlperf_test.py
    │   ├── datasets.py
    │   ├── cnn_util.py
    │   ├── mlperf.py
    │   └── all_reduce_benchmark.py
├── README.md
└── LICENSE


/scripts/keras_benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/platforms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/platforms/default/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/images/black_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/images/black_image.jpg


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/images/white_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/images/white_image.jpg


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00000-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00000-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00001-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00001-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00002-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00002-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00003-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00003-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00004-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00004-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00005-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00005-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00006-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00006-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00007-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00007-of-00008


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00000-of-00002


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00001-of-00002


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/models/timehistory.py:
--------------------------------------------------------------------------------
 1 | """ Utility class for accessing the first epoch time interval. """
 2 | import keras
 3 | import time
 4 | 
 5 | 
 6 | class TimeHistory(keras.callbacks.Callback):
 7 |   def on_train_begin(self, logs={}):
 8 |     self.times = []
 9 | 
10 |   def on_epoch_begin(self, batch, logs={}):
11 |     self.epoch_time_start = time.time()
12 | 
13 |   def on_epoch_end(self, batch, logs={}):
14 |     self.times.append(time.time() - self.epoch_time_start)
15 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/run_tf_backend.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Keras Tensorflow Backend
 4 | python -c "from keras import backend"
 5 | KERAS_BACKEND=tensorflow
 6 | sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
 7 | echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
 8 | 
 9 | # Use "cpu_config", "gpu_config" and "multi_gpu_config" as command line arguments to load the right
10 | # config file.
11 | python benchmarks/scripts/keras_benchmarks/run_benchmark.py  --mode="$1"
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorFlow benchmarks
2 | This repository contains various TensorFlow benchmarks. Currently, it consists of two projects:
3 | 
4 | 1. [scripts/tf_cnn_benchmarks](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks): The TensorFlow CNN benchmarks contain benchmarks for several convolutional neural networks.
5 | 2. [scripts/keras_benchmarks](https://github.com/tensorflow/benchmarks/tree/master/scripts/keras_benchmarks): The Keras benchmarks contain benchmarks for several models using Keras. Note this project is deprecated and unmaintained.
6 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/run_cntk_backend.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Keras CNTK Backend
 4 | python -c "from keras import backend"
 5 | KERAS_BACKEND=cntk
 6 | sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
 7 | echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
 8 | 
 9 | # Use "cpu_config", "gpu_config" and "multi_gpu_config" as command line arguments to load the right
10 | # config file.
11 | if [ "$1" = "multi_gpu_config" ]; then
12 |   mpiexec -n 4 python benchmarks/scripts/keras_benchmarks/run_benchmark.py "$1"
13 | fi
14 | 
15 | python benchmarks/scripts/keras_benchmarks/run_benchmark.py "$1"
16 | 
17 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/run_theano_backend.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Keras Theano Backend
 4 | python -c "from keras import backend"
 5 | KERAS_BACKEND=cntk
 6 | sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
 7 | echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
 8 | 
 9 | # Use "cpu_config", "gpu_config" and "multi_gpu_config" as command line arguments to load the right
10 | # config file.
11 | if [ "$1" = "cpu_config" ]; then
12 |   python benchmarks/scripts/keras_benchmarks/run_benchmark.py "$1"
13 | else
14 |   echo "GPU mode for Theano backend is not supported currently by the keras benchmarks script."
15 | fi


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cpu_config": {
 3 |     "cpu_num_cores": 1,
 4 |     "cpu_memory": 3.75,
 5 |     "cpu_memory_info": "GB",
 6 |     "platform_type": "GCP",
 7 |     "platform_machine_type": "n1-standard-1",
 8 |     "gpus": 0,
 9 |     "gpu_platform": "None"
10 |   },
11 |   "gpu_config": {
12 |     "cpu_num_cores": 8,
13 |     "cpu_memory": 30,
14 |     "cpu_memory_info": "GB",
15 |     "platform_type": "GCP",
16 |     "platform_machine_type": "n1-standard-8",
17 |     "gpus": 1,
18 |     "gpu_platform": "NVIDIA Tesla K80"
19 |   },
20 |   "multi_gpu_config": {
21 |     "cpu_num_cores": 8,
22 |     "cpu_memory": 30,
23 |     "cpu_memory_info": "GB",
24 |     "platform_type": "GCP",
25 |     "platform_machine_type": "n1-standard-8",
26 |     "gpus": 4,
27 |     "gpu_platform": "NVIDIA Tesla K80"
28 |   }
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/platforms/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utility code for a certain platform.
17 | 
18 | This file simply imports everything from the default platform. To switch to a
19 | different platform, the import statement can be changed to point to a new
20 | platform.
21 | 
22 | Creating a custom platform can be useful to, e.g., run some initialization code
23 | required by the platform or register a platform-specific model.
24 | """
25 | 
26 | from __future__ import absolute_import
27 | from __future__ import division
28 | from __future__ import print_function
29 | 
30 | from platforms.default.util import *  # pylint: disable=unused-import,wildcard-import
31 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/data_generator.py:
--------------------------------------------------------------------------------
 1 | """ Generates input and label data for training models. """
 2 | import numpy as np
 3 | 
 4 | 
 5 | def generate_img_input_data(input_shape, num_classes):
 6 |   """Generates training data and target labels.
 7 | 
 8 |   # Arguments
 9 |     input_shape: input shape in the following format
10 |                       `(num_samples, channels, x, y)`
11 |     num_classes: number of classes that we want to classify the input
12 | 
13 |   # Returns
14 |     numpy arrays: `x_train, y_train`
15 |   """
16 |   x_train = np.random.randint(0, 255, input_shape)
17 |   y_train = np.random.randint(0, num_classes, (input_shape[0],))
18 | 
19 |   return x_train, y_train
20 | 
21 | 
22 | def generate_text_input_data(input_shape, p=0.05, return_as_bool=True):
23 |   """Generates training data and target labels .
24 | 
25 |   Given an input shape the function generates one hot encoded vectors. For
26 |   example when we use words as our tokens, the presence/absence of the given
27 |   word in the vocabulary is represented by True/False.
28 | 
29 |   # Arguments
30 |     input_shape: input shape in the following format `(num_samples, x, y)`
31 |     p: fraction of tokens that are present in the vocabulary
32 |     return_as_bool: data and labels are returned as boolean arrays
33 | 
34 |   # Returns
35 |     numpy arrays: `x_train, y_train`
36 |   """
37 |   x_train = np.random.binomial(1, p, input_shape)
38 |   y_train = np.random.binomial(1, p, (input_shape[0], input_shape[2]))
39 | 
40 |   if return_as_bool:
41 |     return x_train.astype(bool), y_train.astype(bool)
42 | 
43 |   return x_train, y_train
44 | 
45 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/lenet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Lenet model configuration.
17 | 
18 | References:
19 |   LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
20 |   Gradient-based learning applied to document recognition
21 |   Proceedings of the IEEE (1998)
22 | """
23 | 
24 | from __future__ import absolute_import
25 | from __future__ import division
26 | from __future__ import print_function
27 | 
28 | from models import model
29 | 
30 | 
31 | class Lenet5Model(model.CNNModel):
32 |   """Lenet5."""
33 | 
34 |   def __init__(self, params=None):
35 |     super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params)
36 | 
37 |   def add_inference(self, cnn):
38 |     # Note: This matches TF's MNIST tutorial model
39 |     cnn.conv(32, 5, 5)
40 |     cnn.mpool(2, 2)
41 |     cnn.conv(64, 5, 5)
42 |     cnn.mpool(2, 2)
43 |     cnn.reshape([-1, 64 * 7 * 7])
44 |     cnn.affine(512)
45 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/constants.py:
--------------------------------------------------------------------------------
 1 | """Constants used in tf_cnn_benchmarks."""
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | from enum import Enum
 8 | 
 9 | # Results fetched with this prefix will not be reduced. Instead, they will be
10 | # passed as matrices to model's postprocess function.
11 | UNREDUCED_ACCURACY_OP_PREFIX = "tensor:"
12 | 
13 | # Eval result values with this name prefix will be included in summary.
14 | SIMPLE_VALUE_RESULT_PREFIX = "simple_value:"
15 | 
16 | 
17 | class BenchmarkMode(object):
18 |   """Benchmark running mode."""
19 |   TRAIN = "training"
20 |   EVAL = "evaluation"
21 |   TRAIN_AND_EVAL = "training + evaluation"
22 |   FORWARD_ONLY = "forward only"
23 | 
24 | 
25 | class NetworkTopology(str, Enum):
26 |   """Network topology describes how multiple GPUs are inter-connected.
27 |   """
28 |   # DGX-1 uses hybrid cube mesh topology with the following device peer to peer
29 |   # matrix:
30 |   # DMA: 0 1 2 3 4 5 6 7
31 |   # 0:   Y Y Y Y Y N N N
32 |   # 1:   Y Y Y Y N Y N N
33 |   # 2:   Y Y Y Y N N Y N
34 |   # 3:   Y Y Y Y N N N Y
35 |   # 4:   Y N N N Y Y Y Y
36 |   # 5:   N Y N N Y Y Y Y
37 |   # 6:   N N Y N Y Y Y Y
38 |   # 7:   N N N Y Y Y Y Y
39 |   DGX1 = "dgx1"
40 | 
41 |   # V100 in GCP are connected with the following device peer to peer matrix.
42 |   # In this topology, bandwidth of the connection depends on if it uses NVLink
43 |   # or PCIe link.
44 |   # DMA: 0 1 2 3 4 5 6 7
45 |   # 0:   Y Y Y Y N Y N N
46 |   # 1:   Y Y Y Y N N N N
47 |   # 2:   Y Y Y Y N N N Y
48 |   # 3:   Y Y Y Y N N N N
49 |   # 4:   N N N N Y Y Y Y
50 |   # 5:   Y N N N Y Y Y Y
51 |   # 6:   N N N N Y Y Y Y
52 |   # 7:   N N Y N Y Y Y Y
53 |   GCP_V100 = "gcp_v100"
54 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/gpu_mode.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | """ CNTK gpu config required for running keras models in multi gpu mode."""
 3 | import cntk
 4 | 
 5 | def cntk_gpu_mode_config(model, num_samples):
 6 |     """Sets up a distributed trainer for keras models using CNTK backend
 7 |         in multi gpu mode.
 8 | 
 9 |     # Arguments
10 |         model: Keras model instance.
11 |         num_samples: Total number of input training samples that will be
12 |                             distributed across gpus for processing.
13 | 
14 |     # Returns
15 |         The start and end indices of the data that a given gpu will process.
16 | 
17 |     # Raises
18 |         ValueError: when there are no learners in the
19 |     """
20 |     model.model._make_train_function()
21 |     trainer = model.model.train_function.trainer
22 |     learner_no = len(trainer.parameter_learners)
23 |     if learner_no < 1:
24 |         raise ValueError("No learner in the trainer.")
25 |     if learner_no > 1:
26 |         warnings.warn("Unexpected multiple learners in a trainer.")
27 |     learner = trainer.parameter_learners[0]
28 |     dist_learner = cntk.train.distributed. \
29 |         data_parallel_distributed_learner(
30 |         learner, num_quantization_bits=32, distributed_after=0)
31 |     model.model.train_function.trainer = cntk.trainer.Trainer(
32 |         trainer.model, [trainer.loss_function,
33 |                         trainer.evaluation_function], [dist_learner])
34 | 
35 |     rank = cntk.Communicator.rank()
36 |     workers = cntk.Communicator.num_workers()
37 |     if workers == 1:
38 |         warnings.warn("Only one worker is found.")
39 |     total_items = num_samples
40 |     start = rank * total_items // workers
41 |     end = min((rank+1) * total_items // workers, total_items)
42 |     return start, end
43 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/overfeat_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Overfeat model configuration.
16 | 
17 | References:
18 |   OverFeat: Integrated Recognition, Localization and Detection using
19 |   Convolutional Networks
20 |   Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus,
21 |   Yann LeCun, 2014
22 |   http://arxiv.org/abs/1312.6229
23 | """
24 | 
25 | from __future__ import absolute_import
26 | from __future__ import division
27 | from __future__ import print_function
28 | 
29 | from models import model
30 | 
31 | 
32 | class OverfeatModel(model.CNNModel):
33 |   """OverfeatModel."""
34 | 
35 |   def __init__(self, params=None):
36 |     super(OverfeatModel, self).__init__(
37 |         'overfeat', 231, 32, 0.005, params=params)
38 | 
39 |   def add_inference(self, cnn):
40 |     # Note: VALID requires padding the images by 3 in width and height
41 |     cnn.conv(96, 11, 11, 4, 4, mode='VALID')
42 |     cnn.mpool(2, 2)
43 |     cnn.conv(256, 5, 5, 1, 1, mode='VALID')
44 |     cnn.mpool(2, 2)
45 |     cnn.conv(512, 3, 3)
46 |     cnn.conv(1024, 3, 3)
47 |     cnn.conv(1024, 3, 3)
48 |     cnn.mpool(2, 2)
49 |     cnn.reshape([-1, 1024 * 6 * 6])
50 |     cnn.affine(3072)
51 |     cnn.dropout()
52 |     cnn.affine(4096)
53 |     cnn.dropout()
54 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/setup_cpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # setup script for running benchmarks on CPU
 4 | 
 5 | sudo apt-get update
 6 | 
 7 | # Install pip package manager
 8 | echo "Installing pip"
 9 | wget https://bootstrap.pypa.io/get-pip.py
10 | sudo python get-pip.py
11 | 
12 | sudo apt-get install bzip2
13 | 
14 | # Install conda environment manager
15 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
16 | chmod 777 miniconda.sh
17 | ./miniconda.sh -b -p $HOME/miniconda
18 | export PATH=$HOME/miniconda/bin:$PATH
19 | hash -r
20 | conda config --set always_yes yes --set changeps1 no
21 | conda update -q conda
22 | # Useful for debugging any issues with conda
23 | conda info -a
24 | conda create -q -n benchmarks-environment python="2.7" numpy scipy
25 | source activate benchmarks-environment
26 | # set library path
27 | export LD_LIBRARY_PATH=$HOME/miniconda/envs/test-environmcondent/lib/:$LD_LIBRARY_PATH
28 | 
29 | # Install Pillow package
30 | conda install pil
31 | 
32 | # Install Theano
33 | echo "Installing Theano"
34 | pip install theano
35 | 
36 | # Install MKL library for Theano
37 | conda install mkl-service
38 | 
39 | # Install g++
40 | sudo apt-get install g++ -y
41 | 
42 | # Install tensorflow
43 | echo "Installing Tensorflow"
44 | pip install tensorflow
45 | 
46 | # Install CNTK
47 | echo "Installing CNTK"
48 | pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp27-cp27mu-linux_x86_64.whl
49 | 
50 | # Install OpenCV
51 | sudo apt-get install libopencv-dev python-opencv -y
52 | 
53 | # Install open mpi
54 | rm -rf ~/mpi
55 | mkdir ~/mpi
56 | pushd ~/mpi
57 | wget http://cntk.ai/PythonWheel/ForKeras/depends/openmpi_1.10-3.zip
58 | sudo apt-get install unzip -y
59 | unzip ./openmpi_1.10-3.zip
60 | sudo dpkg -i openmpi_1.10-3.deb
61 | popd
62 | 
63 | # Install Keras
64 | echo "Installing Keras"
65 | pip install keras
66 | 
67 | # Install git
68 | echo "Installing Git"
69 | sudo apt-get install git -y
70 | 
71 | # Install google-cloud tools
72 | echo "Installing Google Cloud tools"
73 | pip install google-cloud
74 | pip install google-cloud-bigquery
75 | 
76 | # Install h5py
77 | pip install h5py
78 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/all_reduce_benchmark_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for all_reduce_benchmark.py."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | import all_reduce_benchmark
24 | import benchmark_cnn
25 | import test_util
26 | 
27 | 
28 | class AllReduceBenchmarkTest(tf.test.TestCase):
29 |   """Tests the all-reduce benchmark."""
30 | 
31 |   def _test_run_benchmark(self, params):
32 |     """Tests that run_benchmark() runs successfully with the params."""
33 |     logs = []
34 |     with test_util.monkey_patch(all_reduce_benchmark,
35 |                                 log_fn=test_util.print_and_add_to_list(logs)):
36 |       bench_cnn = benchmark_cnn.BenchmarkCNN(params)
37 |       all_reduce_benchmark.run_benchmark(bench_cnn, num_iters=5)
38 |       self.assertRegexpMatches(logs[-1], '^Average time per step: [0-9.]+$')
39 | 
40 |   def test_run_benchmark(self):
41 |     """Tests that run_benchmark() runs successfully."""
42 |     params = benchmark_cnn.make_params(num_batches=10,
43 |                                        variable_update='replicated',
44 |                                        num_gpus=2)
45 |     self._test_run_benchmark(params)
46 |     params = params._replace(hierarchical_copy=True, gradient_repacking=8,
47 |                              num_gpus=8)
48 |     self._test_run_benchmark(params)
49 | 
50 | if __name__ == '__main__':
51 |   tf.test.main()
52 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/models/lstm_benchmark.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Original Model from keras/examples/lstm_text_generation.py
 3 | 
 4 | Benchmark for a LSTM model.
 5 | '''
 6 | from __future__ import print_function
 7 | import keras
 8 | from keras.models import Sequential
 9 | from keras.layers import Dense
10 | from keras.layers import LSTM
11 | from keras.optimizers import RMSprop
12 | from keras.utils import multi_gpu_model
13 | 
14 | from models import timehistory
15 | from data_generator import generate_text_input_data
16 | 
17 | if keras.backend.backend() == 'cntk':
18 |   from gpu_mode import cntk_gpu_mode_config
19 | 
20 | 
21 | class LstmBenchmark():
22 | 
23 |     def __init__(self):
24 |         self.test_name = "lstm"
25 |         self.sample_type = "text"
26 |         self.total_time = 0
27 |         self.batch_size = 128
28 |         self.epochs = 2
29 |         self.num_samples = 1000
30 | 
31 |     def run_benchmark(self, gpus=0):
32 |         input_dim_1 = 40
33 |         input_dim_2 = 60
34 | 
35 |         input_shape = (self.num_samples, input_dim_1, 60)
36 |         x, y = generate_text_input_data(input_shape)
37 | 
38 |         # build the model: a single LSTM
39 |         model = Sequential()
40 |         model.add(LSTM(128, input_shape=(input_dim_1, input_dim_2)))
41 |         model.add(Dense(input_dim_2), activation='softmax')
42 | 
43 |         optimizer = RMSprop(lr=0.01)
44 | 
45 |         if keras.backend.backend() is "tensorflow" and gpus > 1:
46 |             model = multi_gpu_model(model, gpus=gpus)
47 | 
48 |         model.compile(loss='categorical_crossentropy', optimizer=optimizer)
49 | 
50 |         # create a distributed trainer for cntk
51 |         if keras.backend.backend() is "cntk" and gpus > 1:
52 |             start, end = cntk_gpu_mode_config(model, x.shape[0])
53 |             x = x[start: end]
54 |             y = y[start: end]
55 | 
56 |         time_callback = timehistory.TimeHistory()
57 | 
58 |         model.fit(x, y,
59 |                   batch_size=self.batch_size,
60 |                   epochs=self.epochs,
61 |                   callbacks=[time_callback])
62 | 
63 |         self.total_time = 0
64 |         for i in range(1, self.epochs):
65 |             self.total_time += time_callback.times[i]
66 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/setup_gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # setup script for running benchmarks on GPU
 4 | 
 5 | # Install pip
 6 | wget https://bootstrap.pypa.io/get-pip.py
 7 | sudo python get-pip.py
 8 | 
 9 | # Install Nvidia drivers CUDA 8
10 | curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
11 | sudo dpkg -i ./cuda-repo-ubuntu1604_8.0.61-1_amd64.deb
12 | sudo apt-get update
13 | sudo apt-get install cuda-8-0 -y
14 | 
15 | # [Instructions from GCP docs:https://cloud.google.com/compute/docs/gpus/add-gpus#install-gpu-driver]
16 | # Use nvidia-smi to verify that the drivers have been installed
17 | 
18 | # set the CUDA paths
19 | export CUDA_HOME=/usr/local/cuda
20 | export PATH=$PATH:$CUDA_HOME/bin
21 | export LD_LIBRARY_PATH=$CUDA_HOME/lib64
22 | 
23 | #Install cudnn library
24 | # TODO(anjalisridhar): the cudann library was downloaded to the local machine. try using curl
25 | gsutil cp gs://keras-benchmarks/libcudnn6_6.0.21-1+cuda8.0_amd64.deb .
26 | sudo dpkg -i libcudnn6_6.0.21-1+cuda8.0_amd64.deb
27 | 
28 | # CUB for CNTK
29 | wget https://github.com/NVlabs/cub/archive/1.4.1.zip
30 | sudo apt-get install unzip -y
31 | unzip ./1.4.1.zip
32 | sudo cp -r cub-1.4.1 /usr/local
33 | 
34 | # CNTK requires cudnn installation to be in a specific directory
35 | wget http://developer.download.nvidia.com/compute/redist/cudnn/v6.0/cudnn-8.0-linux-x64-v6.0.tgz
36 | tar -xzvf ./cudnn-8.0-linux-x64-v6.0.tgz
37 | sudo mkdir /usr/local/cudnn-6.0
38 | sudo cp -r cuda /usr/local/cudnn-6.0
39 | export LD_LIBRARY_PATH=/usr/local/cudnn-6.0/cuda/lib64:$LD_LIBRARY_PATH
40 | 
41 | # MPI installation
42 | sudo apt-get install openmpi-bin -y
43 | 
44 | # Install CNTK GPU version
45 | pip install https://cntk.ai/PythonWheel/GPU/cntk-2.2-cp27-cp27mu-linux_x86_64.whl
46 | 
47 | # Install other pacakges required for CNTK
48 | sudo apt-get install libopencv-dev python-opencv -y
49 | 
50 | # Install keras
51 | sudo pip install keras
52 | 
53 | # Install required pacakges for TF-GPU
54 | sudo apt-get install python-dev python-pip libcupti-dev
55 | 
56 | # Install tensorflow GPU version
57 | sudo pip install tensorflow-gpu
58 | 
59 | sudo pip install git+git://github.com/fchollet/keras.git --upgrade
60 | 
61 | # Install google-cloud tools
62 | echo "Installing Google Cloud tools"
63 | sudo pip install google-cloud
64 | sudo pip install google-cloud-bigquery
65 | 
66 | # Install h5py
67 | sudo pip install h5py
68 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/trivial_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Trivial model configuration."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | from models import model
23 | 
24 | 
25 | class TrivialModel(model.CNNModel):
26 |   """Trivial model configuration."""
27 | 
28 |   def __init__(self, params=None):
29 |     super(TrivialModel, self).__init__(
30 |         'trivial', 224 + 3, 32, 0.005, params=params)
31 | 
32 |   def add_inference(self, cnn):
33 |     cnn.reshape([-1, 227 * 227 * 3])
34 |     cnn.affine(1)
35 |     cnn.affine(4096)
36 | 
37 | 
38 | class TrivialCifar10Model(model.CNNModel):
39 |   """Trivial cifar10 model configuration."""
40 | 
41 |   def __init__(self, params=None):
42 |     super(TrivialCifar10Model, self).__init__(
43 |         'trivial', 32, 32, 0.005, params=params)
44 | 
45 |   def add_inference(self, cnn):
46 |     cnn.reshape([-1, 32 * 32 * 3])
47 |     cnn.affine(1)
48 |     cnn.affine(4096)
49 | 
50 | 
51 | class TrivialSSD300Model(model.CNNModel):
52 |   """Trivial SSD300 model configuration."""
53 | 
54 |   def __init__(self, params=None):
55 |     super(TrivialSSD300Model, self).__init__(
56 |         'trivial', 300, 32, 0.005, params=params)
57 | 
58 |   def add_inference(self, cnn):
59 |     cnn.reshape([-1, 300 * 300 * 3])
60 |     cnn.affine(1)
61 |     cnn.affine(4096)
62 | 
63 |   def get_input_shapes(self, subset):
64 |     return [[32, 300, 300, 3], [32, 8732, 4], [32, 8732, 1], [32]]
65 | 
66 |   def loss_function(self, inputs, build_network_result):
67 |     images, _, _, labels = inputs
68 |     labels = tf.cast(labels, tf.int32)
69 |     return super(TrivialSSD300Model, self).loss_function(
70 |         (images, labels), build_network_result)
71 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/models/mnist_mlp_benchmark.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Original Model from keras/examples/mnist_mlp.py
 3 | 
 4 | Benchmark a simple MLP model.
 5 | '''
 6 | 
 7 | from __future__ import print_function
 8 | 
 9 | import keras
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Dropout
12 | from keras.optimizers import RMSprop
13 | from keras.utils import multi_gpu_model
14 | 
15 | from models import timehistory
16 | from data_generator import generate_img_input_data
17 | if keras.backend.backend() == 'cntk':
18 |     from gpu_mode import cntk_gpu_mode_config
19 | 
20 | 
21 | class MnistMlpBenchmark():
22 | 
23 |     def __init__(self):
24 |         self.test_name = "mnist_mlp"
25 |         self.sample_type = "images"
26 |         self.total_time = 0
27 |         self.batch_size = 128
28 |         self.epochs = 2
29 |         self.num_samples = 1000
30 | 
31 |     def run_benchmark(self, gpus=0):
32 |         num_classes = 10
33 | 
34 |         # Generate random input data
35 |         input_shape = (self.num_samples, 28, 28)
36 |         x_train, y_train = generate_img_input_data(input_shape)
37 | 
38 |         x_train = x_train.reshape(self.num_samples, 784)
39 |         x_train = x_train.astype('float32')
40 |         x_train /= 255
41 | 
42 |         # convert class vectors to binary class matrices
43 |         y_train = keras.utils.to_categorical(y_train, num_classes)
44 | 
45 |         model = Sequential()
46 |         model.add(Dense(512, activation='relu', input_shape=(784,)))
47 |         model.add(Dropout(0.2))
48 |         model.add(Dense(512, activation='relu'))
49 |         model.add(Dropout(0.2))
50 |         model.add(Dense(num_classes, activation='softmax'))
51 | 
52 |         if keras.backend.backend() is "tensorflow" and gpus > 1:
53 |             model = multi_gpu_model(model, gpus=gpus)
54 | 
55 |         model.compile(loss='categorical_crossentropy',
56 |                       optimizer=RMSprop(),
57 |                       metrics=['accuracy'])
58 | 
59 |         # create a distributed trainer for cntk
60 |         if keras.backend.backend() is "cntk" and gpus > 1:
61 |             start, end = cntk_gpu_mode_config(model, x_train.shape[0])
62 |             x_train = x_train[start: end]
63 |             y_train = y_train[start: end]
64 | 
65 |         time_callback = timehistory.TimeHistory()
66 |         model.fit(x_train, y_train, batch_size=self.batch_size,
67 |                   epochs=self.epochs, verbose=1, callbacks=[time_callback])
68 | 
69 |         self.total_time = 0
70 |         for i in range(1, self.epochs):
71 |             self.total_time += time_callback.times[i]
72 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/googlenet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Googlenet model configuration.
16 | 
17 | References:
18 |   Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
19 |   Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
20 |   Going deeper with convolutions
21 |   arXiv preprint arXiv:1409.4842 (2014)
22 | """
23 | 
24 | from __future__ import absolute_import
25 | from __future__ import division
26 | from __future__ import print_function
27 | 
28 | from models import model
29 | 
30 | 
31 | class GooglenetModel(model.CNNModel):
32 |   """GoogLeNet."""
33 | 
34 |   def __init__(self, params=None):
35 |     super(GooglenetModel, self).__init__(
36 |         'googlenet', 224, 32, 0.005, params=params)
37 | 
38 |   def add_inference(self, cnn):
39 | 
40 |     def inception_v1(cnn, k, l, m, n, p, q):
41 |       cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
42 |               [('conv', n, 1, 1), ('conv', p, 5, 5)],
43 |               [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
44 |       cnn.inception_module('incept_v1', cols)
45 | 
46 |     cnn.conv(64, 7, 7, 2, 2)
47 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
48 |     cnn.conv(64, 1, 1)
49 |     cnn.conv(192, 3, 3)
50 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
51 |     inception_v1(cnn, 64, 96, 128, 16, 32, 32)
52 |     inception_v1(cnn, 128, 128, 192, 32, 96, 64)
53 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
54 |     inception_v1(cnn, 192, 96, 208, 16, 48, 64)
55 |     inception_v1(cnn, 160, 112, 224, 24, 64, 64)
56 |     inception_v1(cnn, 128, 128, 256, 24, 64, 64)
57 |     inception_v1(cnn, 112, 144, 288, 32, 64, 64)
58 |     inception_v1(cnn, 256, 160, 320, 32, 128, 128)
59 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
60 |     inception_v1(cnn, 256, 160, 320, 32, 128, 128)
61 |     inception_v1(cnn, 384, 192, 384, 48, 128, 128)
62 |     cnn.apool(7, 7, 1, 1, mode='VALID')
63 |     cnn.reshape([-1, 1024])
64 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/platforms/default/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utility code for the default platform."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | import os
23 | import sys
24 | import tempfile
25 | 
26 | import cnn_util
27 | 
28 | 
29 | _ROOT_PROJECT_DIR = os.path.dirname(cnn_util.__file__)
30 | 
31 | 
32 | def define_platform_params():
33 |   """Defines platform-specific parameters.
34 | 
35 |   Currently there are no platform-specific parameters to be defined.
36 |   """
37 |   pass
38 | 
39 | 
40 | def get_cluster_manager(params, config_proto):
41 |   """Returns the cluster manager to be used."""
42 |   return cnn_util.GrpcClusterManager(params, config_proto)
43 | 
44 | 
45 | def get_command_to_run_python_module(module):
46 |   """Returns a command to run a Python module."""
47 |   python_interpretter = sys.executable
48 |   if not python_interpretter:
49 |     raise ValueError('Could not find Python interpreter')
50 |   return [python_interpretter,
51 |           os.path.join(_ROOT_PROJECT_DIR, module + '.py')]
52 | 
53 | 
54 | def get_test_output_dir():
55 |   """Returns a directory where test outputs should be placed."""
56 |   base_dir = os.environ.get('TEST_OUTPUTS_DIR',
57 |                             '/tmp/tf_cnn_benchmarks_test_outputs')
58 |   if not os.path.exists(base_dir):
59 |     os.mkdir(base_dir)
60 |   return tempfile.mkdtemp(dir=base_dir)
61 | 
62 | 
63 | def get_test_data_dir():
64 |   """Returns the path to the test_data directory."""
65 |   return os.path.join(_ROOT_PROJECT_DIR, 'test_data')
66 | 
67 | 
68 | def _initialize(params, config_proto):
69 |   # Currently, no platform initialization needs to be done.
70 |   del params, config_proto
71 | 
72 | 
73 | _is_initalized = False
74 | 
75 | 
76 | def initialize(params, config_proto):
77 |   global _is_initalized
78 |   if _is_initalized:
79 |     return
80 |   _is_initalized = True
81 |   _initialize(params, config_proto)
82 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Benchmark script for TensorFlow.
17 | 
18 | See the README for more information.
19 | """
20 | 
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 | 
25 | from absl import app
26 | from absl import flags as absl_flags
27 | import tensorflow as tf
28 | 
29 | import benchmark_cnn
30 | import cnn_util
31 | import flags
32 | import mlperf
33 | from cnn_util import log_fn
34 | 
35 | 
36 | flags.define_flags()
37 | for name in flags.param_specs.keys():
38 |   absl_flags.declare_key_flag(name)
39 | 
40 | absl_flags.DEFINE_boolean(
41 |     'ml_perf_compliance_logging', False,
42 |     'Print logs required to be compliant with MLPerf. If set, must clone the '
43 |     'MLPerf training repo https://github.com/mlperf/training and add '
44 |     'https://github.com/mlperf/training/tree/master/compliance to the '
45 |     'PYTHONPATH')
46 | 
47 | 
48 | def main(positional_arguments):
49 |   # Command-line arguments like '--distortions False' are equivalent to
50 |   # '--distortions=True False', where False is a positional argument. To prevent
51 |   # this from silently running with distortions, we do not allow positional
52 |   # arguments.
53 |   assert len(positional_arguments) >= 1
54 |   if len(positional_arguments) > 1:
55 |     raise ValueError('Received unknown positional arguments: %s'
56 |                      % positional_arguments[1:])
57 | 
58 |   params = benchmark_cnn.make_params_from_flags()
59 |   with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging,
60 |                             params.model):
61 |     params = benchmark_cnn.setup(params)
62 |     bench = benchmark_cnn.BenchmarkCNN(params)
63 | 
64 |     tfversion = cnn_util.tensorflow_version_tuple()
65 |     log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
66 | 
67 |     bench.print_info()
68 |     bench.run()
69 | 
70 | 
71 | if __name__ == '__main__':
72 |   app.run(main)  # Raises error on invalid flags, unlike tf.app.run()
73 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/run_benchmark.py:
--------------------------------------------------------------------------------
 1 | """ Main entry point for running benchmarks with different Keras backends."""
 2 | 
 3 | from models import mnist_mlp_benchmark
 4 | from models import cifar10_cnn_benchmark
 5 | from models import lstm_benchmark
 6 | import upload_benchmarks_bq as bq
 7 | import argparse
 8 | import keras
 9 | import json
10 | 
11 | if keras.backend.backend() == "tensorflow":
12 |   import tensorflow as tf
13 | if keras.backend.backend() == "theano":
14 |   import theano
15 | if keras.backend.backend() == "cntk":
16 |   import cntk
17 | 
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--mode',
20 |                     help='The benchmark can be run on cpu, gpu and multiple gpus.')
21 | 
22 | args = parser.parse_args()
23 | 
24 | # Load the json config file for the requested mode.
25 | config_file = open("benchmarks/scripts/keras_benchmarks/config.json", 'r')
26 | config_contents = config_file.read()
27 | config = json.loads(config_contents)[args.mode]
28 | 
29 | 
30 | def get_backend_version():
31 |     if keras.backend.backend() == "tensorflow":
32 |         return tf.__version__
33 |     if keras.backend.backend() == "theano":
34 |         return theano.__version__
35 |     if keras.backend.backend() == "cntk":
36 |         return cntk.__version__
37 |     return "undefined"
38 | 
39 | def _upload_metrics(current_model):
40 |     bq.upload_metrics_to_bq(test_name=current_model.test_name,
41 |                             total_time=current_model.total_time,
42 |                             epochs=current_model.epochs,
43 |                             batch_size=current_model.batch_size,
44 |                             backend_type=keras.backend.backend(),
45 |                             backend_version=get_backend_version(),
46 |                             cpu_num_cores=config['cpu_num_cores'],
47 |                             cpu_memory=config['cpu_memory'],
48 |                             cpu_memory_info=config['cpu_memory_info'],
49 |                             gpu_count=config['gpus'],
50 |                             gpu_platform=config['gpu_platform'],
51 |                             platform_type=config['platform_type'],
52 |                             platform_machine_type=config['platform_machine_type'],
53 |                             keras_version=keras.__version__,
54 |                             sample_type=current_model.sample_type)
55 | 
56 | 
57 | # MNIST MLP
58 | model = mnist_mlp_benchmark.MnistMlpBenchmark()
59 | model.run_benchmark(gpus=config['gpus'])
60 | _upload_metrics(model)
61 | 
62 | # CIFAR10 CNN
63 | model = cifar10_cnn_benchmark.Cifar10CnnBenchmark()
64 | model.run_benchmark(gpus=config['gpus'])
65 | _upload_metrics(model)
66 | 
67 | # LSTM
68 | model = lstm_benchmark.LstmBenchmark()
69 | model.run_benchmark(gpus=config['gpus'])
70 | _upload_metrics(model)
71 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/vgg_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Vgg model configuration.
16 | 
17 | Includes multiple models: vgg11, vgg16, vgg19, corresponding to
18 |   model A, D, and E in Table 1 of [1].
19 | 
20 | References:
21 | [1]  Simonyan, Karen, Andrew Zisserman
22 |      Very Deep Convolutional Networks for Large-Scale Image Recognition
23 |      arXiv:1409.1556 (2014)
24 | """
25 | 
26 | from __future__ import absolute_import
27 | from __future__ import division
28 | from __future__ import print_function
29 | 
30 | from six.moves import xrange  # pylint: disable=redefined-builtin
31 | from models import model
32 | 
33 | 
34 | def _construct_vgg(cnn, num_conv_layers):
35 |   """Build vgg architecture from blocks."""
36 |   assert len(num_conv_layers) == 5
37 |   for _ in xrange(num_conv_layers[0]):
38 |     cnn.conv(64, 3, 3)
39 |   cnn.mpool(2, 2)
40 |   for _ in xrange(num_conv_layers[1]):
41 |     cnn.conv(128, 3, 3)
42 |   cnn.mpool(2, 2)
43 |   for _ in xrange(num_conv_layers[2]):
44 |     cnn.conv(256, 3, 3)
45 |   cnn.mpool(2, 2)
46 |   for _ in xrange(num_conv_layers[3]):
47 |     cnn.conv(512, 3, 3)
48 |   cnn.mpool(2, 2)
49 |   for _ in xrange(num_conv_layers[4]):
50 |     cnn.conv(512, 3, 3)
51 |   cnn.mpool(2, 2)
52 |   cnn.reshape([-1, 512 * 7 * 7])
53 |   cnn.affine(4096)
54 |   cnn.dropout()
55 |   cnn.affine(4096)
56 |   cnn.dropout()
57 | 
58 | 
59 | class Vgg11Model(model.CNNModel):
60 | 
61 |   def __init__(self, params=None):
62 |     super(Vgg11Model, self).__init__('vgg11', 224, 64, 0.005, params=params)
63 | 
64 |   def add_inference(self, cnn):
65 |     _construct_vgg(cnn, [1, 1, 2, 2, 2])
66 | 
67 | 
68 | class Vgg16Model(model.CNNModel):
69 | 
70 |   def __init__(self, params=None):
71 |     super(Vgg16Model, self).__init__('vgg16', 224, 64, 0.005, params=params)
72 | 
73 |   def add_inference(self, cnn):
74 |     _construct_vgg(cnn, [2, 2, 3, 3, 3])
75 | 
76 | 
77 | class Vgg19Model(model.CNNModel):
78 | 
79 |   def __init__(self, params=None):
80 |     super(Vgg19Model, self).__init__('vgg19', 224, 64, 0.005, params=params)
81 | 
82 |   def add_inference(self, cnn):
83 |     _construct_vgg(cnn, [2, 2, 4, 4, 4])
84 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/resnet_model_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for resnet_model."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import mock
22 | import tensorflow as tf
23 | 
24 | from models import resnet_model
25 | 
26 | 
27 | class ResNetModelTest(tf.test.TestCase):
28 | 
29 |   def testGetScaledBaseLearningRateOneGpuLrFromParams(self):
30 |     """Verifies setting params.resnet_base_lr pipes through."""
31 |     lr = self._get_scaled_base_learning_rate(1,
32 |                                              'parameter_server',
33 |                                              256,
34 |                                              base_lr=.050)
35 |     self.assertEquals(lr, .050)
36 | 
37 |   def testGetScaledBaseLearningRateOneGpu(self):
38 |     lr = self._get_scaled_base_learning_rate(1, 'parameter_server', 128)
39 |     self.assertEquals(lr, .064)
40 | 
41 |   def testGetScaledBaseLearningRateEightGpuReplicated(self):
42 |     lr = self._get_scaled_base_learning_rate(8, 'replicated', 256 * 8)
43 |     self.assertEquals(lr, .128)
44 | 
45 |   def testGetScaledBaseLearningRateTwoGpuParameter(self):
46 |     lr = self._get_scaled_base_learning_rate(2, 'parameter_server', 256 * 2)
47 |     self.assertEquals(lr, .256)
48 | 
49 |   def testGetScaledBaseLearningRateTwoGpuUneven(self):
50 |     lr = self._get_scaled_base_learning_rate(2, 'replicated', 13)
51 |     self.assertEquals(lr, 0.0032500000000000003)
52 | 
53 |   def _get_scaled_base_learning_rate(self,
54 |                                      num_gpus,
55 |                                      variable_update,
56 |                                      batch_size,
57 |                                      base_lr=None):
58 |     """Simplifies testing different learning rate calculations.
59 | 
60 |     Args:
61 |       num_gpus: Number of GPUs to be used.
62 |       variable_update: Type of variable update used.
63 |       batch_size: Total batch size.
64 |       base_lr: Base learning rate before scaling.
65 | 
66 |     Returns:
67 |       Base learning rate that would be used to create lr schedule.
68 |     """
69 |     params = mock.Mock()
70 |     params.num_gpus = num_gpus
71 |     params.variable_update = variable_update
72 |     if base_lr:
73 |       params.resnet_base_lr = base_lr
74 |     resnet50_model = resnet_model.ResnetModel('resnet50', 50, params=params)
75 |     return resnet50_model.get_scaled_base_learning_rate(batch_size)
76 | 
77 | 
78 | if __name__ == '__main__':
79 |   tf.test.main()
80 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/models/cifar10_cnn_benchmark.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Original Model from keras/examples/cifar10_cnn.py
 3 | 
 4 | Benchmark CNN model
 5 | '''
 6 | 
 7 | from __future__ import print_function
 8 | import numpy as np
 9 | import keras
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Dropout, Flatten
12 | from keras.layers import Conv2D, MaxPooling2D
13 | from keras.utils import multi_gpu_model
14 | 
15 | from models import timehistory
16 | from data_generator import generate_img_input_data
17 | if keras.backend.backend() == 'cntk':
18 |     from gpu_mode import cntk_gpu_mode_config
19 | 
20 | 
21 | class Cifar10CnnBenchmark():
22 | 
23 |     def __init__(self):
24 |         self.test_name = "cifar10_cnn"
25 |         self.sample_type = "images"
26 |         self.total_time = 0
27 |         self.batch_size = 32
28 |         self.epochs = 2
29 |         self.num_samples = 1000
30 | 
31 |     def run_benchmark(self, gpus=0):
32 |         num_classes = 10
33 | 
34 |         # Generate random input data
35 |         input_shape = (self.num_samples, 3, 32, 32)
36 |         x_train, y_train = generate_img_input_data(input_shape)
37 | 
38 |         y_train = np.reshape(y_train, (len(y_train), 1))
39 |         y_train = keras.utils.to_categorical(y_train, 10)
40 | 
41 |         if keras.backend.image_data_format() == 'channels_last':
42 |             x_train = x_train.transpose(0, 2, 3, 1)
43 | 
44 |         model = Sequential()
45 |         model.add(Conv2D(32, (3, 3), padding='same',
46 |                          input_shape=x_train.shape[1:], activation='relu'))
47 |         model.add(Conv2D(32, (3, 3), activation='relu'))
48 |         model.add(MaxPooling2D(pool_size=(2, 2)))
49 |         model.add(Dropout(0.25))
50 | 
51 |         model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
52 |         model.add(Conv2D(64, (3, 3), activation='relu'))
53 |         model.add(MaxPooling2D(pool_size=(2, 2)))
54 |         model.add(Dropout(0.25))
55 | 
56 |         model.add(Flatten())
57 |         model.add(Dense(512, activation='relu'))
58 |         model.add(Dropout(0.5))
59 |         model.add(Dense(num_classes, activation='softmax'))
60 | 
61 |         opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
62 | 
63 |         if keras.backend.backend() is "tensorflow" and gpus > 1:
64 |             model = multi_gpu_model(model, gpus=gpus)
65 | 
66 |         model.compile(loss='categorical_crossentropy',
67 |                       optimizer=opt,
68 |                       metrics=['accuracy'])
69 | 
70 |         x_train = x_train.astype('float32')
71 |         x_train /= 255
72 | 
73 |         # create a distributed trainer for cntk
74 |         if keras.backend.backend() is "cntk" and gpus > 1:
75 |             start, end = cntk_gpu_mode_config(model, x_train.shape[0])
76 |             x_train = x_train[start: end]
77 |             y_train = y_train[start: end]
78 | 
79 |         time_callback = timehistory.TimeHistory()
80 | 
81 |         model.fit(x_train,
82 |                   y_train,
83 |                   batch_size=self.batch_size,
84 |                   epochs=self.epochs,
85 |                   shuffle=True,
86 |                   callbacks=[time_callback])
87 | 
88 |         self.total_time = 0
89 |         for i in range(1, self.epochs):
90 |             self.total_time += time_callback.times[i]
91 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/alexnet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Alexnet model configuration.
16 | 
17 | References:
18 |   Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
19 |   ImageNet Classification with Deep Convolutional Neural Networks
20 |   Advances in Neural Information Processing Systems. 2012
21 | """
22 | 
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import tensorflow as tf
28 | from models import model
29 | 
30 | 
31 | class AlexnetModel(model.CNNModel):
32 |   """Alexnet cnn model."""
33 | 
34 |   def __init__(self, params=None):
35 |     super(AlexnetModel, self).__init__(
36 |         'alexnet', 224 + 3, 512, 0.005, params=params)
37 | 
38 |   def add_inference(self, cnn):
39 |     # Note: VALID requires padding the images by 3 in width and height
40 |     cnn.conv(64, 11, 11, 4, 4, 'VALID')
41 |     cnn.mpool(3, 3, 2, 2)
42 |     cnn.conv(192, 5, 5)
43 |     cnn.mpool(3, 3, 2, 2)
44 |     cnn.conv(384, 3, 3)
45 |     cnn.conv(384, 3, 3)
46 |     cnn.conv(256, 3, 3)
47 |     cnn.mpool(3, 3, 2, 2)
48 |     cnn.reshape([-1, 256 * 6 * 6])
49 |     cnn.affine(4096)
50 |     cnn.dropout()
51 |     cnn.affine(4096)
52 |     cnn.dropout()
53 | 
54 | 
55 | class AlexnetCifar10Model(model.CNNModel):
56 |   """Alexnet cnn model for cifar datasets.
57 | 
58 |   The model architecture follows the one defined in the tensorflow tutorial
59 |   model.
60 | 
61 |   Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
62 |   Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
63 |   """
64 | 
65 |   def __init__(self, params=None):
66 |     super(AlexnetCifar10Model, self).__init__(
67 |         'alexnet', 32, 128, 0.1, params=params)
68 | 
69 |   def add_inference(self, cnn):
70 |     cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2)
71 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
72 |     cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
73 |     cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2)
74 |     cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
75 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
76 |     shape = cnn.top_layer.get_shape().as_list()
77 |     flat_dim = shape[1] * shape[2] * shape[3]
78 |     cnn.reshape([-1, flat_dim])
79 |     cnn.affine(384, stddev=0.04, bias=0.1)
80 |     cnn.affine(192, stddev=0.04, bias=0.1)
81 | 
82 |   def get_learning_rate(self, global_step, batch_size):
83 |     num_examples_per_epoch = 50000
84 |     num_epochs_per_decay = 100
85 |     decay_steps = (
86 |         num_epochs_per_decay * num_examples_per_epoch // batch_size)
87 |     decay_factor = 0.1
88 |     return tf.train.exponential_decay(
89 |         self.learning_rate,
90 |         global_step,
91 |         decay_steps,
92 |         decay_factor,
93 |         staircase=True)
94 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/official_resnet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Import official resnet models."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | import datasets
23 | from models import model as model_lib
24 | 
25 | 
26 | class ImagenetResnetModel(model_lib.CNNModel):
27 |   """Official resnet models."""
28 | 
29 |   def __init__(self, resnet_size, version=2, params=None):
30 |     """These are the parameters that work for Imagenet data.
31 | 
32 |     Args:
33 |       resnet_size: The number of convolutional layers needed in the model.
34 |       version: 1 or 2 for v1 or v2, respectively.
35 |       params: params passed by BenchmarkCNN.
36 |     """
37 |     default_batch_sizes = {
38 |         50: 128,
39 |         101: 32,
40 |         152: 32
41 |     }
42 |     batch_size = default_batch_sizes.get(resnet_size, 32)
43 |     default_learning_rate = 0.0125 * batch_size / 32
44 |     model_name = 'official_resnet_{}_v{}'.format(resnet_size, version)
45 |     super(ImagenetResnetModel, self).__init__(
46 |         model_name, 224, batch_size, default_learning_rate, params=params)
47 |     self.resnet_size = resnet_size
48 |     self.version = version
49 | 
50 |   def get_learning_rate(self, global_step, batch_size):
51 |     num_batches_per_epoch = (
52 |         float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
53 |     boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
54 |     values = [1, 0.1, 0.01, 0.001, 0.0001]
55 |     adjusted_learning_rate = (
56 |         self.learning_rate / self.default_batch_size * batch_size)
57 |     values = [v * adjusted_learning_rate for v in values]
58 |     return tf.train.piecewise_constant(global_step, boundaries, values)
59 | 
60 |   def build_network(self, images, phase_train=True, nclass=1001,
61 |                     data_type=tf.float32):
62 |     # pylint: disable=g-import-not-at-top
63 |     try:
64 |       from official.resnet.imagenet_main import ImagenetModel
65 |     except ImportError:
66 |       tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
67 |       raise
68 |     images = tf.cast(images, data_type)
69 |     model_class = ImagenetModel(resnet_size=self.resnet_size,
70 |                                 resnet_version=self.version,
71 |                                 # The official model dtype seems to be ignored,
72 |                                 # as the dtype it uses is the dtype of the input
73 |                                 # images. Doesn't hurt to set it though.
74 |                                 dtype=data_type)
75 |     logits = model_class(images, phase_train)
76 |     logits = tf.cast(logits, tf.float32)
77 |     return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
78 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # tf_cnn_benchmarks: High performance benchmarks
 2 | 
 3 | tf_cnn_benchmarks contains implementations of several popular convolutional
 4 | models, and is designed to be as fast as possible. tf_cnn_benchmarks supports
 5 | both running on a single machine or running in distributed mode across multiple
 6 | hosts. See the [High-Performance models
 7 | guide](https://www.tensorflow.org/performance/performance_models) for more
 8 | information.
 9 | 
10 | These models utilize many of the strategies in the [TensorFlow Performance
11 | Guide](https://www.tensorflow.org/performance/performance_guide). Benchmark
12 | results can be found [here](https://www.tensorflow.org/performance/benchmarks).
13 | 
14 | These models are designed for performance. For models that have clean and
15 | easy-to-read implementations, see the [TensorFlow Official
16 | Models](https://github.com/tensorflow/models/tree/master/official).
17 | 
18 | ## Getting Started
19 | 
20 | To run ResNet50 with synthetic data without distortions with a single GPU, run
21 | 
22 | ```
23 | python tf_cnn_benchmarks.py --num_gpus=1 --batch_size=32 --model=resnet50 --variable_update=parameter_server
24 | ```
25 | 
26 | Note that the master branch of tf_cnn_benchmarks requires the latest nightly
27 | version of TensorFlow. You can install the nightly version by running `pip
28 | install tf-nightly-gpu` in a clean environment, or by installing TensorFlow from
29 | source. We sometimes will create a branch of tf_cnn_benchmarks, in the form of
30 | cnn_tf_vX.Y_compatible, that is compatible with TensorFlow version X.Y For
31 | example, branch
32 | [cnn_tf_v1.9_compatible](https://github.com/tensorflow/benchmarks/tree/cnn_tf_v1.9_compatible/scripts/tf_cnn_benchmarks)
33 | works with TensorFlow 1.9.
34 | 
35 | Some important flags are
36 | 
37 | *   model: Model to use, e.g. resnet50, inception3, vgg16, and alexnet.
38 | *   num_gpus: Number of GPUs to use.
39 | *   data_dir: Path to data to process. If not set, synthetic data is used. To
40 |     use Imagenet data use these
41 |     [instructions](https://github.com/tensorflow/models/tree/master/research/inception#getting-started)
42 |     as a starting point.
43 | *   batch_size: Batch size for each GPU.
44 | *   variable_update: The method for managing variables: parameter_server
45 |     ,replicated, distributed_replicated, independent
46 | *   local_parameter_device: Device to use as parameter server: cpu or gpu.
47 | 
48 | To see the full list of flags, run `python tf_cnn_benchmarks.py --help`.
49 | 
50 | To run ResNet50 with real data with 8 GPUs, run:
51 | 
52 | ```
53 | python tf_cnn_benchmarks.py --data_format=NCHW --batch_size=256 \
54 | --model=resnet50 --optimizer=momentum --variable_update=replicated \
55 | --nodistortions --gradient_repacking=8 --num_gpus=8 \
56 | --num_epochs=90 --weight_decay=1e-4 --data_dir=${DATA_DIR} --use_fp16 \
57 | --train_dir=${CKPT_DIR}
58 | ```
59 | This will train a ResNet-50 model on ImageNet with 2048 batch size on 8
60 | GPUs. The model should train to around 76% accuracy.
61 | 
62 | ## Running the tests
63 | 
64 | To run the tests, run
65 | 
66 | ```bash
67 | pip install portpicker
68 | python run_tests.py && python run_tests.py --run_distributed_tests
69 | ```
70 | 
71 | Note the tests require portpicker.
72 | 
73 | The command above runs a subset of tests that is both fast and fairly
74 | comprehensive. Alternatively, all the tests can be run, but this will take a
75 | long time:
76 | 
77 | ```bash
78 | python run_tests.py --full_tests && python run_tests.py --full_tests --run_distributed_tests
79 | ```
80 | 
81 | We will run all tests on every PR before merging them, so it is not necessary
82 | to pass `--full_tests` when running tests yourself.
83 | 
84 | To run an individual test, such as method `testParameterServer` of test class
85 | `TfCnnBenchmarksTest` of module `benchmark_cnn_test`, run
86 | 
87 | ```bash
88 | python -m unittest -v benchmark_cnn_test.TfCnnBenchmarksTest.testParameterServer
89 | ```
90 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/flags.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains functions to define flags and params.
16 | 
17 | Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
18 | dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
19 | a command-line flag for every ParamSpec defined by a DEFINE_* functions.
20 | 
21 | The reason we don't use absl flags directly is that we want to be able to use
22 | tf_cnn_benchmarks as a library. When using it as a library, we don't want to
23 | define any flags, but instead pass parameters to the BenchmarkCNN constructor.
24 | """
25 | 
26 | from __future__ import absolute_import
27 | from __future__ import division
28 | from __future__ import print_function
29 | 
30 | from collections import namedtuple
31 | 
32 | from absl import flags as absl_flags
33 | import six
34 | 
35 | 
36 | FLAGS = absl_flags.FLAGS
37 | 
38 | 
39 | # ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
40 | ParamSpec = namedtuple('_ParamSpec',
41 |                        ['flag_type', 'default_value', 'description',
42 |                         'kwargs'])
43 | 
44 | 
45 | # Maps from parameter name to its ParamSpec.
46 | param_specs = {}
47 | 
48 | 
49 | def DEFINE_string(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
50 |   param_specs[name] = ParamSpec('string', default, help, {})
51 | 
52 | 
53 | def DEFINE_boolean(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
54 |   param_specs[name] = ParamSpec('boolean', default, help, {})
55 | 
56 | 
57 | def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
58 |   kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
59 |   param_specs[name] = ParamSpec('integer', default, help, kwargs)
60 | 
61 | 
62 | def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
63 |   kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
64 |   param_specs[name] = ParamSpec('float', default, help, kwargs)
65 | 
66 | 
67 | def DEFINE_enum(name, default, enum_values, help):  # pylint: disable=invalid-name,redefined-builtin
68 |   kwargs = {'enum_values': enum_values}
69 |   param_specs[name] = ParamSpec('enum', default, help, kwargs)
70 | 
71 | 
72 | def DEFINE_list(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
73 |   param_specs[name] = ParamSpec('list', default, help, {})
74 | 
75 | 
76 | def define_flags(specs=None):
77 |   """Define a command line flag for each ParamSpec in flags.param_specs."""
78 |   specs = specs or param_specs
79 |   define_flag = {
80 |       'boolean': absl_flags.DEFINE_boolean,
81 |       'float': absl_flags.DEFINE_float,
82 |       'integer': absl_flags.DEFINE_integer,
83 |       'string': absl_flags.DEFINE_string,
84 |       'enum': absl_flags.DEFINE_enum,
85 |       'list': absl_flags.DEFINE_list
86 |   }
87 |   for name, param_spec in six.iteritems(specs):
88 |     if param_spec.flag_type not in define_flag:
89 |       raise ValueError('Unknown flag_type %s' % param_spec.flag_type)
90 |     else:
91 |       define_flag[param_spec.flag_type](name, param_spec.default_value,
92 |                                         help=param_spec.description,
93 |                                         **param_spec.kwargs)
94 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/densenet_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Densenet model configuration.
 17 | 
 18 | References:
 19 |   "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
 20 | """
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | 
 26 | import numpy as np
 27 | from six.moves import xrange  # pylint: disable=redefined-builtin
 28 | import tensorflow as tf
 29 | from models import model as model_lib
 30 | 
 31 | 
 32 | class DensenetCifar10Model(model_lib.CNNModel):
 33 |   """Densenet cnn network configuration."""
 34 | 
 35 |   def __init__(self, model, layer_counts, growth_rate, params=None):
 36 |     self.growth_rate = growth_rate
 37 |     super(DensenetCifar10Model, self).__init__(
 38 |         model, 32, 64, 0.1, layer_counts=layer_counts, params=params)
 39 |     self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
 40 | 
 41 |   def dense_block(self, cnn, growth_rate):
 42 |     input_layer = cnn.top_layer
 43 |     c = cnn.batch_norm(input_layer, **self.batch_norm_config)
 44 |     c = tf.nn.relu(c)
 45 |     c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate),
 46 |                  activation=None, input_layer=c)
 47 |     channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
 48 |     cnn.top_layer = tf.concat([input_layer, c], channel_index)
 49 |     cnn.top_size += growth_rate
 50 | 
 51 |   def transition_layer(self, cnn):
 52 |     in_size = cnn.top_size
 53 |     cnn.batch_norm(**self.batch_norm_config)
 54 |     cnn.top_layer = tf.nn.relu(cnn.top_layer)
 55 |     cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size))
 56 |     cnn.apool(2, 2, 2, 2)
 57 | 
 58 |   def add_inference(self, cnn):
 59 |     if self.layer_counts is None:
 60 |       raise ValueError('Layer counts not specified for %s' % self.get_model())
 61 |     if self.growth_rate is None:
 62 |       raise ValueError('Growth rate not specified for %s' % self.get_model())
 63 | 
 64 |     cnn.conv(16, 3, 3, 1, 1, activation=None)
 65 |     # Block 1
 66 |     for _ in xrange(self.layer_counts[0]):
 67 |       self.dense_block(cnn, self.growth_rate)
 68 |     self.transition_layer(cnn)
 69 |     # Block 2
 70 |     for _ in xrange(self.layer_counts[1]):
 71 |       self.dense_block(cnn, self.growth_rate)
 72 |     self.transition_layer(cnn)
 73 |     # Block 3
 74 |     for _ in xrange(self.layer_counts[2]):
 75 |       self.dense_block(cnn, self.growth_rate)
 76 |     cnn.batch_norm(**self.batch_norm_config)
 77 |     cnn.top_layer = tf.nn.relu(cnn.top_layer)
 78 |     channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
 79 |     cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index]
 80 |     cnn.spatial_mean()
 81 | 
 82 |   def get_learning_rate(self, global_step, batch_size):
 83 |     num_batches_per_epoch = 50000 // batch_size
 84 |     boundaries = num_batches_per_epoch * np.array([150, 225, 300],
 85 |                                                   dtype=np.int64)
 86 |     boundaries = [x for x in boundaries]
 87 |     values = [0.1, 0.01, 0.001, 0.0001]
 88 |     return tf.train.piecewise_constant(global_step, boundaries, values)
 89 | 
 90 | 
 91 | def create_densenet40_k12_model():
 92 |   return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12)
 93 | 
 94 | 
 95 | def create_densenet100_k12_model():
 96 |   return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12)
 97 | 
 98 | 
 99 | def create_densenet100_k24_model():
100 |   return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24)
101 | 


--------------------------------------------------------------------------------
/scripts/keras_benchmarks/upload_benchmarks_bq.py:
--------------------------------------------------------------------------------
 1 | """ Uploads benchmark statistics along with platform used to run the benchmark
 2 | to BigQuery."""
 3 | from google.cloud import bigquery
 4 | import uuid
 5 | 
 6 | 
 7 | def upload_metrics_to_bq(test_name, total_time, epochs, batch_size,
 8 |     backend_type, backend_version, cpu_num_cores, cpu_memory, cpu_memory_info,
 9 |     gpu_count, gpu_platform, platform_type, platform_machine_type,
10 |     keras_version, sample_type=None):
11 |     """ Upload benchmark metrics of a model along with platform specs.
12 | 
13 |     # Arguments
14 |         test_name: Unique test name for each benchmark.
15 |         total_time: Time taken to run the given number of epochs.
16 |         epochs: Total number of epochs for which the given benchmark was run.
17 |                        We don't count the first epoch since some amount of time is
18 |                        spent creating the graph.
19 |         batch_size: Batch size of samples used in a given epoch.
20 |         backend_type: Backend type used by the Keras models. This is either
21 |                             "tensorflow", "cntk" or "theano".
22 |         backend_version: This is the version "tensorflow", "cntk" or "theano"
23 |                                 used by Keras.
24 |         cpu_num_cores: Number of CPU cores of the machine on which the Keras
25 |                               benchmark is run.
26 |         cpu_memory: RAM memory specs of the CPU.
27 |         cpu_memory_info: This is the memory unit of the CPU memory such as
28 |         gpu_count: Number of GPUs used to run the benchmarks.
29 |                                 'GB'.
30 |         gpu_platform: The type of GPU used, for e.g "Nvidia Tesla K80"
31 |         platform_type: This is the local or cloud platform used to run the
32 |                               benchmarks.
33 |         platform_machine_type: This can be details about the machine type
34 |                                       for e.g
35 |         keras_version: Version of Keras used to run the benchmark model.
36 |         sample_type: This is a user specified string used to calculate metrics such
37 |                     as "images per epoch" etc.
38 |     """
39 |     bigquery_client = bigquery.Client()
40 |     dataset = bigquery_client.dataset('keras_benchmarks')
41 |     table = dataset.table('benchmarks')
42 |     table.reload()
43 | 
44 |     query = """\
45 |     INSERT keras_benchmarks.benchmarks (test_id,test_name,recorded_time,\
46 |     metrics,keras_backend,cpu_info,platform_info,keras_version,gpu_info) \
47 |     VALUES(@testid,@testname,CURRENT_TIMESTAMP(),\
48 |     (@metrics_totaltime,@metrics_epochs,@metrics_batch_size,@metrics_sampletype),\
49 |     (@keras_backend_type, @keras_backend_version),\
50 |     (@cpu_info_numcores,@cpu_info_memory, @cpu_info_memory_units),\
51 |     (@platform_info_type,@platform_info_machine_type),\
52 |      @keras_version,\
53 |      (@gpu_info_count,@gpu_info_platform))
54 |     """
55 |     test_id = uuid.uuid4().int >> 80
56 |     query_job = bigquery_client.run_async_query(
57 |         str(uuid.uuid4()),
58 |         query,
59 |         query_parameters=(
60 |           bigquery.ScalarQueryParameter('testid', 'INTEGER', test_id),
61 |           bigquery.ScalarQueryParameter('testname', 'STRING', test_name),
62 |           bigquery.ScalarQueryParameter('metrics_totaltime', 'FLOAT', total_time),
63 |           bigquery.ScalarQueryParameter('metrics_epochs', 'INTEGER', epochs),
64 |           bigquery.ScalarQueryParameter('metrics_batch_size', 'INTEGER', batch_size),
65 |           bigquery.ScalarQueryParameter('metrics_sampletype', 'STRING', sample_type),
66 |           bigquery.ScalarQueryParameter('keras_backend_type', 'STRING', backend_type),
67 |           bigquery.ScalarQueryParameter('keras_backend_version', 'STRING', backend_version),
68 |           bigquery.ScalarQueryParameter('cpu_info_numcores', 'FLOAT', cpu_num_cores),
69 |           bigquery.ScalarQueryParameter('cpu_info_memory', 'FLOAT', cpu_memory),
70 |           bigquery.ScalarQueryParameter('cpu_info_memory_units', 'STRING', cpu_memory_info),
71 |           bigquery.ScalarQueryParameter('platform_info_type', 'STRING', platform_type),
72 |           bigquery.ScalarQueryParameter('platform_info_machine_type', 'STRING', platform_machine_type),
73 |           bigquery.ScalarQueryParameter('keras_version', 'STRING', keras_version),
74 |           bigquery.ScalarQueryParameter('gpu_info_count', 'FLOAT', gpu_count),
75 |           bigquery.ScalarQueryParameter('gpu_info_platform', 'STRING', gpu_platform)))
76 | 
77 |     query_job.use_legacy_sql = False
78 | 
79 |     query_job.begin()
80 |     query_job.result()
81 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/run_tests.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Runs the tf_cnn_benchmarks tests."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import sys
 22 | import unittest
 23 | 
 24 | from absl import app
 25 | from absl import flags as absl_flags
 26 | 
 27 | import all_reduce_benchmark_test
 28 | import allreduce_test
 29 | import benchmark_cnn_distributed_test
 30 | import benchmark_cnn_test
 31 | import cnn_util_test
 32 | import variable_mgr_util_test
 33 | from models import nasnet_test
 34 | 
 35 | 
 36 | # Ideally, we wouldn't need this option, and run both distributed tests and non-
 37 | # distributed tests. But, TensorFlow allocates all the GPU memory by default, so
 38 | # the non-distributed tests allocate all the GPU memory. The distributed tests
 39 | # spawn processes that run TensorFlow, and cannot run if all the GPU memory is
 40 | # already allocated. If a non-distributed test is run, then a distributed test
 41 | # is run in the same process, the distributed test will fail because there is no
 42 | # more GPU memory for the spawned processes to allocate.
 43 | absl_flags.DEFINE_boolean('run_distributed_tests', False,
 44 |                           'If True, run the distributed tests. If False, the'
 45 |                           'non-distributed tests.')
 46 | 
 47 | absl_flags.DEFINE_boolean('full_tests', False,
 48 |                           'If True, all distributed or non-distributed tests '
 49 |                           'are run, which can take hours. If False, only a '
 50 |                           'subset of tests will be run. This subset runs much '
 51 |                           'faster and tests almost all the functionality as '
 52 |                           'the full set of tests, so it is recommended to keep '
 53 |                           'this option set to False.')
 54 | 
 55 | FLAGS = absl_flags.FLAGS
 56 | 
 57 | 
 58 | def main(_):
 59 |   loader = unittest.defaultTestLoader
 60 |   if FLAGS.full_tests:
 61 |     suite = unittest.TestSuite([
 62 |         loader.loadTestsFromModule(allreduce_test),
 63 |         loader.loadTestsFromModule(cnn_util_test),
 64 |         loader.loadTestsFromModule(variable_mgr_util_test),
 65 |         loader.loadTestsFromModule(benchmark_cnn_test),
 66 |         loader.loadTestsFromModule(all_reduce_benchmark_test),
 67 |         loader.loadTestsFromModule(nasnet_test),
 68 |     ])
 69 |     dist_suite = unittest.TestSuite([
 70 |         loader.loadTestsFromModule(benchmark_cnn_distributed_test),
 71 |     ])
 72 |   else:
 73 |     suite = unittest.TestSuite([
 74 |         loader.loadTestsFromModule(allreduce_test),
 75 |         loader.loadTestsFromModule(cnn_util_test),
 76 |         loader.loadTestsFromModule(all_reduce_benchmark_test),
 77 |         loader.loadTestsFromModule(variable_mgr_util_test),
 78 |         loader.loadTestsFromTestCase(benchmark_cnn_test.TestAlexnetModel),
 79 |         loader.loadTestsFromTestCase(benchmark_cnn_test.TfCnnBenchmarksTest),
 80 |         loader.loadTestsFromTestCase(benchmark_cnn_test.VariableUpdateTest),
 81 |         loader.loadTestsFromTestCase(
 82 |             benchmark_cnn_test.VariableMgrLocalReplicatedTest),
 83 |     ])
 84 |     dist_suite = unittest.TestSuite([
 85 |         loader.loadTestsFromNames([
 86 |             'benchmark_cnn_distributed_test.DistributedVariableUpdateTest'
 87 |             '.testVarUpdateDefault',
 88 | 
 89 |             'benchmark_cnn_distributed_test.TfCnnBenchmarksDistributedTest'
 90 |             '.testParameterServer',
 91 |         ]),
 92 |     ])
 93 | 
 94 |   if FLAGS.run_distributed_tests:
 95 |     print('Running distributed tests')
 96 |     result = unittest.TextTestRunner(verbosity=2).run(dist_suite)
 97 |   else:
 98 |     print('Running non-distributed tests')
 99 |     result = unittest.TextTestRunner(verbosity=2).run(suite)
100 |   sys.exit(not result.wasSuccessful())
101 | 
102 | 
103 | if __name__ == '__main__':
104 |   app.run(main)
105 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/ssd_constants.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Google. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Central location for all constants related to MLPerf SSD."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | # ==============================================================================
 22 | # == Model =====================================================================
 23 | # ==============================================================================
 24 | IMAGE_SIZE = 300
 25 | 
 26 | # TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90)
 27 | # Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to
 28 | #                      be resolved.
 29 | NUM_CLASSES = 81  # Including "no class". Not all COCO classes are used.
 30 | 
 31 | # Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero.
 32 | CLASS_INV_MAP = (
 33 |     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
 34 |     22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
 35 |     44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
 36 |     64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87,
 37 |     88, 89, 90)
 38 | _MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)}
 39 | CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1))
 40 | 
 41 | NUM_SSD_BOXES = 8732
 42 | 
 43 | RESNET_DEPTH = 34
 44 | 
 45 | """SSD specific"""
 46 | MIN_LEVEL = 3
 47 | MAX_LEVEL = 8
 48 | 
 49 | FEATURE_SIZES = (38, 19, 10, 5, 3, 1)
 50 | STEPS = (8, 16, 32, 64, 100, 300)
 51 | 
 52 | # https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py
 53 | SCALES = (21, 45, 99, 153, 207, 261, 315)
 54 | ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,))
 55 | NUM_DEFAULTS = (4, 6, 6, 6, 4, 4)
 56 | NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4}
 57 | SCALE_XY = 0.1
 58 | SCALE_HW = 0.2
 59 | BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW)
 60 | MATCH_THRESHOLD = 0.5
 61 | 
 62 | # https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683
 63 | NORMALIZATION_MEAN = (0.485, 0.456, 0.406)
 64 | NORMALIZATION_STD = (0.229, 0.224, 0.225)
 65 | 
 66 | # SSD Cropping
 67 | NUM_CROP_PASSES = 50
 68 | CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9)
 69 | P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1)
 70 | 
 71 | # Hard example mining
 72 | NEGS_PER_POSITIVE = 3
 73 | 
 74 | # Batch normalization
 75 | BATCH_NORM_DECAY = 0.997
 76 | BATCH_NORM_EPSILON = 1e-4
 77 | 
 78 | 
 79 | # ==============================================================================
 80 | # == Optimizer =================================================================
 81 | # ==============================================================================
 82 | LEARNING_RATE_SCHEDULE = (
 83 |     (0, 1e-3),
 84 |     (160000, 1e-4),
 85 |     (200000, 1e-5),
 86 | )
 87 | MOMENTUM = 0.9
 88 | WEIGHT_DECAY = 5e-4
 89 | 
 90 | 
 91 | # ==============================================================================
 92 | # == Keys ======================================================================
 93 | # ==============================================================================
 94 | BOXES = "boxes"
 95 | CLASSES = "classes"
 96 | NUM_MATCHED_BOXES = "num_matched_boxes"
 97 | IMAGE = "image"
 98 | SOURCE_ID = "source_id"
 99 | RAW_SHAPE = "raw_shape"
100 | PRED_BOXES = "pred_boxes"
101 | PRED_SCORES = "pred_scores"
102 | 
103 | 
104 | # ==============================================================================
105 | # == Evaluation ================================================================
106 | # ==============================================================================
107 | 
108 | # Note: This is based on a batch size of 32
109 | #   https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37
110 | CHECKPOINT_FREQUENCY = 20000
111 | MAX_NUM_EVAL_BOXES = 200
112 | OVERLAP_CRITERIA = 0.5  # Used for nonmax supression
113 | MIN_SCORE = 0.05  # Minimum score to be considered during evaluation.
114 | DUMMY_SCORE = -1e5  # If no boxes are matched.
115 | 
116 | ANNOTATION_FILE = "annotations/instances_val2017.json"
117 | COCO_NUM_TRAIN_IMAGES = 118287
118 | COCO_NUM_VAL_IMAGES = 4952
119 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/cnn_util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for tf_cnn_benchmarks.cnn_util."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import threading
 23 | import time
 24 | 
 25 | import tensorflow as tf
 26 | 
 27 | import cnn_util
 28 | 
 29 | 
 30 | class CnnUtilBarrierTest(tf.test.TestCase):
 31 | 
 32 |   def testBarrier(self):
 33 |     num_tasks = 20
 34 |     num_waits = 4
 35 |     barrier = cnn_util.Barrier(num_tasks)
 36 |     threads = []
 37 |     sync_matrix = []
 38 |     for i in range(num_tasks):
 39 |       sync_times = [0] * num_waits
 40 |       thread = threading.Thread(
 41 |           target=self._run_task, args=(barrier, sync_times))
 42 |       thread.start()
 43 |       threads.append(thread)
 44 |       sync_matrix.append(sync_times)
 45 |     for thread in threads:
 46 |       thread.join()
 47 |     for wait_index in range(num_waits - 1):
 48 |       # Max of times at iteration i < min of times at iteration i + 1
 49 |       self.assertLessEqual(
 50 |           max([sync_matrix[i][wait_index] for i in range(num_tasks)]),
 51 |           min([sync_matrix[i][wait_index + 1] for i in range(num_tasks)]))
 52 | 
 53 |   def _run_task(self, barrier, sync_times):
 54 |     for wait_index in range(len(sync_times)):
 55 |       sync_times[wait_index] = time.time()
 56 |       barrier.wait()
 57 | 
 58 |   def testBarrierAbort(self):
 59 |     num_tasks = 2
 60 |     num_waits = 1
 61 |     sync_times = [0] * num_waits
 62 |     barrier = cnn_util.Barrier(num_tasks)
 63 |     thread = threading.Thread(
 64 |         target=self._run_task, args=(barrier, sync_times))
 65 |     thread.start()
 66 |     barrier.abort()
 67 |     # thread won't be blocked by done barrier.
 68 |     thread.join()
 69 | 
 70 | 
 71 | class ImageProducerTest(tf.test.TestCase):
 72 | 
 73 |   def _slow_tensorflow_op(self):
 74 |     """Returns a TensorFlow op that takes approximately 0.1s to complete."""
 75 |     def slow_func(v):
 76 |       time.sleep(0.1)
 77 |       return v
 78 |     return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op
 79 | 
 80 |   def _test_image_producer(self, batch_group_size, put_slower_than_get):
 81 |     # We use the variable x to simulate a staging area of images. x represents
 82 |     # the number of batches in the staging area.
 83 |     x = tf.Variable(0, dtype=tf.int32)
 84 |     if put_slower_than_get:
 85 |       put_dep = self._slow_tensorflow_op()
 86 |       get_dep = tf.no_op()
 87 |     else:
 88 |       put_dep = tf.no_op()
 89 |       get_dep = self._slow_tensorflow_op()
 90 |     with tf.control_dependencies([put_dep]):
 91 |       put_op = x.assign_add(batch_group_size, use_locking=True)
 92 |     with tf.control_dependencies([get_dep]):
 93 |       get_op = x.assign_sub(1, use_locking=True)
 94 |     with self.test_session() as sess:
 95 |       sess.run(tf.variables_initializer([x]))
 96 |       image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size,
 97 |                                               use_python32_barrier=False)
 98 |       image_producer.start()
 99 |       for _ in range(5 * batch_group_size):
100 |         sess.run(get_op)
101 |         # We assert x is nonnegative, to ensure image_producer never causes
102 |         # an unstage op to block. We assert x is at most 2 * batch_group_size,
103 |         # to ensure it doesn't use too much memory by storing too many batches
104 |         # in the staging area.
105 |         self.assertGreaterEqual(sess.run(x), 0)
106 |         self.assertLessEqual(sess.run(x), 2 * batch_group_size)
107 |         image_producer.notify_image_consumption()
108 |         self.assertGreaterEqual(sess.run(x), 0)
109 |         self.assertLessEqual(sess.run(x), 2 * batch_group_size)
110 | 
111 |       image_producer.done()
112 |       time.sleep(0.1)
113 |       self.assertGreaterEqual(sess.run(x), 0)
114 |       self.assertLessEqual(sess.run(x), 2 * batch_group_size)
115 | 
116 |   def test_image_producer(self):
117 |     self._test_image_producer(1, False)
118 |     self._test_image_producer(1, True)
119 |     self._test_image_producer(2, False)
120 |     self._test_image_producer(2, True)
121 |     self._test_image_producer(3, False)
122 |     self._test_image_producer(3, True)
123 |     self._test_image_producer(8, False)
124 |     self._test_image_producer(8, True)
125 | 
126 | 
127 | if __name__ == '__main__':
128 |   tf.test.main()
129 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Used to run benchmark_cnn for distributed tests.
 17 | 
 18 | In distributed tests, we spawn processes to run tf_cnn_benchmark tasks. We could
 19 | directly spawn tf_cnn_benchmark processes, but we want some added functionality,
 20 | such as being able to inject custom images during training. So instead, this
 21 | file is spawned as a Python process, which supports the added functionality.
 22 | """
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | 
 28 | from absl import flags as absl_flags
 29 | import numpy as np
 30 | import tensorflow as tf
 31 | import benchmark_cnn
 32 | import flags
 33 | import preprocessing
 34 | import test_util
 35 | 
 36 | 
 37 | absl_flags.DEFINE_string('fake_input', 'none',
 38 |                          """What fake input to inject into benchmark_cnn. This
 39 |                             is ignored if --model=test_model.
 40 |                             Options are:
 41 |                             none: Do not inject any fake input.
 42 |                             zeros_and_ones: Half the images will be all 0s with
 43 |                             a label of 0. Half the images will be all 1s with a
 44 |                             label of 1.""")
 45 | 
 46 | flags.define_flags()
 47 | FLAGS = flags.FLAGS
 48 | 
 49 | 
 50 | def get_test_image_preprocessor(batch_size, params):
 51 |   """Returns the preprocessing.TestImagePreprocessor that should be injected.
 52 | 
 53 |   Returns None if no preprocessor should be injected.
 54 | 
 55 |   Args:
 56 |     batch_size: The batch size across all GPUs.
 57 |     params: BenchmarkCNN's parameters.
 58 |   Returns:
 59 |     Returns the preprocessing.TestImagePreprocessor that should be injected.
 60 |   Raises:
 61 |     ValueError: Flag --fake_input is an invalid value.
 62 |   """
 63 |   if FLAGS.fake_input == 'none':
 64 |     return None
 65 |   elif FLAGS.fake_input == 'zeros_and_ones':
 66 |     half_batch_size = batch_size // 2
 67 |     images = np.zeros((batch_size, 227, 227, 3), dtype=np.float32)
 68 |     images[half_batch_size:, :, :, :] = 1
 69 |     labels = np.array([0] * half_batch_size + [1] * half_batch_size,
 70 |                       dtype=np.int32)
 71 |     preprocessor = preprocessing.TestImagePreprocessor(
 72 |         batch_size, [227, 227, 3], params.num_gpus,
 73 |         benchmark_cnn.get_data_type(params))
 74 |     preprocessor.set_fake_data(images, labels)
 75 |     preprocessor.expected_subset = 'validation' if params.eval else 'train'
 76 |     return preprocessor
 77 |   else:
 78 |     raise ValueError('Invalid --fake_input: %s' % FLAGS.fake_input)
 79 | 
 80 | 
 81 | def run_with_real_model(params):
 82 |   """Runs tf_cnn_benchmarks with a real model."""
 83 |   bench = benchmark_cnn.BenchmarkCNN(params)
 84 |   bench.print_info()
 85 |   preprocessor = get_test_image_preprocessor(bench.batch_size, params)
 86 |   if preprocessor is not None:
 87 |     # The test image preprocessor requires queue runners. Since this file is
 88 |     # used for testing, it is OK to access protected members.
 89 |     # pylint: disable=protected-access
 90 |     bench.dataset._queue_runner_required = True
 91 |     # pylint: enable=protected-access
 92 |     bench.input_preprocessor = preprocessor
 93 |   bench.run()
 94 | 
 95 | 
 96 | def run_with_test_model(params):
 97 |   """Runs tf_cnn_benchmarks with a test model."""
 98 |   model = test_util.TestCNNModel()
 99 |   inputs = test_util.get_fake_var_update_inputs()
100 |   with test_util.monkey_patch(benchmark_cnn,
101 |                               LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15):
102 |     bench = benchmark_cnn.BenchmarkCNN(params, dataset=test_util.TestDataSet(),
103 |                                        model=model)
104 |     # The test model does not use labels when computing loss, so the label
105 |     # values do not matter as long as it's the right shape.
106 |     labels = np.array([1] * inputs.shape[0])
107 |     bench.input_preprocessor.set_fake_data(inputs, labels)
108 |     bench.run()
109 | 
110 | 
111 | def main(_):
112 |   params = benchmark_cnn.make_params_from_flags()
113 |   params = benchmark_cnn.setup(params)
114 |   if params.model == 'test_model':
115 |     run_with_test_model(params)
116 |   else:
117 |     run_with_real_model(params)
118 | 
119 | 
120 | if __name__ == '__main__':
121 |   tf.app.run()
122 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/variable_mgr_util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for variable_mgr_util."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | import variable_mgr_util
 23 | 
 24 | 
 25 | class VariableMgrUtilTest(tf.test.TestCase):
 26 | 
 27 |   def testGetLossScaleUpdateOpTruePath(self):
 28 |     loss_scale = tf.Variable(4)
 29 |     # loss_scale_normal_steps >= inc_loss_scale_every_n
 30 |     loss_scale_normal_steps = tf.Variable(10)
 31 |     inc_loss_scale_every_n = 10
 32 |     update_op = variable_mgr_util.get_loss_scale_update_op(
 33 |         loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n)
 34 | 
 35 |     with self.test_session() as sess:
 36 |       sess.run(tf.global_variables_initializer())
 37 |       sess.run(update_op)
 38 | 
 39 |       self.assertEqual(sess.run(loss_scale), 8)
 40 |       self.assertEqual(sess.run(loss_scale_normal_steps), 0)
 41 | 
 42 |   def testGetLossScaleUpdateOpFalsePath(self):
 43 |     loss_scale = tf.Variable(4)
 44 |     # loss_scale_normal_steps < inc_loss_scale_every_n
 45 |     loss_scale_normal_steps = tf.Variable(9)
 46 |     inc_loss_scale_every_n = 10
 47 |     update_op = variable_mgr_util.get_loss_scale_update_op(
 48 |         loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n)
 49 | 
 50 |     with self.test_session() as sess:
 51 |       sess.run(tf.global_variables_initializer())
 52 |       sess.run(update_op)
 53 | 
 54 |       self.assertEqual(sess.run(loss_scale), 4)
 55 |       self.assertEqual(sess.run(loss_scale_normal_steps), 10)
 56 | 
 57 |   def testAppendGradientsWithLossScaleWithAutoScaleDisabled(self):
 58 |     v = tf.Variable(0)
 59 |     training_ops = []
 60 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
 61 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
 62 |         enable_auto_loss_scale=False,  # no auto loss scale.
 63 |         loss_scale=tf.Variable(4),
 64 |         loss_scale_normal_steps=tf.Variable(10),
 65 |         inc_loss_scale_every_n=10,
 66 |         is_chief=True)
 67 |     variable_mgr_util.append_gradients_with_loss_scale(
 68 |         training_ops,
 69 |         get_apply_gradients_ops_func,
 70 |         loss_scale_params,
 71 |         grad_has_inf_nan=True)
 72 | 
 73 |     with self.test_session() as sess:
 74 |       sess.run(tf.global_variables_initializer())
 75 |       sess.run(training_ops)
 76 |       self.assertEqual(sess.run(v), 1)
 77 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
 78 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10)
 79 | 
 80 |   def testAppendGradientsWithLossScaleForNonChiefWorker(self):
 81 |     v = tf.Variable(0)
 82 |     training_ops = []
 83 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
 84 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
 85 |         enable_auto_loss_scale=True,
 86 |         loss_scale=tf.Variable(4),
 87 |         loss_scale_normal_steps=tf.Variable(10),
 88 |         inc_loss_scale_every_n=10,
 89 |         is_chief=False)  # Non-chief
 90 |     variable_mgr_util.append_gradients_with_loss_scale(
 91 |         training_ops,
 92 |         get_apply_gradients_ops_func,
 93 |         loss_scale_params,
 94 |         grad_has_inf_nan=False)
 95 | 
 96 |     with self.test_session() as sess:
 97 |       sess.run(tf.global_variables_initializer())
 98 |       sess.run(training_ops)
 99 |       self.assertEqual(sess.run(v), 1)
100 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
101 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10)
102 | 
103 |   def testAppendGradientsWithLossScaleWithoutNan(self):
104 |     v = tf.Variable(0)
105 |     training_ops = []
106 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
107 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
108 |         enable_auto_loss_scale=True,
109 |         loss_scale=tf.Variable(4, dtype=tf.float32),
110 |         loss_scale_normal_steps=tf.Variable(10),
111 |         inc_loss_scale_every_n=10,
112 |         is_chief=True)
113 |     variable_mgr_util.append_gradients_with_loss_scale(
114 |         training_ops,
115 |         get_apply_gradients_ops_func,
116 |         loss_scale_params,
117 |         grad_has_inf_nan=tf.constant(False))
118 | 
119 |     with self.test_session() as sess:
120 |       sess.run(tf.global_variables_initializer())
121 |       sess.run(training_ops)
122 |       self.assertEqual(sess.run(v), 1)
123 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 8)
124 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
125 | 
126 |   def testAppendGradientsWithLossScaleWithtNan(self):
127 |     v = tf.Variable(0)
128 |     training_ops = []
129 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
130 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
131 |         enable_auto_loss_scale=True,
132 |         loss_scale=tf.Variable(4, dtype=tf.float32),
133 |         loss_scale_normal_steps=tf.Variable(10),
134 |         inc_loss_scale_every_n=10,
135 |         is_chief=True)
136 |     variable_mgr_util.append_gradients_with_loss_scale(
137 |         training_ops,
138 |         get_apply_gradients_ops_func,
139 |         loss_scale_params,
140 |         grad_has_inf_nan=tf.constant(True))
141 | 
142 |     with self.test_session() as sess:
143 |       sess.run(tf.global_variables_initializer())
144 |       sess.run(training_ops)
145 |       self.assertEqual(sess.run(v), 0)  # Skip updating for v.
146 |       # halve loss_scale and reset local_scale_normal_steps.
147 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 2)
148 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
149 | 
150 | 
151 | if __name__ == '__main__':
152 |   tf.test.main()
153 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/model_config.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Model configurations for CNN benchmarks.
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | 
 23 | from functools import partial
 24 | 
 25 | from models import alexnet_model
 26 | from models import densenet_model
 27 | from models import googlenet_model
 28 | from models import inception_model
 29 | from models import lenet_model
 30 | from models import mobilenet_v2
 31 | from models import nasnet_model
 32 | from models import official_resnet_model
 33 | from models import overfeat_model
 34 | from models import resnet_model
 35 | from models import ssd_model
 36 | from models import trivial_model
 37 | from models import vgg_model
 38 | from models.experimental import deepspeech
 39 | from models.experimental import official_ncf_model
 40 | 
 41 | 
 42 | _model_name_to_imagenet_model = {
 43 |     'vgg11': vgg_model.Vgg11Model,
 44 |     'vgg16': vgg_model.Vgg16Model,
 45 |     'vgg19': vgg_model.Vgg19Model,
 46 |     'lenet': lenet_model.Lenet5Model,
 47 |     'googlenet': googlenet_model.GooglenetModel,
 48 |     'overfeat': overfeat_model.OverfeatModel,
 49 |     'alexnet': alexnet_model.AlexnetModel,
 50 |     'trivial': trivial_model.TrivialModel,
 51 |     'inception3': inception_model.Inceptionv3Model,
 52 |     'inception4': inception_model.Inceptionv4Model,
 53 |     'official_resnet18_v2':
 54 |         partial(official_resnet_model.ImagenetResnetModel, 18),
 55 |     'official_resnet34_v2':
 56 |         partial(official_resnet_model.ImagenetResnetModel, 34),
 57 |     'official_resnet50_v2':
 58 |         partial(official_resnet_model.ImagenetResnetModel, 50),
 59 |     'official_resnet101_v2':
 60 |         partial(official_resnet_model.ImagenetResnetModel, 101),
 61 |     'official_resnet152_v2':
 62 |         partial(official_resnet_model.ImagenetResnetModel, 152),
 63 |     'official_resnet200_v2':
 64 |         partial(official_resnet_model.ImagenetResnetModel, 200),
 65 |     'official_resnet18':
 66 |         partial(official_resnet_model.ImagenetResnetModel, 18, version=1),
 67 |     'official_resnet34':
 68 |         partial(official_resnet_model.ImagenetResnetModel, 34, version=1),
 69 |     'official_resnet50':
 70 |         partial(official_resnet_model.ImagenetResnetModel, 50, version=1),
 71 |     'official_resnet101':
 72 |         partial(official_resnet_model.ImagenetResnetModel, 101, version=1),
 73 |     'official_resnet152':
 74 |         partial(official_resnet_model.ImagenetResnetModel, 152, version=1),
 75 |     'official_resnet200':
 76 |         partial(official_resnet_model.ImagenetResnetModel, 200, version=1),
 77 |     'resnet50': resnet_model.create_resnet50_model,
 78 |     'resnet50_v1.5': resnet_model.create_resnet50_v1_5_model,
 79 |     'resnet50_v2': resnet_model.create_resnet50_v2_model,
 80 |     'resnet101': resnet_model.create_resnet101_model,
 81 |     'resnet101_v2': resnet_model.create_resnet101_v2_model,
 82 |     'resnet152': resnet_model.create_resnet152_model,
 83 |     'resnet152_v2': resnet_model.create_resnet152_v2_model,
 84 |     'nasnet': nasnet_model.NasnetModel,
 85 |     'nasnetlarge': nasnet_model.NasnetLargeModel,
 86 |     'mobilenet': mobilenet_v2.MobilenetModel,
 87 |     'ncf': official_ncf_model.NcfModel,
 88 | }
 89 | 
 90 | 
 91 | _model_name_to_cifar_model = {
 92 |     'alexnet': alexnet_model.AlexnetCifar10Model,
 93 |     'resnet20': resnet_model.create_resnet20_cifar_model,
 94 |     'resnet20_v2': resnet_model.create_resnet20_v2_cifar_model,
 95 |     'resnet32': resnet_model.create_resnet32_cifar_model,
 96 |     'resnet32_v2': resnet_model.create_resnet32_v2_cifar_model,
 97 |     'resnet44': resnet_model.create_resnet44_cifar_model,
 98 |     'resnet44_v2': resnet_model.create_resnet44_v2_cifar_model,
 99 |     'resnet56': resnet_model.create_resnet56_cifar_model,
100 |     'resnet56_v2': resnet_model.create_resnet56_v2_cifar_model,
101 |     'resnet110': resnet_model.create_resnet110_cifar_model,
102 |     'resnet110_v2': resnet_model.create_resnet110_v2_cifar_model,
103 |     'trivial': trivial_model.TrivialCifar10Model,
104 |     'densenet40_k12': densenet_model.create_densenet40_k12_model,
105 |     'densenet100_k12': densenet_model.create_densenet100_k12_model,
106 |     'densenet100_k24': densenet_model.create_densenet100_k24_model,
107 |     'nasnet': nasnet_model.NasnetCifarModel,
108 | }
109 | 
110 | 
111 | _model_name_to_object_detection_model = {
112 |     'ssd300': ssd_model.SSD300Model,
113 |     'trivial': trivial_model.TrivialSSD300Model,
114 | }
115 | 
116 | 
117 | def _get_model_map(dataset_name):
118 |   """Get name to model map for specified dataset."""
119 |   if dataset_name == 'cifar10':
120 |     return _model_name_to_cifar_model
121 |   elif dataset_name in ('imagenet', 'synthetic'):
122 |     return _model_name_to_imagenet_model
123 |   elif dataset_name == 'librispeech':
124 |     return {'deepspeech2': deepspeech.DeepSpeech2Model}
125 |   elif dataset_name == 'coco':
126 |     return _model_name_to_object_detection_model
127 |   else:
128 |     raise ValueError('Invalid dataset name: %s' % dataset_name)
129 | 
130 | 
131 | def get_model_config(model_name, dataset, params):
132 |   """Map model name to model network configuration."""
133 |   model_map = _get_model_map(dataset.name)
134 |   if model_name not in model_map:
135 |     raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' %
136 |                      (model_name, dataset.name))
137 |   else:
138 |     return model_map[model_name](params=params)
139 | 
140 | 
141 | def register_model(model_name, dataset_name, model_func):
142 |   """Register a new model that can be obtained with `get_model_config`."""
143 |   model_map = _get_model_map(dataset_name)
144 |   if model_name in model_map:
145 |     raise ValueError('Model "%s" is already registered for dataset "%s"' %
146 |                      (model_name, dataset_name))
147 |   model_map[model_name] = model_func
148 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/coco_metric.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Google. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """COCO-style evaluation metrics.
 16 | 
 17 | Forked from reference model implementation.
 18 | 
 19 | COCO API: github.com/cocodataset/cocoapi/
 20 | """
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | 
 26 | import atexit
 27 | import tempfile
 28 | 
 29 | from absl import flags
 30 | 
 31 | import numpy as np
 32 | from pycocotools.coco import COCO
 33 | from pycocotools.cocoeval import COCOeval
 34 | import six
 35 | 
 36 | import tensorflow as tf
 37 | 
 38 | import mlperf
 39 | import ssd_constants
 40 | 
 41 | FLAGS = flags.FLAGS
 42 | 
 43 | 
 44 | # https://github.com/cocodataset/cocoapi/issues/49
 45 | if six.PY3:
 46 |   import pycocotools.coco
 47 |   pycocotools.coco.unicode = str
 48 | 
 49 | 
 50 | def async_eval_runner(queue_predictions, queue_results, val_json_file):
 51 |   """Load intermediate eval results and get COCO metrics."""
 52 |   while True:
 53 |     message = queue_predictions.get()
 54 |     if message == 'STOP':  # poison pill
 55 |       break
 56 |     step, predictions = message
 57 |     results = compute_map(predictions, val_json_file)
 58 |     queue_results.put((step, results))
 59 | 
 60 | 
 61 | def compute_map(predictions, val_json_file):
 62 |   """Use model predictions to compute mAP.
 63 | 
 64 |   Args:
 65 |     predictions: a list of tuples returned by decoded_predictions function,
 66 |       each containing the following elements:
 67 |       image source_id, box coordinates in XYWH order, probability score, label
 68 |     val_json_file: path to COCO annotation file
 69 |   Returns:
 70 |     A dictionary that maps all COCO metrics (keys) to their values
 71 |   """
 72 | 
 73 |   if val_json_file.startswith("gs://"):
 74 |     _, local_val_json = tempfile.mkstemp(suffix=".json")
 75 |     tf.gfile.Remove(local_val_json)
 76 | 
 77 |     tf.gfile.Copy(val_json_file, local_val_json)
 78 |     atexit.register(tf.gfile.Remove, local_val_json)
 79 |   else:
 80 |     local_val_json = val_json_file
 81 | 
 82 |   cocoGt = COCO(local_val_json)
 83 |   cocoDt = cocoGt.loadRes(np.array(predictions))
 84 |   E = COCOeval(cocoGt, cocoDt, iouType='bbox')
 85 |   E.evaluate()
 86 |   E.accumulate()
 87 |   E.summarize()
 88 |   print("Current AP: {:.5f}".format(E.stats[0]))
 89 |   metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
 90 |                   'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
 91 | 
 92 |   # Prefix with "COCO" to group in TensorBoard.
 93 |   return {"COCO/" + key: value for key, value in zip(metric_names, E.stats)}
 94 | 
 95 | 
 96 | def calc_iou(target, candidates):
 97 |   target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1))
 98 |   # Left Top & Right Bottom
 99 |   lt = np.maximum(target_tiled[:,:2], candidates[:,:2])
100 | 
101 |   rb = np.minimum(target_tiled[:,2:], candidates[:,2:])
102 | 
103 |   delta = np.maximum(rb - lt, 0)
104 | 
105 |   intersect = delta[:,0] * delta[:,1]
106 | 
107 |   delta1 = target_tiled[:,2:] - candidates[:,:2]
108 |   area1 = delta1[:,0] * delta1[:,1]
109 |   delta2 = target_tiled[:,2:] - candidates[:,:2]
110 |   area2 = delta2[:,0] * delta2[:,1]
111 | 
112 |   iou = intersect/(area1 + area2 - intersect)
113 |   return iou
114 | 
115 | 
116 | # TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based
117 | # implementation under ssd_model.py accuracy_function.
118 | def decode_predictions(labels_and_predictions):
119 |   """Decode predictions and remove unused boxes and labels."""
120 |   predictions = []
121 |   for example in labels_and_predictions:
122 |     source_id = int(example[ssd_constants.SOURCE_ID])
123 |     pred_box = example[ssd_constants.PRED_BOXES]
124 |     pred_scores = example[ssd_constants.PRED_SCORES]
125 | 
126 |     locs, labels, probs = decode_single(
127 |         pred_box, pred_scores, ssd_constants.OVERLAP_CRITERIA,
128 |         ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES)
129 | 
130 |     raw_height, raw_width, _ = example[ssd_constants.RAW_SHAPE]
131 |     for loc, label, prob in zip(locs, labels, probs):
132 |       # Ordering convention differs, hence [1], [0] rather than [0], [1]
133 |       x, y = loc[1] * raw_width, loc[0] * raw_height
134 |       w, h = (loc[3] - loc[1]) * raw_width, (loc[2] - loc[0]) * raw_height
135 |       predictions.append(
136 |           [source_id, x, y, w, h, prob, ssd_constants.CLASS_INV_MAP[label]])
137 |   mlperf.logger.log(key=mlperf.tags.NMS_THRESHOLD,
138 |                     value=ssd_constants.OVERLAP_CRITERIA)
139 |   mlperf.logger.log(key=mlperf.tags.NMS_MAX_DETECTIONS,
140 |                     value=ssd_constants.MAX_NUM_EVAL_BOXES)
141 |   return predictions
142 | 
143 | 
144 | def decode_single(bboxes_in, scores_in, criteria, max_output, max_num=200):
145 |   # Reference to https://github.com/amdegroot/ssd.pytorch
146 | 
147 |   bboxes_out = []
148 |   scores_out = []
149 |   labels_out = []
150 | 
151 |   for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)):
152 |     score = np.squeeze(score, 1)
153 | 
154 |     # skip background
155 |     if i == 0:
156 |       continue
157 | 
158 |     mask = score > ssd_constants.MIN_SCORE
159 |     if not np.any(mask):
160 |       continue
161 | 
162 |     bboxes, score = bboxes_in[mask, :], score[mask]
163 | 
164 |     score_idx_sorted = np.argsort(score)
165 |     score_sorted = score[score_idx_sorted]
166 | 
167 |     score_idx_sorted = score_idx_sorted[-max_num:]
168 |     candidates = []
169 | 
170 |     # perform non-maximum suppression
171 |     while len(score_idx_sorted):
172 |       idx = score_idx_sorted[-1]
173 |       bboxes_sorted = bboxes[score_idx_sorted, :]
174 |       bboxes_idx = bboxes[idx, :]
175 |       iou = calc_iou(bboxes_idx, bboxes_sorted)
176 | 
177 |       score_idx_sorted = score_idx_sorted[iou < criteria]
178 |       candidates.append(idx)
179 | 
180 |     bboxes_out.append(bboxes[candidates, :])
181 |     scores_out.append(score[candidates])
182 |     labels_out.extend([i]*len(candidates))
183 | 
184 |   if len(scores_out) == 0:
185 |     tf.logging.info("No objects detected. Returning dummy values.")
186 |     return (
187 |         np.zeros(shape=(1, 4), dtype=np.float32),
188 |         np.zeros(shape=(1,), dtype=np.int32),
189 |         np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE,
190 |     )
191 | 
192 |   bboxes_out = np.concatenate(bboxes_out, axis=0)
193 |   scores_out = np.concatenate(scores_out, axis=0)
194 |   labels_out = np.array(labels_out)
195 | 
196 |   max_ids = np.argsort(scores_out)[-max_output:]
197 | 
198 |   return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]
199 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Wrap the official recommendation model in a tf_cnn_benchmarks Model.
 16 | 
 17 | This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
 18 | Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
 19 | intended to be used only with CNNs.
 20 | 
 21 | Only synthetic data with 1 GPU is currently supported.
 22 | """
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | 
 28 | import tensorflow as tf
 29 | 
 30 | from models import model
 31 | 
 32 | 
 33 | # Obtained by running the official NCF model with the following command:
 34 | #     python ncf_main.py  --dataset ml-20m
 35 | # and printing the number of users and items here:
 36 | # https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
 37 | _NUM_USERS_20M = 138493
 38 | _NUM_ITEMS_20M = 26744
 39 | 
 40 | 
 41 | # TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
 42 | # uses, ignore variable_scopes, which we rely on for multi-GPU support.
 43 | # TODO(reedwm): Support real data. This will require a significant refactor.
 44 | # TODO(reedwm): All-reduce IndexedSlices more effectively.
 45 | # TODO(reedwm): Support the 1M variant of this model.
 46 | 
 47 | 
 48 | class NcfModel(model.Model):
 49 |   r"""A model.Model wrapper around the official NCF recommendation model.
 50 | 
 51 |   To do an NCF run with synthetic data that roughly matches what the official
 52 |   model does, run:
 53 | 
 54 |   python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
 55 |       --weight_decay=0 --sparse_to_dense_grads
 56 |   """
 57 | 
 58 |   def __init__(self, params=None):
 59 |     super(NcfModel, self).__init__(
 60 |         'official_ncf', batch_size=2048, learning_rate=0.0005,
 61 |         fp16_loss_scale=128, params=params)
 62 |     if self.fp16_vars:
 63 |       raise ValueError('NCF model only supports float32 variables for now.')
 64 | 
 65 |   def build_network(self, inputs, phase_train=True, nclass=1001):
 66 |     try:
 67 |       from official.recommendation import neumf_model  # pylint: disable=g-import-not-at-top
 68 |     except ImportError as e:
 69 |       if 'neumf_model' not in e.message:
 70 |         raise
 71 |       raise ImportError('To use the experimental NCF model, you must clone the '
 72 |                         'repo https://github.com/tensorflow/models and add '
 73 |                         'tensorflow/models to the PYTHONPATH.')
 74 |     del nclass
 75 | 
 76 |     users, items, _ = inputs
 77 |     params = {
 78 |         'num_users': _NUM_USERS_20M,
 79 |         'num_items': _NUM_ITEMS_20M,
 80 |         'model_layers': (256, 256, 128, 64),
 81 |         'mf_dim': 64,
 82 |         'mf_regularization': 0,
 83 |         'mlp_reg_layers': (0, 0, 0, 0),
 84 |         'use_tpu': False
 85 |     }
 86 |     if self.data_type == tf.float32:
 87 |       keras_model = neumf_model.construct_model(users, items, params)
 88 |       logits = keras_model.output
 89 |     else:
 90 |       assert self.data_type == tf.float16
 91 |       old_floatx = tf.keras.backend.floatx()
 92 |       try:
 93 |         tf.keras.backend.set_floatx('float16')
 94 |         # We cannot rely on the variable_scope's fp16 custom getter here,
 95 |         # because the NCF model uses keras layers, which ignore variable scopes.
 96 |         # So we use a variable_creator_scope instead.
 97 |         with tf.variable_creator_scope(_fp16_variable_creator):
 98 |           keras_model = neumf_model.construct_model(users, items, params)
 99 |         logits = tf.cast(keras_model.output, tf.float32)
100 |       finally:
101 |         tf.keras.backend.set_floatx(old_floatx)
102 |     return model.BuildNetworkResult(logits=logits, extra_info=None)
103 | 
104 |   def loss_function(self, inputs, build_network_result):
105 |     logits = build_network_result.logits
106 | 
107 |     # Softmax with the first column of ones is equivalent to sigmoid.
108 |     # TODO(reedwm): Actually, the first column should be zeros to be equivalent
109 |     # to sigmoid. But, we keep it at ones to match the official models.
110 |     logits = tf.concat([tf.ones(logits.shape, dtype=logits.dtype), logits],
111 |                        axis=1)
112 | 
113 |     return tf.losses.sparse_softmax_cross_entropy(
114 |         labels=inputs[2],
115 |         logits=logits
116 |     )
117 | 
118 |   def get_synthetic_inputs(self, input_name, nclass):
119 |     """Returns the ops to generate synthetic inputs and labels."""
120 |     def users_init_val():
121 |       return tf.random_uniform((self.batch_size,), minval=0,
122 |                                maxval=_NUM_USERS_20M, dtype=tf.int32)
123 |     users = tf.Variable(users_init_val, dtype=tf.int32, trainable=False,
124 |                         collections=[tf.GraphKeys.LOCAL_VARIABLES],
125 |                         name='synthetic_users')
126 |     def items_init_val():
127 |       return tf.random_uniform((self.batch_size,), minval=0,
128 |                                maxval=_NUM_ITEMS_20M, dtype=tf.int32)
129 |     items = tf.Variable(items_init_val, dtype=tf.int32, trainable=False,
130 |                         collections=[tf.GraphKeys.LOCAL_VARIABLES],
131 |                         name='synthetic_items')
132 | 
133 |     def labels_init_val():
134 |       return tf.random_uniform((self.batch_size,), minval=0, maxval=2,
135 |                                dtype=tf.int32)
136 |     labels = tf.Variable(labels_init_val, dtype=tf.int32, trainable=False,
137 |                          collections=[tf.GraphKeys.LOCAL_VARIABLES],
138 |                          name='synthetic_labels')
139 | 
140 |     return [users, items, labels]
141 | 
142 |   def get_input_shapes(self, subset):
143 |     del subset
144 |     return [[self.batch_size], [self.batch_size], [self.batch_size]]
145 | 
146 |   def get_input_data_types(self, subset):
147 |     del subset
148 |     return [self.int32, tf.int32, tf.int32]
149 | 
150 | 
151 | def _fp16_variable_creator(next_creator, **kwargs):
152 |   """Variable creator to create variables in fp32 and cast them to fp16."""
153 |   dtype = kwargs.get('dtype', None)
154 |   initial_value = kwargs.get('initial_value', None)
155 |   if dtype is None:
156 |     if initial_value is not None and not callable(initial_value):
157 |       dtype = initial_value.dtype
158 |   if dtype == tf.float16:
159 |     if callable(initial_value):
160 |       new_initial_value = lambda: tf.cast(initial_value(), tf.float32)
161 |     else:
162 |       new_initial_value = tf.cast(initial_value, tf.float32)
163 |     kwargs['dtype'] = tf.float32
164 |     kwargs['initial_value'] = new_initial_value
165 |     var = next_creator(**kwargs)
166 |     return tf.cast(var, dtype=tf.float16)
167 |   else:
168 |     return next_creator(**kwargs)
169 | 
170 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/mobilenet_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for mobilenet_v2, branched from slim for fp16 performance study."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import copy
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | from models import mobilenet
 26 | from models import mobilenet_conv_blocks as ops
 27 | from models import mobilenet_v2
 28 | 
 29 | 
 30 | slim = tf.contrib.slim
 31 | 
 32 | 
 33 | def find_ops(optype):
 34 |   """Find ops of a given type in graphdef or a graph.
 35 | 
 36 |   Args:
 37 |     optype: operation type (e.g. Conv2D)
 38 |   Returns:
 39 |      List of operations.
 40 |   """
 41 |   gd = tf.get_default_graph()
 42 |   return [var for var in gd.get_operations() if var.type == optype]
 43 | 
 44 | 
 45 | class MobilenetV2Test(tf.test.TestCase):
 46 | 
 47 |   def setUp(self):
 48 |     tf.reset_default_graph()
 49 | 
 50 |   def testCreation(self):
 51 |     spec = dict(mobilenet_v2.V2_DEF)
 52 |     _, ep = mobilenet.mobilenet(
 53 |         tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
 54 |     num_convs = len(find_ops('Conv2D'))
 55 | 
 56 |     # This is mostly a sanity test. No deep reason for these particular
 57 |     # constants.
 58 |     #
 59 |     # All but first 2 and last one have  two convolutions, and there is one
 60 |     # extra conv that is not in the spec. (logits)
 61 |     self.assertEqual(num_convs, len(spec['spec']) * 2 - 2)
 62 |     # Check that depthwise are exposed.
 63 |     for i in range(2, 17):
 64 |       self.assertIn('layer_%d/depthwise_output' % i, ep)
 65 | 
 66 |   def testCreationNoClasses(self):
 67 |     spec = copy.deepcopy(mobilenet_v2.V2_DEF)
 68 |     net, ep = mobilenet.mobilenet(
 69 |         tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec,
 70 |         num_classes=None)
 71 |     self.assertIs(net, ep['global_pool'])
 72 | 
 73 |   def testImageSizes(self):
 74 |     for input_size, output_size in [(224, 7), (192, 6), (160, 5),
 75 |                                     (128, 4), (96, 3)]:
 76 |       tf.reset_default_graph()
 77 |       _, ep = mobilenet_v2.mobilenet(
 78 |           tf.placeholder(tf.float32, (10, input_size, input_size, 3)))
 79 | 
 80 |       self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3],
 81 |                        [output_size] * 2)
 82 | 
 83 |   def testWithSplits(self):
 84 |     spec = copy.deepcopy(mobilenet_v2.V2_DEF)
 85 |     spec['overrides'] = {
 86 |         (ops.expanded_conv,): dict(split_expansion=2),
 87 |     }
 88 |     _, _ = mobilenet.mobilenet(
 89 |         tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
 90 |     num_convs = len(find_ops('Conv2D'))
 91 |     # All but 3 op has 3 conv operatore, the remainign 3 have one
 92 |     # and there is one unaccounted.
 93 |     self.assertEqual(num_convs, len(spec['spec']) * 3 - 5)
 94 | 
 95 |   def testWithOutputStride8(self):
 96 |     out, _ = mobilenet.mobilenet_base(
 97 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
 98 |         conv_defs=mobilenet_v2.V2_DEF,
 99 |         output_stride=8,
100 |         scope='MobilenetV2')
101 |     self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
102 | 
103 |   def testDivisibleBy(self):
104 |     tf.reset_default_graph()
105 |     mobilenet_v2.mobilenet(
106 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
107 |         conv_defs=mobilenet_v2.V2_DEF,
108 |         divisible_by=16,
109 |         min_depth=32)
110 |     s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
111 |     s = set(s)
112 |     self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280,
113 |                              1001], s)
114 | 
115 |   def testDivisibleByWithArgScope(self):
116 |     tf.reset_default_graph()
117 |     # Verifies that depth_multiplier arg scope actually works
118 |     # if no default min_depth is provided.
119 |     with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
120 |       mobilenet_v2.mobilenet(
121 |           tf.placeholder(tf.float32, (10, 224, 224, 2)),
122 |           conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
123 |       s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
124 |       s = set(s)
125 |       self.assertSameElements(s, [32, 192, 128, 1001])
126 | 
127 |   def testFineGrained(self):
128 |     tf.reset_default_graph()
129 |     # Verifies that depth_multiplier arg scope actually works
130 |     # if no default min_depth is provided.
131 | 
132 |     mobilenet_v2.mobilenet(
133 |         tf.placeholder(tf.float32, (10, 224, 224, 2)),
134 |         conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01,
135 |         finegrain_classification_mode=True)
136 |     s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
137 |     s = set(s)
138 |     # All convolutions will be 8->48, except for the last one.
139 |     self.assertSameElements(s, [8, 48, 1001, 1280])
140 | 
141 |   def testMobilenetBase(self):
142 |     tf.reset_default_graph()
143 |     # Verifies that mobilenet_base returns pre-pooling layer.
144 |     with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
145 |       net, _ = mobilenet_v2.mobilenet_base(
146 |           tf.placeholder(tf.float32, (10, 224, 224, 16)),
147 |           conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
148 |       self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
149 | 
150 |   def testWithOutputStride16(self):
151 |     tf.reset_default_graph()
152 |     out, _ = mobilenet.mobilenet_base(
153 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
154 |         conv_defs=mobilenet_v2.V2_DEF,
155 |         output_stride=16)
156 |     self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
157 | 
158 |   def testWithOutputStride8AndExplicitPadding(self):
159 |     tf.reset_default_graph()
160 |     out, _ = mobilenet.mobilenet_base(
161 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
162 |         conv_defs=mobilenet_v2.V2_DEF,
163 |         output_stride=8,
164 |         use_explicit_padding=True,
165 |         scope='MobilenetV2')
166 |     self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
167 | 
168 |   def testWithOutputStride16AndExplicitPadding(self):
169 |     tf.reset_default_graph()
170 |     out, _ = mobilenet.mobilenet_base(
171 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
172 |         conv_defs=mobilenet_v2.V2_DEF,
173 |         output_stride=16,
174 |         use_explicit_padding=True)
175 |     self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
176 | 
177 |   def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self):
178 |     sc = mobilenet.training_scope(is_training=None)
179 |     self.assertNotIn('is_training', sc[slim.arg_scope_func_key(
180 |         slim.batch_norm)])
181 | 
182 |   def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self):
183 |     sc = mobilenet.training_scope(is_training=False)
184 |     self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
185 |     sc = mobilenet.training_scope(is_training=True)
186 |     self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
187 |     sc = mobilenet.training_scope()
188 |     self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
189 | 
190 | 
191 | if __name__ == '__main__':
192 |   tf.test.main()
193 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/mobilenet_v2.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Mobilenet V2 model, branched from slim models for fp16 performance study.
 16 | 
 17 | Architecture: https://arxiv.org/abs/1801.04381
 18 | 
 19 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
 20 | 3.4 M parameters.
 21 | """
 22 | 
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | import copy
 28 | 
 29 | import tensorflow as tf
 30 | 
 31 | from models import mobilenet as lib
 32 | from models import mobilenet_conv_blocks as ops
 33 | from models import model
 34 | 
 35 | slim = tf.contrib.slim
 36 | op = lib.op
 37 | 
 38 | expand_input = ops.expand_input_by_factor
 39 | 
 40 | # pyformat: disable
 41 | # Architecture: https://arxiv.org/abs/1801.04381
 42 | V2_DEF = dict(
 43 |     defaults={
 44 |         # Note: these parameters of batch norm affect the architecture
 45 |         # that's why they are here and not in training_scope.
 46 |         (slim.batch_norm,): {'center': True, 'scale': True},
 47 |         (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
 48 |             'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
 49 |         },
 50 |         (ops.expanded_conv,): {
 51 |             'expansion_size': expand_input(6),
 52 |             'split_expansion': 1,
 53 |             'normalizer_fn': slim.batch_norm,
 54 |             'residual': True
 55 |         },
 56 |         (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
 57 |     },
 58 |     spec=[
 59 |         op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
 60 |         op(ops.expanded_conv,
 61 |            expansion_size=expand_input(1, divisible_by=1),
 62 |            num_outputs=16),
 63 |         op(ops.expanded_conv, stride=2, num_outputs=24),
 64 |         op(ops.expanded_conv, stride=1, num_outputs=24),
 65 |         op(ops.expanded_conv, stride=2, num_outputs=32),
 66 |         op(ops.expanded_conv, stride=1, num_outputs=32),
 67 |         op(ops.expanded_conv, stride=1, num_outputs=32),
 68 |         op(ops.expanded_conv, stride=2, num_outputs=64),
 69 |         op(ops.expanded_conv, stride=1, num_outputs=64),
 70 |         op(ops.expanded_conv, stride=1, num_outputs=64),
 71 |         op(ops.expanded_conv, stride=1, num_outputs=64),
 72 |         op(ops.expanded_conv, stride=1, num_outputs=96),
 73 |         op(ops.expanded_conv, stride=1, num_outputs=96),
 74 |         op(ops.expanded_conv, stride=1, num_outputs=96),
 75 |         op(ops.expanded_conv, stride=2, num_outputs=160),
 76 |         op(ops.expanded_conv, stride=1, num_outputs=160),
 77 |         op(ops.expanded_conv, stride=1, num_outputs=160),
 78 |         op(ops.expanded_conv, stride=1, num_outputs=320),
 79 |         op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
 80 |     ],
 81 | )
 82 | # pyformat: enable
 83 | 
 84 | 
 85 | @slim.add_arg_scope
 86 | def mobilenet(input_tensor,
 87 |               num_classes=1001,
 88 |               depth_multiplier=1.0,
 89 |               scope='MobilenetV2',
 90 |               conv_defs=None,
 91 |               finegrain_classification_mode=False,
 92 |               min_depth=None,
 93 |               divisible_by=None,
 94 |               **kwargs):
 95 |   """Creates mobilenet V2 network.
 96 | 
 97 |   Inference mode is created by default. To create training use training_scope
 98 |   below.
 99 | 
100 |   with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
101 |      logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
102 | 
103 |   Args:
104 |     input_tensor: The input tensor
105 |     num_classes: number of classes
106 |     depth_multiplier: The multiplier applied to scale number of
107 |     channels in each layer. Note: this is called depth multiplier in the
108 |     paper but the name is kept for consistency with slim's model builder.
109 |     scope: Scope of the operator
110 |     conv_defs: Allows to override default conv def.
111 |     finegrain_classification_mode: When set to True, the model
112 |     will keep the last layer large even for small multipliers. Following
113 |     https://arxiv.org/abs/1801.04381
114 |     suggests that it improves performance for ImageNet-type of problems.
115 |       *Note* ignored if final_endpoint makes the builder exit earlier.
116 |     min_depth: If provided, will ensure that all layers will have that
117 |     many channels after application of depth multiplier.
118 |     divisible_by: If provided will ensure that all layers # channels
119 |     will be divisible by this number.
120 |     **kwargs: passed directly to mobilenet.mobilenet:
121 |       prediction_fn- what prediction function to use.
122 |       reuse-: whether to reuse variables (if reuse set to true, scope
123 |       must be given).
124 |   Returns:
125 |     logits/endpoints pair
126 | 
127 |   Raises:
128 |     ValueError: On invalid arguments
129 |   """
130 |   if conv_defs is None:
131 |     conv_defs = V2_DEF
132 |   if 'multiplier' in kwargs:
133 |     raise ValueError('mobilenetv2 doesn\'t support generic '
134 |                      'multiplier parameter use "depth_multiplier" instead.')
135 |   if finegrain_classification_mode:
136 |     conv_defs = copy.deepcopy(conv_defs)
137 |     if depth_multiplier < 1:
138 |       conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
139 | 
140 |   depth_args = {}
141 |   # NB: do not set depth_args unless they are provided to avoid overriding
142 |   # whatever default depth_multiplier might have thanks to arg_scope.
143 |   if min_depth is not None:
144 |     depth_args['min_depth'] = min_depth
145 |   if divisible_by is not None:
146 |     depth_args['divisible_by'] = divisible_by
147 | 
148 |   with slim.arg_scope((lib.depth_multiplier,), **depth_args):
149 |     return lib.mobilenet(
150 |         input_tensor,
151 |         num_classes=num_classes,
152 |         conv_defs=conv_defs,
153 |         scope=scope,
154 |         multiplier=depth_multiplier,
155 |         **kwargs)
156 | 
157 | 
158 | @slim.add_arg_scope
159 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
160 |   """Creates base of the mobilenet (no pooling and no logits) ."""
161 |   return mobilenet(
162 |       input_tensor, depth_multiplier=depth_multiplier, base_only=True, **kwargs)
163 | 
164 | 
165 | def training_scope(**kwargs):
166 |   """Defines MobilenetV2 training scope.
167 | 
168 |   Usage:
169 |      with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
170 |        logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
171 | 
172 |   with slim.
173 | 
174 |   Args:
175 |     **kwargs: Passed to mobilenet.training_scope. The following parameters
176 |     are supported:
177 |       weight_decay- The weight decay to use for regularizing the model.
178 |       stddev-  Standard deviation for initialization, if negative uses xavier.
179 |       dropout_keep_prob- dropout keep probability
180 |       bn_decay- decay for the batch norm moving averages.
181 | 
182 |   Returns:
183 |     An `arg_scope` to use for the mobilenet v2 model.
184 |   """
185 |   return lib.training_scope(**kwargs)
186 | 
187 | 
188 | class MobilenetModel(model.CNNModel):
189 |   """Mobilenet model configuration."""
190 | 
191 |   def __init__(self, params=None):
192 |     super(MobilenetModel, self).__init__(
193 |         'mobilenet', 224, 32, 0.005, params=params)
194 | 
195 |   def add_inference(self, cnn):
196 |     with tf.contrib.slim.arg_scope(training_scope(is_training=cnn.phase_train)):
197 |       cnn.top_layer, _ = mobilenet(cnn.top_layer, is_training=cnn.phase_train)
198 |       cnn.top_size = cnn.top_layer.shape[-1].value
199 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/mlperf_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains tests related to MLPerf.
 16 | 
 17 | Note this test only passes if the MLPerf compliance library is installed.
 18 | """
 19 | 
 20 | from __future__ import absolute_import
 21 | from __future__ import division
 22 | from __future__ import print_function
 23 | 
 24 | from collections import Counter
 25 | import logging
 26 | import re
 27 | 
 28 | import six
 29 | import tensorflow as tf
 30 | import benchmark_cnn
 31 | import datasets
 32 | import mlperf
 33 | import test_util
 34 | from models import model
 35 | from mlperf_compliance import mlperf_log
 36 | 
 37 | 
 38 | class _MlPerfTestModel(model.CNNModel):
 39 |   """A model to test the MLPerf compliance logging on."""
 40 | 
 41 |   def __init__(self):
 42 |     super(_MlPerfTestModel, self).__init__(
 43 |         'mlperf_test_model', image_size=224, batch_size=2, learning_rate=1)
 44 | 
 45 |   def add_inference(self, cnn):
 46 |     assert cnn.top_layer.shape[1:] == (3, 224, 224)
 47 |     cnn.conv(1, 1, 1, 1, 1, use_batch_norm=True)
 48 |     cnn.mpool(1, 1, 1, 1, num_channels_in=1)
 49 |     cnn.reshape([-1, 224 * 224])
 50 |     cnn.affine(1, activation=None)
 51 | 
 52 |     # Assert that the batch norm variables are filtered out for L2 loss.
 53 |     variables = tf.global_variables() + tf.local_variables()
 54 |     assert len(variables) > len(self.filter_l2_loss_vars(variables))
 55 | 
 56 | 
 57 | class MlPerfComplianceTest(tf.test.TestCase):
 58 |   """Tests the MLPerf compliance logs.
 59 | 
 60 |   This serves as a quick check that we probably didn't break the compliance
 61 |   logging. It is not mean to be as comprehensive as the official MLPerf
 62 |   compliance checker will be.
 63 |   """
 64 | 
 65 |   def setUp(self):
 66 |     super(MlPerfComplianceTest, self).setUp()
 67 |     benchmark_cnn.setup(benchmark_cnn.make_params())
 68 | 
 69 |   # Map between regex and the number of times we expect to see that regex in the
 70 |   # logs. Entry commented out with the comment FIXME indicate that
 71 |   # tf_cnn_benchmarks currently fails compliance in that regard, and needs to be
 72 |   # fixed to be MLPerf compliant.
 73 |   EXPECTED_LOG_REGEXES = {
 74 |       # Preprocessing tags
 75 |       mlperf.tags.INPUT_ORDER: 2,  # 1 for training, 1 for eval
 76 |       # We pass --tf_random_seed=9876 in the test.
 77 |       r'%s: 9876' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
 78 |       # The Numpy random seed is hardcoded to 4321.
 79 |       r'%s: 4321' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
 80 |       r'%s: %d' % (mlperf.tags.PREPROC_NUM_TRAIN_EXAMPLES,
 81 |                    datasets.IMAGENET_NUM_TRAIN_IMAGES): 1,
 82 |       r'%s: %d' % (mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES,
 83 |                    datasets.IMAGENET_NUM_VAL_IMAGES): 1,
 84 |       mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES + '.*': 1,
 85 |       mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV + '.*': 1,
 86 |       mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE + '.*': 1,
 87 |       mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE + '.*': 1,
 88 |       mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS + '.*': 1,
 89 |       mlperf.tags.INPUT_RANDOM_FLIP + '.*': 1,
 90 |       r'%s: \[224, 224\].*' % mlperf.tags.INPUT_CENTRAL_CROP: 1,
 91 | 
 92 |       r'%s: \[123.68, 116.78, 103.94\].*' % mlperf.tags.INPUT_MEAN_SUBTRACTION:
 93 |           2,
 94 | 
 95 |       r'%s: {"min": 256}.*' % mlperf.tags.INPUT_RESIZE_ASPECT_PRESERVING: 1,
 96 | 
 97 |       # 1 for training, 1 for eval
 98 |       r'%s: \[224, 224\].*' % mlperf.tags.INPUT_RESIZE: 2,
 99 | 
100 |       # Resnet model tags
101 |       mlperf.tags.MODEL_HP_BATCH_NORM + '.*': 2,
102 |       # 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d
103 |       # produces 2 logs.
104 |       mlperf.tags.MODEL_HP_CONV2D_FIXED_PADDING + '.*': 4,
105 |       mlperf.tags.MODEL_HP_RELU + '.*': 2,
106 |       mlperf.tags.MODEL_HP_INITIAL_MAX_POOL + '.*': 2,
107 |       mlperf.tags.MODEL_HP_DENSE + '.*': 4,
108 |       mlperf.tags.MODEL_HP_DENSE + '.*': 4,
109 | 
110 |       # Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD,
111 |       # are omitted here.
112 | 
113 |       r'%s: "categorical_cross_entropy".*' % mlperf.tags.MODEL_HP_LOSS_FN: 1,
114 | 
115 |       # 1 for training, 2 because the _MlPerfTestModel calls this when building
116 |       # the model for both training and eval
117 |       r'%s: true' % mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2: 3,
118 | 
119 |       r'%s: 0.5.*' % mlperf.tags.MODEL_L2_REGULARIZATION: 1,
120 | 
121 |       # Note we do not handle OPT_LR, since that is printed to stderr using
122 |       # tf.Print, which we cannot easily intercept.
123 | 
124 |       # Other tags
125 |       '%s: "%s"' % (mlperf.tags.OPT_NAME, mlperf.tags.SGD_WITH_MOMENTUM): 1,
126 |       '%s: 0.5' % mlperf.tags.OPT_MOMENTUM: 1,
127 |       mlperf.tags.RUN_START: 1,
128 |       '%s: 2' % mlperf.tags.INPUT_BATCH_SIZE: 1,
129 |       mlperf.tags.TRAIN_LOOP: 1,
130 |       mlperf.tags.TRAIN_EPOCH + '.*': 1,
131 |       '%s: 2' % mlperf.tags.INPUT_SIZE: 2,
132 |       mlperf.tags.EVAL_START: 2,
133 |       mlperf.tags.EVAL_STOP: 2,
134 |       '%s: 6' % mlperf.tags.EVAL_SIZE: 2,
135 |       mlperf.tags.EVAL_ACCURACY + '.*': 2,
136 |       '%s: 2.0' % mlperf.tags.EVAL_TARGET: 2,
137 |       mlperf.tags.RUN_STOP + '.*': 1,
138 |       mlperf.tags.RUN_FINAL: 1
139 |   }
140 |   EXPECTED_LOG_REGEXES = Counter({re.compile(k): v for
141 |                                   k, v in EXPECTED_LOG_REGEXES.items()})
142 | 
143 |   def testMlPerfCompliance(self):
144 |     string_io = six.StringIO()
145 |     handler = logging.StreamHandler(string_io)
146 |     data_dir = test_util.create_black_and_white_images()
147 |     try:
148 |       mlperf_log.LOGGER.addHandler(handler)
149 |       params = benchmark_cnn.make_params(data_dir=data_dir,
150 |                                          data_name='imagenet',
151 |                                          batch_size=2,
152 |                                          num_warmup_batches=0,
153 |                                          num_batches=2,
154 |                                          num_eval_batches=3,
155 |                                          eval_during_training_every_n_steps=1,
156 |                                          distortions=False,
157 |                                          weight_decay=0.5,
158 |                                          optimizer='momentum',
159 |                                          momentum=0.5,
160 |                                          stop_at_top_1_accuracy=2.0,
161 |                                          tf_random_seed=9876,
162 |                                          ml_perf=True)
163 |       with mlperf.mlperf_logger(use_mlperf_logger=True, model='resnet50_v1.5'):
164 |         bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=_MlPerfTestModel())
165 |         bench_cnn.run()
166 |       logs = string_io.getvalue().splitlines()
167 |       log_regexes = Counter()
168 |       for log in logs:
169 |         for regex in self.EXPECTED_LOG_REGEXES:
170 |           if regex.search(log):
171 |             log_regexes[regex] += 1
172 |       if log_regexes != self.EXPECTED_LOG_REGEXES:
173 |         diff_counter = Counter(log_regexes)
174 |         diff_counter.subtract(self.EXPECTED_LOG_REGEXES)
175 |         differences = []
176 |         for regex in (k for k in diff_counter.keys() if diff_counter[k]):
177 |           found_count = log_regexes[regex]
178 |           expected_count = self.EXPECTED_LOG_REGEXES[regex]
179 |           differences.append('  For regex %s: Found %d lines matching but '
180 |                              'expected to find %d' %
181 |                              (regex.pattern, found_count, expected_count))
182 |         raise AssertionError('Logs did not match expected logs. Differences:\n'
183 |                              '%s' % '\n'.join(differences))
184 |     finally:
185 |       mlperf_log.LOGGER.removeHandler(handler)
186 | 
187 | if __name__ == '__main__':
188 |   tf.test.main()
189 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/datasets.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Benchmark dataset utilities.
 16 | """
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | from abc import abstractmethod
 23 | import os
 24 | 
 25 | import numpy as np
 26 | import six
 27 | from six.moves import cPickle
 28 | from six.moves import xrange  # pylint: disable=redefined-builtin
 29 | import tensorflow as tf
 30 | 
 31 | from tensorflow.python.platform import gfile
 32 | import preprocessing
 33 | 
 34 | IMAGENET_NUM_TRAIN_IMAGES = 1281167
 35 | IMAGENET_NUM_VAL_IMAGES = 50000
 36 | 
 37 | COCO_NUM_TRAIN_IMAGES = 118287
 38 | COCO_NUM_VAL_IMAGES = 4952
 39 | 
 40 | 
 41 | class Dataset(object):
 42 |   """Abstract class for cnn benchmarks dataset."""
 43 | 
 44 |   def __init__(self,
 45 |                name,
 46 |                data_dir=None,
 47 |                queue_runner_required=False,
 48 |                num_classes=None):
 49 |     self.name = name
 50 |     self.data_dir = data_dir
 51 |     self._queue_runner_required = queue_runner_required
 52 |     self._num_classes = num_classes
 53 | 
 54 |   def tf_record_pattern(self, subset):
 55 |     return os.path.join(self.data_dir, '%s-*-of-*' % subset)
 56 | 
 57 |   def reader(self):
 58 |     return tf.TFRecordReader()
 59 | 
 60 |   @property
 61 |   def num_classes(self):
 62 |     return self._num_classes
 63 | 
 64 |   @num_classes.setter
 65 |   def num_classes(self, val):
 66 |     self._num_classes = val
 67 | 
 68 |   @abstractmethod
 69 |   def num_examples_per_epoch(self, subset):
 70 |     pass
 71 | 
 72 |   def __str__(self):
 73 |     return self.name
 74 | 
 75 |   def get_input_preprocessor(self, input_preprocessor='default'):
 76 |     assert not self.use_synthetic_gpu_inputs()
 77 |     return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor]
 78 | 
 79 |   def queue_runner_required(self):
 80 |     return self._queue_runner_required
 81 | 
 82 |   def use_synthetic_gpu_inputs(self):
 83 |     return not self.data_dir
 84 | 
 85 | 
 86 | class LibrispeechDataset(Dataset):
 87 |   """Configuration for LibriSpeech dataset."""
 88 | 
 89 |   def __init__(self, data_dir=None):
 90 |     super(LibrispeechDataset, self).__init__(
 91 |         'librispeech', data_dir, num_classes=29)
 92 | 
 93 |   def tf_record_pattern(self, subset):
 94 |     if subset == 'train':
 95 |       return os.path.join(self.data_dir, 'train-clean-*.tfrecords')
 96 |     elif subset == 'validation':
 97 |       return os.path.join(self.data_dir, 'test-clean.tfrecords')
 98 |     else:
 99 |       return ''
100 | 
101 |   def num_examples_per_epoch(self, subset='train'):
102 |     del subset
103 |     return 2  # TODO(laigd): currently this is an arbitrary number.
104 | 
105 | 
106 | class ImageDataset(Dataset):
107 |   """Abstract class for image datasets."""
108 | 
109 |   def __init__(self,
110 |                name,
111 |                height,
112 |                width,
113 |                depth=None,
114 |                data_dir=None,
115 |                queue_runner_required=False,
116 |                num_classes=1001):
117 |     super(ImageDataset, self).__init__(name, data_dir, queue_runner_required,
118 |                                        num_classes)
119 |     self.height = height
120 |     self.width = width
121 |     self.depth = depth or 3
122 | 
123 | 
124 | class ImagenetDataset(ImageDataset):
125 |   """Configuration for Imagenet dataset."""
126 | 
127 |   def __init__(self, data_dir=None):
128 |     super(ImagenetDataset, self).__init__(
129 |         'imagenet', 300, 300, data_dir=data_dir)
130 | 
131 |   def num_examples_per_epoch(self, subset='train'):
132 |     if subset == 'train':
133 |       return IMAGENET_NUM_TRAIN_IMAGES
134 |     elif subset == 'validation':
135 |       return IMAGENET_NUM_VAL_IMAGES
136 |     else:
137 |       raise ValueError('Invalid data subset "%s"' % subset)
138 | 
139 | 
140 | class Cifar10Dataset(ImageDataset):
141 |   """Configuration for cifar 10 dataset.
142 | 
143 |   It will mount all the input images to memory.
144 |   """
145 | 
146 |   def __init__(self, data_dir=None):
147 |     super(Cifar10Dataset, self).__init__(
148 |         'cifar10',
149 |         32,
150 |         32,
151 |         data_dir=data_dir,
152 |         queue_runner_required=True,
153 |         num_classes=11)
154 | 
155 |   def read_data_files(self, subset='train'):
156 |     """Reads from data file and returns images and labels in a numpy array."""
157 |     assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
158 |                            'data')
159 |     if subset == 'train':
160 |       filenames = [
161 |           os.path.join(self.data_dir, 'data_batch_%d' % i)
162 |           for i in xrange(1, 6)
163 |       ]
164 |     elif subset == 'validation':
165 |       filenames = [os.path.join(self.data_dir, 'test_batch')]
166 |     else:
167 |       raise ValueError('Invalid data subset "%s"' % subset)
168 | 
169 |     inputs = []
170 |     for filename in filenames:
171 |       with gfile.Open(filename, 'rb') as f:
172 |         # python2 does not have the encoding parameter
173 |         encoding = {} if six.PY2 else {'encoding': 'bytes'}
174 |         inputs.append(cPickle.load(f, **encoding))
175 |     # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
176 |     # input format.
177 |     all_images = np.concatenate(
178 |         [each_input[b'data'] for each_input in inputs]).astype(np.float32)
179 |     all_labels = np.concatenate(
180 |         [each_input[b'labels'] for each_input in inputs])
181 |     return all_images, all_labels
182 | 
183 |   def num_examples_per_epoch(self, subset='train'):
184 |     if subset == 'train':
185 |       return 50000
186 |     elif subset == 'validation':
187 |       return 10000
188 |     else:
189 |       raise ValueError('Invalid data subset "%s"' % subset)
190 | 
191 | 
192 | class COCODataset(ImageDataset):
193 |   """COnfiguration for COCO dataset."""
194 | 
195 |   def __init__(self, data_dir=None, image_size=300):
196 |     super(COCODataset, self).__init__(
197 |         'coco', image_size, image_size, data_dir=data_dir, num_classes=81)
198 | 
199 |   def num_examples_per_epoch(self, subset='train'):
200 |     if subset == 'train':
201 |       return COCO_NUM_TRAIN_IMAGES
202 |     elif subset == 'validation':
203 |       return COCO_NUM_VAL_IMAGES
204 |     else:
205 |       raise ValueError('Invalid data subset "%s"' % subset)
206 | 
207 | 
208 | _SUPPORTED_DATASETS = {
209 |     'imagenet': ImagenetDataset,
210 |     'cifar10': Cifar10Dataset,
211 |     'librispeech': LibrispeechDataset,
212 |     'coco': COCODataset,
213 | }
214 | 
215 | _SUPPORTED_INPUT_PREPROCESSORS = {
216 |     'imagenet': {
217 |         'default': preprocessing.RecordInputImagePreprocessor,
218 |         'official_models_imagenet': preprocessing.ImagenetPreprocessor,
219 |     },
220 |     'cifar10': {
221 |         'default': preprocessing.Cifar10ImagePreprocessor
222 |     },
223 |     'librispeech': {
224 |         'default': preprocessing.LibrispeechPreprocessor
225 |     },
226 |     'coco': {
227 |         'default': preprocessing.COCOPreprocessor
228 |     },
229 | }
230 | 
231 | 
232 | def create_dataset(data_dir, data_name):
233 |   """Create a Dataset instance based on data_dir and data_name."""
234 |   if not data_dir and not data_name:
235 |     # When using synthetic data, use synthetic imagenet images by default.
236 |     data_name = 'imagenet'
237 | 
238 |   # Infere dataset name from data_dir if data_name is not provided.
239 |   if data_name is None:
240 |     for supported_name in _SUPPORTED_DATASETS:
241 |       if supported_name in data_dir:
242 |         data_name = supported_name
243 |         break
244 |     else:  # Failed to identify dataset name from data dir.
245 |       raise ValueError('Could not identify name of dataset. '
246 |                        'Please specify with --data_name option.')
247 |   if data_name not in _SUPPORTED_DATASETS:
248 |     raise ValueError('Unknown dataset. Must be one of %s' % ', '.join(
249 |         [key for key in sorted(_SUPPORTED_DATASETS.keys())]))
250 | 
251 |   return _SUPPORTED_DATASETS[data_name](data_dir)
252 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/models/inception_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Inception model configuration.
 17 | 
 18 | Includes multiple models: inception3, inception4, inception-resnet2.
 19 | 
 20 | References:
 21 |   Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
 22 |   Inception-v4, Inception-ResNet and the Impact of Residual Connections on
 23 |   Learning
 24 | 
 25 |   Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
 26 |   Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
 27 |   Going Deeper with Convolutions
 28 |   http://arxiv.org/pdf/1409.4842v1.pdf
 29 | 
 30 |   Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
 31 |   Zbigniew Wojna
 32 |   Rethinking the Inception Architecture for Computer Vision
 33 |   arXiv preprint arXiv:1512.00567 (2015)
 34 | 
 35 |   Inception v3 model: http://arxiv.org/abs/1512.00567
 36 | 
 37 |   Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
 38 | """
 39 | 
 40 | from __future__ import absolute_import
 41 | from __future__ import division
 42 | from __future__ import print_function
 43 | 
 44 | from six.moves import xrange  # pylint: disable=redefined-builtin
 45 | from models import model
 46 | 
 47 | 
 48 | class Inceptionv3Model(model.CNNModel):
 49 |   """InceptionV3."""
 50 | 
 51 |   def __init__(self, auxiliary=False, params=None):
 52 |     self._auxiliary = auxiliary
 53 |     super(Inceptionv3Model, self).__init__(
 54 |         'inception3', 299, 32, 0.005, params=params)
 55 | 
 56 |   def add_inference(self, cnn):
 57 |     def inception_v3_a(cnn, n):
 58 |       cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)],
 59 |               [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)],
 60 |               [('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]]
 61 |       cnn.inception_module('incept_v3_a', cols)
 62 | 
 63 |     def inception_v3_b(cnn):
 64 |       cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')],
 65 |               [('conv', 64, 1, 1),
 66 |                ('conv', 96, 3, 3),
 67 |                ('conv', 96, 3, 3, 2, 2, 'VALID')],
 68 |               [('mpool', 3, 3, 2, 2, 'VALID')]]
 69 |       cnn.inception_module('incept_v3_b', cols)
 70 | 
 71 |     def inception_v3_c(cnn, n):
 72 |       cols = [[('conv', 192, 1, 1)],
 73 |               [('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)],
 74 |               [('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7),
 75 |                ('conv', n, 7, 1), ('conv', 192, 1, 7)],
 76 |               [('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]]
 77 |       cnn.inception_module('incept_v3_c', cols)
 78 | 
 79 |     def inception_v3_d(cnn):
 80 |       cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')],
 81 |               [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1),
 82 |                ('conv', 192, 3, 3, 2, 2, 'VALID')],
 83 |               [('mpool', 3, 3, 2, 2, 'VALID')]]
 84 |       cnn.inception_module('incept_v3_d', cols)
 85 | 
 86 |     def inception_v3_e(cnn, pooltype):
 87 |       cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)],
 88 |               [('share',), ('conv', 384, 3, 1)],
 89 |               [('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)],
 90 |               [('share',), ('share',), ('conv', 384, 3, 1)],
 91 |               [('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'),
 92 |                ('conv', 192, 1, 1)]]
 93 |       cnn.inception_module('incept_v3_e', cols)
 94 | 
 95 |     def incept_v3_aux(cnn):
 96 |       assert cnn.aux_top_layer is None
 97 |       cnn.aux_top_layer = cnn.top_layer
 98 |       cnn.aux_top_size = cnn.top_size
 99 |       with cnn.switch_to_aux_top_layer():
100 |         cnn.apool(5, 5, 3, 3, mode='VALID')
101 |         cnn.conv(128, 1, 1, mode='SAME')
102 |         cnn.conv(768, 5, 5, mode='VALID', stddev=0.01)
103 |         cnn.reshape([-1, 768])
104 | 
105 |     cnn.use_batch_norm = True
106 |     cnn.conv(32, 3, 3, 2, 2, mode='VALID')   # 299 x 299 x 3
107 |     cnn.conv(32, 3, 3, 1, 1, mode='VALID')   # 149 x 149 x 32
108 |     cnn.conv(64, 3, 3, 1, 1, mode='SAME')    # 147 x 147 x 64
109 |     cnn.mpool(3, 3, 2, 2, mode='VALID')      # 147 x 147 x 64
110 |     cnn.conv(80, 1, 1, 1, 1, mode='VALID')   # 73 x 73 x 80
111 |     cnn.conv(192, 3, 3, 1, 1, mode='VALID')  # 71 x 71 x 192
112 |     cnn.mpool(3, 3, 2, 2, 'VALID')           # 35 x 35 x 192
113 |     inception_v3_a(cnn, 32)                  # 35 x 35 x 256 mixed.
114 |     inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_1.
115 |     inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_2
116 |     inception_v3_b(cnn)                      # 17 x 17 x 768 mixed_3
117 |     inception_v3_c(cnn, 128)                 # 17 x 17 x 768 mixed_4
118 |     inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_5
119 |     inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_6
120 |     inception_v3_c(cnn, 192)                 # 17 x 17 x 768 mixed_7
121 |     if self._auxiliary:
122 |       incept_v3_aux(cnn)                     # Auxillary Head logits
123 |     inception_v3_d(cnn)                      # 17 x 17 x 1280 mixed_8
124 |     inception_v3_e(cnn, 'avg')               # 8 x 8 x 2048 mixed_9
125 |     inception_v3_e(cnn, 'max')               # 8 x 8 x 2048 mixed_10
126 |     cnn.apool(8, 8, 1, 1, 'VALID')           # 8 x 8 x 2048
127 |     cnn.reshape([-1, 2048])                  # 1 x 1 x 2048
128 | 
129 | 
130 | # Stem functions
131 | def inception_v4_sa(cnn):
132 |   cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', 96, 3, 3, 2, 2, 'VALID')]]
133 |   cnn.inception_module('incept_v4_sa', cols)
134 | 
135 | 
136 | def inception_v4_sb(cnn):
137 |   cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')],
138 |           [('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7),
139 |            ('conv', 96, 3, 3, 1, 1, 'VALID')]]
140 |   cnn.inception_module('incept_v4_sb', cols)
141 | 
142 | 
143 | def inception_v4_sc(cnn):
144 |   cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')],
145 |           [('mpool', 3, 3, 2, 2, 'VALID')]]
146 |   cnn.inception_module('incept_v4_sc', cols)
147 | 
148 | 
149 | # Reduction functions
150 | def inception_v4_ra(cnn, k, l, m, n):
151 |   cols = [
152 |       [('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')],
153 |       [('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')]
154 |   ]
155 |   cnn.inception_module('incept_v4_ra', cols)
156 | 
157 | 
158 | def inception_v4_rb(cnn):
159 |   cols = [[('mpool', 3, 3, 2, 2, 'VALID')],
160 |           [('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')],
161 |           [('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1),
162 |            ('conv', 320, 3, 3, 2, 2, 'VALID')]]
163 |   cnn.inception_module('incept_v4_rb', cols)
164 | 
165 | 
166 | class Inceptionv4Model(model.CNNModel):
167 |   """Inceptionv4."""
168 | 
169 |   def __init__(self, params=None):
170 |     super(Inceptionv4Model, self).__init__(
171 |         'inception4', 299, 32, 0.005, params=params)
172 | 
173 |   def add_inference(self, cnn):
174 |     def inception_v4_a(cnn):
175 |       cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)],
176 |               [('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)],
177 |               [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]]
178 |       cnn.inception_module('incept_v4_a', cols)
179 | 
180 |     def inception_v4_b(cnn):
181 |       cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)],
182 |               [('conv', 384, 1, 1)],
183 |               [('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)],
184 |               [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1),
185 |                ('conv', 224, 1, 7), ('conv', 256, 7, 1)]]
186 |       cnn.inception_module('incept_v4_b', cols)
187 | 
188 |     def inception_v4_c(cnn):
189 |       cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)],
190 |               [('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)],
191 |               [('share',), ('conv', 256, 3, 1)],
192 |               [('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1),
193 |                ('conv', 256, 3, 1)], [('share',), ('share',), ('share',),
194 |                                       ('conv', 256, 1, 3)]]
195 |       cnn.inception_module('incept_v4_c', cols)
196 | 
197 |     cnn.use_batch_norm = True
198 |     cnn.conv(32, 3, 3, 2, 2, mode='VALID')
199 |     cnn.conv(32, 3, 3, 1, 1, mode='VALID')
200 |     cnn.conv(64, 3, 3)
201 |     inception_v4_sa(cnn)
202 |     inception_v4_sb(cnn)
203 |     inception_v4_sc(cnn)
204 |     for _ in xrange(4):
205 |       inception_v4_a(cnn)
206 |     inception_v4_ra(cnn, 192, 224, 256, 384)
207 |     for _ in xrange(7):
208 |       inception_v4_b(cnn)
209 |     inception_v4_rb(cnn)
210 |     for _ in xrange(3):
211 |       inception_v4_c(cnn)
212 |     cnn.spatial_mean()
213 |     cnn.dropout(0.8)
214 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/test_data/tfrecord_image_generator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Generate black and white test TFRecords with Example protos.
 16 | 
 17 | Each record within the TFRecord file is a
 18 | serialized Example proto. The Example proto contains the following fields:
 19 | 
 20 |   image/encoded: string containing JPEG encoded image in RGB colorspace
 21 |   image/height: integer, image height in pixels
 22 |   image/width: integer, image width in pixels
 23 |   image/colorspace: string, specifying the colorspace, always 'RGB'
 24 |   image/channels: integer, specifying the number of channels, always 3
 25 |   image/format: string, specifying the format, always'JPEG'
 26 | 
 27 |   image/filename: string containing the basename of the image file
 28 |             e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
 29 |   image/class/label: integer specifying the index in a classification layer.
 30 |     The label ranges from [1, 1000] where 0 is not used.
 31 |   image/class/synset: string specifying the unique ID of the label,
 32 |     e.g. 'n01440764'
 33 |   image/class/text: string specifying the human-readable version of the label
 34 |     e.g. 'red fox, Vulpes vulpes'
 35 | 
 36 |   image/object/bbox/xmin: list of integers specifying the 0+ human annotated
 37 |     bounding boxes
 38 |   image/object/bbox/xmax: list of integers specifying the 0+ human annotated
 39 |     bounding boxes
 40 |   image/object/bbox/ymin: list of integers specifying the 0+ human annotated
 41 |     bounding boxes
 42 |   image/object/bbox/ymax: list of integers specifying the 0+ human annotated
 43 |     bounding boxes
 44 |   image/object/bbox/label: integer specifying the index in a classification
 45 |     layer. The label ranges from [1, 1000] where 0 is not used. Note this is
 46 |     always identical to the image label.
 47 | """
 48 | from __future__ import absolute_import
 49 | from __future__ import division
 50 | from __future__ import print_function
 51 | 
 52 | import os
 53 | import random
 54 | 
 55 | import numpy as np
 56 | import tensorflow as tf
 57 | 
 58 | 
 59 | def _int64_feature(value):
 60 |   """Wrapper for inserting int64 features into Example proto."""
 61 |   if not isinstance(value, list):
 62 |     value = [value]
 63 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
 64 | 
 65 | 
 66 | def _float_feature(value):
 67 |   """Wrapper for inserting float features into Example proto."""
 68 |   if not isinstance(value, list):
 69 |     value = [value]
 70 |   return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 71 | 
 72 | 
 73 | def _bytes_feature(value):
 74 |   """Wrapper for inserting bytes features into Example proto."""
 75 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 76 | 
 77 | 
 78 | def _convert_to_example(filename, image_buffer, label, synset, human, bbox,
 79 |                         height, width):
 80 |   """Build an Example proto for an example.
 81 | 
 82 |   Args:
 83 |     filename: string, path to an image file, e.g., '/path/to/example.JPG'
 84 |     image_buffer: string, JPEG encoding of RGB image
 85 |     label: integer, identifier for the ground truth for the network
 86 |     synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
 87 |     human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
 88 |     bbox: list of bounding boxes; each box is a list of integers
 89 |       specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to
 90 |       the same label as the image label.
 91 |     height: integer, image height in pixels
 92 |     width: integer, image width in pixels
 93 |   Returns:
 94 |     Example proto
 95 |   """
 96 |   xmin = []
 97 |   ymin = []
 98 |   xmax = []
 99 |   ymax = []
100 |   for b in bbox:
101 |     assert len(b) == 4
102 |     # pylint: disable=expression-not-assigned
103 |     [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
104 |     # pylint: enable=expression-not-assigned
105 | 
106 |   colorspace = 'RGB'
107 |   channels = 3
108 |   image_format = 'JPEG'
109 | 
110 |   example = tf.train.Example(features=tf.train.Features(feature={
111 |       'image/height': _int64_feature(height),
112 |       'image/width': _int64_feature(width),
113 |       'image/colorspace': _bytes_feature(colorspace),
114 |       'image/channels': _int64_feature(channels),
115 |       'image/class/label': _int64_feature(label),
116 |       'image/class/synset': _bytes_feature(synset),
117 |       'image/class/text': _bytes_feature(human),
118 |       'image/object/bbox/xmin': _float_feature(xmin),
119 |       'image/object/bbox/xmax': _float_feature(xmax),
120 |       'image/object/bbox/ymin': _float_feature(ymin),
121 |       'image/object/bbox/ymax': _float_feature(ymax),
122 |       'image/object/bbox/label': _int64_feature([label] * len(xmin)),
123 |       'image/format': _bytes_feature(image_format),
124 |       'image/filename': _bytes_feature(os.path.basename(filename)),
125 |       'image/encoded': _bytes_feature(image_buffer)}))
126 |   return example
127 | 
128 | 
129 | class ImageCoder(object):
130 |   """Helper class that provides TensorFlow image coding utilities."""
131 | 
132 |   def __init__(self):
133 |     # Create a single Session to run all image coding calls.
134 |     self._sess = tf.Session()
135 | 
136 |     # Initializes function that converts PNG to JPEG data.
137 |     self._image = tf.placeholder(dtype=tf.uint8)
138 |     self._encode_jpeg = tf.image.encode_jpeg(
139 |         self._image, format='rgb', quality=100)
140 | 
141 |   def encode_jpeg(self, image):
142 |     jpeg_image = self._sess.run(self._encode_jpeg,
143 |                                 feed_dict={self._image: image})
144 |     return jpeg_image
145 | 
146 | 
147 | def _process_image(coder, name):
148 |   """Process a single image file.
149 | 
150 |   If name is "train", a black image is returned. Otherwise, a white image is
151 |   returned.
152 | 
153 |   Args:
154 |     coder: instance of ImageCoder to provide TensorFlow image coding utils.
155 |     name: string, unique identifier specifying the data set.
156 |   Returns:
157 |     image_buffer: string, JPEG encoding of RGB image.
158 |     height: integer, image height in pixels.
159 |     width: integer, image width in pixels.
160 |   """
161 |   # Read the image file.
162 |   value = 0 if name == 'train' else 255
163 |   height = random.randint(30, 299)
164 |   width = random.randint(30, 299)
165 |   image = np.full((height, width, 3), value, np.uint8)
166 | 
167 |   jpeg_data = coder.encode_jpeg(image)
168 | 
169 |   return jpeg_data, height, width
170 | 
171 | 
172 | def _process_dataset(output_directory, num_classes, coder, name, num_images,
173 |                      num_shards):
174 |   """Process a complete data set and save it as a TFRecord.
175 | 
176 |   Args:
177 |     output_directory: Where to put outputs.
178 |     num_classes: number of classes.
179 |     coder: Instance of an ImageCoder.
180 |     name: string, unique identifier specifying the data set.
181 |     num_images: number of images to generate.
182 |     num_shards: integer number of shards to create.
183 |   """
184 |   files_per_shard = num_images // num_shards
185 |   for shard in range(num_shards):
186 |     output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
187 |     output_file = os.path.join(output_directory, output_filename)
188 |     with tf.python_io.TFRecordWriter(output_file) as writer:
189 |       for i in range(files_per_shard):
190 |         index = shard * files_per_shard + i
191 |         image_buffer, height, width = _process_image(coder, name)
192 | 
193 |         filename = '{}_{}_{}'.format(name, shard, i)
194 |         label = index % num_classes
195 |         synset = str(index)
196 |         human = name
197 |         bbox = [[0.1, 0.1, 0.9, 0.9]]
198 |         example = _convert_to_example(filename, image_buffer, label,
199 |                                       synset, human, bbox,
200 |                                       height, width)
201 |         writer.write(example.SerializeToString())
202 | 
203 | 
204 | def write_black_and_white_tfrecord_data(
205 |     output_directory, num_classes, num_train_images=512,
206 |     num_validation_images=128, train_shards=8, validation_shards=2):
207 |   """Writes black and white images in tfrecord format.
208 | 
209 |   Training images are black and validation images are white.
210 | 
211 |   Args:
212 |     output_directory: Where to put outputs.
213 |     num_classes: number of classes.
214 |     num_train_images: number of training images to generate.
215 |     num_validation_images: number of validation images to generate.
216 |     train_shards: integer number of training shards to create.
217 |     validation_shards: integer number of validation shards to create.
218 |   """
219 | 
220 |   coder = ImageCoder()
221 |   _process_dataset(output_directory, num_classes, coder, 'validation',
222 |                    num_validation_images, validation_shards)
223 |   _process_dataset(output_directory, num_classes, coder, 'train',
224 |                    num_train_images, train_shards)
225 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/cnn_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Utilities for CNN benchmarks."""
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import sys
 22 | import threading
 23 | 
 24 | import numpy as np
 25 | import tensorflow as tf
 26 | 
 27 | 
 28 | def tensorflow_version_tuple():
 29 |   v = tf.__version__
 30 |   major, minor, patch = v.split('.')
 31 |   return (int(major), int(minor), patch)
 32 | 
 33 | 
 34 | def tensorflow_version():
 35 |   vt = tensorflow_version_tuple()
 36 |   return vt[0] * 1000 + vt[1]
 37 | 
 38 | 
 39 | def log_fn(log):
 40 |   print(log)
 41 | 
 42 | 
 43 | def roll_numpy_batches(array, batch_size, shift_ratio):
 44 |   """Moves a proportion of batches from start to the end of the array.
 45 | 
 46 |   This function moves a proportion of batches, specified by `shift_ratio`, from
 47 |   the starts of the array to the end. The number of batches moved is rounded
 48 |   down to the nearest integer. For example,
 49 | 
 50 |   ```
 51 |   roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2]
 52 |   ```
 53 | 
 54 |   Args:
 55 |     array: A Numpy array whose first dimension is the batch dimension.
 56 |     batch_size: The batch size.
 57 |     shift_ratio: Proportion of batches to move from the start of the array to
 58 |       the end of the array.
 59 |   Returns:
 60 |     A new Numpy array, with a proportion of the batches at the start of `array`
 61 |     moved to the end.
 62 |   """
 63 |   num_items = array.shape[0]
 64 |   assert num_items % batch_size == 0
 65 |   num_batches = num_items // batch_size
 66 |   starting_batch = int(num_batches * shift_ratio)
 67 |   starting_item = starting_batch * batch_size
 68 |   return np.roll(array, -starting_item, axis=0)
 69 | 
 70 | 
 71 | # For Python 2.7 compatibility, we do not use threading.Barrier.
 72 | class Barrier(object):
 73 |   """Implements a lightweight Barrier.
 74 | 
 75 |   Useful for synchronizing a fixed number of threads at known synchronization
 76 |   points.  Threads block on 'wait()' and simultaneously return once they have
 77 |   all made that call.
 78 | 
 79 |   # Implementation adopted from boost/thread/barrier.hpp
 80 |   """
 81 | 
 82 |   def __init__(self, parties):
 83 |     """Create a barrier, initialised to 'parties' threads."""
 84 |     self.cond = threading.Condition(threading.Lock())
 85 |     self.parties = parties
 86 |     # Indicates the number of waiting parties.
 87 |     self.waiting = 0
 88 |     # generation is needed to deal with spurious wakeups. If self.cond.wait()
 89 |     # wakes up for other reasons, generation will force it go back to wait().
 90 |     self.generation = 0
 91 |     self.broken = False
 92 | 
 93 |   def wait(self):
 94 |     """Wait for the barrier."""
 95 |     with self.cond:
 96 |       # Check if the barrier has been disabled or not.
 97 |       if self.broken:
 98 |         return
 99 |       gen = self.generation
100 |       self.waiting += 1
101 |       if self.waiting == self.parties:
102 |         self.waiting = 0
103 |         self.generation += 1
104 |         self.cond.notify_all()
105 |       # loop because of spurious wakeups
106 |       while gen == self.generation:
107 |         self.cond.wait()
108 | 
109 |   # TODO(huangyp): Remove this method once we find a way to know which step
110 |   # is the last barrier.
111 |   def abort(self):
112 |     """Clear existing barrier and disable this barrier."""
113 |     with self.cond:
114 |       if self.waiting > 0:
115 |         self.generation += 1
116 |         self.cond.notify_all()
117 |       self.broken = True
118 | 
119 | 
120 | class ImageProducer(object):
121 |   """An image producer that puts images into a staging area periodically.
122 | 
123 |   This class is useful for periodically running a set of ops, `put_ops` on a
124 |   different thread every `batch_group_size` steps.
125 | 
126 |   The notify_image_consumption() method is used to increment an internal counter
127 |   so that every `batch_group_size` times it is called, `put_ops` is executed. A
128 |   barrier is placed so that notify_image_consumption() will block until
129 |   the previous call to `put_ops` has been executed.
130 | 
131 |   The start() method is used to start the thread that runs `put_ops`.
132 | 
133 |   The done() method waits until the last put_ops is executed and stops the
134 |   thread.
135 | 
136 |   The purpose of this class is to fill an image input pipeline every
137 |   `batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images
138 |   to the input pipeline when run, and that every step, 1 batch of images is
139 |   consumed. Then, by calling notify_image_consumption() every step, images are
140 |   supplied to the input pipeline at the same amount they are consumed.
141 | 
142 |   Example usage:
143 |   ```
144 |   put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea
145 |   get_op = ...  # Dequeues 1 batch, and does some operations on it
146 |   batch_group_size = 4
147 |   with tf.Session() as sess:
148 |     image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size)
149 |     image_producer.start()
150 |     for _ in range(100):
151 |       sess.run(get_op)
152 |       image_producer.notify_image_consumption()
153 |   ```
154 |   """
155 | 
156 |   def __init__(self, sess, put_ops, batch_group_size, use_python32_barrier):
157 |     self.sess = sess
158 |     self.num_gets = 0
159 |     self.put_ops = put_ops
160 |     self.batch_group_size = batch_group_size
161 |     self.done_event = threading.Event()
162 |     if (use_python32_barrier and
163 |         sys.version_info[0] == 3 and sys.version_info[1] >= 2):
164 |       self.put_barrier = threading.Barrier(2)
165 |     else:
166 |       self.put_barrier = Barrier(2)
167 | 
168 |   def _should_put(self):
169 |     return (self.num_gets + 1) % self.batch_group_size == 0
170 | 
171 |   def done(self):
172 |     """Stop the image producer."""
173 |     self.done_event.set()
174 |     self.put_barrier.abort()
175 |     self.thread.join()
176 | 
177 |   def start(self):
178 |     """Start the image producer."""
179 |     self.sess.run([self.put_ops])
180 |     self.thread = threading.Thread(target=self._loop_producer)
181 |     # Set daemon to true to allow Ctrl + C to terminate all threads.
182 |     self.thread.daemon = True
183 |     self.thread.start()
184 | 
185 |   def notify_image_consumption(self):
186 |     """Increment the counter of image_producer by 1.
187 | 
188 |     This should only be called by the main thread that consumes images and runs
189 |     the model computation. One batch of images should be consumed between
190 |     calling start() and the first call to this method. Then, one batch of images
191 |     should be consumed between any two successive calls to this method.
192 |     """
193 |     if self._should_put():
194 |       self.put_barrier.wait()
195 |     self.num_gets += 1
196 | 
197 |   def _loop_producer(self):
198 |     while not self.done_event.isSet():
199 |       self.sess.run([self.put_ops])
200 |       self.put_barrier.wait()
201 | 
202 | 
203 | class BaseClusterManager(object):
204 |   """The manager for the cluster of servers running the benchmark."""
205 | 
206 |   def __init__(self, params):
207 |     worker_hosts = params.worker_hosts.split(',')
208 |     ps_hosts = params.ps_hosts.split(',') if params.ps_hosts else []
209 |     cluster = {'worker': worker_hosts}
210 |     if ps_hosts:
211 |       cluster['ps'] = ps_hosts
212 |     self._cluster_spec = tf.train.ClusterSpec(cluster)
213 | 
214 |   def get_target(self):
215 |     """Returns a target to be passed to tf.Session()."""
216 |     raise NotImplementedError('get_target must be implemented by subclass')
217 | 
218 |   def join_server(self):
219 |     raise NotImplementedError('join must be implemented by subclass')
220 | 
221 |   def get_cluster_spec(self):
222 |     return self._cluster_spec
223 | 
224 |   def num_workers(self):
225 |     return len(self._cluster_spec.job_tasks('worker'))
226 | 
227 |   def num_ps(self):
228 |     if 'ps' in self._cluster_spec.jobs:
229 |       return len(self._cluster_spec.job_tasks('ps'))
230 |     else:
231 |       return 0
232 | 
233 | 
234 | class GrpcClusterManager(BaseClusterManager):
235 |   """A cluster manager for a cluster networked with gRPC."""
236 | 
237 |   def __init__(self, params, config_proto):
238 |     super(GrpcClusterManager, self).__init__(params)
239 |     if params.job_name == 'controller':
240 |       self._target = 'grpc://%s' % self._cluster_spec.job_tasks('worker')[0]
241 |     else:
242 |       self._server = tf.train.Server(self._cluster_spec,
243 |                                      job_name=params.job_name,
244 |                                      task_index=params.task_index,
245 |                                      config=config_proto,
246 |                                      protocol=params.server_protocol)
247 |       self._target = self._server.target
248 | 
249 |   def get_target(self):
250 |     return self._target
251 | 
252 |   def join_server(self):
253 |     return self._server.join()
254 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/mlperf.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains functions related to MLPerf compliance.
 16 | 
 17 | MLPerf requires submissions to log what the benchmark does, in order to verify
 18 | that the benchmark meets the MLPerf requirements. This module contains a global
 19 | object `logger` that is used by other files to log what tf_cnn_benchmarks does
 20 | for compliance.
 21 | 
 22 | By default, `logger` does nothing, as the MLPerf compliance logs are verbose and
 23 | unnecessary if one is not concerned about MLPerf compliance. The logger can be
 24 | enabled by using the `mlperf_logger` context manager.
 25 | 
 26 | To enable the logger with `mlperf_logger`, the MLPerf compliance library at
 27 | https://github.com/mlperf/training/tree/master/compliance is required. If
 28 | the logger is not enabled, the library is not needed.
 29 | """
 30 | 
 31 | from __future__ import absolute_import
 32 | from __future__ import division
 33 | from __future__ import print_function
 34 | 
 35 | 
 36 | from collections import namedtuple
 37 | import contextlib
 38 | import os
 39 | import sys
 40 | 
 41 | import tensorflow as tf
 42 | 
 43 | # pylint: disable=g-import-not-at-top
 44 | try:
 45 |   # Not all users have the MLPerf compliance library, so we don't want to
 46 |   # unconditionally crash if these imports fail.
 47 |   from mlperf_compliance import mlperf_log
 48 |   from mlperf_compliance import resnet_log_helper
 49 |   from mlperf_compliance import tags
 50 |   import_successful = True
 51 | except ImportError:
 52 |   # The logger cannot be enabled in this case since the MLPerf library isn't
 53 |   # found. We return empty strings from the `tags` attribute so that
 54 |   # the benchmark can still run without crashing. This empty tags are passed
 55 |   # to an instance of `NullMlPerfLogger`, which does not log anything and
 56 |   # ignores the tag values.
 57 | 
 58 |   class _Tags(object):
 59 | 
 60 |     def __getattr__(self, item):
 61 |       return ''
 62 |   tags = _Tags()
 63 |   import_successful = False
 64 | # pylint: enable=g-import-not-at-top
 65 | 
 66 | 
 67 | _ModelInfo = namedtuple('_ModelInfo', ['print_fn', 'tag_set',
 68 |                                        'mlperf_model_name'])
 69 | 
 70 | 
 71 | _MLPERF_LOG_PREFIX = ':::MLPv0.5.0'
 72 | 
 73 | 
 74 | class MlPerfLogger(object):
 75 |   """Logs various aspects about a benchmark run for MLPerf compliance."""
 76 | 
 77 |   def __init__(self, model):
 78 |     self._root_dir = os.path.split(os.path.abspath(__file__))[0]
 79 |     mlperf_log.ROOT_DIR_RESNET = self._root_dir
 80 |     mlperf_log.ROOT_DIR_SSD = self._root_dir
 81 |     self.model = model
 82 |     model_to_info = {
 83 |         'resnet50_v1.5': _ModelInfo(mlperf_log.resnet_print,
 84 |                                     mlperf_log.RESNET_TAG_SET, tags.RESNET),
 85 |         'ssd300': _ModelInfo(mlperf_log.ssd_print, mlperf_log.SSD_TAG_SET,
 86 |                              tags.SSD)
 87 |     }
 88 | 
 89 |     try:
 90 |       self._log_fn, self.tag_set, self.mlperf_model_name = model_to_info[model]
 91 |     except KeyError:
 92 |       raise ValueError('--ml_perf_compliance_logging is only compatible when '
 93 |                        '--model is one of the following: ' +
 94 |                        ', '.join(model_to_info.keys()))
 95 | 
 96 |   def log(self, key, value=None, stack_offset=2):
 97 |     if key in self.tag_set:
 98 |       self._log_fn(key, value, stack_offset)
 99 |     else:
100 |       print('Ignoring MLPerf logging item key=%s, value=%s for model %s' %
101 |             (key, value, self.model))
102 | 
103 |   def log_deferred_tensor_value(self, key, tensor_value, global_step,
104 |                                 stack_offset=2, every_n=1):
105 |     """Logs the value of a tensor when the graph is run."""
106 |     caller = '(%s)' % mlperf_log.get_caller(stack_offset, self._root_dir)
107 |     def create_print_op():
108 |       return tf.print(_MLPERF_LOG_PREFIX, self.mlperf_model_name,
109 |                       tf.timestamp(), caller, key,
110 |                       ': { "deferred": true, "value":', tensor_value, '}',
111 |                       output_stream=sys.stdout)
112 |     maybe_print = tf.cond(tf.equal(global_step % every_n, 0), create_print_op,
113 |                           tf.no_op)
114 |     with tf.control_dependencies([maybe_print]):
115 |       return tf.identity(tensor_value)
116 | 
117 |   def log_max_pool(self, input_tensor, output_tensor):
118 |     if self.model == 'resnet50_v1.5':
119 |       resnet_log_helper.log_max_pool(input_tensor, output_tensor)
120 | 
121 |   def log_begin_block(self, input_tensor, block_type):
122 |     if self.model == 'resnet50_v1.5':
123 |       resnet_log_helper.log_begin_block(input_tensor, block_type)
124 | 
125 |   def log_end_block(self, output_tensor):
126 |     if self.model == 'resnet50_v1.5':
127 |       resnet_log_helper.log_end_block(output_tensor)
128 | 
129 |   def log_projection(self, input_tensor, output_tensor):
130 |     if self.model == 'resnet50_v1.5':
131 |       resnet_log_helper.log_projection(input_tensor, output_tensor)
132 | 
133 |   def log_conv2d(self, input_tensor, output_tensor, stride_height, stride_width,
134 |                  filters, initializer, use_bias):
135 |     """Log a conv2d call."""
136 |     if self.model == 'resnet50_v1.5':
137 |       assert stride_height == stride_width, (
138 |           '--ml_perf_compliance_logging does not support convolutions where '
139 |           'the stride height is not equal to the stride width. '
140 |           'stride_height=%d, stride_width=%d' % (stride_height, stride_width))
141 |       if isinstance(initializer, tf.truncated_normal_initializer) or (
142 |           isinstance(initializer, tf.variance_scaling_initializer) and
143 |           initializer.distribution == 'truncated_normal'):
144 |         initializer = tags.TRUNCATED_NORMAL
145 |       elif (isinstance(initializer, tf.glorot_uniform_initializer) or
146 |             initializer is None):
147 |         initializer = 'glorot_uniform'
148 |       resnet_log_helper.log_conv2d(input_tensor, output_tensor, stride_width,
149 |                                    filters, initializer, use_bias)
150 | 
151 |   def log_batch_norm(self, input_tensor, output_tensor, momentum, epsilon,
152 |                      center, scale, training):
153 |     if self.model == 'resnet50_v1.5':
154 |       resnet_log_helper.log_batch_norm(input_tensor, output_tensor, momentum,
155 |                                        epsilon, center, scale, training)
156 | 
157 |   def log_train_epochs(self, num_epochs):
158 |     """Logs all the TRAIN_EPOCHs log lines."""
159 |     num_epochs_int = int(num_epochs)
160 |     for i in range(num_epochs_int):
161 |       # MLPerf allows us to print all the train epochs at once instead of
162 |       # printing them as we do them.
163 |       self.log(key=mlperf_log.TRAIN_EPOCH, value=i, stack_offset=3)
164 |     if num_epochs_int != num_epochs:
165 |       value = (str(num_epochs_int) +
166 |                ', but this epoch only has {}% of the examples of a normal epoch'
167 |                .format(100 * (num_epochs - num_epochs_int)))
168 |       self.log(key=mlperf_log.TRAIN_EPOCH, value=value, stack_offset=3)
169 | 
170 |   def log_input_resize_aspect_preserving(self, height, width, scale_factor):
171 |     assert height == width, (
172 |         '--ml_perf_compliance_logging does not support models with nonsquare '
173 |         'images. Cannot process image with height=%d and width=%d' %
174 |         (height, width))
175 |     self.log(key=tags.INPUT_RESIZE_ASPECT_PRESERVING,
176 |              value={'min': int(height * scale_factor)})
177 | 
178 |   def log_eval_epoch(self, tag, global_step, batch_size, stack_offset=2):
179 |     if self.model == 'resnet50_v1.5':
180 |       self.log(key=tag, stack_offset=stack_offset+1)
181 |     elif self.model == 'ssd300':
182 |       epoch = int(global_step * batch_size / 118287)
183 |       self.log(key=tag, value=epoch, stack_offset=stack_offset+1)
184 | 
185 |   def log_eval_accuracy(self, accuracy, global_step, batch_size,
186 |                         examples_per_epoch, stack_offset=2):
187 |     """Logs eval accuracy."""
188 |     epoch = int(global_step * batch_size / examples_per_epoch)
189 |     eval_accuracy = {'epoch': epoch, 'value': accuracy}
190 |     eval_iteration_accuracy = {'iteration': global_step, 'value': accuracy}
191 |     self.log(key=tags.EVAL_ACCURACY, value=eval_accuracy,
192 |              stack_offset=stack_offset+1)
193 |     self.log(key=tags.EVAL_ITERATION_ACCURACY,
194 |              value=eval_iteration_accuracy,
195 |              stack_offset=stack_offset+1)
196 | 
197 | 
198 | def _empty_fn(*args, **kwargs):
199 |   del args, kwargs
200 | 
201 | 
202 | class NullMlPerfLogger(object):
203 |   """A version of `MlPerfLogger` that does not log anything.
204 | 
205 |   This class has the same interface as `MlPerfLogger`, but does not actually do
206 |   anything. This is used when logging is disabled, which is the default
207 |   behavior.
208 |   """
209 | 
210 |   def __getattr__(self, item):
211 |     return _empty_fn
212 | 
213 |   def log_deferred_tensor_value(self, key, tensor_value, *args, **kwargs):
214 |     del key, args, kwargs
215 |     return tensor_value
216 | 
217 | 
218 | # A global singleton logger. By default, it's the null logger but can be
219 | # switched to an MlPerfLogger with `mlperf_logger()`.
220 | logger = NullMlPerfLogger()
221 | 
222 | 
223 | @contextlib.contextmanager
224 | def mlperf_logger(use_mlperf_logger, model):
225 |   """Optionally enable the mlperf logger.
226 | 
227 |   If `use_mlperf_logger` is True, sets the `logger` global variable to an
228 |   instance of MlPerfLogger that will print logs for MLPerf compliance. If
229 |   `use_mlperf_logger` is False, does nothing.
230 | 
231 |   Args:
232 |     use_mlperf_logger: If True, enables the mlperf logger. If False, this
233 |       function does nothing.
234 |     model: The model that will be logged. Required, because different models
235 |       must log different things for MLPerf compliance.
236 | 
237 |   Yields:
238 |     Nothing.
239 | 
240 |   Raises:
241 |     ImportError: If `use_mlperf_logger` is True but the MLPerf compliance
242 |       library cannot be imported
243 |   """
244 |   global logger
245 |   if use_mlperf_logger:
246 |     if not import_successful:
247 |       raise ImportError('Failed to import MLPerf compliance library, which is '
248 |                         'required when --ml_perf_compliance_logging is '
249 |                         'specified. Clone this repo and add this directory '
250 |                         'https://github.com/mlperf/training/tree/master/'
251 |                         'compliance to the PYTHONPATH environmental variable.')
252 |     logger_ = MlPerfLogger(model)
253 |     old_logger = logger
254 |     try:
255 |       logger = logger_
256 |       yield
257 |     finally:
258 |       logger = old_logger
259 |   else:
260 |     yield
261 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/scripts/tf_cnn_benchmarks/all_reduce_benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Benchmarks the all-reduce algorithms of tf_cnn_benchmarks.
 16 | 
 17 | tf_cnn_benchmarks uses all-reduce to aggregate gradients. This benchmark is
 18 | useful for benchmarking the performance of just this gradient aggregation,
 19 | instead of the entire model. All the flags that tf_cnn_benchmarks accepts are
 20 | also accepted by this script, although many are silently ignored.
 21 | 
 22 | The number and shapes of the tensors all-reduced are those of the variables of
 23 | the model specified by the --model flag.
 24 | TODO(reedwm): Allow custom sizes to be specified.
 25 | """
 26 | 
 27 | from __future__ import absolute_import
 28 | from __future__ import division
 29 | from __future__ import print_function
 30 | 
 31 | 
 32 | import os
 33 | import time
 34 | 
 35 | from absl import app
 36 | from absl import flags as absl_flags
 37 | import tensorflow as tf
 38 | 
 39 | from tensorflow.python.ops import control_flow_ops
 40 | import benchmark_cnn
 41 | import cnn_util
 42 | import flags
 43 | from cnn_util import log_fn
 44 | 
 45 | 
 46 | absl_flags.DEFINE_integer('iters_per_step', 5,
 47 |                           'Number of iterations to run all-reduce for, per '
 48 |                           'step. Every step, a session will be run on a Graph '
 49 |                           'that contains this many copies of the all-reduce. '
 50 |                           'The copies are run sequentially. Setting this above '
 51 |                           '1 is useful to lower the overhead of starting the '
 52 |                           'session run, running the VariableV2 ops at the '
 53 |                           'start of the step, etc.')
 54 | 
 55 | 
 56 | flags.define_flags()
 57 | for name in flags.param_specs.keys():
 58 |   absl_flags.declare_key_flag(name)
 59 | 
 60 | 
 61 | def get_var_shapes(model):
 62 |   """Returns the list of variable shapes for a tf_cnn_benchmarks Model."""
 63 |   with tf.Graph().as_default():
 64 |     # The variable shapes do not depend on the batch size.
 65 |     images = tf.placeholder(tf.float32, model.get_input_shapes('train')[0])
 66 |     model.build_network([images])
 67 |     return [[int(d) for d in v.shape.dims] for v in tf.trainable_variables()]
 68 | 
 69 | 
 70 | def all_reduce(all_device_tensors, variable_mgr):
 71 |   """Performs a single batch all-reduce.
 72 | 
 73 |   Args:
 74 |     all_device_tensors: List of lists of tensors. all_device_tensors[t][i] is
 75 |       a tensor, where t is the tower the tensor is on and i is the index of
 76 |       the tensor.
 77 |     variable_mgr: The VariableMgr to perform the all-reduce.
 78 |   Returns:
 79 |     List of list of tensors in the same form as `all_device_tensors`, except the
 80 |     tensors are aggregated across towers.
 81 |   """
 82 |   tower_grads = [[(g, None) for g in device_tensors] for
 83 |                  device_tensors in all_device_tensors]
 84 |   _, aggregated_tower_grads = variable_mgr.preprocess_device_grads(tower_grads)
 85 |   return [
 86 |       [g for g, _ in agg_device_tensors]
 87 |       for agg_device_tensors in aggregated_tower_grads]
 88 | 
 89 | 
 90 | def build_all_reduce_iterations(all_device_tensors, tower_devices, variable_mgr,
 91 |                                 num_iters):
 92 |   """Builds the all-reduce ops for multiple iterations to aggregate tensors.
 93 | 
 94 |   The tensors in `all_device_tensors` are aggregated `num_iters` times. Each
 95 |   iteration aggregates the results from the previous iteration. The iterations
 96 |   are run sequentially, so the aggregations for an iteration do not start
 97 |   running until the previous iteration has completed. Each iteration after the
 98 |   first is aggregating already-aggregated values, but it does not matter because
 99 |   we are only aggregating for benchmarking purposes.
100 | 
101 |   Args:
102 |     all_device_tensors: List of lists of tensors. all_device_tensors[t][i] is
103 |       a tensor, where t is the tower the tensor is on and i is the index of
104 |       the tensor.
105 |     tower_devices: A list of device strings. tower_devices[t] is the device
106 |       of the tensors in all_device_tensors[t].
107 |     variable_mgr: The VariableMgr to perform the all-reduce.
108 |     num_iters: Number of iterations to aggregate tensors for.
109 |   Returns:
110 |     An op that when run, causes the all-reduce ops to run.
111 |   """
112 |   for i in range(num_iters):
113 |     with tf.name_scope('iteration_%d' % i):
114 |       # Step 1: Do the aggregation.
115 |       with tf.name_scope('tensor_aggregation'):
116 |         all_device_tensors = all_reduce(all_device_tensors, variable_mgr)
117 | 
118 |       # Step 2. Create identity ops, to bring the aggregated results back to
119 |       # each device.
120 |       new_all_device_tensors = []
121 |       for device, device_tensors in zip(tower_devices, all_device_tensors):
122 |         with tf.device(device):
123 |           new_all_device_tensors.append([
124 |               tf.identity(t, name='identity_after_allreduce')
125 |               for t in device_tensors
126 |           ])
127 |       all_device_tensors = new_all_device_tensors
128 | 
129 |       # Step 3. Add control dependencies to delay the next iteration until this
130 |       # iteration is complete. To avoid extra overhead, we do not have any
131 |       # cross-device control dependencies, which means it's possible for two
132 |       # iterations to slightly overlap.
133 |       new_all_device_tensors = []
134 |       for device_tensors in all_device_tensors:
135 |         new_all_device_tensors.append([
136 |             control_flow_ops.with_dependencies(
137 |                 device_tensors, t, name='identity_after_dependencies')
138 |             for t in device_tensors
139 |         ])
140 |       all_device_tensors = new_all_device_tensors
141 | 
142 |   # To prevent the dependency optimizer from removing every op we created,
143 |   # we store the results in variables.
144 |   ops_to_run = []
145 |   for device, device_tensors in zip(tower_devices, all_device_tensors):
146 |     with tf.device(device):
147 |       for t in device_tensors:
148 |         # The placeholder initial value is never run.
149 |         var = tf.Variable(tf.placeholder(tf.float32, t.shape), collections=[])
150 |         ops_to_run.append(var.assign(t))
151 |   return tf.group(*ops_to_run)
152 | 
153 | 
154 | def build_graph(tower_devices, tensor_shapes, variable_mgr, num_iters):
155 |   """Builds the graph for the benchmark.
156 | 
157 |   Args:
158 |     tower_devices: A list of device strings of the devices to run the all-reduce
159 |       benchmark on.
160 |     tensor_shapes: A list of shapes of the tensors that will be aggregated for
161 |       the all-reduce.
162 |     variable_mgr: The VariableMgr to perform the all-reduce.
163 |     num_iters: Number of iterations to aggregate tensors for.
164 |   Returns:
165 |     An op that runs the benchmark.
166 |   """
167 |   all_device_tensors = []
168 |   for i, tower_device in enumerate(tower_devices):
169 |     with tf.device(tower_device):
170 |       device_tensors = []
171 |       for j, shape in enumerate(tensor_shapes):
172 |         tensor = tf.Variable(tf.random_normal(shape, dtype=tf.float32),
173 |                              name='tensor_%d_on_device_%d' % (j, i))
174 |         device_tensors.append(tensor)
175 |     all_device_tensors.append(device_tensors)
176 | 
177 |   log_fn('Building all-reduce ops')
178 |   benchmark_op = build_all_reduce_iterations(all_device_tensors, tower_devices,
179 |                                              variable_mgr, num_iters)
180 |   log_fn('Done building all-reduce ops')
181 |   return benchmark_op
182 | 
183 | 
184 | def run_graph(benchmark_op, bench_cnn, init_ops, dummy_loss_op):
185 |   """Runs the graph for the benchmark.
186 | 
187 |   Args:
188 |     benchmark_op: An op that runs the benchmark.
189 |     bench_cnn: The BenchmarkCNN where params and other attributes are obtained.
190 |     init_ops: A list of ops that are run before `benchmark_op` for
191 |       initialization.
192 |     dummy_loss_op: Any op. We must pass a loss op to
193 |       `benchmark_cnn.benchmark_one_step`, but the result of the op is never
194 |       actually used.
195 |   """
196 |   config = benchmark_cnn.create_config_proto(bench_cnn.params)
197 |   with tf.Session(config=config) as sess:
198 |     for op in init_ops:
199 |       sess.run(op)
200 |     step_train_times = []
201 |     fetches = {'average_loss': dummy_loss_op, 'benchmark_op': benchmark_op}
202 |     log_fn('Running warmup')
203 |     for i in range(-bench_cnn.num_warmup_batches, bench_cnn.num_batches):
204 |       if i == 0:
205 |         log_fn('Running all-reduce ops')
206 |         start = time.time()
207 |       if i > 0 and i % bench_cnn.params.display_every == 0:
208 |         log_fn('Iteration: %d. Average time per step so far: %s' %
209 |                (i, (time.time() - start) / i))
210 |       # Call benchmark_one_step instead of directly calling sess.run(...), to
211 |       # potentially get a trace file, partitioned graphs, etc.
212 |       benchmark_cnn.benchmark_one_step(
213 |           sess=sess,
214 |           fetches=fetches,
215 |           step=i,
216 |           # The batch size is only used for the images/sec calculation, which is
217 |           # not actually calculated because we pass show_images_per_sec=False.
218 |           batch_size=None,
219 |           step_train_times=step_train_times,
220 |           trace_filename=bench_cnn.trace_filename,
221 |           partitioned_graph_file_prefix=(
222 |               bench_cnn.params.partitioned_graph_file_prefix),
223 |           profiler=None,
224 |           image_producer=None,
225 |           params=bench_cnn.params,
226 |           show_images_per_sec=False)
227 |     log_fn('Average time per step: %s' %
228 |            ((time.time() - start) / bench_cnn.num_batches))
229 | 
230 | 
231 | def run_benchmark(bench_cnn, num_iters):
232 |   """Runs the all-reduce benchmark.
233 | 
234 |   Args:
235 |     bench_cnn: The BenchmarkCNN where params, the variable manager, and other
236 |       attributes are obtained.
237 |     num_iters: Number of iterations to do all-reduce for for.
238 | 
239 |   Raises:
240 |     ValueError: Invalid params of bench_cnn.
241 |   """
242 |   if bench_cnn.params.variable_update != 'replicated':
243 |     raise ValueError('--variable_update=replicated must be specified to use'
244 |                      'the all-reduce benchmark')
245 |   if bench_cnn.params.variable_consistency == 'relaxed':
246 |     raise ValueError('--variable_consistency=relaxed is not supported')
247 | 
248 |   benchmark_op = build_graph(bench_cnn.raw_devices,
249 |                              get_var_shapes(bench_cnn.model),
250 |                              bench_cnn.variable_mgr, num_iters)
251 |   init_ops = [
252 |       tf.global_variables_initializer(),
253 |       bench_cnn.variable_mgr.get_post_init_ops()
254 |   ]
255 |   loss_op = tf.no_op()
256 | 
257 |   if bench_cnn.graph_file:
258 |     path, filename = os.path.split(bench_cnn.graph_file)
259 |     as_text = filename.endswith('txt')
260 |     log_fn('Writing GraphDef as %s to %s' % (
261 |         'text' if as_text else 'binary', bench_cnn.graph_file))
262 |     tf.train.write_graph(tf.get_default_graph().as_graph_def(add_shapes=True),
263 |                          path, filename, as_text)
264 | 
265 |   run_graph(benchmark_op, bench_cnn, init_ops, loss_op)
266 | 
267 | 
268 | # TODO(reedwm): Reduce redundancy with tf_cnn_benchmarks
269 | def main(positional_arguments):
270 |   # Command-line arguments like '--distortions False' are equivalent to
271 |   # '--distortions=True False', where False is a positional argument. To prevent
272 |   # this from silently running with distortions, we do not allow positional
273 |   # arguments.
274 |   assert len(positional_arguments) >= 1
275 |   if len(positional_arguments) > 1:
276 |     raise ValueError('Received unknown positional arguments: %s'
277 |                      % positional_arguments[1:])
278 | 
279 |   params = benchmark_cnn.make_params_from_flags()
280 |   params = benchmark_cnn.setup(params)
281 |   bench = benchmark_cnn.BenchmarkCNN(params)
282 | 
283 |   tfversion = cnn_util.tensorflow_version_tuple()
284 |   log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
285 | 
286 |   run_benchmark(bench, absl_flags.FLAGS.iters_per_step)
287 | 
288 | if __name__ == '__main__':
289 |   app.run(main)  # Raises error on invalid flags, unlike tf.app.run()
290 | 


--------------------------------------------------------------------------------