├── scripts ├── keras_benchmarks │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── timehistory.py │ │ ├── lstm_benchmark.py │ │ ├── mnist_mlp_benchmark.py │ │ └── cifar10_cnn_benchmark.py │ ├── run_tf_backend.sh │ ├── run_cntk_backend.sh │ ├── run_theano_backend.sh │ ├── config.json │ ├── data_generator.py │ ├── gpu_mode.py │ ├── setup_cpu.sh │ ├── setup_gpu.sh │ ├── run_benchmark.py │ └── upload_benchmarks_bq.py └── tf_cnn_benchmarks │ ├── models │ ├── __init__.py │ ├── experimental │ │ ├── __init__.py │ │ └── official_ncf_model.py │ ├── lenet_model.py │ ├── overfeat_model.py │ ├── trivial_model.py │ ├── googlenet_model.py │ ├── vgg_model.py │ ├── resnet_model_test.py │ ├── alexnet_model.py │ ├── official_resnet_model.py │ ├── densenet_model.py │ ├── model_config.py │ ├── mobilenet_test.py │ ├── mobilenet_v2.py │ └── inception_model.py │ ├── platforms │ ├── __init__.py │ ├── default │ │ ├── __init__.py │ │ └── util.py │ └── util.py │ ├── test_data │ ├── __init__.py │ ├── images │ │ ├── black_image.jpg │ │ └── white_image.jpg │ ├── fake_tf_record_data │ │ ├── train-00000-of-00008 │ │ ├── train-00001-of-00008 │ │ ├── train-00002-of-00008 │ │ ├── train-00003-of-00008 │ │ ├── train-00004-of-00008 │ │ ├── train-00005-of-00008 │ │ ├── train-00006-of-00008 │ │ ├── train-00007-of-00008 │ │ ├── validation-00000-of-00002 │ │ └── validation-00001-of-00002 │ └── tfrecord_image_generator.py │ ├── constants.py │ ├── all_reduce_benchmark_test.py │ ├── tf_cnn_benchmarks.py │ ├── README.md │ ├── flags.py │ ├── run_tests.py │ ├── ssd_constants.py │ ├── cnn_util_test.py │ ├── benchmark_cnn_distributed_test_runner.py │ ├── variable_mgr_util_test.py │ ├── coco_metric.py │ ├── mlperf_test.py │ ├── datasets.py │ ├── cnn_util.py │ ├── mlperf.py │ └── all_reduce_benchmark.py ├── README.md └── LICENSE /scripts/keras_benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/platforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/platforms/default/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/images/black_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/images/black_image.jpg -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/images/white_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/images/white_image.jpg -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00000-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00000-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00001-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00001-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00002-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00002-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00003-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00003-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00004-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00004-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00005-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00005-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00006-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00006-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00007-of-00008: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/train-00007-of-00008 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00000-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00000-of-00002 -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00001-of-00002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/benchmarks/master/scripts/tf_cnn_benchmarks/test_data/fake_tf_record_data/validation-00001-of-00002 -------------------------------------------------------------------------------- /scripts/keras_benchmarks/models/timehistory.py: -------------------------------------------------------------------------------- 1 | """ Utility class for accessing the first epoch time interval. """ 2 | import keras 3 | import time 4 | 5 | 6 | class TimeHistory(keras.callbacks.Callback): 7 | def on_train_begin(self, logs={}): 8 | self.times = [] 9 | 10 | def on_epoch_begin(self, batch, logs={}): 11 | self.epoch_time_start = time.time() 12 | 13 | def on_epoch_end(self, batch, logs={}): 14 | self.times.append(time.time() - self.epoch_time_start) 15 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/run_tf_backend.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Keras Tensorflow Backend 4 | python -c "from keras import backend" 5 | KERAS_BACKEND=tensorflow 6 | sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 7 | echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 8 | 9 | # Use "cpu_config", "gpu_config" and "multi_gpu_config" as command line arguments to load the right 10 | # config file. 11 | python benchmarks/scripts/keras_benchmarks/run_benchmark.py --mode="$1" 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow benchmarks 2 | This repository contains various TensorFlow benchmarks. Currently, it consists of two projects: 3 | 4 | 1. [scripts/tf_cnn_benchmarks](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks): The TensorFlow CNN benchmarks contain benchmarks for several convolutional neural networks. 5 | 2. [scripts/keras_benchmarks](https://github.com/tensorflow/benchmarks/tree/master/scripts/keras_benchmarks): The Keras benchmarks contain benchmarks for several models using Keras. Note this project is deprecated and unmaintained. 6 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/run_cntk_backend.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Keras CNTK Backend 4 | python -c "from keras import backend" 5 | KERAS_BACKEND=cntk 6 | sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 7 | echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 8 | 9 | # Use "cpu_config", "gpu_config" and "multi_gpu_config" as command line arguments to load the right 10 | # config file. 11 | if [ "$1" = "multi_gpu_config" ]; then 12 | mpiexec -n 4 python benchmarks/scripts/keras_benchmarks/run_benchmark.py "$1" 13 | fi 14 | 15 | python benchmarks/scripts/keras_benchmarks/run_benchmark.py "$1" 16 | 17 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/run_theano_backend.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Keras Theano Backend 4 | python -c "from keras import backend" 5 | KERAS_BACKEND=cntk 6 | sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 7 | echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 8 | 9 | # Use "cpu_config", "gpu_config" and "multi_gpu_config" as command line arguments to load the right 10 | # config file. 11 | if [ "$1" = "cpu_config" ]; then 12 | python benchmarks/scripts/keras_benchmarks/run_benchmark.py "$1" 13 | else 14 | echo "GPU mode for Theano backend is not supported currently by the keras benchmarks script." 15 | fi -------------------------------------------------------------------------------- /scripts/keras_benchmarks/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "cpu_config": { 3 | "cpu_num_cores": 1, 4 | "cpu_memory": 3.75, 5 | "cpu_memory_info": "GB", 6 | "platform_type": "GCP", 7 | "platform_machine_type": "n1-standard-1", 8 | "gpus": 0, 9 | "gpu_platform": "None" 10 | }, 11 | "gpu_config": { 12 | "cpu_num_cores": 8, 13 | "cpu_memory": 30, 14 | "cpu_memory_info": "GB", 15 | "platform_type": "GCP", 16 | "platform_machine_type": "n1-standard-8", 17 | "gpus": 1, 18 | "gpu_platform": "NVIDIA Tesla K80" 19 | }, 20 | "multi_gpu_config": { 21 | "cpu_num_cores": 8, 22 | "cpu_memory": 30, 23 | "cpu_memory_info": "GB", 24 | "platform_type": "GCP", 25 | "platform_machine_type": "n1-standard-8", 26 | "gpus": 4, 27 | "gpu_platform": "NVIDIA Tesla K80" 28 | } 29 | } 30 | 31 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/platforms/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility code for a certain platform. 17 | 18 | This file simply imports everything from the default platform. To switch to a 19 | different platform, the import statement can be changed to point to a new 20 | platform. 21 | 22 | Creating a custom platform can be useful to, e.g., run some initialization code 23 | required by the platform or register a platform-specific model. 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from platforms.default.util import * # pylint: disable=unused-import,wildcard-import 31 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/data_generator.py: -------------------------------------------------------------------------------- 1 | """ Generates input and label data for training models. """ 2 | import numpy as np 3 | 4 | 5 | def generate_img_input_data(input_shape, num_classes): 6 | """Generates training data and target labels. 7 | 8 | # Arguments 9 | input_shape: input shape in the following format 10 | `(num_samples, channels, x, y)` 11 | num_classes: number of classes that we want to classify the input 12 | 13 | # Returns 14 | numpy arrays: `x_train, y_train` 15 | """ 16 | x_train = np.random.randint(0, 255, input_shape) 17 | y_train = np.random.randint(0, num_classes, (input_shape[0],)) 18 | 19 | return x_train, y_train 20 | 21 | 22 | def generate_text_input_data(input_shape, p=0.05, return_as_bool=True): 23 | """Generates training data and target labels . 24 | 25 | Given an input shape the function generates one hot encoded vectors. For 26 | example when we use words as our tokens, the presence/absence of the given 27 | word in the vocabulary is represented by True/False. 28 | 29 | # Arguments 30 | input_shape: input shape in the following format `(num_samples, x, y)` 31 | p: fraction of tokens that are present in the vocabulary 32 | return_as_bool: data and labels are returned as boolean arrays 33 | 34 | # Returns 35 | numpy arrays: `x_train, y_train` 36 | """ 37 | x_train = np.random.binomial(1, p, input_shape) 38 | y_train = np.random.binomial(1, p, (input_shape[0], input_shape[2])) 39 | 40 | if return_as_bool: 41 | return x_train.astype(bool), y_train.astype(bool) 42 | 43 | return x_train, y_train 44 | 45 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/lenet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Lenet model configuration. 17 | 18 | References: 19 | LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner 20 | Gradient-based learning applied to document recognition 21 | Proceedings of the IEEE (1998) 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from models import model 29 | 30 | 31 | class Lenet5Model(model.CNNModel): 32 | """Lenet5.""" 33 | 34 | def __init__(self, params=None): 35 | super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params) 36 | 37 | def add_inference(self, cnn): 38 | # Note: This matches TF's MNIST tutorial model 39 | cnn.conv(32, 5, 5) 40 | cnn.mpool(2, 2) 41 | cnn.conv(64, 5, 5) 42 | cnn.mpool(2, 2) 43 | cnn.reshape([-1, 64 * 7 * 7]) 44 | cnn.affine(512) 45 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/constants.py: -------------------------------------------------------------------------------- 1 | """Constants used in tf_cnn_benchmarks.""" 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from enum import Enum 8 | 9 | # Results fetched with this prefix will not be reduced. Instead, they will be 10 | # passed as matrices to model's postprocess function. 11 | UNREDUCED_ACCURACY_OP_PREFIX = "tensor:" 12 | 13 | # Eval result values with this name prefix will be included in summary. 14 | SIMPLE_VALUE_RESULT_PREFIX = "simple_value:" 15 | 16 | 17 | class BenchmarkMode(object): 18 | """Benchmark running mode.""" 19 | TRAIN = "training" 20 | EVAL = "evaluation" 21 | TRAIN_AND_EVAL = "training + evaluation" 22 | FORWARD_ONLY = "forward only" 23 | 24 | 25 | class NetworkTopology(str, Enum): 26 | """Network topology describes how multiple GPUs are inter-connected. 27 | """ 28 | # DGX-1 uses hybrid cube mesh topology with the following device peer to peer 29 | # matrix: 30 | # DMA: 0 1 2 3 4 5 6 7 31 | # 0: Y Y Y Y Y N N N 32 | # 1: Y Y Y Y N Y N N 33 | # 2: Y Y Y Y N N Y N 34 | # 3: Y Y Y Y N N N Y 35 | # 4: Y N N N Y Y Y Y 36 | # 5: N Y N N Y Y Y Y 37 | # 6: N N Y N Y Y Y Y 38 | # 7: N N N Y Y Y Y Y 39 | DGX1 = "dgx1" 40 | 41 | # V100 in GCP are connected with the following device peer to peer matrix. 42 | # In this topology, bandwidth of the connection depends on if it uses NVLink 43 | # or PCIe link. 44 | # DMA: 0 1 2 3 4 5 6 7 45 | # 0: Y Y Y Y N Y N N 46 | # 1: Y Y Y Y N N N N 47 | # 2: Y Y Y Y N N N Y 48 | # 3: Y Y Y Y N N N N 49 | # 4: N N N N Y Y Y Y 50 | # 5: Y N N N Y Y Y Y 51 | # 6: N N N N Y Y Y Y 52 | # 7: N N Y N Y Y Y Y 53 | GCP_V100 = "gcp_v100" 54 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/gpu_mode.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | """ CNTK gpu config required for running keras models in multi gpu mode.""" 3 | import cntk 4 | 5 | def cntk_gpu_mode_config(model, num_samples): 6 | """Sets up a distributed trainer for keras models using CNTK backend 7 | in multi gpu mode. 8 | 9 | # Arguments 10 | model: Keras model instance. 11 | num_samples: Total number of input training samples that will be 12 | distributed across gpus for processing. 13 | 14 | # Returns 15 | The start and end indices of the data that a given gpu will process. 16 | 17 | # Raises 18 | ValueError: when there are no learners in the 19 | """ 20 | model.model._make_train_function() 21 | trainer = model.model.train_function.trainer 22 | learner_no = len(trainer.parameter_learners) 23 | if learner_no < 1: 24 | raise ValueError("No learner in the trainer.") 25 | if learner_no > 1: 26 | warnings.warn("Unexpected multiple learners in a trainer.") 27 | learner = trainer.parameter_learners[0] 28 | dist_learner = cntk.train.distributed. \ 29 | data_parallel_distributed_learner( 30 | learner, num_quantization_bits=32, distributed_after=0) 31 | model.model.train_function.trainer = cntk.trainer.Trainer( 32 | trainer.model, [trainer.loss_function, 33 | trainer.evaluation_function], [dist_learner]) 34 | 35 | rank = cntk.Communicator.rank() 36 | workers = cntk.Communicator.num_workers() 37 | if workers == 1: 38 | warnings.warn("Only one worker is found.") 39 | total_items = num_samples 40 | start = rank * total_items // workers 41 | end = min((rank+1) * total_items // workers, total_items) 42 | return start, end 43 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/overfeat_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Overfeat model configuration. 16 | 17 | References: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus, 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | """ 24 | 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | from models import model 30 | 31 | 32 | class OverfeatModel(model.CNNModel): 33 | """OverfeatModel.""" 34 | 35 | def __init__(self, params=None): 36 | super(OverfeatModel, self).__init__( 37 | 'overfeat', 231, 32, 0.005, params=params) 38 | 39 | def add_inference(self, cnn): 40 | # Note: VALID requires padding the images by 3 in width and height 41 | cnn.conv(96, 11, 11, 4, 4, mode='VALID') 42 | cnn.mpool(2, 2) 43 | cnn.conv(256, 5, 5, 1, 1, mode='VALID') 44 | cnn.mpool(2, 2) 45 | cnn.conv(512, 3, 3) 46 | cnn.conv(1024, 3, 3) 47 | cnn.conv(1024, 3, 3) 48 | cnn.mpool(2, 2) 49 | cnn.reshape([-1, 1024 * 6 * 6]) 50 | cnn.affine(3072) 51 | cnn.dropout() 52 | cnn.affine(4096) 53 | cnn.dropout() 54 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/setup_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # setup script for running benchmarks on CPU 4 | 5 | sudo apt-get update 6 | 7 | # Install pip package manager 8 | echo "Installing pip" 9 | wget https://bootstrap.pypa.io/get-pip.py 10 | sudo python get-pip.py 11 | 12 | sudo apt-get install bzip2 13 | 14 | # Install conda environment manager 15 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 16 | chmod 777 miniconda.sh 17 | ./miniconda.sh -b -p $HOME/miniconda 18 | export PATH=$HOME/miniconda/bin:$PATH 19 | hash -r 20 | conda config --set always_yes yes --set changeps1 no 21 | conda update -q conda 22 | # Useful for debugging any issues with conda 23 | conda info -a 24 | conda create -q -n benchmarks-environment python="2.7" numpy scipy 25 | source activate benchmarks-environment 26 | # set library path 27 | export LD_LIBRARY_PATH=$HOME/miniconda/envs/test-environmcondent/lib/:$LD_LIBRARY_PATH 28 | 29 | # Install Pillow package 30 | conda install pil 31 | 32 | # Install Theano 33 | echo "Installing Theano" 34 | pip install theano 35 | 36 | # Install MKL library for Theano 37 | conda install mkl-service 38 | 39 | # Install g++ 40 | sudo apt-get install g++ -y 41 | 42 | # Install tensorflow 43 | echo "Installing Tensorflow" 44 | pip install tensorflow 45 | 46 | # Install CNTK 47 | echo "Installing CNTK" 48 | pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp27-cp27mu-linux_x86_64.whl 49 | 50 | # Install OpenCV 51 | sudo apt-get install libopencv-dev python-opencv -y 52 | 53 | # Install open mpi 54 | rm -rf ~/mpi 55 | mkdir ~/mpi 56 | pushd ~/mpi 57 | wget http://cntk.ai/PythonWheel/ForKeras/depends/openmpi_1.10-3.zip 58 | sudo apt-get install unzip -y 59 | unzip ./openmpi_1.10-3.zip 60 | sudo dpkg -i openmpi_1.10-3.deb 61 | popd 62 | 63 | # Install Keras 64 | echo "Installing Keras" 65 | pip install keras 66 | 67 | # Install git 68 | echo "Installing Git" 69 | sudo apt-get install git -y 70 | 71 | # Install google-cloud tools 72 | echo "Installing Google Cloud tools" 73 | pip install google-cloud 74 | pip install google-cloud-bigquery 75 | 76 | # Install h5py 77 | pip install h5py 78 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/all_reduce_benchmark_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for all_reduce_benchmark.py.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | import all_reduce_benchmark 24 | import benchmark_cnn 25 | import test_util 26 | 27 | 28 | class AllReduceBenchmarkTest(tf.test.TestCase): 29 | """Tests the all-reduce benchmark.""" 30 | 31 | def _test_run_benchmark(self, params): 32 | """Tests that run_benchmark() runs successfully with the params.""" 33 | logs = [] 34 | with test_util.monkey_patch(all_reduce_benchmark, 35 | log_fn=test_util.print_and_add_to_list(logs)): 36 | bench_cnn = benchmark_cnn.BenchmarkCNN(params) 37 | all_reduce_benchmark.run_benchmark(bench_cnn, num_iters=5) 38 | self.assertRegexpMatches(logs[-1], '^Average time per step: [0-9.]+$') 39 | 40 | def test_run_benchmark(self): 41 | """Tests that run_benchmark() runs successfully.""" 42 | params = benchmark_cnn.make_params(num_batches=10, 43 | variable_update='replicated', 44 | num_gpus=2) 45 | self._test_run_benchmark(params) 46 | params = params._replace(hierarchical_copy=True, gradient_repacking=8, 47 | num_gpus=8) 48 | self._test_run_benchmark(params) 49 | 50 | if __name__ == '__main__': 51 | tf.test.main() 52 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/models/lstm_benchmark.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Original Model from keras/examples/lstm_text_generation.py 3 | 4 | Benchmark for a LSTM model. 5 | ''' 6 | from __future__ import print_function 7 | import keras 8 | from keras.models import Sequential 9 | from keras.layers import Dense 10 | from keras.layers import LSTM 11 | from keras.optimizers import RMSprop 12 | from keras.utils import multi_gpu_model 13 | 14 | from models import timehistory 15 | from data_generator import generate_text_input_data 16 | 17 | if keras.backend.backend() == 'cntk': 18 | from gpu_mode import cntk_gpu_mode_config 19 | 20 | 21 | class LstmBenchmark(): 22 | 23 | def __init__(self): 24 | self.test_name = "lstm" 25 | self.sample_type = "text" 26 | self.total_time = 0 27 | self.batch_size = 128 28 | self.epochs = 2 29 | self.num_samples = 1000 30 | 31 | def run_benchmark(self, gpus=0): 32 | input_dim_1 = 40 33 | input_dim_2 = 60 34 | 35 | input_shape = (self.num_samples, input_dim_1, 60) 36 | x, y = generate_text_input_data(input_shape) 37 | 38 | # build the model: a single LSTM 39 | model = Sequential() 40 | model.add(LSTM(128, input_shape=(input_dim_1, input_dim_2))) 41 | model.add(Dense(input_dim_2), activation='softmax') 42 | 43 | optimizer = RMSprop(lr=0.01) 44 | 45 | if keras.backend.backend() is "tensorflow" and gpus > 1: 46 | model = multi_gpu_model(model, gpus=gpus) 47 | 48 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 49 | 50 | # create a distributed trainer for cntk 51 | if keras.backend.backend() is "cntk" and gpus > 1: 52 | start, end = cntk_gpu_mode_config(model, x.shape[0]) 53 | x = x[start: end] 54 | y = y[start: end] 55 | 56 | time_callback = timehistory.TimeHistory() 57 | 58 | model.fit(x, y, 59 | batch_size=self.batch_size, 60 | epochs=self.epochs, 61 | callbacks=[time_callback]) 62 | 63 | self.total_time = 0 64 | for i in range(1, self.epochs): 65 | self.total_time += time_callback.times[i] 66 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/setup_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # setup script for running benchmarks on GPU 4 | 5 | # Install pip 6 | wget https://bootstrap.pypa.io/get-pip.py 7 | sudo python get-pip.py 8 | 9 | # Install Nvidia drivers CUDA 8 10 | curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb 11 | sudo dpkg -i ./cuda-repo-ubuntu1604_8.0.61-1_amd64.deb 12 | sudo apt-get update 13 | sudo apt-get install cuda-8-0 -y 14 | 15 | # [Instructions from GCP docs:https://cloud.google.com/compute/docs/gpus/add-gpus#install-gpu-driver] 16 | # Use nvidia-smi to verify that the drivers have been installed 17 | 18 | # set the CUDA paths 19 | export CUDA_HOME=/usr/local/cuda 20 | export PATH=$PATH:$CUDA_HOME/bin 21 | export LD_LIBRARY_PATH=$CUDA_HOME/lib64 22 | 23 | #Install cudnn library 24 | # TODO(anjalisridhar): the cudann library was downloaded to the local machine. try using curl 25 | gsutil cp gs://keras-benchmarks/libcudnn6_6.0.21-1+cuda8.0_amd64.deb . 26 | sudo dpkg -i libcudnn6_6.0.21-1+cuda8.0_amd64.deb 27 | 28 | # CUB for CNTK 29 | wget https://github.com/NVlabs/cub/archive/1.4.1.zip 30 | sudo apt-get install unzip -y 31 | unzip ./1.4.1.zip 32 | sudo cp -r cub-1.4.1 /usr/local 33 | 34 | # CNTK requires cudnn installation to be in a specific directory 35 | wget http://developer.download.nvidia.com/compute/redist/cudnn/v6.0/cudnn-8.0-linux-x64-v6.0.tgz 36 | tar -xzvf ./cudnn-8.0-linux-x64-v6.0.tgz 37 | sudo mkdir /usr/local/cudnn-6.0 38 | sudo cp -r cuda /usr/local/cudnn-6.0 39 | export LD_LIBRARY_PATH=/usr/local/cudnn-6.0/cuda/lib64:$LD_LIBRARY_PATH 40 | 41 | # MPI installation 42 | sudo apt-get install openmpi-bin -y 43 | 44 | # Install CNTK GPU version 45 | pip install https://cntk.ai/PythonWheel/GPU/cntk-2.2-cp27-cp27mu-linux_x86_64.whl 46 | 47 | # Install other pacakges required for CNTK 48 | sudo apt-get install libopencv-dev python-opencv -y 49 | 50 | # Install keras 51 | sudo pip install keras 52 | 53 | # Install required pacakges for TF-GPU 54 | sudo apt-get install python-dev python-pip libcupti-dev 55 | 56 | # Install tensorflow GPU version 57 | sudo pip install tensorflow-gpu 58 | 59 | sudo pip install git+git://github.com/fchollet/keras.git --upgrade 60 | 61 | # Install google-cloud tools 62 | echo "Installing Google Cloud tools" 63 | sudo pip install google-cloud 64 | sudo pip install google-cloud-bigquery 65 | 66 | # Install h5py 67 | sudo pip install h5py 68 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/trivial_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Trivial model configuration.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | from models import model 23 | 24 | 25 | class TrivialModel(model.CNNModel): 26 | """Trivial model configuration.""" 27 | 28 | def __init__(self, params=None): 29 | super(TrivialModel, self).__init__( 30 | 'trivial', 224 + 3, 32, 0.005, params=params) 31 | 32 | def add_inference(self, cnn): 33 | cnn.reshape([-1, 227 * 227 * 3]) 34 | cnn.affine(1) 35 | cnn.affine(4096) 36 | 37 | 38 | class TrivialCifar10Model(model.CNNModel): 39 | """Trivial cifar10 model configuration.""" 40 | 41 | def __init__(self, params=None): 42 | super(TrivialCifar10Model, self).__init__( 43 | 'trivial', 32, 32, 0.005, params=params) 44 | 45 | def add_inference(self, cnn): 46 | cnn.reshape([-1, 32 * 32 * 3]) 47 | cnn.affine(1) 48 | cnn.affine(4096) 49 | 50 | 51 | class TrivialSSD300Model(model.CNNModel): 52 | """Trivial SSD300 model configuration.""" 53 | 54 | def __init__(self, params=None): 55 | super(TrivialSSD300Model, self).__init__( 56 | 'trivial', 300, 32, 0.005, params=params) 57 | 58 | def add_inference(self, cnn): 59 | cnn.reshape([-1, 300 * 300 * 3]) 60 | cnn.affine(1) 61 | cnn.affine(4096) 62 | 63 | def get_input_shapes(self, subset): 64 | return [[32, 300, 300, 3], [32, 8732, 4], [32, 8732, 1], [32]] 65 | 66 | def loss_function(self, inputs, build_network_result): 67 | images, _, _, labels = inputs 68 | labels = tf.cast(labels, tf.int32) 69 | return super(TrivialSSD300Model, self).loss_function( 70 | (images, labels), build_network_result) 71 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/models/mnist_mlp_benchmark.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Original Model from keras/examples/mnist_mlp.py 3 | 4 | Benchmark a simple MLP model. 5 | ''' 6 | 7 | from __future__ import print_function 8 | 9 | import keras 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout 12 | from keras.optimizers import RMSprop 13 | from keras.utils import multi_gpu_model 14 | 15 | from models import timehistory 16 | from data_generator import generate_img_input_data 17 | if keras.backend.backend() == 'cntk': 18 | from gpu_mode import cntk_gpu_mode_config 19 | 20 | 21 | class MnistMlpBenchmark(): 22 | 23 | def __init__(self): 24 | self.test_name = "mnist_mlp" 25 | self.sample_type = "images" 26 | self.total_time = 0 27 | self.batch_size = 128 28 | self.epochs = 2 29 | self.num_samples = 1000 30 | 31 | def run_benchmark(self, gpus=0): 32 | num_classes = 10 33 | 34 | # Generate random input data 35 | input_shape = (self.num_samples, 28, 28) 36 | x_train, y_train = generate_img_input_data(input_shape) 37 | 38 | x_train = x_train.reshape(self.num_samples, 784) 39 | x_train = x_train.astype('float32') 40 | x_train /= 255 41 | 42 | # convert class vectors to binary class matrices 43 | y_train = keras.utils.to_categorical(y_train, num_classes) 44 | 45 | model = Sequential() 46 | model.add(Dense(512, activation='relu', input_shape=(784,))) 47 | model.add(Dropout(0.2)) 48 | model.add(Dense(512, activation='relu')) 49 | model.add(Dropout(0.2)) 50 | model.add(Dense(num_classes, activation='softmax')) 51 | 52 | if keras.backend.backend() is "tensorflow" and gpus > 1: 53 | model = multi_gpu_model(model, gpus=gpus) 54 | 55 | model.compile(loss='categorical_crossentropy', 56 | optimizer=RMSprop(), 57 | metrics=['accuracy']) 58 | 59 | # create a distributed trainer for cntk 60 | if keras.backend.backend() is "cntk" and gpus > 1: 61 | start, end = cntk_gpu_mode_config(model, x_train.shape[0]) 62 | x_train = x_train[start: end] 63 | y_train = y_train[start: end] 64 | 65 | time_callback = timehistory.TimeHistory() 66 | model.fit(x_train, y_train, batch_size=self.batch_size, 67 | epochs=self.epochs, verbose=1, callbacks=[time_callback]) 68 | 69 | self.total_time = 0 70 | for i in range(1, self.epochs): 71 | self.total_time += time_callback.times[i] 72 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/googlenet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Googlenet model configuration. 16 | 17 | References: 18 | Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, 19 | Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich 20 | Going deeper with convolutions 21 | arXiv preprint arXiv:1409.4842 (2014) 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from models import model 29 | 30 | 31 | class GooglenetModel(model.CNNModel): 32 | """GoogLeNet.""" 33 | 34 | def __init__(self, params=None): 35 | super(GooglenetModel, self).__init__( 36 | 'googlenet', 224, 32, 0.005, params=params) 37 | 38 | def add_inference(self, cnn): 39 | 40 | def inception_v1(cnn, k, l, m, n, p, q): 41 | cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)], 42 | [('conv', n, 1, 1), ('conv', p, 5, 5)], 43 | [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]] 44 | cnn.inception_module('incept_v1', cols) 45 | 46 | cnn.conv(64, 7, 7, 2, 2) 47 | cnn.mpool(3, 3, 2, 2, mode='SAME') 48 | cnn.conv(64, 1, 1) 49 | cnn.conv(192, 3, 3) 50 | cnn.mpool(3, 3, 2, 2, mode='SAME') 51 | inception_v1(cnn, 64, 96, 128, 16, 32, 32) 52 | inception_v1(cnn, 128, 128, 192, 32, 96, 64) 53 | cnn.mpool(3, 3, 2, 2, mode='SAME') 54 | inception_v1(cnn, 192, 96, 208, 16, 48, 64) 55 | inception_v1(cnn, 160, 112, 224, 24, 64, 64) 56 | inception_v1(cnn, 128, 128, 256, 24, 64, 64) 57 | inception_v1(cnn, 112, 144, 288, 32, 64, 64) 58 | inception_v1(cnn, 256, 160, 320, 32, 128, 128) 59 | cnn.mpool(3, 3, 2, 2, mode='SAME') 60 | inception_v1(cnn, 256, 160, 320, 32, 128, 128) 61 | inception_v1(cnn, 384, 192, 384, 48, 128, 128) 62 | cnn.apool(7, 7, 1, 1, mode='VALID') 63 | cnn.reshape([-1, 1024]) 64 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/platforms/default/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility code for the default platform.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | import sys 24 | import tempfile 25 | 26 | import cnn_util 27 | 28 | 29 | _ROOT_PROJECT_DIR = os.path.dirname(cnn_util.__file__) 30 | 31 | 32 | def define_platform_params(): 33 | """Defines platform-specific parameters. 34 | 35 | Currently there are no platform-specific parameters to be defined. 36 | """ 37 | pass 38 | 39 | 40 | def get_cluster_manager(params, config_proto): 41 | """Returns the cluster manager to be used.""" 42 | return cnn_util.GrpcClusterManager(params, config_proto) 43 | 44 | 45 | def get_command_to_run_python_module(module): 46 | """Returns a command to run a Python module.""" 47 | python_interpretter = sys.executable 48 | if not python_interpretter: 49 | raise ValueError('Could not find Python interpreter') 50 | return [python_interpretter, 51 | os.path.join(_ROOT_PROJECT_DIR, module + '.py')] 52 | 53 | 54 | def get_test_output_dir(): 55 | """Returns a directory where test outputs should be placed.""" 56 | base_dir = os.environ.get('TEST_OUTPUTS_DIR', 57 | '/tmp/tf_cnn_benchmarks_test_outputs') 58 | if not os.path.exists(base_dir): 59 | os.mkdir(base_dir) 60 | return tempfile.mkdtemp(dir=base_dir) 61 | 62 | 63 | def get_test_data_dir(): 64 | """Returns the path to the test_data directory.""" 65 | return os.path.join(_ROOT_PROJECT_DIR, 'test_data') 66 | 67 | 68 | def _initialize(params, config_proto): 69 | # Currently, no platform initialization needs to be done. 70 | del params, config_proto 71 | 72 | 73 | _is_initalized = False 74 | 75 | 76 | def initialize(params, config_proto): 77 | global _is_initalized 78 | if _is_initalized: 79 | return 80 | _is_initalized = True 81 | _initialize(params, config_proto) 82 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Benchmark script for TensorFlow. 17 | 18 | See the README for more information. 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | from absl import app 26 | from absl import flags as absl_flags 27 | import tensorflow as tf 28 | 29 | import benchmark_cnn 30 | import cnn_util 31 | import flags 32 | import mlperf 33 | from cnn_util import log_fn 34 | 35 | 36 | flags.define_flags() 37 | for name in flags.param_specs.keys(): 38 | absl_flags.declare_key_flag(name) 39 | 40 | absl_flags.DEFINE_boolean( 41 | 'ml_perf_compliance_logging', False, 42 | 'Print logs required to be compliant with MLPerf. If set, must clone the ' 43 | 'MLPerf training repo https://github.com/mlperf/training and add ' 44 | 'https://github.com/mlperf/training/tree/master/compliance to the ' 45 | 'PYTHONPATH') 46 | 47 | 48 | def main(positional_arguments): 49 | # Command-line arguments like '--distortions False' are equivalent to 50 | # '--distortions=True False', where False is a positional argument. To prevent 51 | # this from silently running with distortions, we do not allow positional 52 | # arguments. 53 | assert len(positional_arguments) >= 1 54 | if len(positional_arguments) > 1: 55 | raise ValueError('Received unknown positional arguments: %s' 56 | % positional_arguments[1:]) 57 | 58 | params = benchmark_cnn.make_params_from_flags() 59 | with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, 60 | params.model): 61 | params = benchmark_cnn.setup(params) 62 | bench = benchmark_cnn.BenchmarkCNN(params) 63 | 64 | tfversion = cnn_util.tensorflow_version_tuple() 65 | log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) 66 | 67 | bench.print_info() 68 | bench.run() 69 | 70 | 71 | if __name__ == '__main__': 72 | app.run(main) # Raises error on invalid flags, unlike tf.app.run() 73 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/run_benchmark.py: -------------------------------------------------------------------------------- 1 | """ Main entry point for running benchmarks with different Keras backends.""" 2 | 3 | from models import mnist_mlp_benchmark 4 | from models import cifar10_cnn_benchmark 5 | from models import lstm_benchmark 6 | import upload_benchmarks_bq as bq 7 | import argparse 8 | import keras 9 | import json 10 | 11 | if keras.backend.backend() == "tensorflow": 12 | import tensorflow as tf 13 | if keras.backend.backend() == "theano": 14 | import theano 15 | if keras.backend.backend() == "cntk": 16 | import cntk 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--mode', 20 | help='The benchmark can be run on cpu, gpu and multiple gpus.') 21 | 22 | args = parser.parse_args() 23 | 24 | # Load the json config file for the requested mode. 25 | config_file = open("benchmarks/scripts/keras_benchmarks/config.json", 'r') 26 | config_contents = config_file.read() 27 | config = json.loads(config_contents)[args.mode] 28 | 29 | 30 | def get_backend_version(): 31 | if keras.backend.backend() == "tensorflow": 32 | return tf.__version__ 33 | if keras.backend.backend() == "theano": 34 | return theano.__version__ 35 | if keras.backend.backend() == "cntk": 36 | return cntk.__version__ 37 | return "undefined" 38 | 39 | def _upload_metrics(current_model): 40 | bq.upload_metrics_to_bq(test_name=current_model.test_name, 41 | total_time=current_model.total_time, 42 | epochs=current_model.epochs, 43 | batch_size=current_model.batch_size, 44 | backend_type=keras.backend.backend(), 45 | backend_version=get_backend_version(), 46 | cpu_num_cores=config['cpu_num_cores'], 47 | cpu_memory=config['cpu_memory'], 48 | cpu_memory_info=config['cpu_memory_info'], 49 | gpu_count=config['gpus'], 50 | gpu_platform=config['gpu_platform'], 51 | platform_type=config['platform_type'], 52 | platform_machine_type=config['platform_machine_type'], 53 | keras_version=keras.__version__, 54 | sample_type=current_model.sample_type) 55 | 56 | 57 | # MNIST MLP 58 | model = mnist_mlp_benchmark.MnistMlpBenchmark() 59 | model.run_benchmark(gpus=config['gpus']) 60 | _upload_metrics(model) 61 | 62 | # CIFAR10 CNN 63 | model = cifar10_cnn_benchmark.Cifar10CnnBenchmark() 64 | model.run_benchmark(gpus=config['gpus']) 65 | _upload_metrics(model) 66 | 67 | # LSTM 68 | model = lstm_benchmark.LstmBenchmark() 69 | model.run_benchmark(gpus=config['gpus']) 70 | _upload_metrics(model) 71 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/vgg_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Vgg model configuration. 16 | 17 | Includes multiple models: vgg11, vgg16, vgg19, corresponding to 18 | model A, D, and E in Table 1 of [1]. 19 | 20 | References: 21 | [1] Simonyan, Karen, Andrew Zisserman 22 | Very Deep Convolutional Networks for Large-Scale Image Recognition 23 | arXiv:1409.1556 (2014) 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from six.moves import xrange # pylint: disable=redefined-builtin 31 | from models import model 32 | 33 | 34 | def _construct_vgg(cnn, num_conv_layers): 35 | """Build vgg architecture from blocks.""" 36 | assert len(num_conv_layers) == 5 37 | for _ in xrange(num_conv_layers[0]): 38 | cnn.conv(64, 3, 3) 39 | cnn.mpool(2, 2) 40 | for _ in xrange(num_conv_layers[1]): 41 | cnn.conv(128, 3, 3) 42 | cnn.mpool(2, 2) 43 | for _ in xrange(num_conv_layers[2]): 44 | cnn.conv(256, 3, 3) 45 | cnn.mpool(2, 2) 46 | for _ in xrange(num_conv_layers[3]): 47 | cnn.conv(512, 3, 3) 48 | cnn.mpool(2, 2) 49 | for _ in xrange(num_conv_layers[4]): 50 | cnn.conv(512, 3, 3) 51 | cnn.mpool(2, 2) 52 | cnn.reshape([-1, 512 * 7 * 7]) 53 | cnn.affine(4096) 54 | cnn.dropout() 55 | cnn.affine(4096) 56 | cnn.dropout() 57 | 58 | 59 | class Vgg11Model(model.CNNModel): 60 | 61 | def __init__(self, params=None): 62 | super(Vgg11Model, self).__init__('vgg11', 224, 64, 0.005, params=params) 63 | 64 | def add_inference(self, cnn): 65 | _construct_vgg(cnn, [1, 1, 2, 2, 2]) 66 | 67 | 68 | class Vgg16Model(model.CNNModel): 69 | 70 | def __init__(self, params=None): 71 | super(Vgg16Model, self).__init__('vgg16', 224, 64, 0.005, params=params) 72 | 73 | def add_inference(self, cnn): 74 | _construct_vgg(cnn, [2, 2, 3, 3, 3]) 75 | 76 | 77 | class Vgg19Model(model.CNNModel): 78 | 79 | def __init__(self, params=None): 80 | super(Vgg19Model, self).__init__('vgg19', 224, 64, 0.005, params=params) 81 | 82 | def add_inference(self, cnn): 83 | _construct_vgg(cnn, [2, 2, 4, 4, 4]) 84 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/resnet_model_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for resnet_model.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import mock 22 | import tensorflow as tf 23 | 24 | from models import resnet_model 25 | 26 | 27 | class ResNetModelTest(tf.test.TestCase): 28 | 29 | def testGetScaledBaseLearningRateOneGpuLrFromParams(self): 30 | """Verifies setting params.resnet_base_lr pipes through.""" 31 | lr = self._get_scaled_base_learning_rate(1, 32 | 'parameter_server', 33 | 256, 34 | base_lr=.050) 35 | self.assertEquals(lr, .050) 36 | 37 | def testGetScaledBaseLearningRateOneGpu(self): 38 | lr = self._get_scaled_base_learning_rate(1, 'parameter_server', 128) 39 | self.assertEquals(lr, .064) 40 | 41 | def testGetScaledBaseLearningRateEightGpuReplicated(self): 42 | lr = self._get_scaled_base_learning_rate(8, 'replicated', 256 * 8) 43 | self.assertEquals(lr, .128) 44 | 45 | def testGetScaledBaseLearningRateTwoGpuParameter(self): 46 | lr = self._get_scaled_base_learning_rate(2, 'parameter_server', 256 * 2) 47 | self.assertEquals(lr, .256) 48 | 49 | def testGetScaledBaseLearningRateTwoGpuUneven(self): 50 | lr = self._get_scaled_base_learning_rate(2, 'replicated', 13) 51 | self.assertEquals(lr, 0.0032500000000000003) 52 | 53 | def _get_scaled_base_learning_rate(self, 54 | num_gpus, 55 | variable_update, 56 | batch_size, 57 | base_lr=None): 58 | """Simplifies testing different learning rate calculations. 59 | 60 | Args: 61 | num_gpus: Number of GPUs to be used. 62 | variable_update: Type of variable update used. 63 | batch_size: Total batch size. 64 | base_lr: Base learning rate before scaling. 65 | 66 | Returns: 67 | Base learning rate that would be used to create lr schedule. 68 | """ 69 | params = mock.Mock() 70 | params.num_gpus = num_gpus 71 | params.variable_update = variable_update 72 | if base_lr: 73 | params.resnet_base_lr = base_lr 74 | resnet50_model = resnet_model.ResnetModel('resnet50', 50, params=params) 75 | return resnet50_model.get_scaled_base_learning_rate(batch_size) 76 | 77 | 78 | if __name__ == '__main__': 79 | tf.test.main() 80 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/models/cifar10_cnn_benchmark.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Original Model from keras/examples/cifar10_cnn.py 3 | 4 | Benchmark CNN model 5 | ''' 6 | 7 | from __future__ import print_function 8 | import numpy as np 9 | import keras 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Flatten 12 | from keras.layers import Conv2D, MaxPooling2D 13 | from keras.utils import multi_gpu_model 14 | 15 | from models import timehistory 16 | from data_generator import generate_img_input_data 17 | if keras.backend.backend() == 'cntk': 18 | from gpu_mode import cntk_gpu_mode_config 19 | 20 | 21 | class Cifar10CnnBenchmark(): 22 | 23 | def __init__(self): 24 | self.test_name = "cifar10_cnn" 25 | self.sample_type = "images" 26 | self.total_time = 0 27 | self.batch_size = 32 28 | self.epochs = 2 29 | self.num_samples = 1000 30 | 31 | def run_benchmark(self, gpus=0): 32 | num_classes = 10 33 | 34 | # Generate random input data 35 | input_shape = (self.num_samples, 3, 32, 32) 36 | x_train, y_train = generate_img_input_data(input_shape) 37 | 38 | y_train = np.reshape(y_train, (len(y_train), 1)) 39 | y_train = keras.utils.to_categorical(y_train, 10) 40 | 41 | if keras.backend.image_data_format() == 'channels_last': 42 | x_train = x_train.transpose(0, 2, 3, 1) 43 | 44 | model = Sequential() 45 | model.add(Conv2D(32, (3, 3), padding='same', 46 | input_shape=x_train.shape[1:], activation='relu')) 47 | model.add(Conv2D(32, (3, 3), activation='relu')) 48 | model.add(MaxPooling2D(pool_size=(2, 2))) 49 | model.add(Dropout(0.25)) 50 | 51 | model.add(Conv2D(64, (3, 3), padding='same', activation='relu')) 52 | model.add(Conv2D(64, (3, 3), activation='relu')) 53 | model.add(MaxPooling2D(pool_size=(2, 2))) 54 | model.add(Dropout(0.25)) 55 | 56 | model.add(Flatten()) 57 | model.add(Dense(512, activation='relu')) 58 | model.add(Dropout(0.5)) 59 | model.add(Dense(num_classes, activation='softmax')) 60 | 61 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 62 | 63 | if keras.backend.backend() is "tensorflow" and gpus > 1: 64 | model = multi_gpu_model(model, gpus=gpus) 65 | 66 | model.compile(loss='categorical_crossentropy', 67 | optimizer=opt, 68 | metrics=['accuracy']) 69 | 70 | x_train = x_train.astype('float32') 71 | x_train /= 255 72 | 73 | # create a distributed trainer for cntk 74 | if keras.backend.backend() is "cntk" and gpus > 1: 75 | start, end = cntk_gpu_mode_config(model, x_train.shape[0]) 76 | x_train = x_train[start: end] 77 | y_train = y_train[start: end] 78 | 79 | time_callback = timehistory.TimeHistory() 80 | 81 | model.fit(x_train, 82 | y_train, 83 | batch_size=self.batch_size, 84 | epochs=self.epochs, 85 | shuffle=True, 86 | callbacks=[time_callback]) 87 | 88 | self.total_time = 0 89 | for i in range(1, self.epochs): 90 | self.total_time += time_callback.times[i] 91 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/alexnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Alexnet model configuration. 16 | 17 | References: 18 | Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton 19 | ImageNet Classification with Deep Convolutional Neural Networks 20 | Advances in Neural Information Processing Systems. 2012 21 | """ 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | from models import model 29 | 30 | 31 | class AlexnetModel(model.CNNModel): 32 | """Alexnet cnn model.""" 33 | 34 | def __init__(self, params=None): 35 | super(AlexnetModel, self).__init__( 36 | 'alexnet', 224 + 3, 512, 0.005, params=params) 37 | 38 | def add_inference(self, cnn): 39 | # Note: VALID requires padding the images by 3 in width and height 40 | cnn.conv(64, 11, 11, 4, 4, 'VALID') 41 | cnn.mpool(3, 3, 2, 2) 42 | cnn.conv(192, 5, 5) 43 | cnn.mpool(3, 3, 2, 2) 44 | cnn.conv(384, 3, 3) 45 | cnn.conv(384, 3, 3) 46 | cnn.conv(256, 3, 3) 47 | cnn.mpool(3, 3, 2, 2) 48 | cnn.reshape([-1, 256 * 6 * 6]) 49 | cnn.affine(4096) 50 | cnn.dropout() 51 | cnn.affine(4096) 52 | cnn.dropout() 53 | 54 | 55 | class AlexnetCifar10Model(model.CNNModel): 56 | """Alexnet cnn model for cifar datasets. 57 | 58 | The model architecture follows the one defined in the tensorflow tutorial 59 | model. 60 | 61 | Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py 62 | Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf 63 | """ 64 | 65 | def __init__(self, params=None): 66 | super(AlexnetCifar10Model, self).__init__( 67 | 'alexnet', 32, 128, 0.1, params=params) 68 | 69 | def add_inference(self, cnn): 70 | cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2) 71 | cnn.mpool(3, 3, 2, 2, mode='SAME') 72 | cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) 73 | cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2) 74 | cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) 75 | cnn.mpool(3, 3, 2, 2, mode='SAME') 76 | shape = cnn.top_layer.get_shape().as_list() 77 | flat_dim = shape[1] * shape[2] * shape[3] 78 | cnn.reshape([-1, flat_dim]) 79 | cnn.affine(384, stddev=0.04, bias=0.1) 80 | cnn.affine(192, stddev=0.04, bias=0.1) 81 | 82 | def get_learning_rate(self, global_step, batch_size): 83 | num_examples_per_epoch = 50000 84 | num_epochs_per_decay = 100 85 | decay_steps = ( 86 | num_epochs_per_decay * num_examples_per_epoch // batch_size) 87 | decay_factor = 0.1 88 | return tf.train.exponential_decay( 89 | self.learning_rate, 90 | global_step, 91 | decay_steps, 92 | decay_factor, 93 | staircase=True) 94 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/official_resnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Import official resnet models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | import datasets 23 | from models import model as model_lib 24 | 25 | 26 | class ImagenetResnetModel(model_lib.CNNModel): 27 | """Official resnet models.""" 28 | 29 | def __init__(self, resnet_size, version=2, params=None): 30 | """These are the parameters that work for Imagenet data. 31 | 32 | Args: 33 | resnet_size: The number of convolutional layers needed in the model. 34 | version: 1 or 2 for v1 or v2, respectively. 35 | params: params passed by BenchmarkCNN. 36 | """ 37 | default_batch_sizes = { 38 | 50: 128, 39 | 101: 32, 40 | 152: 32 41 | } 42 | batch_size = default_batch_sizes.get(resnet_size, 32) 43 | default_learning_rate = 0.0125 * batch_size / 32 44 | model_name = 'official_resnet_{}_v{}'.format(resnet_size, version) 45 | super(ImagenetResnetModel, self).__init__( 46 | model_name, 224, batch_size, default_learning_rate, params=params) 47 | self.resnet_size = resnet_size 48 | self.version = version 49 | 50 | def get_learning_rate(self, global_step, batch_size): 51 | num_batches_per_epoch = ( 52 | float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size) 53 | boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]] 54 | values = [1, 0.1, 0.01, 0.001, 0.0001] 55 | adjusted_learning_rate = ( 56 | self.learning_rate / self.default_batch_size * batch_size) 57 | values = [v * adjusted_learning_rate for v in values] 58 | return tf.train.piecewise_constant(global_step, boundaries, values) 59 | 60 | def build_network(self, images, phase_train=True, nclass=1001, 61 | data_type=tf.float32): 62 | # pylint: disable=g-import-not-at-top 63 | try: 64 | from official.resnet.imagenet_main import ImagenetModel 65 | except ImportError: 66 | tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.') 67 | raise 68 | images = tf.cast(images, data_type) 69 | model_class = ImagenetModel(resnet_size=self.resnet_size, 70 | resnet_version=self.version, 71 | # The official model dtype seems to be ignored, 72 | # as the dtype it uses is the dtype of the input 73 | # images. Doesn't hurt to set it though. 74 | dtype=data_type) 75 | logits = model_class(images, phase_train) 76 | logits = tf.cast(logits, tf.float32) 77 | return model_lib.BuildNetworkResult(logits=logits, extra_info=None) 78 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # tf_cnn_benchmarks: High performance benchmarks 2 | 3 | tf_cnn_benchmarks contains implementations of several popular convolutional 4 | models, and is designed to be as fast as possible. tf_cnn_benchmarks supports 5 | both running on a single machine or running in distributed mode across multiple 6 | hosts. See the [High-Performance models 7 | guide](https://www.tensorflow.org/performance/performance_models) for more 8 | information. 9 | 10 | These models utilize many of the strategies in the [TensorFlow Performance 11 | Guide](https://www.tensorflow.org/performance/performance_guide). Benchmark 12 | results can be found [here](https://www.tensorflow.org/performance/benchmarks). 13 | 14 | These models are designed for performance. For models that have clean and 15 | easy-to-read implementations, see the [TensorFlow Official 16 | Models](https://github.com/tensorflow/models/tree/master/official). 17 | 18 | ## Getting Started 19 | 20 | To run ResNet50 with synthetic data without distortions with a single GPU, run 21 | 22 | ``` 23 | python tf_cnn_benchmarks.py --num_gpus=1 --batch_size=32 --model=resnet50 --variable_update=parameter_server 24 | ``` 25 | 26 | Note that the master branch of tf_cnn_benchmarks requires the latest nightly 27 | version of TensorFlow. You can install the nightly version by running `pip 28 | install tf-nightly-gpu` in a clean environment, or by installing TensorFlow from 29 | source. We sometimes will create a branch of tf_cnn_benchmarks, in the form of 30 | cnn_tf_vX.Y_compatible, that is compatible with TensorFlow version X.Y For 31 | example, branch 32 | [cnn_tf_v1.9_compatible](https://github.com/tensorflow/benchmarks/tree/cnn_tf_v1.9_compatible/scripts/tf_cnn_benchmarks) 33 | works with TensorFlow 1.9. 34 | 35 | Some important flags are 36 | 37 | * model: Model to use, e.g. resnet50, inception3, vgg16, and alexnet. 38 | * num_gpus: Number of GPUs to use. 39 | * data_dir: Path to data to process. If not set, synthetic data is used. To 40 | use Imagenet data use these 41 | [instructions](https://github.com/tensorflow/models/tree/master/research/inception#getting-started) 42 | as a starting point. 43 | * batch_size: Batch size for each GPU. 44 | * variable_update: The method for managing variables: parameter_server 45 | ,replicated, distributed_replicated, independent 46 | * local_parameter_device: Device to use as parameter server: cpu or gpu. 47 | 48 | To see the full list of flags, run `python tf_cnn_benchmarks.py --help`. 49 | 50 | To run ResNet50 with real data with 8 GPUs, run: 51 | 52 | ``` 53 | python tf_cnn_benchmarks.py --data_format=NCHW --batch_size=256 \ 54 | --model=resnet50 --optimizer=momentum --variable_update=replicated \ 55 | --nodistortions --gradient_repacking=8 --num_gpus=8 \ 56 | --num_epochs=90 --weight_decay=1e-4 --data_dir=${DATA_DIR} --use_fp16 \ 57 | --train_dir=${CKPT_DIR} 58 | ``` 59 | This will train a ResNet-50 model on ImageNet with 2048 batch size on 8 60 | GPUs. The model should train to around 76% accuracy. 61 | 62 | ## Running the tests 63 | 64 | To run the tests, run 65 | 66 | ```bash 67 | pip install portpicker 68 | python run_tests.py && python run_tests.py --run_distributed_tests 69 | ``` 70 | 71 | Note the tests require portpicker. 72 | 73 | The command above runs a subset of tests that is both fast and fairly 74 | comprehensive. Alternatively, all the tests can be run, but this will take a 75 | long time: 76 | 77 | ```bash 78 | python run_tests.py --full_tests && python run_tests.py --full_tests --run_distributed_tests 79 | ``` 80 | 81 | We will run all tests on every PR before merging them, so it is not necessary 82 | to pass `--full_tests` when running tests yourself. 83 | 84 | To run an individual test, such as method `testParameterServer` of test class 85 | `TfCnnBenchmarksTest` of module `benchmark_cnn_test`, run 86 | 87 | ```bash 88 | python -m unittest -v benchmark_cnn_test.TfCnnBenchmarksTest.testParameterServer 89 | ``` 90 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/flags.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains functions to define flags and params. 16 | 17 | Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec 18 | dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates 19 | a command-line flag for every ParamSpec defined by a DEFINE_* functions. 20 | 21 | The reason we don't use absl flags directly is that we want to be able to use 22 | tf_cnn_benchmarks as a library. When using it as a library, we don't want to 23 | define any flags, but instead pass parameters to the BenchmarkCNN constructor. 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from collections import namedtuple 31 | 32 | from absl import flags as absl_flags 33 | import six 34 | 35 | 36 | FLAGS = absl_flags.FLAGS 37 | 38 | 39 | # ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters. 40 | ParamSpec = namedtuple('_ParamSpec', 41 | ['flag_type', 'default_value', 'description', 42 | 'kwargs']) 43 | 44 | 45 | # Maps from parameter name to its ParamSpec. 46 | param_specs = {} 47 | 48 | 49 | def DEFINE_string(name, default, help): # pylint: disable=invalid-name,redefined-builtin 50 | param_specs[name] = ParamSpec('string', default, help, {}) 51 | 52 | 53 | def DEFINE_boolean(name, default, help): # pylint: disable=invalid-name,redefined-builtin 54 | param_specs[name] = ParamSpec('boolean', default, help, {}) 55 | 56 | 57 | def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None): # pylint: disable=invalid-name,redefined-builtin 58 | kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound} 59 | param_specs[name] = ParamSpec('integer', default, help, kwargs) 60 | 61 | 62 | def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None): # pylint: disable=invalid-name,redefined-builtin 63 | kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound} 64 | param_specs[name] = ParamSpec('float', default, help, kwargs) 65 | 66 | 67 | def DEFINE_enum(name, default, enum_values, help): # pylint: disable=invalid-name,redefined-builtin 68 | kwargs = {'enum_values': enum_values} 69 | param_specs[name] = ParamSpec('enum', default, help, kwargs) 70 | 71 | 72 | def DEFINE_list(name, default, help): # pylint: disable=invalid-name,redefined-builtin 73 | param_specs[name] = ParamSpec('list', default, help, {}) 74 | 75 | 76 | def define_flags(specs=None): 77 | """Define a command line flag for each ParamSpec in flags.param_specs.""" 78 | specs = specs or param_specs 79 | define_flag = { 80 | 'boolean': absl_flags.DEFINE_boolean, 81 | 'float': absl_flags.DEFINE_float, 82 | 'integer': absl_flags.DEFINE_integer, 83 | 'string': absl_flags.DEFINE_string, 84 | 'enum': absl_flags.DEFINE_enum, 85 | 'list': absl_flags.DEFINE_list 86 | } 87 | for name, param_spec in six.iteritems(specs): 88 | if param_spec.flag_type not in define_flag: 89 | raise ValueError('Unknown flag_type %s' % param_spec.flag_type) 90 | else: 91 | define_flag[param_spec.flag_type](name, param_spec.default_value, 92 | help=param_spec.description, 93 | **param_spec.kwargs) 94 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/densenet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Densenet model configuration. 17 | 18 | References: 19 | "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import numpy as np 27 | from six.moves import xrange # pylint: disable=redefined-builtin 28 | import tensorflow as tf 29 | from models import model as model_lib 30 | 31 | 32 | class DensenetCifar10Model(model_lib.CNNModel): 33 | """Densenet cnn network configuration.""" 34 | 35 | def __init__(self, model, layer_counts, growth_rate, params=None): 36 | self.growth_rate = growth_rate 37 | super(DensenetCifar10Model, self).__init__( 38 | model, 32, 64, 0.1, layer_counts=layer_counts, params=params) 39 | self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True} 40 | 41 | def dense_block(self, cnn, growth_rate): 42 | input_layer = cnn.top_layer 43 | c = cnn.batch_norm(input_layer, **self.batch_norm_config) 44 | c = tf.nn.relu(c) 45 | c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate), 46 | activation=None, input_layer=c) 47 | channel_index = 3 if cnn.channel_pos == 'channels_last' else 1 48 | cnn.top_layer = tf.concat([input_layer, c], channel_index) 49 | cnn.top_size += growth_rate 50 | 51 | def transition_layer(self, cnn): 52 | in_size = cnn.top_size 53 | cnn.batch_norm(**self.batch_norm_config) 54 | cnn.top_layer = tf.nn.relu(cnn.top_layer) 55 | cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size)) 56 | cnn.apool(2, 2, 2, 2) 57 | 58 | def add_inference(self, cnn): 59 | if self.layer_counts is None: 60 | raise ValueError('Layer counts not specified for %s' % self.get_model()) 61 | if self.growth_rate is None: 62 | raise ValueError('Growth rate not specified for %s' % self.get_model()) 63 | 64 | cnn.conv(16, 3, 3, 1, 1, activation=None) 65 | # Block 1 66 | for _ in xrange(self.layer_counts[0]): 67 | self.dense_block(cnn, self.growth_rate) 68 | self.transition_layer(cnn) 69 | # Block 2 70 | for _ in xrange(self.layer_counts[1]): 71 | self.dense_block(cnn, self.growth_rate) 72 | self.transition_layer(cnn) 73 | # Block 3 74 | for _ in xrange(self.layer_counts[2]): 75 | self.dense_block(cnn, self.growth_rate) 76 | cnn.batch_norm(**self.batch_norm_config) 77 | cnn.top_layer = tf.nn.relu(cnn.top_layer) 78 | channel_index = 3 if cnn.channel_pos == 'channels_last' else 1 79 | cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index] 80 | cnn.spatial_mean() 81 | 82 | def get_learning_rate(self, global_step, batch_size): 83 | num_batches_per_epoch = 50000 // batch_size 84 | boundaries = num_batches_per_epoch * np.array([150, 225, 300], 85 | dtype=np.int64) 86 | boundaries = [x for x in boundaries] 87 | values = [0.1, 0.01, 0.001, 0.0001] 88 | return tf.train.piecewise_constant(global_step, boundaries, values) 89 | 90 | 91 | def create_densenet40_k12_model(): 92 | return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12) 93 | 94 | 95 | def create_densenet100_k12_model(): 96 | return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12) 97 | 98 | 99 | def create_densenet100_k24_model(): 100 | return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24) 101 | -------------------------------------------------------------------------------- /scripts/keras_benchmarks/upload_benchmarks_bq.py: -------------------------------------------------------------------------------- 1 | """ Uploads benchmark statistics along with platform used to run the benchmark 2 | to BigQuery.""" 3 | from google.cloud import bigquery 4 | import uuid 5 | 6 | 7 | def upload_metrics_to_bq(test_name, total_time, epochs, batch_size, 8 | backend_type, backend_version, cpu_num_cores, cpu_memory, cpu_memory_info, 9 | gpu_count, gpu_platform, platform_type, platform_machine_type, 10 | keras_version, sample_type=None): 11 | """ Upload benchmark metrics of a model along with platform specs. 12 | 13 | # Arguments 14 | test_name: Unique test name for each benchmark. 15 | total_time: Time taken to run the given number of epochs. 16 | epochs: Total number of epochs for which the given benchmark was run. 17 | We don't count the first epoch since some amount of time is 18 | spent creating the graph. 19 | batch_size: Batch size of samples used in a given epoch. 20 | backend_type: Backend type used by the Keras models. This is either 21 | "tensorflow", "cntk" or "theano". 22 | backend_version: This is the version "tensorflow", "cntk" or "theano" 23 | used by Keras. 24 | cpu_num_cores: Number of CPU cores of the machine on which the Keras 25 | benchmark is run. 26 | cpu_memory: RAM memory specs of the CPU. 27 | cpu_memory_info: This is the memory unit of the CPU memory such as 28 | gpu_count: Number of GPUs used to run the benchmarks. 29 | 'GB'. 30 | gpu_platform: The type of GPU used, for e.g "Nvidia Tesla K80" 31 | platform_type: This is the local or cloud platform used to run the 32 | benchmarks. 33 | platform_machine_type: This can be details about the machine type 34 | for e.g 35 | keras_version: Version of Keras used to run the benchmark model. 36 | sample_type: This is a user specified string used to calculate metrics such 37 | as "images per epoch" etc. 38 | """ 39 | bigquery_client = bigquery.Client() 40 | dataset = bigquery_client.dataset('keras_benchmarks') 41 | table = dataset.table('benchmarks') 42 | table.reload() 43 | 44 | query = """\ 45 | INSERT keras_benchmarks.benchmarks (test_id,test_name,recorded_time,\ 46 | metrics,keras_backend,cpu_info,platform_info,keras_version,gpu_info) \ 47 | VALUES(@testid,@testname,CURRENT_TIMESTAMP(),\ 48 | (@metrics_totaltime,@metrics_epochs,@metrics_batch_size,@metrics_sampletype),\ 49 | (@keras_backend_type, @keras_backend_version),\ 50 | (@cpu_info_numcores,@cpu_info_memory, @cpu_info_memory_units),\ 51 | (@platform_info_type,@platform_info_machine_type),\ 52 | @keras_version,\ 53 | (@gpu_info_count,@gpu_info_platform)) 54 | """ 55 | test_id = uuid.uuid4().int >> 80 56 | query_job = bigquery_client.run_async_query( 57 | str(uuid.uuid4()), 58 | query, 59 | query_parameters=( 60 | bigquery.ScalarQueryParameter('testid', 'INTEGER', test_id), 61 | bigquery.ScalarQueryParameter('testname', 'STRING', test_name), 62 | bigquery.ScalarQueryParameter('metrics_totaltime', 'FLOAT', total_time), 63 | bigquery.ScalarQueryParameter('metrics_epochs', 'INTEGER', epochs), 64 | bigquery.ScalarQueryParameter('metrics_batch_size', 'INTEGER', batch_size), 65 | bigquery.ScalarQueryParameter('metrics_sampletype', 'STRING', sample_type), 66 | bigquery.ScalarQueryParameter('keras_backend_type', 'STRING', backend_type), 67 | bigquery.ScalarQueryParameter('keras_backend_version', 'STRING', backend_version), 68 | bigquery.ScalarQueryParameter('cpu_info_numcores', 'FLOAT', cpu_num_cores), 69 | bigquery.ScalarQueryParameter('cpu_info_memory', 'FLOAT', cpu_memory), 70 | bigquery.ScalarQueryParameter('cpu_info_memory_units', 'STRING', cpu_memory_info), 71 | bigquery.ScalarQueryParameter('platform_info_type', 'STRING', platform_type), 72 | bigquery.ScalarQueryParameter('platform_info_machine_type', 'STRING', platform_machine_type), 73 | bigquery.ScalarQueryParameter('keras_version', 'STRING', keras_version), 74 | bigquery.ScalarQueryParameter('gpu_info_count', 'FLOAT', gpu_count), 75 | bigquery.ScalarQueryParameter('gpu_info_platform', 'STRING', gpu_platform))) 76 | 77 | query_job.use_legacy_sql = False 78 | 79 | query_job.begin() 80 | query_job.result() 81 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/run_tests.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Runs the tf_cnn_benchmarks tests.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import sys 22 | import unittest 23 | 24 | from absl import app 25 | from absl import flags as absl_flags 26 | 27 | import all_reduce_benchmark_test 28 | import allreduce_test 29 | import benchmark_cnn_distributed_test 30 | import benchmark_cnn_test 31 | import cnn_util_test 32 | import variable_mgr_util_test 33 | from models import nasnet_test 34 | 35 | 36 | # Ideally, we wouldn't need this option, and run both distributed tests and non- 37 | # distributed tests. But, TensorFlow allocates all the GPU memory by default, so 38 | # the non-distributed tests allocate all the GPU memory. The distributed tests 39 | # spawn processes that run TensorFlow, and cannot run if all the GPU memory is 40 | # already allocated. If a non-distributed test is run, then a distributed test 41 | # is run in the same process, the distributed test will fail because there is no 42 | # more GPU memory for the spawned processes to allocate. 43 | absl_flags.DEFINE_boolean('run_distributed_tests', False, 44 | 'If True, run the distributed tests. If False, the' 45 | 'non-distributed tests.') 46 | 47 | absl_flags.DEFINE_boolean('full_tests', False, 48 | 'If True, all distributed or non-distributed tests ' 49 | 'are run, which can take hours. If False, only a ' 50 | 'subset of tests will be run. This subset runs much ' 51 | 'faster and tests almost all the functionality as ' 52 | 'the full set of tests, so it is recommended to keep ' 53 | 'this option set to False.') 54 | 55 | FLAGS = absl_flags.FLAGS 56 | 57 | 58 | def main(_): 59 | loader = unittest.defaultTestLoader 60 | if FLAGS.full_tests: 61 | suite = unittest.TestSuite([ 62 | loader.loadTestsFromModule(allreduce_test), 63 | loader.loadTestsFromModule(cnn_util_test), 64 | loader.loadTestsFromModule(variable_mgr_util_test), 65 | loader.loadTestsFromModule(benchmark_cnn_test), 66 | loader.loadTestsFromModule(all_reduce_benchmark_test), 67 | loader.loadTestsFromModule(nasnet_test), 68 | ]) 69 | dist_suite = unittest.TestSuite([ 70 | loader.loadTestsFromModule(benchmark_cnn_distributed_test), 71 | ]) 72 | else: 73 | suite = unittest.TestSuite([ 74 | loader.loadTestsFromModule(allreduce_test), 75 | loader.loadTestsFromModule(cnn_util_test), 76 | loader.loadTestsFromModule(all_reduce_benchmark_test), 77 | loader.loadTestsFromModule(variable_mgr_util_test), 78 | loader.loadTestsFromTestCase(benchmark_cnn_test.TestAlexnetModel), 79 | loader.loadTestsFromTestCase(benchmark_cnn_test.TfCnnBenchmarksTest), 80 | loader.loadTestsFromTestCase(benchmark_cnn_test.VariableUpdateTest), 81 | loader.loadTestsFromTestCase( 82 | benchmark_cnn_test.VariableMgrLocalReplicatedTest), 83 | ]) 84 | dist_suite = unittest.TestSuite([ 85 | loader.loadTestsFromNames([ 86 | 'benchmark_cnn_distributed_test.DistributedVariableUpdateTest' 87 | '.testVarUpdateDefault', 88 | 89 | 'benchmark_cnn_distributed_test.TfCnnBenchmarksDistributedTest' 90 | '.testParameterServer', 91 | ]), 92 | ]) 93 | 94 | if FLAGS.run_distributed_tests: 95 | print('Running distributed tests') 96 | result = unittest.TextTestRunner(verbosity=2).run(dist_suite) 97 | else: 98 | print('Running non-distributed tests') 99 | result = unittest.TextTestRunner(verbosity=2).run(suite) 100 | sys.exit(not result.wasSuccessful()) 101 | 102 | 103 | if __name__ == '__main__': 104 | app.run(main) 105 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/ssd_constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Central location for all constants related to MLPerf SSD.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # ============================================================================== 22 | # == Model ===================================================================== 23 | # ============================================================================== 24 | IMAGE_SIZE = 300 25 | 26 | # TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90) 27 | # Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to 28 | # be resolved. 29 | NUM_CLASSES = 81 # Including "no class". Not all COCO classes are used. 30 | 31 | # Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero. 32 | CLASS_INV_MAP = ( 33 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 34 | 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 35 | 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 36 | 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 37 | 88, 89, 90) 38 | _MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)} 39 | CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1)) 40 | 41 | NUM_SSD_BOXES = 8732 42 | 43 | RESNET_DEPTH = 34 44 | 45 | """SSD specific""" 46 | MIN_LEVEL = 3 47 | MAX_LEVEL = 8 48 | 49 | FEATURE_SIZES = (38, 19, 10, 5, 3, 1) 50 | STEPS = (8, 16, 32, 64, 100, 300) 51 | 52 | # https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py 53 | SCALES = (21, 45, 99, 153, 207, 261, 315) 54 | ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,)) 55 | NUM_DEFAULTS = (4, 6, 6, 6, 4, 4) 56 | NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4} 57 | SCALE_XY = 0.1 58 | SCALE_HW = 0.2 59 | BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW) 60 | MATCH_THRESHOLD = 0.5 61 | 62 | # https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683 63 | NORMALIZATION_MEAN = (0.485, 0.456, 0.406) 64 | NORMALIZATION_STD = (0.229, 0.224, 0.225) 65 | 66 | # SSD Cropping 67 | NUM_CROP_PASSES = 50 68 | CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9) 69 | P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1) 70 | 71 | # Hard example mining 72 | NEGS_PER_POSITIVE = 3 73 | 74 | # Batch normalization 75 | BATCH_NORM_DECAY = 0.997 76 | BATCH_NORM_EPSILON = 1e-4 77 | 78 | 79 | # ============================================================================== 80 | # == Optimizer ================================================================= 81 | # ============================================================================== 82 | LEARNING_RATE_SCHEDULE = ( 83 | (0, 1e-3), 84 | (160000, 1e-4), 85 | (200000, 1e-5), 86 | ) 87 | MOMENTUM = 0.9 88 | WEIGHT_DECAY = 5e-4 89 | 90 | 91 | # ============================================================================== 92 | # == Keys ====================================================================== 93 | # ============================================================================== 94 | BOXES = "boxes" 95 | CLASSES = "classes" 96 | NUM_MATCHED_BOXES = "num_matched_boxes" 97 | IMAGE = "image" 98 | SOURCE_ID = "source_id" 99 | RAW_SHAPE = "raw_shape" 100 | PRED_BOXES = "pred_boxes" 101 | PRED_SCORES = "pred_scores" 102 | 103 | 104 | # ============================================================================== 105 | # == Evaluation ================================================================ 106 | # ============================================================================== 107 | 108 | # Note: This is based on a batch size of 32 109 | # https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37 110 | CHECKPOINT_FREQUENCY = 20000 111 | MAX_NUM_EVAL_BOXES = 200 112 | OVERLAP_CRITERIA = 0.5 # Used for nonmax supression 113 | MIN_SCORE = 0.05 # Minimum score to be considered during evaluation. 114 | DUMMY_SCORE = -1e5 # If no boxes are matched. 115 | 116 | ANNOTATION_FILE = "annotations/instances_val2017.json" 117 | COCO_NUM_TRAIN_IMAGES = 118287 118 | COCO_NUM_VAL_IMAGES = 4952 119 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/cnn_util_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for tf_cnn_benchmarks.cnn_util.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import threading 23 | import time 24 | 25 | import tensorflow as tf 26 | 27 | import cnn_util 28 | 29 | 30 | class CnnUtilBarrierTest(tf.test.TestCase): 31 | 32 | def testBarrier(self): 33 | num_tasks = 20 34 | num_waits = 4 35 | barrier = cnn_util.Barrier(num_tasks) 36 | threads = [] 37 | sync_matrix = [] 38 | for i in range(num_tasks): 39 | sync_times = [0] * num_waits 40 | thread = threading.Thread( 41 | target=self._run_task, args=(barrier, sync_times)) 42 | thread.start() 43 | threads.append(thread) 44 | sync_matrix.append(sync_times) 45 | for thread in threads: 46 | thread.join() 47 | for wait_index in range(num_waits - 1): 48 | # Max of times at iteration i < min of times at iteration i + 1 49 | self.assertLessEqual( 50 | max([sync_matrix[i][wait_index] for i in range(num_tasks)]), 51 | min([sync_matrix[i][wait_index + 1] for i in range(num_tasks)])) 52 | 53 | def _run_task(self, barrier, sync_times): 54 | for wait_index in range(len(sync_times)): 55 | sync_times[wait_index] = time.time() 56 | barrier.wait() 57 | 58 | def testBarrierAbort(self): 59 | num_tasks = 2 60 | num_waits = 1 61 | sync_times = [0] * num_waits 62 | barrier = cnn_util.Barrier(num_tasks) 63 | thread = threading.Thread( 64 | target=self._run_task, args=(barrier, sync_times)) 65 | thread.start() 66 | barrier.abort() 67 | # thread won't be blocked by done barrier. 68 | thread.join() 69 | 70 | 71 | class ImageProducerTest(tf.test.TestCase): 72 | 73 | def _slow_tensorflow_op(self): 74 | """Returns a TensorFlow op that takes approximately 0.1s to complete.""" 75 | def slow_func(v): 76 | time.sleep(0.1) 77 | return v 78 | return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op 79 | 80 | def _test_image_producer(self, batch_group_size, put_slower_than_get): 81 | # We use the variable x to simulate a staging area of images. x represents 82 | # the number of batches in the staging area. 83 | x = tf.Variable(0, dtype=tf.int32) 84 | if put_slower_than_get: 85 | put_dep = self._slow_tensorflow_op() 86 | get_dep = tf.no_op() 87 | else: 88 | put_dep = tf.no_op() 89 | get_dep = self._slow_tensorflow_op() 90 | with tf.control_dependencies([put_dep]): 91 | put_op = x.assign_add(batch_group_size, use_locking=True) 92 | with tf.control_dependencies([get_dep]): 93 | get_op = x.assign_sub(1, use_locking=True) 94 | with self.test_session() as sess: 95 | sess.run(tf.variables_initializer([x])) 96 | image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size, 97 | use_python32_barrier=False) 98 | image_producer.start() 99 | for _ in range(5 * batch_group_size): 100 | sess.run(get_op) 101 | # We assert x is nonnegative, to ensure image_producer never causes 102 | # an unstage op to block. We assert x is at most 2 * batch_group_size, 103 | # to ensure it doesn't use too much memory by storing too many batches 104 | # in the staging area. 105 | self.assertGreaterEqual(sess.run(x), 0) 106 | self.assertLessEqual(sess.run(x), 2 * batch_group_size) 107 | image_producer.notify_image_consumption() 108 | self.assertGreaterEqual(sess.run(x), 0) 109 | self.assertLessEqual(sess.run(x), 2 * batch_group_size) 110 | 111 | image_producer.done() 112 | time.sleep(0.1) 113 | self.assertGreaterEqual(sess.run(x), 0) 114 | self.assertLessEqual(sess.run(x), 2 * batch_group_size) 115 | 116 | def test_image_producer(self): 117 | self._test_image_producer(1, False) 118 | self._test_image_producer(1, True) 119 | self._test_image_producer(2, False) 120 | self._test_image_producer(2, True) 121 | self._test_image_producer(3, False) 122 | self._test_image_producer(3, True) 123 | self._test_image_producer(8, False) 124 | self._test_image_producer(8, True) 125 | 126 | 127 | if __name__ == '__main__': 128 | tf.test.main() 129 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Used to run benchmark_cnn for distributed tests. 17 | 18 | In distributed tests, we spawn processes to run tf_cnn_benchmark tasks. We could 19 | directly spawn tf_cnn_benchmark processes, but we want some added functionality, 20 | such as being able to inject custom images during training. So instead, this 21 | file is spawned as a Python process, which supports the added functionality. 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from absl import flags as absl_flags 29 | import numpy as np 30 | import tensorflow as tf 31 | import benchmark_cnn 32 | import flags 33 | import preprocessing 34 | import test_util 35 | 36 | 37 | absl_flags.DEFINE_string('fake_input', 'none', 38 | """What fake input to inject into benchmark_cnn. This 39 | is ignored if --model=test_model. 40 | Options are: 41 | none: Do not inject any fake input. 42 | zeros_and_ones: Half the images will be all 0s with 43 | a label of 0. Half the images will be all 1s with a 44 | label of 1.""") 45 | 46 | flags.define_flags() 47 | FLAGS = flags.FLAGS 48 | 49 | 50 | def get_test_image_preprocessor(batch_size, params): 51 | """Returns the preprocessing.TestImagePreprocessor that should be injected. 52 | 53 | Returns None if no preprocessor should be injected. 54 | 55 | Args: 56 | batch_size: The batch size across all GPUs. 57 | params: BenchmarkCNN's parameters. 58 | Returns: 59 | Returns the preprocessing.TestImagePreprocessor that should be injected. 60 | Raises: 61 | ValueError: Flag --fake_input is an invalid value. 62 | """ 63 | if FLAGS.fake_input == 'none': 64 | return None 65 | elif FLAGS.fake_input == 'zeros_and_ones': 66 | half_batch_size = batch_size // 2 67 | images = np.zeros((batch_size, 227, 227, 3), dtype=np.float32) 68 | images[half_batch_size:, :, :, :] = 1 69 | labels = np.array([0] * half_batch_size + [1] * half_batch_size, 70 | dtype=np.int32) 71 | preprocessor = preprocessing.TestImagePreprocessor( 72 | batch_size, [227, 227, 3], params.num_gpus, 73 | benchmark_cnn.get_data_type(params)) 74 | preprocessor.set_fake_data(images, labels) 75 | preprocessor.expected_subset = 'validation' if params.eval else 'train' 76 | return preprocessor 77 | else: 78 | raise ValueError('Invalid --fake_input: %s' % FLAGS.fake_input) 79 | 80 | 81 | def run_with_real_model(params): 82 | """Runs tf_cnn_benchmarks with a real model.""" 83 | bench = benchmark_cnn.BenchmarkCNN(params) 84 | bench.print_info() 85 | preprocessor = get_test_image_preprocessor(bench.batch_size, params) 86 | if preprocessor is not None: 87 | # The test image preprocessor requires queue runners. Since this file is 88 | # used for testing, it is OK to access protected members. 89 | # pylint: disable=protected-access 90 | bench.dataset._queue_runner_required = True 91 | # pylint: enable=protected-access 92 | bench.input_preprocessor = preprocessor 93 | bench.run() 94 | 95 | 96 | def run_with_test_model(params): 97 | """Runs tf_cnn_benchmarks with a test model.""" 98 | model = test_util.TestCNNModel() 99 | inputs = test_util.get_fake_var_update_inputs() 100 | with test_util.monkey_patch(benchmark_cnn, 101 | LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15): 102 | bench = benchmark_cnn.BenchmarkCNN(params, dataset=test_util.TestDataSet(), 103 | model=model) 104 | # The test model does not use labels when computing loss, so the label 105 | # values do not matter as long as it's the right shape. 106 | labels = np.array([1] * inputs.shape[0]) 107 | bench.input_preprocessor.set_fake_data(inputs, labels) 108 | bench.run() 109 | 110 | 111 | def main(_): 112 | params = benchmark_cnn.make_params_from_flags() 113 | params = benchmark_cnn.setup(params) 114 | if params.model == 'test_model': 115 | run_with_test_model(params) 116 | else: 117 | run_with_real_model(params) 118 | 119 | 120 | if __name__ == '__main__': 121 | tf.app.run() 122 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/variable_mgr_util_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for variable_mgr_util.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | import variable_mgr_util 23 | 24 | 25 | class VariableMgrUtilTest(tf.test.TestCase): 26 | 27 | def testGetLossScaleUpdateOpTruePath(self): 28 | loss_scale = tf.Variable(4) 29 | # loss_scale_normal_steps >= inc_loss_scale_every_n 30 | loss_scale_normal_steps = tf.Variable(10) 31 | inc_loss_scale_every_n = 10 32 | update_op = variable_mgr_util.get_loss_scale_update_op( 33 | loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n) 34 | 35 | with self.test_session() as sess: 36 | sess.run(tf.global_variables_initializer()) 37 | sess.run(update_op) 38 | 39 | self.assertEqual(sess.run(loss_scale), 8) 40 | self.assertEqual(sess.run(loss_scale_normal_steps), 0) 41 | 42 | def testGetLossScaleUpdateOpFalsePath(self): 43 | loss_scale = tf.Variable(4) 44 | # loss_scale_normal_steps < inc_loss_scale_every_n 45 | loss_scale_normal_steps = tf.Variable(9) 46 | inc_loss_scale_every_n = 10 47 | update_op = variable_mgr_util.get_loss_scale_update_op( 48 | loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n) 49 | 50 | with self.test_session() as sess: 51 | sess.run(tf.global_variables_initializer()) 52 | sess.run(update_op) 53 | 54 | self.assertEqual(sess.run(loss_scale), 4) 55 | self.assertEqual(sess.run(loss_scale_normal_steps), 10) 56 | 57 | def testAppendGradientsWithLossScaleWithAutoScaleDisabled(self): 58 | v = tf.Variable(0) 59 | training_ops = [] 60 | get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)] 61 | loss_scale_params = variable_mgr_util.AutoLossScaleParams( 62 | enable_auto_loss_scale=False, # no auto loss scale. 63 | loss_scale=tf.Variable(4), 64 | loss_scale_normal_steps=tf.Variable(10), 65 | inc_loss_scale_every_n=10, 66 | is_chief=True) 67 | variable_mgr_util.append_gradients_with_loss_scale( 68 | training_ops, 69 | get_apply_gradients_ops_func, 70 | loss_scale_params, 71 | grad_has_inf_nan=True) 72 | 73 | with self.test_session() as sess: 74 | sess.run(tf.global_variables_initializer()) 75 | sess.run(training_ops) 76 | self.assertEqual(sess.run(v), 1) 77 | self.assertEqual(sess.run(loss_scale_params.loss_scale), 4) 78 | self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10) 79 | 80 | def testAppendGradientsWithLossScaleForNonChiefWorker(self): 81 | v = tf.Variable(0) 82 | training_ops = [] 83 | get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)] 84 | loss_scale_params = variable_mgr_util.AutoLossScaleParams( 85 | enable_auto_loss_scale=True, 86 | loss_scale=tf.Variable(4), 87 | loss_scale_normal_steps=tf.Variable(10), 88 | inc_loss_scale_every_n=10, 89 | is_chief=False) # Non-chief 90 | variable_mgr_util.append_gradients_with_loss_scale( 91 | training_ops, 92 | get_apply_gradients_ops_func, 93 | loss_scale_params, 94 | grad_has_inf_nan=False) 95 | 96 | with self.test_session() as sess: 97 | sess.run(tf.global_variables_initializer()) 98 | sess.run(training_ops) 99 | self.assertEqual(sess.run(v), 1) 100 | self.assertEqual(sess.run(loss_scale_params.loss_scale), 4) 101 | self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10) 102 | 103 | def testAppendGradientsWithLossScaleWithoutNan(self): 104 | v = tf.Variable(0) 105 | training_ops = [] 106 | get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)] 107 | loss_scale_params = variable_mgr_util.AutoLossScaleParams( 108 | enable_auto_loss_scale=True, 109 | loss_scale=tf.Variable(4, dtype=tf.float32), 110 | loss_scale_normal_steps=tf.Variable(10), 111 | inc_loss_scale_every_n=10, 112 | is_chief=True) 113 | variable_mgr_util.append_gradients_with_loss_scale( 114 | training_ops, 115 | get_apply_gradients_ops_func, 116 | loss_scale_params, 117 | grad_has_inf_nan=tf.constant(False)) 118 | 119 | with self.test_session() as sess: 120 | sess.run(tf.global_variables_initializer()) 121 | sess.run(training_ops) 122 | self.assertEqual(sess.run(v), 1) 123 | self.assertEqual(sess.run(loss_scale_params.loss_scale), 8) 124 | self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0) 125 | 126 | def testAppendGradientsWithLossScaleWithtNan(self): 127 | v = tf.Variable(0) 128 | training_ops = [] 129 | get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)] 130 | loss_scale_params = variable_mgr_util.AutoLossScaleParams( 131 | enable_auto_loss_scale=True, 132 | loss_scale=tf.Variable(4, dtype=tf.float32), 133 | loss_scale_normal_steps=tf.Variable(10), 134 | inc_loss_scale_every_n=10, 135 | is_chief=True) 136 | variable_mgr_util.append_gradients_with_loss_scale( 137 | training_ops, 138 | get_apply_gradients_ops_func, 139 | loss_scale_params, 140 | grad_has_inf_nan=tf.constant(True)) 141 | 142 | with self.test_session() as sess: 143 | sess.run(tf.global_variables_initializer()) 144 | sess.run(training_ops) 145 | self.assertEqual(sess.run(v), 0) # Skip updating for v. 146 | # halve loss_scale and reset local_scale_normal_steps. 147 | self.assertEqual(sess.run(loss_scale_params.loss_scale), 2) 148 | self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0) 149 | 150 | 151 | if __name__ == '__main__': 152 | tf.test.main() 153 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/model_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Model configurations for CNN benchmarks. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | from functools import partial 24 | 25 | from models import alexnet_model 26 | from models import densenet_model 27 | from models import googlenet_model 28 | from models import inception_model 29 | from models import lenet_model 30 | from models import mobilenet_v2 31 | from models import nasnet_model 32 | from models import official_resnet_model 33 | from models import overfeat_model 34 | from models import resnet_model 35 | from models import ssd_model 36 | from models import trivial_model 37 | from models import vgg_model 38 | from models.experimental import deepspeech 39 | from models.experimental import official_ncf_model 40 | 41 | 42 | _model_name_to_imagenet_model = { 43 | 'vgg11': vgg_model.Vgg11Model, 44 | 'vgg16': vgg_model.Vgg16Model, 45 | 'vgg19': vgg_model.Vgg19Model, 46 | 'lenet': lenet_model.Lenet5Model, 47 | 'googlenet': googlenet_model.GooglenetModel, 48 | 'overfeat': overfeat_model.OverfeatModel, 49 | 'alexnet': alexnet_model.AlexnetModel, 50 | 'trivial': trivial_model.TrivialModel, 51 | 'inception3': inception_model.Inceptionv3Model, 52 | 'inception4': inception_model.Inceptionv4Model, 53 | 'official_resnet18_v2': 54 | partial(official_resnet_model.ImagenetResnetModel, 18), 55 | 'official_resnet34_v2': 56 | partial(official_resnet_model.ImagenetResnetModel, 34), 57 | 'official_resnet50_v2': 58 | partial(official_resnet_model.ImagenetResnetModel, 50), 59 | 'official_resnet101_v2': 60 | partial(official_resnet_model.ImagenetResnetModel, 101), 61 | 'official_resnet152_v2': 62 | partial(official_resnet_model.ImagenetResnetModel, 152), 63 | 'official_resnet200_v2': 64 | partial(official_resnet_model.ImagenetResnetModel, 200), 65 | 'official_resnet18': 66 | partial(official_resnet_model.ImagenetResnetModel, 18, version=1), 67 | 'official_resnet34': 68 | partial(official_resnet_model.ImagenetResnetModel, 34, version=1), 69 | 'official_resnet50': 70 | partial(official_resnet_model.ImagenetResnetModel, 50, version=1), 71 | 'official_resnet101': 72 | partial(official_resnet_model.ImagenetResnetModel, 101, version=1), 73 | 'official_resnet152': 74 | partial(official_resnet_model.ImagenetResnetModel, 152, version=1), 75 | 'official_resnet200': 76 | partial(official_resnet_model.ImagenetResnetModel, 200, version=1), 77 | 'resnet50': resnet_model.create_resnet50_model, 78 | 'resnet50_v1.5': resnet_model.create_resnet50_v1_5_model, 79 | 'resnet50_v2': resnet_model.create_resnet50_v2_model, 80 | 'resnet101': resnet_model.create_resnet101_model, 81 | 'resnet101_v2': resnet_model.create_resnet101_v2_model, 82 | 'resnet152': resnet_model.create_resnet152_model, 83 | 'resnet152_v2': resnet_model.create_resnet152_v2_model, 84 | 'nasnet': nasnet_model.NasnetModel, 85 | 'nasnetlarge': nasnet_model.NasnetLargeModel, 86 | 'mobilenet': mobilenet_v2.MobilenetModel, 87 | 'ncf': official_ncf_model.NcfModel, 88 | } 89 | 90 | 91 | _model_name_to_cifar_model = { 92 | 'alexnet': alexnet_model.AlexnetCifar10Model, 93 | 'resnet20': resnet_model.create_resnet20_cifar_model, 94 | 'resnet20_v2': resnet_model.create_resnet20_v2_cifar_model, 95 | 'resnet32': resnet_model.create_resnet32_cifar_model, 96 | 'resnet32_v2': resnet_model.create_resnet32_v2_cifar_model, 97 | 'resnet44': resnet_model.create_resnet44_cifar_model, 98 | 'resnet44_v2': resnet_model.create_resnet44_v2_cifar_model, 99 | 'resnet56': resnet_model.create_resnet56_cifar_model, 100 | 'resnet56_v2': resnet_model.create_resnet56_v2_cifar_model, 101 | 'resnet110': resnet_model.create_resnet110_cifar_model, 102 | 'resnet110_v2': resnet_model.create_resnet110_v2_cifar_model, 103 | 'trivial': trivial_model.TrivialCifar10Model, 104 | 'densenet40_k12': densenet_model.create_densenet40_k12_model, 105 | 'densenet100_k12': densenet_model.create_densenet100_k12_model, 106 | 'densenet100_k24': densenet_model.create_densenet100_k24_model, 107 | 'nasnet': nasnet_model.NasnetCifarModel, 108 | } 109 | 110 | 111 | _model_name_to_object_detection_model = { 112 | 'ssd300': ssd_model.SSD300Model, 113 | 'trivial': trivial_model.TrivialSSD300Model, 114 | } 115 | 116 | 117 | def _get_model_map(dataset_name): 118 | """Get name to model map for specified dataset.""" 119 | if dataset_name == 'cifar10': 120 | return _model_name_to_cifar_model 121 | elif dataset_name in ('imagenet', 'synthetic'): 122 | return _model_name_to_imagenet_model 123 | elif dataset_name == 'librispeech': 124 | return {'deepspeech2': deepspeech.DeepSpeech2Model} 125 | elif dataset_name == 'coco': 126 | return _model_name_to_object_detection_model 127 | else: 128 | raise ValueError('Invalid dataset name: %s' % dataset_name) 129 | 130 | 131 | def get_model_config(model_name, dataset, params): 132 | """Map model name to model network configuration.""" 133 | model_map = _get_model_map(dataset.name) 134 | if model_name not in model_map: 135 | raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' % 136 | (model_name, dataset.name)) 137 | else: 138 | return model_map[model_name](params=params) 139 | 140 | 141 | def register_model(model_name, dataset_name, model_func): 142 | """Register a new model that can be obtained with `get_model_config`.""" 143 | model_map = _get_model_map(dataset_name) 144 | if model_name in model_map: 145 | raise ValueError('Model "%s" is already registered for dataset "%s"' % 146 | (model_name, dataset_name)) 147 | model_map[model_name] = model_func 148 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/coco_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """COCO-style evaluation metrics. 16 | 17 | Forked from reference model implementation. 18 | 19 | COCO API: github.com/cocodataset/cocoapi/ 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import atexit 27 | import tempfile 28 | 29 | from absl import flags 30 | 31 | import numpy as np 32 | from pycocotools.coco import COCO 33 | from pycocotools.cocoeval import COCOeval 34 | import six 35 | 36 | import tensorflow as tf 37 | 38 | import mlperf 39 | import ssd_constants 40 | 41 | FLAGS = flags.FLAGS 42 | 43 | 44 | # https://github.com/cocodataset/cocoapi/issues/49 45 | if six.PY3: 46 | import pycocotools.coco 47 | pycocotools.coco.unicode = str 48 | 49 | 50 | def async_eval_runner(queue_predictions, queue_results, val_json_file): 51 | """Load intermediate eval results and get COCO metrics.""" 52 | while True: 53 | message = queue_predictions.get() 54 | if message == 'STOP': # poison pill 55 | break 56 | step, predictions = message 57 | results = compute_map(predictions, val_json_file) 58 | queue_results.put((step, results)) 59 | 60 | 61 | def compute_map(predictions, val_json_file): 62 | """Use model predictions to compute mAP. 63 | 64 | Args: 65 | predictions: a list of tuples returned by decoded_predictions function, 66 | each containing the following elements: 67 | image source_id, box coordinates in XYWH order, probability score, label 68 | val_json_file: path to COCO annotation file 69 | Returns: 70 | A dictionary that maps all COCO metrics (keys) to their values 71 | """ 72 | 73 | if val_json_file.startswith("gs://"): 74 | _, local_val_json = tempfile.mkstemp(suffix=".json") 75 | tf.gfile.Remove(local_val_json) 76 | 77 | tf.gfile.Copy(val_json_file, local_val_json) 78 | atexit.register(tf.gfile.Remove, local_val_json) 79 | else: 80 | local_val_json = val_json_file 81 | 82 | cocoGt = COCO(local_val_json) 83 | cocoDt = cocoGt.loadRes(np.array(predictions)) 84 | E = COCOeval(cocoGt, cocoDt, iouType='bbox') 85 | E.evaluate() 86 | E.accumulate() 87 | E.summarize() 88 | print("Current AP: {:.5f}".format(E.stats[0])) 89 | metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 90 | 'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl'] 91 | 92 | # Prefix with "COCO" to group in TensorBoard. 93 | return {"COCO/" + key: value for key, value in zip(metric_names, E.stats)} 94 | 95 | 96 | def calc_iou(target, candidates): 97 | target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1)) 98 | # Left Top & Right Bottom 99 | lt = np.maximum(target_tiled[:,:2], candidates[:,:2]) 100 | 101 | rb = np.minimum(target_tiled[:,2:], candidates[:,2:]) 102 | 103 | delta = np.maximum(rb - lt, 0) 104 | 105 | intersect = delta[:,0] * delta[:,1] 106 | 107 | delta1 = target_tiled[:,2:] - candidates[:,:2] 108 | area1 = delta1[:,0] * delta1[:,1] 109 | delta2 = target_tiled[:,2:] - candidates[:,:2] 110 | area2 = delta2[:,0] * delta2[:,1] 111 | 112 | iou = intersect/(area1 + area2 - intersect) 113 | return iou 114 | 115 | 116 | # TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based 117 | # implementation under ssd_model.py accuracy_function. 118 | def decode_predictions(labels_and_predictions): 119 | """Decode predictions and remove unused boxes and labels.""" 120 | predictions = [] 121 | for example in labels_and_predictions: 122 | source_id = int(example[ssd_constants.SOURCE_ID]) 123 | pred_box = example[ssd_constants.PRED_BOXES] 124 | pred_scores = example[ssd_constants.PRED_SCORES] 125 | 126 | locs, labels, probs = decode_single( 127 | pred_box, pred_scores, ssd_constants.OVERLAP_CRITERIA, 128 | ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES) 129 | 130 | raw_height, raw_width, _ = example[ssd_constants.RAW_SHAPE] 131 | for loc, label, prob in zip(locs, labels, probs): 132 | # Ordering convention differs, hence [1], [0] rather than [0], [1] 133 | x, y = loc[1] * raw_width, loc[0] * raw_height 134 | w, h = (loc[3] - loc[1]) * raw_width, (loc[2] - loc[0]) * raw_height 135 | predictions.append( 136 | [source_id, x, y, w, h, prob, ssd_constants.CLASS_INV_MAP[label]]) 137 | mlperf.logger.log(key=mlperf.tags.NMS_THRESHOLD, 138 | value=ssd_constants.OVERLAP_CRITERIA) 139 | mlperf.logger.log(key=mlperf.tags.NMS_MAX_DETECTIONS, 140 | value=ssd_constants.MAX_NUM_EVAL_BOXES) 141 | return predictions 142 | 143 | 144 | def decode_single(bboxes_in, scores_in, criteria, max_output, max_num=200): 145 | # Reference to https://github.com/amdegroot/ssd.pytorch 146 | 147 | bboxes_out = [] 148 | scores_out = [] 149 | labels_out = [] 150 | 151 | for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)): 152 | score = np.squeeze(score, 1) 153 | 154 | # skip background 155 | if i == 0: 156 | continue 157 | 158 | mask = score > ssd_constants.MIN_SCORE 159 | if not np.any(mask): 160 | continue 161 | 162 | bboxes, score = bboxes_in[mask, :], score[mask] 163 | 164 | score_idx_sorted = np.argsort(score) 165 | score_sorted = score[score_idx_sorted] 166 | 167 | score_idx_sorted = score_idx_sorted[-max_num:] 168 | candidates = [] 169 | 170 | # perform non-maximum suppression 171 | while len(score_idx_sorted): 172 | idx = score_idx_sorted[-1] 173 | bboxes_sorted = bboxes[score_idx_sorted, :] 174 | bboxes_idx = bboxes[idx, :] 175 | iou = calc_iou(bboxes_idx, bboxes_sorted) 176 | 177 | score_idx_sorted = score_idx_sorted[iou < criteria] 178 | candidates.append(idx) 179 | 180 | bboxes_out.append(bboxes[candidates, :]) 181 | scores_out.append(score[candidates]) 182 | labels_out.extend([i]*len(candidates)) 183 | 184 | if len(scores_out) == 0: 185 | tf.logging.info("No objects detected. Returning dummy values.") 186 | return ( 187 | np.zeros(shape=(1, 4), dtype=np.float32), 188 | np.zeros(shape=(1,), dtype=np.int32), 189 | np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE, 190 | ) 191 | 192 | bboxes_out = np.concatenate(bboxes_out, axis=0) 193 | scores_out = np.concatenate(scores_out, axis=0) 194 | labels_out = np.array(labels_out) 195 | 196 | max_ids = np.argsort(scores_out)[-max_output:] 197 | 198 | return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids] 199 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Wrap the official recommendation model in a tf_cnn_benchmarks Model. 16 | 17 | This allows the recommendation NCF model to be used in tf_cnn_benchmarks. 18 | Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is 19 | intended to be used only with CNNs. 20 | 21 | Only synthetic data with 1 GPU is currently supported. 22 | """ 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | import tensorflow as tf 29 | 30 | from models import model 31 | 32 | 33 | # Obtained by running the official NCF model with the following command: 34 | # python ncf_main.py --dataset ml-20m 35 | # and printing the number of users and items here: 36 | # https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68 37 | _NUM_USERS_20M = 138493 38 | _NUM_ITEMS_20M = 26744 39 | 40 | 41 | # TODO(reedwm): Support multi-GPU. Currently keras layers, which this model 42 | # uses, ignore variable_scopes, which we rely on for multi-GPU support. 43 | # TODO(reedwm): Support real data. This will require a significant refactor. 44 | # TODO(reedwm): All-reduce IndexedSlices more effectively. 45 | # TODO(reedwm): Support the 1M variant of this model. 46 | 47 | 48 | class NcfModel(model.Model): 49 | r"""A model.Model wrapper around the official NCF recommendation model. 50 | 51 | To do an NCF run with synthetic data that roughly matches what the official 52 | model does, run: 53 | 54 | python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \ 55 | --weight_decay=0 --sparse_to_dense_grads 56 | """ 57 | 58 | def __init__(self, params=None): 59 | super(NcfModel, self).__init__( 60 | 'official_ncf', batch_size=2048, learning_rate=0.0005, 61 | fp16_loss_scale=128, params=params) 62 | if self.fp16_vars: 63 | raise ValueError('NCF model only supports float32 variables for now.') 64 | 65 | def build_network(self, inputs, phase_train=True, nclass=1001): 66 | try: 67 | from official.recommendation import neumf_model # pylint: disable=g-import-not-at-top 68 | except ImportError as e: 69 | if 'neumf_model' not in e.message: 70 | raise 71 | raise ImportError('To use the experimental NCF model, you must clone the ' 72 | 'repo https://github.com/tensorflow/models and add ' 73 | 'tensorflow/models to the PYTHONPATH.') 74 | del nclass 75 | 76 | users, items, _ = inputs 77 | params = { 78 | 'num_users': _NUM_USERS_20M, 79 | 'num_items': _NUM_ITEMS_20M, 80 | 'model_layers': (256, 256, 128, 64), 81 | 'mf_dim': 64, 82 | 'mf_regularization': 0, 83 | 'mlp_reg_layers': (0, 0, 0, 0), 84 | 'use_tpu': False 85 | } 86 | if self.data_type == tf.float32: 87 | keras_model = neumf_model.construct_model(users, items, params) 88 | logits = keras_model.output 89 | else: 90 | assert self.data_type == tf.float16 91 | old_floatx = tf.keras.backend.floatx() 92 | try: 93 | tf.keras.backend.set_floatx('float16') 94 | # We cannot rely on the variable_scope's fp16 custom getter here, 95 | # because the NCF model uses keras layers, which ignore variable scopes. 96 | # So we use a variable_creator_scope instead. 97 | with tf.variable_creator_scope(_fp16_variable_creator): 98 | keras_model = neumf_model.construct_model(users, items, params) 99 | logits = tf.cast(keras_model.output, tf.float32) 100 | finally: 101 | tf.keras.backend.set_floatx(old_floatx) 102 | return model.BuildNetworkResult(logits=logits, extra_info=None) 103 | 104 | def loss_function(self, inputs, build_network_result): 105 | logits = build_network_result.logits 106 | 107 | # Softmax with the first column of ones is equivalent to sigmoid. 108 | # TODO(reedwm): Actually, the first column should be zeros to be equivalent 109 | # to sigmoid. But, we keep it at ones to match the official models. 110 | logits = tf.concat([tf.ones(logits.shape, dtype=logits.dtype), logits], 111 | axis=1) 112 | 113 | return tf.losses.sparse_softmax_cross_entropy( 114 | labels=inputs[2], 115 | logits=logits 116 | ) 117 | 118 | def get_synthetic_inputs(self, input_name, nclass): 119 | """Returns the ops to generate synthetic inputs and labels.""" 120 | def users_init_val(): 121 | return tf.random_uniform((self.batch_size,), minval=0, 122 | maxval=_NUM_USERS_20M, dtype=tf.int32) 123 | users = tf.Variable(users_init_val, dtype=tf.int32, trainable=False, 124 | collections=[tf.GraphKeys.LOCAL_VARIABLES], 125 | name='synthetic_users') 126 | def items_init_val(): 127 | return tf.random_uniform((self.batch_size,), minval=0, 128 | maxval=_NUM_ITEMS_20M, dtype=tf.int32) 129 | items = tf.Variable(items_init_val, dtype=tf.int32, trainable=False, 130 | collections=[tf.GraphKeys.LOCAL_VARIABLES], 131 | name='synthetic_items') 132 | 133 | def labels_init_val(): 134 | return tf.random_uniform((self.batch_size,), minval=0, maxval=2, 135 | dtype=tf.int32) 136 | labels = tf.Variable(labels_init_val, dtype=tf.int32, trainable=False, 137 | collections=[tf.GraphKeys.LOCAL_VARIABLES], 138 | name='synthetic_labels') 139 | 140 | return [users, items, labels] 141 | 142 | def get_input_shapes(self, subset): 143 | del subset 144 | return [[self.batch_size], [self.batch_size], [self.batch_size]] 145 | 146 | def get_input_data_types(self, subset): 147 | del subset 148 | return [self.int32, tf.int32, tf.int32] 149 | 150 | 151 | def _fp16_variable_creator(next_creator, **kwargs): 152 | """Variable creator to create variables in fp32 and cast them to fp16.""" 153 | dtype = kwargs.get('dtype', None) 154 | initial_value = kwargs.get('initial_value', None) 155 | if dtype is None: 156 | if initial_value is not None and not callable(initial_value): 157 | dtype = initial_value.dtype 158 | if dtype == tf.float16: 159 | if callable(initial_value): 160 | new_initial_value = lambda: tf.cast(initial_value(), tf.float32) 161 | else: 162 | new_initial_value = tf.cast(initial_value, tf.float32) 163 | kwargs['dtype'] = tf.float32 164 | kwargs['initial_value'] = new_initial_value 165 | var = next_creator(**kwargs) 166 | return tf.cast(var, dtype=tf.float16) 167 | else: 168 | return next_creator(**kwargs) 169 | 170 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/mobilenet_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for mobilenet_v2, branched from slim for fp16 performance study.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import copy 22 | 23 | import tensorflow as tf 24 | 25 | from models import mobilenet 26 | from models import mobilenet_conv_blocks as ops 27 | from models import mobilenet_v2 28 | 29 | 30 | slim = tf.contrib.slim 31 | 32 | 33 | def find_ops(optype): 34 | """Find ops of a given type in graphdef or a graph. 35 | 36 | Args: 37 | optype: operation type (e.g. Conv2D) 38 | Returns: 39 | List of operations. 40 | """ 41 | gd = tf.get_default_graph() 42 | return [var for var in gd.get_operations() if var.type == optype] 43 | 44 | 45 | class MobilenetV2Test(tf.test.TestCase): 46 | 47 | def setUp(self): 48 | tf.reset_default_graph() 49 | 50 | def testCreation(self): 51 | spec = dict(mobilenet_v2.V2_DEF) 52 | _, ep = mobilenet.mobilenet( 53 | tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec) 54 | num_convs = len(find_ops('Conv2D')) 55 | 56 | # This is mostly a sanity test. No deep reason for these particular 57 | # constants. 58 | # 59 | # All but first 2 and last one have two convolutions, and there is one 60 | # extra conv that is not in the spec. (logits) 61 | self.assertEqual(num_convs, len(spec['spec']) * 2 - 2) 62 | # Check that depthwise are exposed. 63 | for i in range(2, 17): 64 | self.assertIn('layer_%d/depthwise_output' % i, ep) 65 | 66 | def testCreationNoClasses(self): 67 | spec = copy.deepcopy(mobilenet_v2.V2_DEF) 68 | net, ep = mobilenet.mobilenet( 69 | tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec, 70 | num_classes=None) 71 | self.assertIs(net, ep['global_pool']) 72 | 73 | def testImageSizes(self): 74 | for input_size, output_size in [(224, 7), (192, 6), (160, 5), 75 | (128, 4), (96, 3)]: 76 | tf.reset_default_graph() 77 | _, ep = mobilenet_v2.mobilenet( 78 | tf.placeholder(tf.float32, (10, input_size, input_size, 3))) 79 | 80 | self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3], 81 | [output_size] * 2) 82 | 83 | def testWithSplits(self): 84 | spec = copy.deepcopy(mobilenet_v2.V2_DEF) 85 | spec['overrides'] = { 86 | (ops.expanded_conv,): dict(split_expansion=2), 87 | } 88 | _, _ = mobilenet.mobilenet( 89 | tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec) 90 | num_convs = len(find_ops('Conv2D')) 91 | # All but 3 op has 3 conv operatore, the remainign 3 have one 92 | # and there is one unaccounted. 93 | self.assertEqual(num_convs, len(spec['spec']) * 3 - 5) 94 | 95 | def testWithOutputStride8(self): 96 | out, _ = mobilenet.mobilenet_base( 97 | tf.placeholder(tf.float32, (10, 224, 224, 16)), 98 | conv_defs=mobilenet_v2.V2_DEF, 99 | output_stride=8, 100 | scope='MobilenetV2') 101 | self.assertEqual(out.get_shape().as_list()[1:3], [28, 28]) 102 | 103 | def testDivisibleBy(self): 104 | tf.reset_default_graph() 105 | mobilenet_v2.mobilenet( 106 | tf.placeholder(tf.float32, (10, 224, 224, 16)), 107 | conv_defs=mobilenet_v2.V2_DEF, 108 | divisible_by=16, 109 | min_depth=32) 110 | s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] 111 | s = set(s) 112 | self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280, 113 | 1001], s) 114 | 115 | def testDivisibleByWithArgScope(self): 116 | tf.reset_default_graph() 117 | # Verifies that depth_multiplier arg scope actually works 118 | # if no default min_depth is provided. 119 | with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32): 120 | mobilenet_v2.mobilenet( 121 | tf.placeholder(tf.float32, (10, 224, 224, 2)), 122 | conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1) 123 | s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] 124 | s = set(s) 125 | self.assertSameElements(s, [32, 192, 128, 1001]) 126 | 127 | def testFineGrained(self): 128 | tf.reset_default_graph() 129 | # Verifies that depth_multiplier arg scope actually works 130 | # if no default min_depth is provided. 131 | 132 | mobilenet_v2.mobilenet( 133 | tf.placeholder(tf.float32, (10, 224, 224, 2)), 134 | conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01, 135 | finegrain_classification_mode=True) 136 | s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] 137 | s = set(s) 138 | # All convolutions will be 8->48, except for the last one. 139 | self.assertSameElements(s, [8, 48, 1001, 1280]) 140 | 141 | def testMobilenetBase(self): 142 | tf.reset_default_graph() 143 | # Verifies that mobilenet_base returns pre-pooling layer. 144 | with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32): 145 | net, _ = mobilenet_v2.mobilenet_base( 146 | tf.placeholder(tf.float32, (10, 224, 224, 16)), 147 | conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1) 148 | self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128]) 149 | 150 | def testWithOutputStride16(self): 151 | tf.reset_default_graph() 152 | out, _ = mobilenet.mobilenet_base( 153 | tf.placeholder(tf.float32, (10, 224, 224, 16)), 154 | conv_defs=mobilenet_v2.V2_DEF, 155 | output_stride=16) 156 | self.assertEqual(out.get_shape().as_list()[1:3], [14, 14]) 157 | 158 | def testWithOutputStride8AndExplicitPadding(self): 159 | tf.reset_default_graph() 160 | out, _ = mobilenet.mobilenet_base( 161 | tf.placeholder(tf.float32, (10, 224, 224, 16)), 162 | conv_defs=mobilenet_v2.V2_DEF, 163 | output_stride=8, 164 | use_explicit_padding=True, 165 | scope='MobilenetV2') 166 | self.assertEqual(out.get_shape().as_list()[1:3], [28, 28]) 167 | 168 | def testWithOutputStride16AndExplicitPadding(self): 169 | tf.reset_default_graph() 170 | out, _ = mobilenet.mobilenet_base( 171 | tf.placeholder(tf.float32, (10, 224, 224, 16)), 172 | conv_defs=mobilenet_v2.V2_DEF, 173 | output_stride=16, 174 | use_explicit_padding=True) 175 | self.assertEqual(out.get_shape().as_list()[1:3], [14, 14]) 176 | 177 | def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self): 178 | sc = mobilenet.training_scope(is_training=None) 179 | self.assertNotIn('is_training', sc[slim.arg_scope_func_key( 180 | slim.batch_norm)]) 181 | 182 | def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self): 183 | sc = mobilenet.training_scope(is_training=False) 184 | self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) 185 | sc = mobilenet.training_scope(is_training=True) 186 | self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) 187 | sc = mobilenet.training_scope() 188 | self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)]) 189 | 190 | 191 | if __name__ == '__main__': 192 | tf.test.main() 193 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Mobilenet V2 model, branched from slim models for fp16 performance study. 16 | 17 | Architecture: https://arxiv.org/abs/1801.04381 18 | 19 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds, 20 | 3.4 M parameters. 21 | """ 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import copy 28 | 29 | import tensorflow as tf 30 | 31 | from models import mobilenet as lib 32 | from models import mobilenet_conv_blocks as ops 33 | from models import model 34 | 35 | slim = tf.contrib.slim 36 | op = lib.op 37 | 38 | expand_input = ops.expand_input_by_factor 39 | 40 | # pyformat: disable 41 | # Architecture: https://arxiv.org/abs/1801.04381 42 | V2_DEF = dict( 43 | defaults={ 44 | # Note: these parameters of batch norm affect the architecture 45 | # that's why they are here and not in training_scope. 46 | (slim.batch_norm,): {'center': True, 'scale': True}, 47 | (slim.conv2d, slim.fully_connected, slim.separable_conv2d): { 48 | 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6 49 | }, 50 | (ops.expanded_conv,): { 51 | 'expansion_size': expand_input(6), 52 | 'split_expansion': 1, 53 | 'normalizer_fn': slim.batch_norm, 54 | 'residual': True 55 | }, 56 | (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'} 57 | }, 58 | spec=[ 59 | op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]), 60 | op(ops.expanded_conv, 61 | expansion_size=expand_input(1, divisible_by=1), 62 | num_outputs=16), 63 | op(ops.expanded_conv, stride=2, num_outputs=24), 64 | op(ops.expanded_conv, stride=1, num_outputs=24), 65 | op(ops.expanded_conv, stride=2, num_outputs=32), 66 | op(ops.expanded_conv, stride=1, num_outputs=32), 67 | op(ops.expanded_conv, stride=1, num_outputs=32), 68 | op(ops.expanded_conv, stride=2, num_outputs=64), 69 | op(ops.expanded_conv, stride=1, num_outputs=64), 70 | op(ops.expanded_conv, stride=1, num_outputs=64), 71 | op(ops.expanded_conv, stride=1, num_outputs=64), 72 | op(ops.expanded_conv, stride=1, num_outputs=96), 73 | op(ops.expanded_conv, stride=1, num_outputs=96), 74 | op(ops.expanded_conv, stride=1, num_outputs=96), 75 | op(ops.expanded_conv, stride=2, num_outputs=160), 76 | op(ops.expanded_conv, stride=1, num_outputs=160), 77 | op(ops.expanded_conv, stride=1, num_outputs=160), 78 | op(ops.expanded_conv, stride=1, num_outputs=320), 79 | op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280) 80 | ], 81 | ) 82 | # pyformat: enable 83 | 84 | 85 | @slim.add_arg_scope 86 | def mobilenet(input_tensor, 87 | num_classes=1001, 88 | depth_multiplier=1.0, 89 | scope='MobilenetV2', 90 | conv_defs=None, 91 | finegrain_classification_mode=False, 92 | min_depth=None, 93 | divisible_by=None, 94 | **kwargs): 95 | """Creates mobilenet V2 network. 96 | 97 | Inference mode is created by default. To create training use training_scope 98 | below. 99 | 100 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): 101 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor) 102 | 103 | Args: 104 | input_tensor: The input tensor 105 | num_classes: number of classes 106 | depth_multiplier: The multiplier applied to scale number of 107 | channels in each layer. Note: this is called depth multiplier in the 108 | paper but the name is kept for consistency with slim's model builder. 109 | scope: Scope of the operator 110 | conv_defs: Allows to override default conv def. 111 | finegrain_classification_mode: When set to True, the model 112 | will keep the last layer large even for small multipliers. Following 113 | https://arxiv.org/abs/1801.04381 114 | suggests that it improves performance for ImageNet-type of problems. 115 | *Note* ignored if final_endpoint makes the builder exit earlier. 116 | min_depth: If provided, will ensure that all layers will have that 117 | many channels after application of depth multiplier. 118 | divisible_by: If provided will ensure that all layers # channels 119 | will be divisible by this number. 120 | **kwargs: passed directly to mobilenet.mobilenet: 121 | prediction_fn- what prediction function to use. 122 | reuse-: whether to reuse variables (if reuse set to true, scope 123 | must be given). 124 | Returns: 125 | logits/endpoints pair 126 | 127 | Raises: 128 | ValueError: On invalid arguments 129 | """ 130 | if conv_defs is None: 131 | conv_defs = V2_DEF 132 | if 'multiplier' in kwargs: 133 | raise ValueError('mobilenetv2 doesn\'t support generic ' 134 | 'multiplier parameter use "depth_multiplier" instead.') 135 | if finegrain_classification_mode: 136 | conv_defs = copy.deepcopy(conv_defs) 137 | if depth_multiplier < 1: 138 | conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier 139 | 140 | depth_args = {} 141 | # NB: do not set depth_args unless they are provided to avoid overriding 142 | # whatever default depth_multiplier might have thanks to arg_scope. 143 | if min_depth is not None: 144 | depth_args['min_depth'] = min_depth 145 | if divisible_by is not None: 146 | depth_args['divisible_by'] = divisible_by 147 | 148 | with slim.arg_scope((lib.depth_multiplier,), **depth_args): 149 | return lib.mobilenet( 150 | input_tensor, 151 | num_classes=num_classes, 152 | conv_defs=conv_defs, 153 | scope=scope, 154 | multiplier=depth_multiplier, 155 | **kwargs) 156 | 157 | 158 | @slim.add_arg_scope 159 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs): 160 | """Creates base of the mobilenet (no pooling and no logits) .""" 161 | return mobilenet( 162 | input_tensor, depth_multiplier=depth_multiplier, base_only=True, **kwargs) 163 | 164 | 165 | def training_scope(**kwargs): 166 | """Defines MobilenetV2 training scope. 167 | 168 | Usage: 169 | with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): 170 | logits, endpoints = mobilenet_v2.mobilenet(input_tensor) 171 | 172 | with slim. 173 | 174 | Args: 175 | **kwargs: Passed to mobilenet.training_scope. The following parameters 176 | are supported: 177 | weight_decay- The weight decay to use for regularizing the model. 178 | stddev- Standard deviation for initialization, if negative uses xavier. 179 | dropout_keep_prob- dropout keep probability 180 | bn_decay- decay for the batch norm moving averages. 181 | 182 | Returns: 183 | An `arg_scope` to use for the mobilenet v2 model. 184 | """ 185 | return lib.training_scope(**kwargs) 186 | 187 | 188 | class MobilenetModel(model.CNNModel): 189 | """Mobilenet model configuration.""" 190 | 191 | def __init__(self, params=None): 192 | super(MobilenetModel, self).__init__( 193 | 'mobilenet', 224, 32, 0.005, params=params) 194 | 195 | def add_inference(self, cnn): 196 | with tf.contrib.slim.arg_scope(training_scope(is_training=cnn.phase_train)): 197 | cnn.top_layer, _ = mobilenet(cnn.top_layer, is_training=cnn.phase_train) 198 | cnn.top_size = cnn.top_layer.shape[-1].value 199 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/mlperf_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains tests related to MLPerf. 16 | 17 | Note this test only passes if the MLPerf compliance library is installed. 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | from collections import Counter 25 | import logging 26 | import re 27 | 28 | import six 29 | import tensorflow as tf 30 | import benchmark_cnn 31 | import datasets 32 | import mlperf 33 | import test_util 34 | from models import model 35 | from mlperf_compliance import mlperf_log 36 | 37 | 38 | class _MlPerfTestModel(model.CNNModel): 39 | """A model to test the MLPerf compliance logging on.""" 40 | 41 | def __init__(self): 42 | super(_MlPerfTestModel, self).__init__( 43 | 'mlperf_test_model', image_size=224, batch_size=2, learning_rate=1) 44 | 45 | def add_inference(self, cnn): 46 | assert cnn.top_layer.shape[1:] == (3, 224, 224) 47 | cnn.conv(1, 1, 1, 1, 1, use_batch_norm=True) 48 | cnn.mpool(1, 1, 1, 1, num_channels_in=1) 49 | cnn.reshape([-1, 224 * 224]) 50 | cnn.affine(1, activation=None) 51 | 52 | # Assert that the batch norm variables are filtered out for L2 loss. 53 | variables = tf.global_variables() + tf.local_variables() 54 | assert len(variables) > len(self.filter_l2_loss_vars(variables)) 55 | 56 | 57 | class MlPerfComplianceTest(tf.test.TestCase): 58 | """Tests the MLPerf compliance logs. 59 | 60 | This serves as a quick check that we probably didn't break the compliance 61 | logging. It is not mean to be as comprehensive as the official MLPerf 62 | compliance checker will be. 63 | """ 64 | 65 | def setUp(self): 66 | super(MlPerfComplianceTest, self).setUp() 67 | benchmark_cnn.setup(benchmark_cnn.make_params()) 68 | 69 | # Map between regex and the number of times we expect to see that regex in the 70 | # logs. Entry commented out with the comment FIXME indicate that 71 | # tf_cnn_benchmarks currently fails compliance in that regard, and needs to be 72 | # fixed to be MLPerf compliant. 73 | EXPECTED_LOG_REGEXES = { 74 | # Preprocessing tags 75 | mlperf.tags.INPUT_ORDER: 2, # 1 for training, 1 for eval 76 | # We pass --tf_random_seed=9876 in the test. 77 | r'%s: 9876' % mlperf.tags.RUN_SET_RANDOM_SEED: 2, 78 | # The Numpy random seed is hardcoded to 4321. 79 | r'%s: 4321' % mlperf.tags.RUN_SET_RANDOM_SEED: 2, 80 | r'%s: %d' % (mlperf.tags.PREPROC_NUM_TRAIN_EXAMPLES, 81 | datasets.IMAGENET_NUM_TRAIN_IMAGES): 1, 82 | r'%s: %d' % (mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES, 83 | datasets.IMAGENET_NUM_VAL_IMAGES): 1, 84 | mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES + '.*': 1, 85 | mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV + '.*': 1, 86 | mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE + '.*': 1, 87 | mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE + '.*': 1, 88 | mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS + '.*': 1, 89 | mlperf.tags.INPUT_RANDOM_FLIP + '.*': 1, 90 | r'%s: \[224, 224\].*' % mlperf.tags.INPUT_CENTRAL_CROP: 1, 91 | 92 | r'%s: \[123.68, 116.78, 103.94\].*' % mlperf.tags.INPUT_MEAN_SUBTRACTION: 93 | 2, 94 | 95 | r'%s: {"min": 256}.*' % mlperf.tags.INPUT_RESIZE_ASPECT_PRESERVING: 1, 96 | 97 | # 1 for training, 1 for eval 98 | r'%s: \[224, 224\].*' % mlperf.tags.INPUT_RESIZE: 2, 99 | 100 | # Resnet model tags 101 | mlperf.tags.MODEL_HP_BATCH_NORM + '.*': 2, 102 | # 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d 103 | # produces 2 logs. 104 | mlperf.tags.MODEL_HP_CONV2D_FIXED_PADDING + '.*': 4, 105 | mlperf.tags.MODEL_HP_RELU + '.*': 2, 106 | mlperf.tags.MODEL_HP_INITIAL_MAX_POOL + '.*': 2, 107 | mlperf.tags.MODEL_HP_DENSE + '.*': 4, 108 | mlperf.tags.MODEL_HP_DENSE + '.*': 4, 109 | 110 | # Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD, 111 | # are omitted here. 112 | 113 | r'%s: "categorical_cross_entropy".*' % mlperf.tags.MODEL_HP_LOSS_FN: 1, 114 | 115 | # 1 for training, 2 because the _MlPerfTestModel calls this when building 116 | # the model for both training and eval 117 | r'%s: true' % mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2: 3, 118 | 119 | r'%s: 0.5.*' % mlperf.tags.MODEL_L2_REGULARIZATION: 1, 120 | 121 | # Note we do not handle OPT_LR, since that is printed to stderr using 122 | # tf.Print, which we cannot easily intercept. 123 | 124 | # Other tags 125 | '%s: "%s"' % (mlperf.tags.OPT_NAME, mlperf.tags.SGD_WITH_MOMENTUM): 1, 126 | '%s: 0.5' % mlperf.tags.OPT_MOMENTUM: 1, 127 | mlperf.tags.RUN_START: 1, 128 | '%s: 2' % mlperf.tags.INPUT_BATCH_SIZE: 1, 129 | mlperf.tags.TRAIN_LOOP: 1, 130 | mlperf.tags.TRAIN_EPOCH + '.*': 1, 131 | '%s: 2' % mlperf.tags.INPUT_SIZE: 2, 132 | mlperf.tags.EVAL_START: 2, 133 | mlperf.tags.EVAL_STOP: 2, 134 | '%s: 6' % mlperf.tags.EVAL_SIZE: 2, 135 | mlperf.tags.EVAL_ACCURACY + '.*': 2, 136 | '%s: 2.0' % mlperf.tags.EVAL_TARGET: 2, 137 | mlperf.tags.RUN_STOP + '.*': 1, 138 | mlperf.tags.RUN_FINAL: 1 139 | } 140 | EXPECTED_LOG_REGEXES = Counter({re.compile(k): v for 141 | k, v in EXPECTED_LOG_REGEXES.items()}) 142 | 143 | def testMlPerfCompliance(self): 144 | string_io = six.StringIO() 145 | handler = logging.StreamHandler(string_io) 146 | data_dir = test_util.create_black_and_white_images() 147 | try: 148 | mlperf_log.LOGGER.addHandler(handler) 149 | params = benchmark_cnn.make_params(data_dir=data_dir, 150 | data_name='imagenet', 151 | batch_size=2, 152 | num_warmup_batches=0, 153 | num_batches=2, 154 | num_eval_batches=3, 155 | eval_during_training_every_n_steps=1, 156 | distortions=False, 157 | weight_decay=0.5, 158 | optimizer='momentum', 159 | momentum=0.5, 160 | stop_at_top_1_accuracy=2.0, 161 | tf_random_seed=9876, 162 | ml_perf=True) 163 | with mlperf.mlperf_logger(use_mlperf_logger=True, model='resnet50_v1.5'): 164 | bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=_MlPerfTestModel()) 165 | bench_cnn.run() 166 | logs = string_io.getvalue().splitlines() 167 | log_regexes = Counter() 168 | for log in logs: 169 | for regex in self.EXPECTED_LOG_REGEXES: 170 | if regex.search(log): 171 | log_regexes[regex] += 1 172 | if log_regexes != self.EXPECTED_LOG_REGEXES: 173 | diff_counter = Counter(log_regexes) 174 | diff_counter.subtract(self.EXPECTED_LOG_REGEXES) 175 | differences = [] 176 | for regex in (k for k in diff_counter.keys() if diff_counter[k]): 177 | found_count = log_regexes[regex] 178 | expected_count = self.EXPECTED_LOG_REGEXES[regex] 179 | differences.append(' For regex %s: Found %d lines matching but ' 180 | 'expected to find %d' % 181 | (regex.pattern, found_count, expected_count)) 182 | raise AssertionError('Logs did not match expected logs. Differences:\n' 183 | '%s' % '\n'.join(differences)) 184 | finally: 185 | mlperf_log.LOGGER.removeHandler(handler) 186 | 187 | if __name__ == '__main__': 188 | tf.test.main() 189 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Benchmark dataset utilities. 16 | """ 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from abc import abstractmethod 23 | import os 24 | 25 | import numpy as np 26 | import six 27 | from six.moves import cPickle 28 | from six.moves import xrange # pylint: disable=redefined-builtin 29 | import tensorflow as tf 30 | 31 | from tensorflow.python.platform import gfile 32 | import preprocessing 33 | 34 | IMAGENET_NUM_TRAIN_IMAGES = 1281167 35 | IMAGENET_NUM_VAL_IMAGES = 50000 36 | 37 | COCO_NUM_TRAIN_IMAGES = 118287 38 | COCO_NUM_VAL_IMAGES = 4952 39 | 40 | 41 | class Dataset(object): 42 | """Abstract class for cnn benchmarks dataset.""" 43 | 44 | def __init__(self, 45 | name, 46 | data_dir=None, 47 | queue_runner_required=False, 48 | num_classes=None): 49 | self.name = name 50 | self.data_dir = data_dir 51 | self._queue_runner_required = queue_runner_required 52 | self._num_classes = num_classes 53 | 54 | def tf_record_pattern(self, subset): 55 | return os.path.join(self.data_dir, '%s-*-of-*' % subset) 56 | 57 | def reader(self): 58 | return tf.TFRecordReader() 59 | 60 | @property 61 | def num_classes(self): 62 | return self._num_classes 63 | 64 | @num_classes.setter 65 | def num_classes(self, val): 66 | self._num_classes = val 67 | 68 | @abstractmethod 69 | def num_examples_per_epoch(self, subset): 70 | pass 71 | 72 | def __str__(self): 73 | return self.name 74 | 75 | def get_input_preprocessor(self, input_preprocessor='default'): 76 | assert not self.use_synthetic_gpu_inputs() 77 | return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor] 78 | 79 | def queue_runner_required(self): 80 | return self._queue_runner_required 81 | 82 | def use_synthetic_gpu_inputs(self): 83 | return not self.data_dir 84 | 85 | 86 | class LibrispeechDataset(Dataset): 87 | """Configuration for LibriSpeech dataset.""" 88 | 89 | def __init__(self, data_dir=None): 90 | super(LibrispeechDataset, self).__init__( 91 | 'librispeech', data_dir, num_classes=29) 92 | 93 | def tf_record_pattern(self, subset): 94 | if subset == 'train': 95 | return os.path.join(self.data_dir, 'train-clean-*.tfrecords') 96 | elif subset == 'validation': 97 | return os.path.join(self.data_dir, 'test-clean.tfrecords') 98 | else: 99 | return '' 100 | 101 | def num_examples_per_epoch(self, subset='train'): 102 | del subset 103 | return 2 # TODO(laigd): currently this is an arbitrary number. 104 | 105 | 106 | class ImageDataset(Dataset): 107 | """Abstract class for image datasets.""" 108 | 109 | def __init__(self, 110 | name, 111 | height, 112 | width, 113 | depth=None, 114 | data_dir=None, 115 | queue_runner_required=False, 116 | num_classes=1001): 117 | super(ImageDataset, self).__init__(name, data_dir, queue_runner_required, 118 | num_classes) 119 | self.height = height 120 | self.width = width 121 | self.depth = depth or 3 122 | 123 | 124 | class ImagenetDataset(ImageDataset): 125 | """Configuration for Imagenet dataset.""" 126 | 127 | def __init__(self, data_dir=None): 128 | super(ImagenetDataset, self).__init__( 129 | 'imagenet', 300, 300, data_dir=data_dir) 130 | 131 | def num_examples_per_epoch(self, subset='train'): 132 | if subset == 'train': 133 | return IMAGENET_NUM_TRAIN_IMAGES 134 | elif subset == 'validation': 135 | return IMAGENET_NUM_VAL_IMAGES 136 | else: 137 | raise ValueError('Invalid data subset "%s"' % subset) 138 | 139 | 140 | class Cifar10Dataset(ImageDataset): 141 | """Configuration for cifar 10 dataset. 142 | 143 | It will mount all the input images to memory. 144 | """ 145 | 146 | def __init__(self, data_dir=None): 147 | super(Cifar10Dataset, self).__init__( 148 | 'cifar10', 149 | 32, 150 | 32, 151 | data_dir=data_dir, 152 | queue_runner_required=True, 153 | num_classes=11) 154 | 155 | def read_data_files(self, subset='train'): 156 | """Reads from data file and returns images and labels in a numpy array.""" 157 | assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' 158 | 'data') 159 | if subset == 'train': 160 | filenames = [ 161 | os.path.join(self.data_dir, 'data_batch_%d' % i) 162 | for i in xrange(1, 6) 163 | ] 164 | elif subset == 'validation': 165 | filenames = [os.path.join(self.data_dir, 'test_batch')] 166 | else: 167 | raise ValueError('Invalid data subset "%s"' % subset) 168 | 169 | inputs = [] 170 | for filename in filenames: 171 | with gfile.Open(filename, 'rb') as f: 172 | # python2 does not have the encoding parameter 173 | encoding = {} if six.PY2 else {'encoding': 'bytes'} 174 | inputs.append(cPickle.load(f, **encoding)) 175 | # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the 176 | # input format. 177 | all_images = np.concatenate( 178 | [each_input[b'data'] for each_input in inputs]).astype(np.float32) 179 | all_labels = np.concatenate( 180 | [each_input[b'labels'] for each_input in inputs]) 181 | return all_images, all_labels 182 | 183 | def num_examples_per_epoch(self, subset='train'): 184 | if subset == 'train': 185 | return 50000 186 | elif subset == 'validation': 187 | return 10000 188 | else: 189 | raise ValueError('Invalid data subset "%s"' % subset) 190 | 191 | 192 | class COCODataset(ImageDataset): 193 | """COnfiguration for COCO dataset.""" 194 | 195 | def __init__(self, data_dir=None, image_size=300): 196 | super(COCODataset, self).__init__( 197 | 'coco', image_size, image_size, data_dir=data_dir, num_classes=81) 198 | 199 | def num_examples_per_epoch(self, subset='train'): 200 | if subset == 'train': 201 | return COCO_NUM_TRAIN_IMAGES 202 | elif subset == 'validation': 203 | return COCO_NUM_VAL_IMAGES 204 | else: 205 | raise ValueError('Invalid data subset "%s"' % subset) 206 | 207 | 208 | _SUPPORTED_DATASETS = { 209 | 'imagenet': ImagenetDataset, 210 | 'cifar10': Cifar10Dataset, 211 | 'librispeech': LibrispeechDataset, 212 | 'coco': COCODataset, 213 | } 214 | 215 | _SUPPORTED_INPUT_PREPROCESSORS = { 216 | 'imagenet': { 217 | 'default': preprocessing.RecordInputImagePreprocessor, 218 | 'official_models_imagenet': preprocessing.ImagenetPreprocessor, 219 | }, 220 | 'cifar10': { 221 | 'default': preprocessing.Cifar10ImagePreprocessor 222 | }, 223 | 'librispeech': { 224 | 'default': preprocessing.LibrispeechPreprocessor 225 | }, 226 | 'coco': { 227 | 'default': preprocessing.COCOPreprocessor 228 | }, 229 | } 230 | 231 | 232 | def create_dataset(data_dir, data_name): 233 | """Create a Dataset instance based on data_dir and data_name.""" 234 | if not data_dir and not data_name: 235 | # When using synthetic data, use synthetic imagenet images by default. 236 | data_name = 'imagenet' 237 | 238 | # Infere dataset name from data_dir if data_name is not provided. 239 | if data_name is None: 240 | for supported_name in _SUPPORTED_DATASETS: 241 | if supported_name in data_dir: 242 | data_name = supported_name 243 | break 244 | else: # Failed to identify dataset name from data dir. 245 | raise ValueError('Could not identify name of dataset. ' 246 | 'Please specify with --data_name option.') 247 | if data_name not in _SUPPORTED_DATASETS: 248 | raise ValueError('Unknown dataset. Must be one of %s' % ', '.join( 249 | [key for key in sorted(_SUPPORTED_DATASETS.keys())])) 250 | 251 | return _SUPPORTED_DATASETS[data_name](data_dir) 252 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/models/inception_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Inception model configuration. 17 | 18 | Includes multiple models: inception3, inception4, inception-resnet2. 19 | 20 | References: 21 | Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi 22 | Inception-v4, Inception-ResNet and the Impact of Residual Connections on 23 | Learning 24 | 25 | Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, 26 | Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich 27 | Going Deeper with Convolutions 28 | http://arxiv.org/pdf/1409.4842v1.pdf 29 | 30 | Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, 31 | Zbigniew Wojna 32 | Rethinking the Inception Architecture for Computer Vision 33 | arXiv preprint arXiv:1512.00567 (2015) 34 | 35 | Inception v3 model: http://arxiv.org/abs/1512.00567 36 | 37 | Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261 38 | """ 39 | 40 | from __future__ import absolute_import 41 | from __future__ import division 42 | from __future__ import print_function 43 | 44 | from six.moves import xrange # pylint: disable=redefined-builtin 45 | from models import model 46 | 47 | 48 | class Inceptionv3Model(model.CNNModel): 49 | """InceptionV3.""" 50 | 51 | def __init__(self, auxiliary=False, params=None): 52 | self._auxiliary = auxiliary 53 | super(Inceptionv3Model, self).__init__( 54 | 'inception3', 299, 32, 0.005, params=params) 55 | 56 | def add_inference(self, cnn): 57 | def inception_v3_a(cnn, n): 58 | cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)], 59 | [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)], 60 | [('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]] 61 | cnn.inception_module('incept_v3_a', cols) 62 | 63 | def inception_v3_b(cnn): 64 | cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')], 65 | [('conv', 64, 1, 1), 66 | ('conv', 96, 3, 3), 67 | ('conv', 96, 3, 3, 2, 2, 'VALID')], 68 | [('mpool', 3, 3, 2, 2, 'VALID')]] 69 | cnn.inception_module('incept_v3_b', cols) 70 | 71 | def inception_v3_c(cnn, n): 72 | cols = [[('conv', 192, 1, 1)], 73 | [('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)], 74 | [('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7), 75 | ('conv', n, 7, 1), ('conv', 192, 1, 7)], 76 | [('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]] 77 | cnn.inception_module('incept_v3_c', cols) 78 | 79 | def inception_v3_d(cnn): 80 | cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')], 81 | [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1), 82 | ('conv', 192, 3, 3, 2, 2, 'VALID')], 83 | [('mpool', 3, 3, 2, 2, 'VALID')]] 84 | cnn.inception_module('incept_v3_d', cols) 85 | 86 | def inception_v3_e(cnn, pooltype): 87 | cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)], 88 | [('share',), ('conv', 384, 3, 1)], 89 | [('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)], 90 | [('share',), ('share',), ('conv', 384, 3, 1)], 91 | [('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'), 92 | ('conv', 192, 1, 1)]] 93 | cnn.inception_module('incept_v3_e', cols) 94 | 95 | def incept_v3_aux(cnn): 96 | assert cnn.aux_top_layer is None 97 | cnn.aux_top_layer = cnn.top_layer 98 | cnn.aux_top_size = cnn.top_size 99 | with cnn.switch_to_aux_top_layer(): 100 | cnn.apool(5, 5, 3, 3, mode='VALID') 101 | cnn.conv(128, 1, 1, mode='SAME') 102 | cnn.conv(768, 5, 5, mode='VALID', stddev=0.01) 103 | cnn.reshape([-1, 768]) 104 | 105 | cnn.use_batch_norm = True 106 | cnn.conv(32, 3, 3, 2, 2, mode='VALID') # 299 x 299 x 3 107 | cnn.conv(32, 3, 3, 1, 1, mode='VALID') # 149 x 149 x 32 108 | cnn.conv(64, 3, 3, 1, 1, mode='SAME') # 147 x 147 x 64 109 | cnn.mpool(3, 3, 2, 2, mode='VALID') # 147 x 147 x 64 110 | cnn.conv(80, 1, 1, 1, 1, mode='VALID') # 73 x 73 x 80 111 | cnn.conv(192, 3, 3, 1, 1, mode='VALID') # 71 x 71 x 192 112 | cnn.mpool(3, 3, 2, 2, 'VALID') # 35 x 35 x 192 113 | inception_v3_a(cnn, 32) # 35 x 35 x 256 mixed. 114 | inception_v3_a(cnn, 64) # 35 x 35 x 288 mixed_1. 115 | inception_v3_a(cnn, 64) # 35 x 35 x 288 mixed_2 116 | inception_v3_b(cnn) # 17 x 17 x 768 mixed_3 117 | inception_v3_c(cnn, 128) # 17 x 17 x 768 mixed_4 118 | inception_v3_c(cnn, 160) # 17 x 17 x 768 mixed_5 119 | inception_v3_c(cnn, 160) # 17 x 17 x 768 mixed_6 120 | inception_v3_c(cnn, 192) # 17 x 17 x 768 mixed_7 121 | if self._auxiliary: 122 | incept_v3_aux(cnn) # Auxillary Head logits 123 | inception_v3_d(cnn) # 17 x 17 x 1280 mixed_8 124 | inception_v3_e(cnn, 'avg') # 8 x 8 x 2048 mixed_9 125 | inception_v3_e(cnn, 'max') # 8 x 8 x 2048 mixed_10 126 | cnn.apool(8, 8, 1, 1, 'VALID') # 8 x 8 x 2048 127 | cnn.reshape([-1, 2048]) # 1 x 1 x 2048 128 | 129 | 130 | # Stem functions 131 | def inception_v4_sa(cnn): 132 | cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', 96, 3, 3, 2, 2, 'VALID')]] 133 | cnn.inception_module('incept_v4_sa', cols) 134 | 135 | 136 | def inception_v4_sb(cnn): 137 | cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')], 138 | [('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7), 139 | ('conv', 96, 3, 3, 1, 1, 'VALID')]] 140 | cnn.inception_module('incept_v4_sb', cols) 141 | 142 | 143 | def inception_v4_sc(cnn): 144 | cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')], 145 | [('mpool', 3, 3, 2, 2, 'VALID')]] 146 | cnn.inception_module('incept_v4_sc', cols) 147 | 148 | 149 | # Reduction functions 150 | def inception_v4_ra(cnn, k, l, m, n): 151 | cols = [ 152 | [('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')], 153 | [('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')] 154 | ] 155 | cnn.inception_module('incept_v4_ra', cols) 156 | 157 | 158 | def inception_v4_rb(cnn): 159 | cols = [[('mpool', 3, 3, 2, 2, 'VALID')], 160 | [('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')], 161 | [('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1), 162 | ('conv', 320, 3, 3, 2, 2, 'VALID')]] 163 | cnn.inception_module('incept_v4_rb', cols) 164 | 165 | 166 | class Inceptionv4Model(model.CNNModel): 167 | """Inceptionv4.""" 168 | 169 | def __init__(self, params=None): 170 | super(Inceptionv4Model, self).__init__( 171 | 'inception4', 299, 32, 0.005, params=params) 172 | 173 | def add_inference(self, cnn): 174 | def inception_v4_a(cnn): 175 | cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)], 176 | [('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)], 177 | [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]] 178 | cnn.inception_module('incept_v4_a', cols) 179 | 180 | def inception_v4_b(cnn): 181 | cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)], 182 | [('conv', 384, 1, 1)], 183 | [('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)], 184 | [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1), 185 | ('conv', 224, 1, 7), ('conv', 256, 7, 1)]] 186 | cnn.inception_module('incept_v4_b', cols) 187 | 188 | def inception_v4_c(cnn): 189 | cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)], 190 | [('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)], 191 | [('share',), ('conv', 256, 3, 1)], 192 | [('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1), 193 | ('conv', 256, 3, 1)], [('share',), ('share',), ('share',), 194 | ('conv', 256, 1, 3)]] 195 | cnn.inception_module('incept_v4_c', cols) 196 | 197 | cnn.use_batch_norm = True 198 | cnn.conv(32, 3, 3, 2, 2, mode='VALID') 199 | cnn.conv(32, 3, 3, 1, 1, mode='VALID') 200 | cnn.conv(64, 3, 3) 201 | inception_v4_sa(cnn) 202 | inception_v4_sb(cnn) 203 | inception_v4_sc(cnn) 204 | for _ in xrange(4): 205 | inception_v4_a(cnn) 206 | inception_v4_ra(cnn, 192, 224, 256, 384) 207 | for _ in xrange(7): 208 | inception_v4_b(cnn) 209 | inception_v4_rb(cnn) 210 | for _ in xrange(3): 211 | inception_v4_c(cnn) 212 | cnn.spatial_mean() 213 | cnn.dropout(0.8) 214 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/test_data/tfrecord_image_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Generate black and white test TFRecords with Example protos. 16 | 17 | Each record within the TFRecord file is a 18 | serialized Example proto. The Example proto contains the following fields: 19 | 20 | image/encoded: string containing JPEG encoded image in RGB colorspace 21 | image/height: integer, image height in pixels 22 | image/width: integer, image width in pixels 23 | image/colorspace: string, specifying the colorspace, always 'RGB' 24 | image/channels: integer, specifying the number of channels, always 3 25 | image/format: string, specifying the format, always'JPEG' 26 | 27 | image/filename: string containing the basename of the image file 28 | e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' 29 | image/class/label: integer specifying the index in a classification layer. 30 | The label ranges from [1, 1000] where 0 is not used. 31 | image/class/synset: string specifying the unique ID of the label, 32 | e.g. 'n01440764' 33 | image/class/text: string specifying the human-readable version of the label 34 | e.g. 'red fox, Vulpes vulpes' 35 | 36 | image/object/bbox/xmin: list of integers specifying the 0+ human annotated 37 | bounding boxes 38 | image/object/bbox/xmax: list of integers specifying the 0+ human annotated 39 | bounding boxes 40 | image/object/bbox/ymin: list of integers specifying the 0+ human annotated 41 | bounding boxes 42 | image/object/bbox/ymax: list of integers specifying the 0+ human annotated 43 | bounding boxes 44 | image/object/bbox/label: integer specifying the index in a classification 45 | layer. The label ranges from [1, 1000] where 0 is not used. Note this is 46 | always identical to the image label. 47 | """ 48 | from __future__ import absolute_import 49 | from __future__ import division 50 | from __future__ import print_function 51 | 52 | import os 53 | import random 54 | 55 | import numpy as np 56 | import tensorflow as tf 57 | 58 | 59 | def _int64_feature(value): 60 | """Wrapper for inserting int64 features into Example proto.""" 61 | if not isinstance(value, list): 62 | value = [value] 63 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 64 | 65 | 66 | def _float_feature(value): 67 | """Wrapper for inserting float features into Example proto.""" 68 | if not isinstance(value, list): 69 | value = [value] 70 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 71 | 72 | 73 | def _bytes_feature(value): 74 | """Wrapper for inserting bytes features into Example proto.""" 75 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 76 | 77 | 78 | def _convert_to_example(filename, image_buffer, label, synset, human, bbox, 79 | height, width): 80 | """Build an Example proto for an example. 81 | 82 | Args: 83 | filename: string, path to an image file, e.g., '/path/to/example.JPG' 84 | image_buffer: string, JPEG encoding of RGB image 85 | label: integer, identifier for the ground truth for the network 86 | synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' 87 | human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' 88 | bbox: list of bounding boxes; each box is a list of integers 89 | specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to 90 | the same label as the image label. 91 | height: integer, image height in pixels 92 | width: integer, image width in pixels 93 | Returns: 94 | Example proto 95 | """ 96 | xmin = [] 97 | ymin = [] 98 | xmax = [] 99 | ymax = [] 100 | for b in bbox: 101 | assert len(b) == 4 102 | # pylint: disable=expression-not-assigned 103 | [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)] 104 | # pylint: enable=expression-not-assigned 105 | 106 | colorspace = 'RGB' 107 | channels = 3 108 | image_format = 'JPEG' 109 | 110 | example = tf.train.Example(features=tf.train.Features(feature={ 111 | 'image/height': _int64_feature(height), 112 | 'image/width': _int64_feature(width), 113 | 'image/colorspace': _bytes_feature(colorspace), 114 | 'image/channels': _int64_feature(channels), 115 | 'image/class/label': _int64_feature(label), 116 | 'image/class/synset': _bytes_feature(synset), 117 | 'image/class/text': _bytes_feature(human), 118 | 'image/object/bbox/xmin': _float_feature(xmin), 119 | 'image/object/bbox/xmax': _float_feature(xmax), 120 | 'image/object/bbox/ymin': _float_feature(ymin), 121 | 'image/object/bbox/ymax': _float_feature(ymax), 122 | 'image/object/bbox/label': _int64_feature([label] * len(xmin)), 123 | 'image/format': _bytes_feature(image_format), 124 | 'image/filename': _bytes_feature(os.path.basename(filename)), 125 | 'image/encoded': _bytes_feature(image_buffer)})) 126 | return example 127 | 128 | 129 | class ImageCoder(object): 130 | """Helper class that provides TensorFlow image coding utilities.""" 131 | 132 | def __init__(self): 133 | # Create a single Session to run all image coding calls. 134 | self._sess = tf.Session() 135 | 136 | # Initializes function that converts PNG to JPEG data. 137 | self._image = tf.placeholder(dtype=tf.uint8) 138 | self._encode_jpeg = tf.image.encode_jpeg( 139 | self._image, format='rgb', quality=100) 140 | 141 | def encode_jpeg(self, image): 142 | jpeg_image = self._sess.run(self._encode_jpeg, 143 | feed_dict={self._image: image}) 144 | return jpeg_image 145 | 146 | 147 | def _process_image(coder, name): 148 | """Process a single image file. 149 | 150 | If name is "train", a black image is returned. Otherwise, a white image is 151 | returned. 152 | 153 | Args: 154 | coder: instance of ImageCoder to provide TensorFlow image coding utils. 155 | name: string, unique identifier specifying the data set. 156 | Returns: 157 | image_buffer: string, JPEG encoding of RGB image. 158 | height: integer, image height in pixels. 159 | width: integer, image width in pixels. 160 | """ 161 | # Read the image file. 162 | value = 0 if name == 'train' else 255 163 | height = random.randint(30, 299) 164 | width = random.randint(30, 299) 165 | image = np.full((height, width, 3), value, np.uint8) 166 | 167 | jpeg_data = coder.encode_jpeg(image) 168 | 169 | return jpeg_data, height, width 170 | 171 | 172 | def _process_dataset(output_directory, num_classes, coder, name, num_images, 173 | num_shards): 174 | """Process a complete data set and save it as a TFRecord. 175 | 176 | Args: 177 | output_directory: Where to put outputs. 178 | num_classes: number of classes. 179 | coder: Instance of an ImageCoder. 180 | name: string, unique identifier specifying the data set. 181 | num_images: number of images to generate. 182 | num_shards: integer number of shards to create. 183 | """ 184 | files_per_shard = num_images // num_shards 185 | for shard in range(num_shards): 186 | output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) 187 | output_file = os.path.join(output_directory, output_filename) 188 | with tf.python_io.TFRecordWriter(output_file) as writer: 189 | for i in range(files_per_shard): 190 | index = shard * files_per_shard + i 191 | image_buffer, height, width = _process_image(coder, name) 192 | 193 | filename = '{}_{}_{}'.format(name, shard, i) 194 | label = index % num_classes 195 | synset = str(index) 196 | human = name 197 | bbox = [[0.1, 0.1, 0.9, 0.9]] 198 | example = _convert_to_example(filename, image_buffer, label, 199 | synset, human, bbox, 200 | height, width) 201 | writer.write(example.SerializeToString()) 202 | 203 | 204 | def write_black_and_white_tfrecord_data( 205 | output_directory, num_classes, num_train_images=512, 206 | num_validation_images=128, train_shards=8, validation_shards=2): 207 | """Writes black and white images in tfrecord format. 208 | 209 | Training images are black and validation images are white. 210 | 211 | Args: 212 | output_directory: Where to put outputs. 213 | num_classes: number of classes. 214 | num_train_images: number of training images to generate. 215 | num_validation_images: number of validation images to generate. 216 | train_shards: integer number of training shards to create. 217 | validation_shards: integer number of validation shards to create. 218 | """ 219 | 220 | coder = ImageCoder() 221 | _process_dataset(output_directory, num_classes, coder, 'validation', 222 | num_validation_images, validation_shards) 223 | _process_dataset(output_directory, num_classes, coder, 'train', 224 | num_train_images, train_shards) 225 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/cnn_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for CNN benchmarks.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import sys 22 | import threading 23 | 24 | import numpy as np 25 | import tensorflow as tf 26 | 27 | 28 | def tensorflow_version_tuple(): 29 | v = tf.__version__ 30 | major, minor, patch = v.split('.') 31 | return (int(major), int(minor), patch) 32 | 33 | 34 | def tensorflow_version(): 35 | vt = tensorflow_version_tuple() 36 | return vt[0] * 1000 + vt[1] 37 | 38 | 39 | def log_fn(log): 40 | print(log) 41 | 42 | 43 | def roll_numpy_batches(array, batch_size, shift_ratio): 44 | """Moves a proportion of batches from start to the end of the array. 45 | 46 | This function moves a proportion of batches, specified by `shift_ratio`, from 47 | the starts of the array to the end. The number of batches moved is rounded 48 | down to the nearest integer. For example, 49 | 50 | ``` 51 | roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2] 52 | ``` 53 | 54 | Args: 55 | array: A Numpy array whose first dimension is the batch dimension. 56 | batch_size: The batch size. 57 | shift_ratio: Proportion of batches to move from the start of the array to 58 | the end of the array. 59 | Returns: 60 | A new Numpy array, with a proportion of the batches at the start of `array` 61 | moved to the end. 62 | """ 63 | num_items = array.shape[0] 64 | assert num_items % batch_size == 0 65 | num_batches = num_items // batch_size 66 | starting_batch = int(num_batches * shift_ratio) 67 | starting_item = starting_batch * batch_size 68 | return np.roll(array, -starting_item, axis=0) 69 | 70 | 71 | # For Python 2.7 compatibility, we do not use threading.Barrier. 72 | class Barrier(object): 73 | """Implements a lightweight Barrier. 74 | 75 | Useful for synchronizing a fixed number of threads at known synchronization 76 | points. Threads block on 'wait()' and simultaneously return once they have 77 | all made that call. 78 | 79 | # Implementation adopted from boost/thread/barrier.hpp 80 | """ 81 | 82 | def __init__(self, parties): 83 | """Create a barrier, initialised to 'parties' threads.""" 84 | self.cond = threading.Condition(threading.Lock()) 85 | self.parties = parties 86 | # Indicates the number of waiting parties. 87 | self.waiting = 0 88 | # generation is needed to deal with spurious wakeups. If self.cond.wait() 89 | # wakes up for other reasons, generation will force it go back to wait(). 90 | self.generation = 0 91 | self.broken = False 92 | 93 | def wait(self): 94 | """Wait for the barrier.""" 95 | with self.cond: 96 | # Check if the barrier has been disabled or not. 97 | if self.broken: 98 | return 99 | gen = self.generation 100 | self.waiting += 1 101 | if self.waiting == self.parties: 102 | self.waiting = 0 103 | self.generation += 1 104 | self.cond.notify_all() 105 | # loop because of spurious wakeups 106 | while gen == self.generation: 107 | self.cond.wait() 108 | 109 | # TODO(huangyp): Remove this method once we find a way to know which step 110 | # is the last barrier. 111 | def abort(self): 112 | """Clear existing barrier and disable this barrier.""" 113 | with self.cond: 114 | if self.waiting > 0: 115 | self.generation += 1 116 | self.cond.notify_all() 117 | self.broken = True 118 | 119 | 120 | class ImageProducer(object): 121 | """An image producer that puts images into a staging area periodically. 122 | 123 | This class is useful for periodically running a set of ops, `put_ops` on a 124 | different thread every `batch_group_size` steps. 125 | 126 | The notify_image_consumption() method is used to increment an internal counter 127 | so that every `batch_group_size` times it is called, `put_ops` is executed. A 128 | barrier is placed so that notify_image_consumption() will block until 129 | the previous call to `put_ops` has been executed. 130 | 131 | The start() method is used to start the thread that runs `put_ops`. 132 | 133 | The done() method waits until the last put_ops is executed and stops the 134 | thread. 135 | 136 | The purpose of this class is to fill an image input pipeline every 137 | `batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images 138 | to the input pipeline when run, and that every step, 1 batch of images is 139 | consumed. Then, by calling notify_image_consumption() every step, images are 140 | supplied to the input pipeline at the same amount they are consumed. 141 | 142 | Example usage: 143 | ``` 144 | put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea 145 | get_op = ... # Dequeues 1 batch, and does some operations on it 146 | batch_group_size = 4 147 | with tf.Session() as sess: 148 | image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size) 149 | image_producer.start() 150 | for _ in range(100): 151 | sess.run(get_op) 152 | image_producer.notify_image_consumption() 153 | ``` 154 | """ 155 | 156 | def __init__(self, sess, put_ops, batch_group_size, use_python32_barrier): 157 | self.sess = sess 158 | self.num_gets = 0 159 | self.put_ops = put_ops 160 | self.batch_group_size = batch_group_size 161 | self.done_event = threading.Event() 162 | if (use_python32_barrier and 163 | sys.version_info[0] == 3 and sys.version_info[1] >= 2): 164 | self.put_barrier = threading.Barrier(2) 165 | else: 166 | self.put_barrier = Barrier(2) 167 | 168 | def _should_put(self): 169 | return (self.num_gets + 1) % self.batch_group_size == 0 170 | 171 | def done(self): 172 | """Stop the image producer.""" 173 | self.done_event.set() 174 | self.put_barrier.abort() 175 | self.thread.join() 176 | 177 | def start(self): 178 | """Start the image producer.""" 179 | self.sess.run([self.put_ops]) 180 | self.thread = threading.Thread(target=self._loop_producer) 181 | # Set daemon to true to allow Ctrl + C to terminate all threads. 182 | self.thread.daemon = True 183 | self.thread.start() 184 | 185 | def notify_image_consumption(self): 186 | """Increment the counter of image_producer by 1. 187 | 188 | This should only be called by the main thread that consumes images and runs 189 | the model computation. One batch of images should be consumed between 190 | calling start() and the first call to this method. Then, one batch of images 191 | should be consumed between any two successive calls to this method. 192 | """ 193 | if self._should_put(): 194 | self.put_barrier.wait() 195 | self.num_gets += 1 196 | 197 | def _loop_producer(self): 198 | while not self.done_event.isSet(): 199 | self.sess.run([self.put_ops]) 200 | self.put_barrier.wait() 201 | 202 | 203 | class BaseClusterManager(object): 204 | """The manager for the cluster of servers running the benchmark.""" 205 | 206 | def __init__(self, params): 207 | worker_hosts = params.worker_hosts.split(',') 208 | ps_hosts = params.ps_hosts.split(',') if params.ps_hosts else [] 209 | cluster = {'worker': worker_hosts} 210 | if ps_hosts: 211 | cluster['ps'] = ps_hosts 212 | self._cluster_spec = tf.train.ClusterSpec(cluster) 213 | 214 | def get_target(self): 215 | """Returns a target to be passed to tf.Session().""" 216 | raise NotImplementedError('get_target must be implemented by subclass') 217 | 218 | def join_server(self): 219 | raise NotImplementedError('join must be implemented by subclass') 220 | 221 | def get_cluster_spec(self): 222 | return self._cluster_spec 223 | 224 | def num_workers(self): 225 | return len(self._cluster_spec.job_tasks('worker')) 226 | 227 | def num_ps(self): 228 | if 'ps' in self._cluster_spec.jobs: 229 | return len(self._cluster_spec.job_tasks('ps')) 230 | else: 231 | return 0 232 | 233 | 234 | class GrpcClusterManager(BaseClusterManager): 235 | """A cluster manager for a cluster networked with gRPC.""" 236 | 237 | def __init__(self, params, config_proto): 238 | super(GrpcClusterManager, self).__init__(params) 239 | if params.job_name == 'controller': 240 | self._target = 'grpc://%s' % self._cluster_spec.job_tasks('worker')[0] 241 | else: 242 | self._server = tf.train.Server(self._cluster_spec, 243 | job_name=params.job_name, 244 | task_index=params.task_index, 245 | config=config_proto, 246 | protocol=params.server_protocol) 247 | self._target = self._server.target 248 | 249 | def get_target(self): 250 | return self._target 251 | 252 | def join_server(self): 253 | return self._server.join() 254 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/mlperf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains functions related to MLPerf compliance. 16 | 17 | MLPerf requires submissions to log what the benchmark does, in order to verify 18 | that the benchmark meets the MLPerf requirements. This module contains a global 19 | object `logger` that is used by other files to log what tf_cnn_benchmarks does 20 | for compliance. 21 | 22 | By default, `logger` does nothing, as the MLPerf compliance logs are verbose and 23 | unnecessary if one is not concerned about MLPerf compliance. The logger can be 24 | enabled by using the `mlperf_logger` context manager. 25 | 26 | To enable the logger with `mlperf_logger`, the MLPerf compliance library at 27 | https://github.com/mlperf/training/tree/master/compliance is required. If 28 | the logger is not enabled, the library is not needed. 29 | """ 30 | 31 | from __future__ import absolute_import 32 | from __future__ import division 33 | from __future__ import print_function 34 | 35 | 36 | from collections import namedtuple 37 | import contextlib 38 | import os 39 | import sys 40 | 41 | import tensorflow as tf 42 | 43 | # pylint: disable=g-import-not-at-top 44 | try: 45 | # Not all users have the MLPerf compliance library, so we don't want to 46 | # unconditionally crash if these imports fail. 47 | from mlperf_compliance import mlperf_log 48 | from mlperf_compliance import resnet_log_helper 49 | from mlperf_compliance import tags 50 | import_successful = True 51 | except ImportError: 52 | # The logger cannot be enabled in this case since the MLPerf library isn't 53 | # found. We return empty strings from the `tags` attribute so that 54 | # the benchmark can still run without crashing. This empty tags are passed 55 | # to an instance of `NullMlPerfLogger`, which does not log anything and 56 | # ignores the tag values. 57 | 58 | class _Tags(object): 59 | 60 | def __getattr__(self, item): 61 | return '' 62 | tags = _Tags() 63 | import_successful = False 64 | # pylint: enable=g-import-not-at-top 65 | 66 | 67 | _ModelInfo = namedtuple('_ModelInfo', ['print_fn', 'tag_set', 68 | 'mlperf_model_name']) 69 | 70 | 71 | _MLPERF_LOG_PREFIX = ':::MLPv0.5.0' 72 | 73 | 74 | class MlPerfLogger(object): 75 | """Logs various aspects about a benchmark run for MLPerf compliance.""" 76 | 77 | def __init__(self, model): 78 | self._root_dir = os.path.split(os.path.abspath(__file__))[0] 79 | mlperf_log.ROOT_DIR_RESNET = self._root_dir 80 | mlperf_log.ROOT_DIR_SSD = self._root_dir 81 | self.model = model 82 | model_to_info = { 83 | 'resnet50_v1.5': _ModelInfo(mlperf_log.resnet_print, 84 | mlperf_log.RESNET_TAG_SET, tags.RESNET), 85 | 'ssd300': _ModelInfo(mlperf_log.ssd_print, mlperf_log.SSD_TAG_SET, 86 | tags.SSD) 87 | } 88 | 89 | try: 90 | self._log_fn, self.tag_set, self.mlperf_model_name = model_to_info[model] 91 | except KeyError: 92 | raise ValueError('--ml_perf_compliance_logging is only compatible when ' 93 | '--model is one of the following: ' + 94 | ', '.join(model_to_info.keys())) 95 | 96 | def log(self, key, value=None, stack_offset=2): 97 | if key in self.tag_set: 98 | self._log_fn(key, value, stack_offset) 99 | else: 100 | print('Ignoring MLPerf logging item key=%s, value=%s for model %s' % 101 | (key, value, self.model)) 102 | 103 | def log_deferred_tensor_value(self, key, tensor_value, global_step, 104 | stack_offset=2, every_n=1): 105 | """Logs the value of a tensor when the graph is run.""" 106 | caller = '(%s)' % mlperf_log.get_caller(stack_offset, self._root_dir) 107 | def create_print_op(): 108 | return tf.print(_MLPERF_LOG_PREFIX, self.mlperf_model_name, 109 | tf.timestamp(), caller, key, 110 | ': { "deferred": true, "value":', tensor_value, '}', 111 | output_stream=sys.stdout) 112 | maybe_print = tf.cond(tf.equal(global_step % every_n, 0), create_print_op, 113 | tf.no_op) 114 | with tf.control_dependencies([maybe_print]): 115 | return tf.identity(tensor_value) 116 | 117 | def log_max_pool(self, input_tensor, output_tensor): 118 | if self.model == 'resnet50_v1.5': 119 | resnet_log_helper.log_max_pool(input_tensor, output_tensor) 120 | 121 | def log_begin_block(self, input_tensor, block_type): 122 | if self.model == 'resnet50_v1.5': 123 | resnet_log_helper.log_begin_block(input_tensor, block_type) 124 | 125 | def log_end_block(self, output_tensor): 126 | if self.model == 'resnet50_v1.5': 127 | resnet_log_helper.log_end_block(output_tensor) 128 | 129 | def log_projection(self, input_tensor, output_tensor): 130 | if self.model == 'resnet50_v1.5': 131 | resnet_log_helper.log_projection(input_tensor, output_tensor) 132 | 133 | def log_conv2d(self, input_tensor, output_tensor, stride_height, stride_width, 134 | filters, initializer, use_bias): 135 | """Log a conv2d call.""" 136 | if self.model == 'resnet50_v1.5': 137 | assert stride_height == stride_width, ( 138 | '--ml_perf_compliance_logging does not support convolutions where ' 139 | 'the stride height is not equal to the stride width. ' 140 | 'stride_height=%d, stride_width=%d' % (stride_height, stride_width)) 141 | if isinstance(initializer, tf.truncated_normal_initializer) or ( 142 | isinstance(initializer, tf.variance_scaling_initializer) and 143 | initializer.distribution == 'truncated_normal'): 144 | initializer = tags.TRUNCATED_NORMAL 145 | elif (isinstance(initializer, tf.glorot_uniform_initializer) or 146 | initializer is None): 147 | initializer = 'glorot_uniform' 148 | resnet_log_helper.log_conv2d(input_tensor, output_tensor, stride_width, 149 | filters, initializer, use_bias) 150 | 151 | def log_batch_norm(self, input_tensor, output_tensor, momentum, epsilon, 152 | center, scale, training): 153 | if self.model == 'resnet50_v1.5': 154 | resnet_log_helper.log_batch_norm(input_tensor, output_tensor, momentum, 155 | epsilon, center, scale, training) 156 | 157 | def log_train_epochs(self, num_epochs): 158 | """Logs all the TRAIN_EPOCHs log lines.""" 159 | num_epochs_int = int(num_epochs) 160 | for i in range(num_epochs_int): 161 | # MLPerf allows us to print all the train epochs at once instead of 162 | # printing them as we do them. 163 | self.log(key=mlperf_log.TRAIN_EPOCH, value=i, stack_offset=3) 164 | if num_epochs_int != num_epochs: 165 | value = (str(num_epochs_int) + 166 | ', but this epoch only has {}% of the examples of a normal epoch' 167 | .format(100 * (num_epochs - num_epochs_int))) 168 | self.log(key=mlperf_log.TRAIN_EPOCH, value=value, stack_offset=3) 169 | 170 | def log_input_resize_aspect_preserving(self, height, width, scale_factor): 171 | assert height == width, ( 172 | '--ml_perf_compliance_logging does not support models with nonsquare ' 173 | 'images. Cannot process image with height=%d and width=%d' % 174 | (height, width)) 175 | self.log(key=tags.INPUT_RESIZE_ASPECT_PRESERVING, 176 | value={'min': int(height * scale_factor)}) 177 | 178 | def log_eval_epoch(self, tag, global_step, batch_size, stack_offset=2): 179 | if self.model == 'resnet50_v1.5': 180 | self.log(key=tag, stack_offset=stack_offset+1) 181 | elif self.model == 'ssd300': 182 | epoch = int(global_step * batch_size / 118287) 183 | self.log(key=tag, value=epoch, stack_offset=stack_offset+1) 184 | 185 | def log_eval_accuracy(self, accuracy, global_step, batch_size, 186 | examples_per_epoch, stack_offset=2): 187 | """Logs eval accuracy.""" 188 | epoch = int(global_step * batch_size / examples_per_epoch) 189 | eval_accuracy = {'epoch': epoch, 'value': accuracy} 190 | eval_iteration_accuracy = {'iteration': global_step, 'value': accuracy} 191 | self.log(key=tags.EVAL_ACCURACY, value=eval_accuracy, 192 | stack_offset=stack_offset+1) 193 | self.log(key=tags.EVAL_ITERATION_ACCURACY, 194 | value=eval_iteration_accuracy, 195 | stack_offset=stack_offset+1) 196 | 197 | 198 | def _empty_fn(*args, **kwargs): 199 | del args, kwargs 200 | 201 | 202 | class NullMlPerfLogger(object): 203 | """A version of `MlPerfLogger` that does not log anything. 204 | 205 | This class has the same interface as `MlPerfLogger`, but does not actually do 206 | anything. This is used when logging is disabled, which is the default 207 | behavior. 208 | """ 209 | 210 | def __getattr__(self, item): 211 | return _empty_fn 212 | 213 | def log_deferred_tensor_value(self, key, tensor_value, *args, **kwargs): 214 | del key, args, kwargs 215 | return tensor_value 216 | 217 | 218 | # A global singleton logger. By default, it's the null logger but can be 219 | # switched to an MlPerfLogger with `mlperf_logger()`. 220 | logger = NullMlPerfLogger() 221 | 222 | 223 | @contextlib.contextmanager 224 | def mlperf_logger(use_mlperf_logger, model): 225 | """Optionally enable the mlperf logger. 226 | 227 | If `use_mlperf_logger` is True, sets the `logger` global variable to an 228 | instance of MlPerfLogger that will print logs for MLPerf compliance. If 229 | `use_mlperf_logger` is False, does nothing. 230 | 231 | Args: 232 | use_mlperf_logger: If True, enables the mlperf logger. If False, this 233 | function does nothing. 234 | model: The model that will be logged. Required, because different models 235 | must log different things for MLPerf compliance. 236 | 237 | Yields: 238 | Nothing. 239 | 240 | Raises: 241 | ImportError: If `use_mlperf_logger` is True but the MLPerf compliance 242 | library cannot be imported 243 | """ 244 | global logger 245 | if use_mlperf_logger: 246 | if not import_successful: 247 | raise ImportError('Failed to import MLPerf compliance library, which is ' 248 | 'required when --ml_perf_compliance_logging is ' 249 | 'specified. Clone this repo and add this directory ' 250 | 'https://github.com/mlperf/training/tree/master/' 251 | 'compliance to the PYTHONPATH environmental variable.') 252 | logger_ = MlPerfLogger(model) 253 | old_logger = logger 254 | try: 255 | logger = logger_ 256 | yield 257 | finally: 258 | logger = old_logger 259 | else: 260 | yield 261 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /scripts/tf_cnn_benchmarks/all_reduce_benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Benchmarks the all-reduce algorithms of tf_cnn_benchmarks. 16 | 17 | tf_cnn_benchmarks uses all-reduce to aggregate gradients. This benchmark is 18 | useful for benchmarking the performance of just this gradient aggregation, 19 | instead of the entire model. All the flags that tf_cnn_benchmarks accepts are 20 | also accepted by this script, although many are silently ignored. 21 | 22 | The number and shapes of the tensors all-reduced are those of the variables of 23 | the model specified by the --model flag. 24 | TODO(reedwm): Allow custom sizes to be specified. 25 | """ 26 | 27 | from __future__ import absolute_import 28 | from __future__ import division 29 | from __future__ import print_function 30 | 31 | 32 | import os 33 | import time 34 | 35 | from absl import app 36 | from absl import flags as absl_flags 37 | import tensorflow as tf 38 | 39 | from tensorflow.python.ops import control_flow_ops 40 | import benchmark_cnn 41 | import cnn_util 42 | import flags 43 | from cnn_util import log_fn 44 | 45 | 46 | absl_flags.DEFINE_integer('iters_per_step', 5, 47 | 'Number of iterations to run all-reduce for, per ' 48 | 'step. Every step, a session will be run on a Graph ' 49 | 'that contains this many copies of the all-reduce. ' 50 | 'The copies are run sequentially. Setting this above ' 51 | '1 is useful to lower the overhead of starting the ' 52 | 'session run, running the VariableV2 ops at the ' 53 | 'start of the step, etc.') 54 | 55 | 56 | flags.define_flags() 57 | for name in flags.param_specs.keys(): 58 | absl_flags.declare_key_flag(name) 59 | 60 | 61 | def get_var_shapes(model): 62 | """Returns the list of variable shapes for a tf_cnn_benchmarks Model.""" 63 | with tf.Graph().as_default(): 64 | # The variable shapes do not depend on the batch size. 65 | images = tf.placeholder(tf.float32, model.get_input_shapes('train')[0]) 66 | model.build_network([images]) 67 | return [[int(d) for d in v.shape.dims] for v in tf.trainable_variables()] 68 | 69 | 70 | def all_reduce(all_device_tensors, variable_mgr): 71 | """Performs a single batch all-reduce. 72 | 73 | Args: 74 | all_device_tensors: List of lists of tensors. all_device_tensors[t][i] is 75 | a tensor, where t is the tower the tensor is on and i is the index of 76 | the tensor. 77 | variable_mgr: The VariableMgr to perform the all-reduce. 78 | Returns: 79 | List of list of tensors in the same form as `all_device_tensors`, except the 80 | tensors are aggregated across towers. 81 | """ 82 | tower_grads = [[(g, None) for g in device_tensors] for 83 | device_tensors in all_device_tensors] 84 | _, aggregated_tower_grads = variable_mgr.preprocess_device_grads(tower_grads) 85 | return [ 86 | [g for g, _ in agg_device_tensors] 87 | for agg_device_tensors in aggregated_tower_grads] 88 | 89 | 90 | def build_all_reduce_iterations(all_device_tensors, tower_devices, variable_mgr, 91 | num_iters): 92 | """Builds the all-reduce ops for multiple iterations to aggregate tensors. 93 | 94 | The tensors in `all_device_tensors` are aggregated `num_iters` times. Each 95 | iteration aggregates the results from the previous iteration. The iterations 96 | are run sequentially, so the aggregations for an iteration do not start 97 | running until the previous iteration has completed. Each iteration after the 98 | first is aggregating already-aggregated values, but it does not matter because 99 | we are only aggregating for benchmarking purposes. 100 | 101 | Args: 102 | all_device_tensors: List of lists of tensors. all_device_tensors[t][i] is 103 | a tensor, where t is the tower the tensor is on and i is the index of 104 | the tensor. 105 | tower_devices: A list of device strings. tower_devices[t] is the device 106 | of the tensors in all_device_tensors[t]. 107 | variable_mgr: The VariableMgr to perform the all-reduce. 108 | num_iters: Number of iterations to aggregate tensors for. 109 | Returns: 110 | An op that when run, causes the all-reduce ops to run. 111 | """ 112 | for i in range(num_iters): 113 | with tf.name_scope('iteration_%d' % i): 114 | # Step 1: Do the aggregation. 115 | with tf.name_scope('tensor_aggregation'): 116 | all_device_tensors = all_reduce(all_device_tensors, variable_mgr) 117 | 118 | # Step 2. Create identity ops, to bring the aggregated results back to 119 | # each device. 120 | new_all_device_tensors = [] 121 | for device, device_tensors in zip(tower_devices, all_device_tensors): 122 | with tf.device(device): 123 | new_all_device_tensors.append([ 124 | tf.identity(t, name='identity_after_allreduce') 125 | for t in device_tensors 126 | ]) 127 | all_device_tensors = new_all_device_tensors 128 | 129 | # Step 3. Add control dependencies to delay the next iteration until this 130 | # iteration is complete. To avoid extra overhead, we do not have any 131 | # cross-device control dependencies, which means it's possible for two 132 | # iterations to slightly overlap. 133 | new_all_device_tensors = [] 134 | for device_tensors in all_device_tensors: 135 | new_all_device_tensors.append([ 136 | control_flow_ops.with_dependencies( 137 | device_tensors, t, name='identity_after_dependencies') 138 | for t in device_tensors 139 | ]) 140 | all_device_tensors = new_all_device_tensors 141 | 142 | # To prevent the dependency optimizer from removing every op we created, 143 | # we store the results in variables. 144 | ops_to_run = [] 145 | for device, device_tensors in zip(tower_devices, all_device_tensors): 146 | with tf.device(device): 147 | for t in device_tensors: 148 | # The placeholder initial value is never run. 149 | var = tf.Variable(tf.placeholder(tf.float32, t.shape), collections=[]) 150 | ops_to_run.append(var.assign(t)) 151 | return tf.group(*ops_to_run) 152 | 153 | 154 | def build_graph(tower_devices, tensor_shapes, variable_mgr, num_iters): 155 | """Builds the graph for the benchmark. 156 | 157 | Args: 158 | tower_devices: A list of device strings of the devices to run the all-reduce 159 | benchmark on. 160 | tensor_shapes: A list of shapes of the tensors that will be aggregated for 161 | the all-reduce. 162 | variable_mgr: The VariableMgr to perform the all-reduce. 163 | num_iters: Number of iterations to aggregate tensors for. 164 | Returns: 165 | An op that runs the benchmark. 166 | """ 167 | all_device_tensors = [] 168 | for i, tower_device in enumerate(tower_devices): 169 | with tf.device(tower_device): 170 | device_tensors = [] 171 | for j, shape in enumerate(tensor_shapes): 172 | tensor = tf.Variable(tf.random_normal(shape, dtype=tf.float32), 173 | name='tensor_%d_on_device_%d' % (j, i)) 174 | device_tensors.append(tensor) 175 | all_device_tensors.append(device_tensors) 176 | 177 | log_fn('Building all-reduce ops') 178 | benchmark_op = build_all_reduce_iterations(all_device_tensors, tower_devices, 179 | variable_mgr, num_iters) 180 | log_fn('Done building all-reduce ops') 181 | return benchmark_op 182 | 183 | 184 | def run_graph(benchmark_op, bench_cnn, init_ops, dummy_loss_op): 185 | """Runs the graph for the benchmark. 186 | 187 | Args: 188 | benchmark_op: An op that runs the benchmark. 189 | bench_cnn: The BenchmarkCNN where params and other attributes are obtained. 190 | init_ops: A list of ops that are run before `benchmark_op` for 191 | initialization. 192 | dummy_loss_op: Any op. We must pass a loss op to 193 | `benchmark_cnn.benchmark_one_step`, but the result of the op is never 194 | actually used. 195 | """ 196 | config = benchmark_cnn.create_config_proto(bench_cnn.params) 197 | with tf.Session(config=config) as sess: 198 | for op in init_ops: 199 | sess.run(op) 200 | step_train_times = [] 201 | fetches = {'average_loss': dummy_loss_op, 'benchmark_op': benchmark_op} 202 | log_fn('Running warmup') 203 | for i in range(-bench_cnn.num_warmup_batches, bench_cnn.num_batches): 204 | if i == 0: 205 | log_fn('Running all-reduce ops') 206 | start = time.time() 207 | if i > 0 and i % bench_cnn.params.display_every == 0: 208 | log_fn('Iteration: %d. Average time per step so far: %s' % 209 | (i, (time.time() - start) / i)) 210 | # Call benchmark_one_step instead of directly calling sess.run(...), to 211 | # potentially get a trace file, partitioned graphs, etc. 212 | benchmark_cnn.benchmark_one_step( 213 | sess=sess, 214 | fetches=fetches, 215 | step=i, 216 | # The batch size is only used for the images/sec calculation, which is 217 | # not actually calculated because we pass show_images_per_sec=False. 218 | batch_size=None, 219 | step_train_times=step_train_times, 220 | trace_filename=bench_cnn.trace_filename, 221 | partitioned_graph_file_prefix=( 222 | bench_cnn.params.partitioned_graph_file_prefix), 223 | profiler=None, 224 | image_producer=None, 225 | params=bench_cnn.params, 226 | show_images_per_sec=False) 227 | log_fn('Average time per step: %s' % 228 | ((time.time() - start) / bench_cnn.num_batches)) 229 | 230 | 231 | def run_benchmark(bench_cnn, num_iters): 232 | """Runs the all-reduce benchmark. 233 | 234 | Args: 235 | bench_cnn: The BenchmarkCNN where params, the variable manager, and other 236 | attributes are obtained. 237 | num_iters: Number of iterations to do all-reduce for for. 238 | 239 | Raises: 240 | ValueError: Invalid params of bench_cnn. 241 | """ 242 | if bench_cnn.params.variable_update != 'replicated': 243 | raise ValueError('--variable_update=replicated must be specified to use' 244 | 'the all-reduce benchmark') 245 | if bench_cnn.params.variable_consistency == 'relaxed': 246 | raise ValueError('--variable_consistency=relaxed is not supported') 247 | 248 | benchmark_op = build_graph(bench_cnn.raw_devices, 249 | get_var_shapes(bench_cnn.model), 250 | bench_cnn.variable_mgr, num_iters) 251 | init_ops = [ 252 | tf.global_variables_initializer(), 253 | bench_cnn.variable_mgr.get_post_init_ops() 254 | ] 255 | loss_op = tf.no_op() 256 | 257 | if bench_cnn.graph_file: 258 | path, filename = os.path.split(bench_cnn.graph_file) 259 | as_text = filename.endswith('txt') 260 | log_fn('Writing GraphDef as %s to %s' % ( 261 | 'text' if as_text else 'binary', bench_cnn.graph_file)) 262 | tf.train.write_graph(tf.get_default_graph().as_graph_def(add_shapes=True), 263 | path, filename, as_text) 264 | 265 | run_graph(benchmark_op, bench_cnn, init_ops, loss_op) 266 | 267 | 268 | # TODO(reedwm): Reduce redundancy with tf_cnn_benchmarks 269 | def main(positional_arguments): 270 | # Command-line arguments like '--distortions False' are equivalent to 271 | # '--distortions=True False', where False is a positional argument. To prevent 272 | # this from silently running with distortions, we do not allow positional 273 | # arguments. 274 | assert len(positional_arguments) >= 1 275 | if len(positional_arguments) > 1: 276 | raise ValueError('Received unknown positional arguments: %s' 277 | % positional_arguments[1:]) 278 | 279 | params = benchmark_cnn.make_params_from_flags() 280 | params = benchmark_cnn.setup(params) 281 | bench = benchmark_cnn.BenchmarkCNN(params) 282 | 283 | tfversion = cnn_util.tensorflow_version_tuple() 284 | log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) 285 | 286 | run_benchmark(bench, absl_flags.FLAGS.iters_per_step) 287 | 288 | if __name__ == '__main__': 289 | app.run(main) # Raises error on invalid flags, unlike tf.app.run() 290 | --------------------------------------------------------------------------------