├── .gitignore ├── LICENSE ├── README.md ├── cntk └── retrain.py ├── image_set_preparation.ipynb ├── img ├── data_overview │ ├── mediumnaip_white.png │ ├── mediumnlcd.png │ └── middlesex_ma.png ├── extraction │ ├── common_naip_tiled.png │ ├── common_points.png │ ├── common_tiled_only.png │ └── sample_tile.png ├── middlesex │ ├── 20655.png │ ├── 20655_small.png │ ├── 33308.png │ ├── 33308_small.png │ ├── 36083.png │ ├── 37002.png │ ├── 47331.png │ ├── true_and_predicted_labels.png │ └── true_and_predicted_labels_smoothened.png ├── scoring │ ├── balanced_cm.pdf │ ├── balanced_cm.png │ ├── balanced_cm_small.png │ └── scaling.png └── spark_adls_provisioning │ ├── ambari_configs_tab.GIF │ ├── ambari_custom_spark2_defaults.GIF │ ├── ambari_spark2.GIF │ ├── cluster_type_settings.GIF │ ├── create.GIF │ ├── new_resource_button.GIF │ ├── resource_search_box.GIF │ ├── resource_search_box_adls.GIF │ ├── resource_search_result.GIF │ ├── resource_search_result_adls.GIF │ ├── spark_basics_screenshot.GIF │ └── spark_basics_screenshot2.GIF ├── land_use_prediction.md ├── model_training.ipynb ├── scoring └── script_action.sh ├── scoring_on_spark.ipynb └── tf ├── deployment ├── __init__.py ├── model_deploy.py └── model_deploy_test.py ├── nets ├── __init__.py ├── nets_factory.py ├── nets_factory_test.py ├── resnet_utils.py ├── resnet_v1.py ├── resnet_v1_test.py ├── resnet_v2.py ├── resnet_v2_test.py ├── vgg.py └── vgg_test.py └── retrain.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | 23 | ========================================================================== 24 | 25 | Copyright 2017 Microsoft Corporation. All Rights Reserved. 26 | 27 | Licensed under the Apache License, Version 2.0 (the "License"); 28 | you may not use this file except in compliance with the License. 29 | You may obtain a copy of the License at 30 | 31 | http://www.apache.org/licenses/LICENSE-2.0 32 | 33 | Unless required by applicable law or agreed to in writing, software 34 | distributed under the License is distributed on an "AS IS" BASIS, 35 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 | See the License for the specific language governing permissions and 37 | limitations under the License. 38 | ========================================================================== -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Embarrassingly Parallel Image Classification: Inferring Land Use from Aerial Imagery 2 | 3 | ## Introduction 4 | 5 | Deep neural networks (DNNs) are extraordinarily versatile artificial intelligence models that have achieved widespread use over the last five years. These neural networks excel at automated feature creation and processing of complex data types like images, audio, and free-form text. Common business use cases for DNNs include: 6 | 7 | - Determining whether an uploaded video, audio, or text file contains inappropriate content 8 | - Inferring a user's intent from their spoken or typed input 9 | - Identifying objects or persons in a still image 10 | - Translating speech or text between languages or modalities 11 | 12 | Unfortunately, DNNs are also among the most time- and resource-intensive machine learning models. Whereas a trained linear regression model results can typically score input in negligible time, applying a DNN to a single file of interest may take hundreds or thousands of milliseconds -- a processing rate insufficient for some business needs. Fortunately, DNNs can be applied in parallel and scalable fashion when evaluation is performed on Spark clusters. 13 | 14 | This repository demonstrates how trained DNNs produced with two common deep learning frameworks, Microsoft's [Cognitive Toolkit (CNTK)](https://github.com/Microsoft/CNTK/wiki) and Google's [TensorFlow](https://github.com/tensorflow/tensorflow), can be operationalized on Spark to score a large image set. Files stored on [Azure Data Lake Store](https://azure.microsoft.com/en-us/services/data-lake-store/), Microsoft's HDFS-based cloud storage resource, are processed in parallel by workers on the Spark cluster. The guide follows a specific example use case: land use classification from aerial imagery. 15 | 16 | ## Fast start 17 | 18 | To get started right away, 19 | * Follow the instructions in the [Image Set Preparation](./image_set_preparation.ipynb) notebook to generate the training and validation datasets. 20 | * If you will use our provided image sets, you only need to complete the "Prepare an Azure Data Science Virtual Machine for image extraction" and "Dataset preparation for deep learning" sections. 21 | * If you seek a CNTK Spark operationalization example that doesn't require image set preparation or VM deployment, you may prefer [this walkthrough](https://github.com/Azure-Samples/hdinsight-pyspark-cntk-integration) instead. A brief description of the technique is included in [this blog post](https://blogs.technet.microsoft.com/machinelearning/2017/04/25/using-microsofts-deep-learning-toolkit-with-spark-on-azure-hdinsight-clusters/). 22 | * If you want to retrain an image classification DNN using transfer learning, complete the [Model Training](./model_training.ipynb) notebook. 23 | * You can skip this step if you choose to use our example DNNs. 24 | * If you want to operationalize trainedDNNs on Spark, complete the [Scoring on Spark](./scoring_on_spark.ipynb) notebook. 25 | * If you want to learn how the retrained DNN can be used to study urban development trends, see the [Middlesex County Land Use Prediction](./land_use_prediction.md) page. 26 | * For the motivation and summary of our work, see below. 27 | 28 | ## Land use classification from aerial imagery 29 | 30 | In this guide, we develop a classifier that can predict how a parcel of land has been used -- e.g., whether it is developed, cultivated, forested, etc. -- from an aerial image. We apply the classifier to track recent land development in Middlesex County, MA: the home of Microsoft's New England Research and Development (NERD) Center. Aerial image classification has many important applications in industry and government, including: 31 | - Enforcing tax codes (cf. [identification of home pools in Greece](http://www.nytimes.com/2010/05/02/world/europe/02evasion.html)) 32 | - Monitoring agricultural crop performance 33 | - Quantifying the impact of climate change on natural resources 34 | - Property value estimation and feature tracking for marketing purposes 35 | - Geopolitical surveillance 36 | 37 | This use case was chosen because sample images and ground-truth labels are available in abundance. We use aerial imagery provided by the U.S. [National Agriculture Imagery Program](https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/), and land use labels from the [National Land Cover Database](https://www.mrlc.gov/). NLCD labels are published roughly every five years, while NAIP data are collected more frequently: we were able to apply our land use classification DNN to images collected five years after the most recent training data available. For more information on dataset creation, please see the [Image Set Preparation](./image_set_preparation.ipynb) Jupyter notebook. 38 | 39 | ## Model training and validation 40 | 41 | We applied transfer learning to retrain the final layers of existing TensorFlow ([ResNet](https://github.com/tensorflow/models/tree/master/slim)) and CNTK ([AlexNet](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN)) models for classification of 1-meter resolution NAIP aerial images of 224 meter x 224 meter regions selected from across the United States. Retraining was performed on [Azure N-Series GPU VMs](http://gpu.azure.com/) with the [Deep Learning Toolkit](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/microsoft-ads.dsvm-deep-learning) pre-installed. We created balanced training and validation sets containing aerial images in six major land use categories (Developed, Cultivated, Forest, Shrub, Barren, and Herbaceous) from non-neighboring counties and collection years. For more information on model creation, please see the [Model Training](./model_training.ipynb) Jupyter notebook. 42 | 43 | We used Spark to apply the trained CNTK and TensorFlow models to the 11,760 images in the validation set. Spreading the scoring task across multiple worker nodes allowed us to decrease the total time required to under one minute: 44 | 45 | 46 | 47 | Our retrained models achieved an overall classification accuracy of ~80% on these six categories, with the majority of errors occurring between different types of undeveloped land (see the confusion matrix for the CNTK model's predictions, below): 48 | 49 | 50 | 51 | For a subsequent application -- identifying and quantifying recently-developed land -- we further grouped these land use labels into "Developed," "Cultivated," and "Undeveloped" classes. Our model's overall accuracy at predicting these higher-level labels was roughly 95% in our validation set. For more information on model validation on Spark, see the [Scoring on Spark](./scoring_on_spark.ipynb) Jupyter notebook. 52 | 53 | ## Inferring recent land development 54 | 55 | The trained land use models were applied to 2016 aerial images tiling Middlesex County. The predicted 2016 labels were then compared to the ground-truth 2011 labels to identify putative regions of recent development: such an application may be useful for regulatory bodies seeking to automatically identify new structures or cultivated land in remote locations. Example results (with surrounding tiles for context) are included below: 56 | 57 | 58 | 59 | 60 | Development could also be visualized and quantified at the county level. In the figure below, regions classified as developed land are represented by red pixels, cultivated land by white pixels, and undeveloped land by green pixels. 61 | 62 | 63 | 64 | The predicted land classes largely matched the true 2011 labels. Unfortunately, noisy year-to-year variation (likely reflecting differences in coloration and vegetation) were too large in magnitude to quantify general trends in development. 65 | 66 | For more information on inferring recent land development with our trained DNNs, please see the [Middlesex County Land Use Prediction](./land_use_prediction.md) page. 67 | 68 | ## Contributing and Adapting 69 | 70 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 71 | 72 | The code in this repository is shared under the [MIT and Apache licenses](./LICENSE) included in this directory. Some TensorFlow scripts have been adapted from the [TensorFlow Models repository's slim](https://github.com/tensorflow/models/tree/master/slim) subdirectory (indicated where applicable). Cognitive Toolkit (CNTK) scripts for network definition and training have been adapted from the [CIFAR-10 Image Classification](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/ResNet/Python) example. 73 | -------------------------------------------------------------------------------- /cntk/retrain.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | # 3 | # Modified by Mary Wahl from work by Patrick Buehler, cf. 4 | # https://github.com/Microsoft/CNTK/blob/master/Examples/Image/Detection/FastRCNN/A2_RunCntk_py3.py 5 | # 6 | # Licensed under the MIT license. See LICENSE.md file in the project root 7 | # for full license information. 8 | # ============================================================================== 9 | 10 | import cntk.io.transforms as xforms 11 | from cntk.train.training_session import CheckpointConfig, training_session 12 | import numpy as np 13 | import os, sys, argparse, cntk 14 | from PIL import Image 15 | 16 | def create_reader(map_filename, image_height, image_width, num_channels, 17 | num_classes): 18 | transforms = [xforms.crop(crop_type='randomside', 19 | side_ratio=0.85, 20 | jitter_type='uniratio'), 21 | xforms.scale(width=image_width, 22 | height=image_height, 23 | channels=num_channels, 24 | interpolations='linear'), 25 | xforms.color(brightness_radius=0.2, 26 | contrast_radius=0.2, 27 | saturation_radius=0.2)] 28 | return(cntk.io.MinibatchSource( 29 | cntk.io.ImageDeserializer(map_filename, cntk.io.StreamDefs( 30 | features=cntk.io.StreamDef( 31 | field='image', transforms=transforms, is_sparse=False), 32 | labels=cntk.io.StreamDef( 33 | field='label', shape=num_classes, is_sparse=False))))) 34 | 35 | def modify_model(pretrained_model_filename, features, num_classes): 36 | loaded_model = cntk.load_model(pretrained_model_filename) 37 | feature_node = cntk.logging.graph.find_by_name(loaded_model, 'features') 38 | last_node = cntk.logging.graph.find_by_name(loaded_model, 'h2_d') 39 | all_layers = cntk.ops.combine([last_node.owner]).clone( 40 | cntk.ops.functions.CloneMethod.freeze, 41 | {feature_node: cntk.ops.placeholder()}) 42 | 43 | feat_norm = features - cntk.layers.Constant(114) 44 | fc_out = all_layers(feat_norm) 45 | z = cntk.layers.Dense(num_classes)(fc_out) 46 | 47 | return(z) 48 | 49 | def main(map_filename, output_dir, pretrained_model_filename): 50 | ''' Retrain and save the existing AlexNet model ''' 51 | num_epochs = 50 52 | mb_size = 16 53 | 54 | # Find the number of classes and the number of samples per epoch 55 | labels = set([]) 56 | epoch_size = 0 57 | with open(map_filename, 'r') as f: 58 | for line in f: 59 | labels.add(line.strip().split('\t')[1]) 60 | epoch_size += 1 61 | sample_image_filename = line.strip().split('\t')[0] 62 | num_classes = len(labels) 63 | num_minibatches = int(epoch_size // mb_size) 64 | 65 | # find the typical image dimensions 66 | image_height, image_width, num_channels = np.asarray( 67 | Image.open(sample_image_filename)).shape 68 | assert num_channels == 3, 'Expected to find images with 3 color channels' 69 | assert (image_height == 224) and (image_width == 224), \ 70 | 'Expected to find images of size 224 pixels x 224 pixels' 71 | 72 | # Create the minibatch source 73 | minibatch_source = create_reader(map_filename, image_height, image_width, 74 | num_channels, num_classes) 75 | 76 | # Input variables denoting features, rois and label data 77 | image_input = cntk.ops.input_variable( 78 | (num_channels, image_height, image_width)) 79 | label_input = cntk.ops.input_variable((num_classes)) 80 | 81 | # define mapping from reader streams to network inputs 82 | input_map = {image_input: minibatch_source.streams.features, 83 | label_input: minibatch_source.streams.labels} 84 | 85 | # Instantiate the Fast R-CNN prediction model and loss function 86 | model = modify_model(pretrained_model_filename, image_input, num_classes) 87 | ce = cntk.losses.cross_entropy_with_softmax(model, label_input) 88 | pe = cntk.metrics.classification_error(model, label_input) 89 | 90 | # Set learning parameters 91 | l2_reg_weight = 0.0005 92 | lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] 93 | momentum_time_constant = 10 94 | lr_schedule = cntk.learners.learning_rate_schedule(lr_per_sample, 95 | unit=cntk.UnitType.sample) 96 | mm_schedule = cntk.learners.momentum_as_time_constant_schedule( 97 | momentum_time_constant) 98 | 99 | # Instantiate the trainer object 100 | progress_writers = [cntk.logging.progress_print.ProgressPrinter( 101 | tag='Training', 102 | num_epochs=num_epochs, 103 | freq=num_minibatches)] 104 | learner = cntk.learners.momentum_sgd(model.parameters, 105 | lr_schedule, 106 | mm_schedule, 107 | l2_regularization_weight=l2_reg_weight) 108 | trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) 109 | 110 | # Perform retraining and save the resulting model 111 | cntk.logging.progress_print.log_number_of_parameters(model) 112 | training_session( 113 | trainer=trainer, 114 | max_samples=num_epochs*epoch_size, 115 | mb_source=minibatch_source, 116 | mb_size=mb_size, 117 | model_inputs_to_streams=input_map, 118 | checkpoint_config=CheckpointConfig( 119 | frequency=epoch_size, 120 | filename=os.path.join(output_dir, 121 | 'retrained_checkpoint.model')), 122 | progress_frequency=epoch_size 123 | ).train() 124 | model.save(os.path.join(output_dir, 'retrained.model')) 125 | return 126 | 127 | if __name__ == '__main__': 128 | parser = argparse.ArgumentParser(description=''' 129 | Retrains a pretrained Alexnet model to label aerial images according to land 130 | use. 131 | ''') 132 | parser.add_argument('-i', '--input_map_file', type=str, required=True, 133 | help='MAP file listing training images and labels.') 134 | parser.add_argument('-o', '--output_dir', 135 | type=str, required=True, 136 | help='Output directory where model will be saved.') 137 | parser.add_argument('-p', '--pretrained_model_filename', 138 | type=str, required=True, 139 | help='Filepath of the pretrained AlexNet model.') 140 | args = parser.parse_args() 141 | 142 | # Ensure argument values are acceptable before proceeding 143 | assert os.path.exists(args.input_map_file), \ 144 | 'Input MAP file {} does not exist'.format(args.input_map_file) 145 | os.makedirs(args.output_dir, exist_ok=True) 146 | assert os.path.exists(args.pretrained_model_filename), \ 147 | 'Model file {} does not exist'.format(args.pretrained_model_filename) 148 | 149 | main(args.input_map_file, args.output_dir, args.pretrained_model_filename) -------------------------------------------------------------------------------- /img/data_overview/mediumnaip_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/data_overview/mediumnaip_white.png -------------------------------------------------------------------------------- /img/data_overview/mediumnlcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/data_overview/mediumnlcd.png -------------------------------------------------------------------------------- /img/data_overview/middlesex_ma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/data_overview/middlesex_ma.png -------------------------------------------------------------------------------- /img/extraction/common_naip_tiled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/common_naip_tiled.png -------------------------------------------------------------------------------- /img/extraction/common_points.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/common_points.png -------------------------------------------------------------------------------- /img/extraction/common_tiled_only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/common_tiled_only.png -------------------------------------------------------------------------------- /img/extraction/sample_tile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/sample_tile.png -------------------------------------------------------------------------------- /img/middlesex/20655.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/20655.png -------------------------------------------------------------------------------- /img/middlesex/20655_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/20655_small.png -------------------------------------------------------------------------------- /img/middlesex/33308.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/33308.png -------------------------------------------------------------------------------- /img/middlesex/33308_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/33308_small.png -------------------------------------------------------------------------------- /img/middlesex/36083.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/36083.png -------------------------------------------------------------------------------- /img/middlesex/37002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/37002.png -------------------------------------------------------------------------------- /img/middlesex/47331.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/47331.png -------------------------------------------------------------------------------- /img/middlesex/true_and_predicted_labels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/true_and_predicted_labels.png -------------------------------------------------------------------------------- /img/middlesex/true_and_predicted_labels_smoothened.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/true_and_predicted_labels_smoothened.png -------------------------------------------------------------------------------- /img/scoring/balanced_cm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/balanced_cm.pdf -------------------------------------------------------------------------------- /img/scoring/balanced_cm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/balanced_cm.png -------------------------------------------------------------------------------- /img/scoring/balanced_cm_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/balanced_cm_small.png -------------------------------------------------------------------------------- /img/scoring/scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/scaling.png -------------------------------------------------------------------------------- /img/spark_adls_provisioning/ambari_configs_tab.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/ambari_configs_tab.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/ambari_custom_spark2_defaults.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/ambari_custom_spark2_defaults.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/ambari_spark2.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/ambari_spark2.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/cluster_type_settings.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/cluster_type_settings.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/create.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/create.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/new_resource_button.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/new_resource_button.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/resource_search_box.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_box.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/resource_search_box_adls.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_box_adls.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/resource_search_result.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_result.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/resource_search_result_adls.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_result_adls.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/spark_basics_screenshot.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/spark_basics_screenshot.GIF -------------------------------------------------------------------------------- /img/spark_adls_provisioning/spark_basics_screenshot2.GIF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/spark_basics_screenshot2.GIF -------------------------------------------------------------------------------- /land_use_prediction.md: -------------------------------------------------------------------------------- 1 | # Middlesex County Land Use Prediction 2 | 3 | This notebook illustrates how trained Cognitive Toolkit (CNTK) and TensorFlow models can be applied to predict current land usage from recent aerial imagery. For more detail on image set creation, model training, and Spark cluster deployment, please see the rest of the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository. 4 | 5 | 6 | 7 | ## Image preparation and labeling 8 | 9 | We have used National Land Cover Database (NLCD) data for our ground truth labels during model training and evaluation. The most recent NLCD dataset was published in 2011, but aerial images from the National Agriculture Imagery Program (NAIP) are available for 2016. Our trained models therefore allow us to bridge a five-year data gap by predicting land use in 2016. 10 | 11 | To demonstrate this approach, we extracted a set of 65,563 images tiling Middlesex County, MA (home to Microsoft's New England Research and Development Center) at one-meter resolution from 2010 and 2016 NAIP data as [described previously](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/image_set_preparation.ipynb). Note that unlike the image set used in training and evaluation, some of these images have ambiguous land use types: for example, they may depict the boundary between a forest and developed land. These images were then scored with [trained CNTK and TensorFlow land use classification models](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/model_training.ipynb) applied in [parallel fashion using Spark](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb). Both models performed similarly; results for the CNTK model are shown. 12 | 13 | For those unfamiliar with the region, we include below an aerial view of a 80 km x 70km region covering the county. The Greater Boston Area is centered along the ESE border of the county and extends through all but the northernmost regions. 14 | 15 | 16 | ## Visualizing land use 17 | 18 | To visualize the results, we represent the labels of each 224 m x 224 m tile with a single color-coded pixel: 19 | - Red represents developed regions (NLCD codes 21-24; see [legend](https://www.mrlc.gov/nlcd11_leg.php)) 20 | - White represents cultivated regions (NLCD codes 81-82) 21 | - Green represents undeveloped and uncultivated regions (all other NLCD codes) 22 | 23 | Below left, the plurality NLCD 2011 label is shown for each tile. (NLCD data is provided at 30-meter resolution, so any tile may contain multiple land use labels.) The predicted labels for each tile in 2010 (most directly comparable to the NLCD labels) and 2016 (most recent available) are shown at center and right, respectively. 24 | 25 | 26 | 27 | We found a striking correspondence between true and predicted labels at both timepoints. the classification error for 2010 predictions (the most contemporary image set for the ground-truth 2011 labels) was ~4%. An uptick in the fraction of developed land was observed between 2010 and 2016 (see table below), but we believe this change is attributable in large part to the impact of image coloration and vegetation differences (e.g. browning in drought conditions) on labeling. Some systematic errors are noticable in the predictions, including the apparent mislabeling of some highways as cultivated land (white lines in 2016 image). 28 | 29 | | |No. developed tiles |No. cultivated tiles (%) |No. undeveloped tiles | 30 | |--- |--- |--- |--- | 31 | |2010 predicted labels |27,584 (42.1%) |941 (1.4%) |37,038 (56.4%) | 32 | |NLCD 2011 labels |28,537 (43.5%) |2,337 (3.6%) |34,689 (52.9%) | 33 | |2016 predicted labels |28,911 (44.1%) |4,011 (6.1%) |32,641 (49.8%) | 34 | 35 | For the purposes of mapping and quantifying land use, it may be preferable to discount isolated patches of differing land use. For example, an urban park may not be considered undeveloped land for the purposes of habit conservation, and construction of a rural homestead may not indicate substantial development in an otherwise cultivated region. We note that isolated tiles of land use can be removed by applying a 3x3 plurality-voting filter (with added weight for the center tile's own predicted label) to the raw predictions. The results of such a smoothing operation are shown below: 36 | 37 | 38 | 39 | After smoothing, the classification error for predictions on 2010 images was reduced from 20% to 17%. 40 | 41 | ## Identifying newly developed regions 42 | 43 | The ability to programmatically identify new development and cultivation in remote areas may be useful to government agencies that regulate housing and commerce, e.g. to identify tax evasion or enforce land use regulations. Roughly 400 regions of putative new development were identified in Middlesex County based on a change in their label from "Undeveloped" in 2011 to "Developed" in our 2016 predictions. A few examples (including bordering tiles for context) are shown below: 44 | 45 | 46 | 47 | 48 | 49 | In some cases, our land use classifier was sensitive enough to identify the development of single properties within a tile: 50 | 51 | 52 | 53 | 54 | A visual comparison of the ~400 candidate tiles in 2010 vs. 2016 NAIP images reveals that roughly one-third have truly been developed; the false positives may reflect differences in lighting and drought conditions between the 2016 images and the training data. -------------------------------------------------------------------------------- /model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Producing CNTK and TensorFlow models for image classification\n", 8 | "\n", 9 | "In this notebook, we illustrate how one can produce residual networks (ResNets) to classify aerial images based on land use type (developed, forested, cultivated, etc.). We apply transfer learning with Microsoft Cognitive Toolkit (CNTK) and TensorFlow (TF) to adapt pretrained models for our classification use case. The CNTK and TF sections of this notebook can be completed in either order, or even concurrently.\n", 10 | "\n", 11 | "This notebook is part of the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) git repository. It assumes that a dataset and Azure N-series GPU VM have already been created for model training as described in the previous [Image Set Preparation](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/image_set_preparation.ipynb) notebook. Note that an abbreviated instruction set is mentioned in that notebook for users who would like to employ our sample image set rather than generating their own.\n", 12 | "\n", 13 | "For instructions on applying the trained models to large image sets using Spark, see the [Scoring on Spark](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb) notebook. It is not necessary to complete this notebook before proceeding to [Scoring on Spark](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb), as we have provided sample retrained DNNs for your use." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## Outline\n", 21 | "- [Prepare the VM and training data](#input)\n", 22 | "- [Clone or download this repository](#repo)\n", 23 | "- [Retrain an AlexNet with Microsoft Cognitive Toolkit (CNTK)](#cntk)\n", 24 | " - [Download the pretrained model](#alexnet)\n", 25 | " - [Update and run the training script](#cntkrun)\n", 26 | "- [Retrain a pretrained ResNet with TensorFlow](#tensorflow)\n", 27 | " - [Download a pretrained model](#tfmodel)\n", 28 | " - [Run the training script](#tfrun)\n", 29 | "- [Next Steps](#nextsteps)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "\n", 37 | "## Prepare the VM and training data\n", 38 | "\n", 39 | "If you have not done so already, please complete the instructions in the [Image Set Preparation](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/image_set_preparation.ipynb) notebook to prepare an Azure Data Science VM with the Deep Learning Toolkit and the necessary training data for this tutorial. Note that if you will use our provided training and validation images, it is sufficient to complete the \"Prepare an Azure Data Science Virtual Machine for image extraction\" and \"Dataset preparation for deep learning\" sections." 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "\n", 47 | "## Clone or download this repository\n", 48 | "\n", 49 | "This repository ([Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification)) contains Python scripts that will be referenced by the code cells below. Clone or download/decompress the repository's contents to a directory on your Azure GPU VM and make note of the path." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "\n", 57 | "## Retrain an AlexNet with Microsoft Cognitive Toolkit (CNTK)\n", 58 | "\n", 59 | "At the time of this writing, the Windows 2016 DSVM comes pre-installed with CNTK 2.0. The CNTK code in this repo is therefore designed for version 2.0, and has not been tested with more recent CNTK versions. You can use the code cell below to check when CNTK version has been installed on your DSVM:" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "import cntk\n", 71 | "print(cntk.__version__)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "\n", 79 | "### Download the pretrained model\n", 80 | "You will need to download [the pretrained AlexNet model](https://mawahstorage.blob.core.windows.net/aerialimageclassification/models/AlexNet_cntk2beta15.model) and save the file to a new directory on your temporary storage drive, `D:\\models`." 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "\n", 88 | "### Update and run the training script\n", 89 | "The `retrain.py` script in the `cntk` subfolder of this repo can be used to retrain an AlexNet for aerial image classification. The script is adapted from the [Object Detection using Fast-R-CNN](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN) example in the [CNTK](https://github.com/Microsoft/CNTK) repository. This code has been written for single-GPU training: if using a multi-GPU VM, see the [CNTK ResNet/CIFAR10 image classification](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/ResNet/Python) use case for example code illustrating distributed training.\n", 90 | "\n", 91 | "Run the `retrain.py` script in the `cntk` subfolder from an Anaconda prompt as follows:" 92 | ] 93 | }, 94 | { 95 | "cell_type": "raw", 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "source": [ 100 | "activate py35\n", 101 | "python \\retrain.py --input_map_file D:\\balanced_training_set\\map.txt --output_dir D:\\retrained_models --pretrained_model_file D:\\models\\AlexNet_cntk2beta15.model" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "The training script will load the pretrained AlexNet model, removing the final layer and freezing the weights in all retained layer. A transfer learning model is then created by subtracting an approximate mean value from the RGB channels of the input image, applying the frozen retained layers of AlexNet, and finally applying a dense, trainable last layer. The transfer learning model's output label is given by the index of the maximally-activated node in the final layer, which can be converted to a descriptive string using the mapping in `D:\\balanced_training_set\\labels.txt` (created previously by the image set preparation notebook).\n", 109 | "\n", 110 | "The training script applies several transforms when each minibatch's images are loaded, including a random crop/rescaling and random colorization. These transforms generate variety in the input set, limiting the degree of overfitting.\n", 111 | "\n", 112 | "For details of the model evaluation process, please see the scoring notebook in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository." 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "\n", 120 | "## Retrain a pretrained ResNet with TensorFlow\n", 121 | "\n", 122 | "We made use of the [`tf-slim` API](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim) for TensorFlow, which provides pre-trained ResNet models and helpful scripts for retraining and scoring. During training set preparation, we created the [TFRecords](https://www.tensorflow.org/how_tos/reading_data/#file_formats) that the training script will use as input. For more details on the training data, please see the image preparation notebook in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository. \n", 123 | "\n", 124 | "Our retraining script, `retrain.py` in the `tf` folder of [this repository](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification), is a modified version of `train_image_classifier.py` from the [TensorFlow models repo's slim subdirectory](https://github.com/tensorflow/models/tree/master/slim)." 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "\n", 132 | "### Download a pretrained model\n", 133 | "\n", 134 | "We obtained a 50-layer ResNet pretrained on ImageNet from a link in the [TensorFlow models repo's slim subdirectory](https://github.com/tensorflow/models/tree/master/slim). The pretrained model can be obtained and unpacked with the code snippet below. Note that if you have not already done so, you will first need to [download or clone this repo](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification), then update the variable name `repo_dir` below to point to the repo's root folder." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 1, 140 | "metadata": { 141 | "collapsed": true 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "import urllib.request\n", 146 | "import tarfile\n", 147 | "import os\n", 148 | "\n", 149 | "# Change this directory to point to the location where you downloaded or cloned this git repo\n", 150 | "repo_dir = 'C:\\\\dsvm\\\\notebooks'\n", 151 | "\n", 152 | "os.makedirs(os.path.join(repo_dir, 'tf'), exist_ok=True)\n", 153 | "urllib.request.urlretrieve('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz',\n", 154 | " os.path.join(repo_dir, 'tf', 'resnet_v1_50_2016_08_28.tar.gz'))\n", 155 | "with tarfile.open(os.path.join(repo_dir, 'tf', 'resnet_v1_50_2016_08_28.tar.gz'), 'r:gz') as f:\n", 156 | " f.extractall(path=os.path.join(repo_dir, 'tf'))\n", 157 | "os.remove(os.path.join(repo_dir, 'tf', 'resnet_v1_50_2016_08_28.tar.gz'))" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "\n", 165 | "### Run the training script\n", 166 | "\n", 167 | "We recommend that you run the training script from an Anaconda prompt. The code cell below will help you generate the appropriate command based on your file locations." 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "activate py35\n", 182 | "python D:\\repo\\tf\\retrain.py --train_dir=D:\\repo\\tf\\models --dataset_name=aerial --dataset_split_name=train --dataset_dir=D:\\balanced_training_set --checkpoint_path=D:\\repo\\tf\\resnet_v1_50.ckpt\n", 183 | "\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "# path where retrained model and logs will be saved during training\n", 189 | "train_dir = os.path.join(repo_dir, 'tf', 'models')\n", 190 | "if not os.path.exists(train_dir):\n", 191 | " os.makedirs(train_dir)\n", 192 | " \n", 193 | "# location of the unpacked pretrained model\n", 194 | "checkpoint_path = os.path.join(repo_dir, 'tf', 'resnet_v1_50.ckpt')\n", 195 | "\n", 196 | "# Location of the TFRecords and other files generated during image set preparation\n", 197 | "training_image_dir = 'D:\\\\balanced_training_set'\n", 198 | "\n", 199 | "command = '''activate py35\n", 200 | "python {0} --train_dir={1} --dataset_name=aerial --dataset_split_name=train --dataset_dir={2} --checkpoint_path={3}\n", 201 | "'''.format(os.path.join(repo_dir, 'tf', 'retrain.py'),\n", 202 | " train_dir,\n", 203 | " training_image_dir,\n", 204 | " checkpoint_path)\n", 205 | "\n", 206 | "print(command)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "The training script will load the pretrained ResNet model, freezing the weights for all but the final logits layer. The transfer learning model's output label is taken to be the index of the maximally-activated node in the final layer.\n", 214 | "\n", 215 | "The training script applies several transforms when each minibatch's images are loaded, including subtracting an approximation of the mean values for each channel (red, blue, and green) and randomly cropping/colorizing the image. These transforms generate variety in the input set, limiting the degree of overfitting.\n", 216 | "\n", 217 | "For details of the model evaluation process, please see the scoring notebook in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository." 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "\n", 225 | "## Next Steps\n", 226 | "\n", 227 | "Each training step above should take under one hour when performed alone. Please note that the apparent performance of your retrained models on the training set may be significantly better than the models' performance on the independent validation set of images. (We saw ~6% and ~20% classification error on the training set and validation sets, respectively.)\n", 228 | "\n", 229 | "For details on evaluating the trained models, please see the [Scoring on Spark notebook](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb) in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository. Note that you can proceed using our provided sample retrained DNNs if you prefer not to wait for model training to complete." 230 | ] 231 | } 232 | ], 233 | "metadata": { 234 | "anaconda-cloud": {}, 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.6.0" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 1 255 | } 256 | -------------------------------------------------------------------------------- /scoring/script_action.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This install script generously shared by Miruna Oprescu 4 | # (then lightly modified by Mary Wahl), Microsoft Corporation, 2017 5 | 6 | cntk_home="/usr/hdp/current" 7 | cd $cntk_home 8 | curl "https://cntk.ai/BinaryDrop/CNTK-2-1-Linux-64bit-CPU-Only.tar.gz" | tar xzf - 9 | cd ./cntk/Scripts/install/linux 10 | sed -i "s#"ANACONDA_PREFIX=\"\$HOME/anaconda3\""#"ANACONDA_PREFIX=\"\/usr/bin/anaconda\""#g" install-cntk.sh 11 | sed -i "s#"\$HOME/anaconda3"#"\$ANACONDA_PREFIX"#g" install-cntk.sh 12 | ./install-cntk.sh --py-version 35 13 | 14 | sudo /usr/bin/anaconda/envs/cntk-py35/bin/pip install pillow 15 | sudo /usr/bin/anaconda/envs/cntk-py35/bin/pip install tensorflow 16 | 17 | sudo mkdir /tmp/models 18 | cd /tmp/models 19 | wget https://mawahstorage.blob.core.windows.net/models/20170906/tf.zip -P /tmp/models 20 | unzip /tmp/models/tf.zip 21 | wget https://mawahstorage.blob.core.windows.net/models/20170906/retrained.model -P /tmp/models 22 | sudo chmod -R 777 /tmp/models -------------------------------------------------------------------------------- /tf/deployment/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tf/deployment/model_deploy_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for model_deploy.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow as tf 23 | 24 | from deployment import model_deploy 25 | 26 | slim = tf.contrib.slim 27 | 28 | 29 | class DeploymentConfigTest(tf.test.TestCase): 30 | 31 | def testDefaults(self): 32 | deploy_config = model_deploy.DeploymentConfig() 33 | 34 | self.assertEqual(slim.get_variables(), []) 35 | self.assertEqual(deploy_config.caching_device(), None) 36 | self.assertDeviceEqual(deploy_config.clone_device(0), '') 37 | self.assertEqual(deploy_config.clone_scope(0), '') 38 | self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') 39 | self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') 40 | self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0') 41 | 42 | def testCPUonly(self): 43 | deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True) 44 | 45 | self.assertEqual(deploy_config.caching_device(), None) 46 | self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0') 47 | self.assertEqual(deploy_config.clone_scope(0), '') 48 | self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') 49 | self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') 50 | self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0') 51 | 52 | def testMultiGPU(self): 53 | deploy_config = model_deploy.DeploymentConfig(num_clones=2) 54 | 55 | self.assertEqual(deploy_config.caching_device(), None) 56 | self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0') 57 | self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1') 58 | self.assertEqual(deploy_config.clone_scope(0), 'clone_0') 59 | self.assertEqual(deploy_config.clone_scope(1), 'clone_1') 60 | self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') 61 | self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') 62 | self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0') 63 | 64 | def testPS(self): 65 | deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1) 66 | 67 | self.assertDeviceEqual(deploy_config.clone_device(0), 68 | '/job:worker') 69 | self.assertEqual(deploy_config.clone_scope(0), '') 70 | self.assertDeviceEqual(deploy_config.optimizer_device(), 71 | '/job:worker/device:CPU:0') 72 | self.assertDeviceEqual(deploy_config.inputs_device(), 73 | '/job:worker/device:CPU:0') 74 | with tf.device(deploy_config.variables_device()): 75 | a = tf.Variable(0) 76 | b = tf.Variable(0) 77 | c = tf.no_op() 78 | d = slim.variable('a', [], 79 | caching_device=deploy_config.caching_device()) 80 | self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0') 81 | self.assertDeviceEqual(a.device, a.value().device) 82 | self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0') 83 | self.assertDeviceEqual(b.device, b.value().device) 84 | self.assertDeviceEqual(c.device, '') 85 | self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0') 86 | self.assertDeviceEqual(d.value().device, '') 87 | 88 | def testMultiGPUPS(self): 89 | deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1) 90 | 91 | self.assertEqual(deploy_config.caching_device()(tf.no_op()), '') 92 | self.assertDeviceEqual(deploy_config.clone_device(0), 93 | '/job:worker/device:GPU:0') 94 | self.assertDeviceEqual(deploy_config.clone_device(1), 95 | '/job:worker/device:GPU:1') 96 | self.assertEqual(deploy_config.clone_scope(0), 'clone_0') 97 | self.assertEqual(deploy_config.clone_scope(1), 'clone_1') 98 | self.assertDeviceEqual(deploy_config.optimizer_device(), 99 | '/job:worker/device:CPU:0') 100 | self.assertDeviceEqual(deploy_config.inputs_device(), 101 | '/job:worker/device:CPU:0') 102 | 103 | def testReplicasPS(self): 104 | deploy_config = model_deploy.DeploymentConfig(num_replicas=2, 105 | num_ps_tasks=2) 106 | 107 | self.assertDeviceEqual(deploy_config.clone_device(0), 108 | '/job:worker') 109 | self.assertEqual(deploy_config.clone_scope(0), '') 110 | self.assertDeviceEqual(deploy_config.optimizer_device(), 111 | '/job:worker/device:CPU:0') 112 | self.assertDeviceEqual(deploy_config.inputs_device(), 113 | '/job:worker/device:CPU:0') 114 | 115 | def testReplicasMultiGPUPS(self): 116 | deploy_config = model_deploy.DeploymentConfig(num_replicas=2, 117 | num_clones=2, 118 | num_ps_tasks=2) 119 | self.assertDeviceEqual(deploy_config.clone_device(0), 120 | '/job:worker/device:GPU:0') 121 | self.assertDeviceEqual(deploy_config.clone_device(1), 122 | '/job:worker/device:GPU:1') 123 | self.assertEqual(deploy_config.clone_scope(0), 'clone_0') 124 | self.assertEqual(deploy_config.clone_scope(1), 'clone_1') 125 | self.assertDeviceEqual(deploy_config.optimizer_device(), 126 | '/job:worker/device:CPU:0') 127 | self.assertDeviceEqual(deploy_config.inputs_device(), 128 | '/job:worker/device:CPU:0') 129 | 130 | def testVariablesPS(self): 131 | deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2) 132 | 133 | with tf.device(deploy_config.variables_device()): 134 | a = tf.Variable(0) 135 | b = tf.Variable(0) 136 | c = tf.no_op() 137 | d = slim.variable('a', [], 138 | caching_device=deploy_config.caching_device()) 139 | 140 | self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0') 141 | self.assertDeviceEqual(a.device, a.value().device) 142 | self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0') 143 | self.assertDeviceEqual(b.device, b.value().device) 144 | self.assertDeviceEqual(c.device, '') 145 | self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0') 146 | self.assertDeviceEqual(d.value().device, '') 147 | 148 | 149 | def LogisticClassifier(inputs, labels, scope=None, reuse=None): 150 | with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels], 151 | reuse=reuse): 152 | predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid, 153 | scope='fully_connected') 154 | slim.losses.log_loss(predictions, labels) 155 | return predictions 156 | 157 | 158 | def BatchNormClassifier(inputs, labels, scope=None, reuse=None): 159 | with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels], 160 | reuse=reuse): 161 | inputs = slim.batch_norm(inputs, decay=0.1) 162 | predictions = slim.fully_connected(inputs, 1, 163 | activation_fn=tf.sigmoid, 164 | scope='fully_connected') 165 | slim.losses.log_loss(predictions, labels) 166 | return predictions 167 | 168 | 169 | class CreatecloneTest(tf.test.TestCase): 170 | 171 | def setUp(self): 172 | # Create an easy training set: 173 | np.random.seed(0) 174 | 175 | self._inputs = np.zeros((16, 4)) 176 | self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32) 177 | self._logdir = self.get_temp_dir() 178 | 179 | for i in range(16): 180 | j = int(2 * self._labels[i] + np.random.randint(0, 2)) 181 | self._inputs[i, j] = 1 182 | 183 | def testCreateLogisticClassifier(self): 184 | g = tf.Graph() 185 | with g.as_default(): 186 | tf.set_random_seed(0) 187 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 188 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 189 | 190 | model_fn = LogisticClassifier 191 | clone_args = (tf_inputs, tf_labels) 192 | deploy_config = model_deploy.DeploymentConfig(num_clones=1) 193 | 194 | self.assertEqual(slim.get_variables(), []) 195 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 196 | clone = clones[0] 197 | self.assertEqual(len(slim.get_variables()), 2) 198 | for v in slim.get_variables(): 199 | self.assertDeviceEqual(v.device, 'CPU:0') 200 | self.assertDeviceEqual(v.value().device, 'CPU:0') 201 | self.assertEqual(clone.outputs.op.name, 202 | 'LogisticClassifier/fully_connected/Sigmoid') 203 | self.assertEqual(clone.scope, '') 204 | self.assertDeviceEqual(clone.device, '') 205 | self.assertEqual(len(slim.losses.get_losses()), 1) 206 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 207 | self.assertEqual(update_ops, []) 208 | 209 | def testCreateSingleclone(self): 210 | g = tf.Graph() 211 | with g.as_default(): 212 | tf.set_random_seed(0) 213 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 214 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 215 | 216 | model_fn = BatchNormClassifier 217 | clone_args = (tf_inputs, tf_labels) 218 | deploy_config = model_deploy.DeploymentConfig(num_clones=1) 219 | 220 | self.assertEqual(slim.get_variables(), []) 221 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 222 | clone = clones[0] 223 | self.assertEqual(len(slim.get_variables()), 5) 224 | for v in slim.get_variables(): 225 | self.assertDeviceEqual(v.device, 'CPU:0') 226 | self.assertDeviceEqual(v.value().device, 'CPU:0') 227 | self.assertEqual(clone.outputs.op.name, 228 | 'BatchNormClassifier/fully_connected/Sigmoid') 229 | self.assertEqual(clone.scope, '') 230 | self.assertDeviceEqual(clone.device, '') 231 | self.assertEqual(len(slim.losses.get_losses()), 1) 232 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 233 | self.assertEqual(len(update_ops), 2) 234 | 235 | def testCreateMulticlone(self): 236 | g = tf.Graph() 237 | with g.as_default(): 238 | tf.set_random_seed(0) 239 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 240 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 241 | 242 | model_fn = BatchNormClassifier 243 | clone_args = (tf_inputs, tf_labels) 244 | num_clones = 4 245 | deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones) 246 | 247 | self.assertEqual(slim.get_variables(), []) 248 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 249 | self.assertEqual(len(slim.get_variables()), 5) 250 | for v in slim.get_variables(): 251 | self.assertDeviceEqual(v.device, 'CPU:0') 252 | self.assertDeviceEqual(v.value().device, 'CPU:0') 253 | self.assertEqual(len(clones), num_clones) 254 | for i, clone in enumerate(clones): 255 | self.assertEqual( 256 | clone.outputs.op.name, 257 | 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i) 258 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope) 259 | self.assertEqual(len(update_ops), 2) 260 | self.assertEqual(clone.scope, 'clone_%d/' % i) 261 | self.assertDeviceEqual(clone.device, 'GPU:%d' % i) 262 | 263 | def testCreateOnecloneWithPS(self): 264 | g = tf.Graph() 265 | with g.as_default(): 266 | tf.set_random_seed(0) 267 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 268 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 269 | 270 | model_fn = BatchNormClassifier 271 | clone_args = (tf_inputs, tf_labels) 272 | deploy_config = model_deploy.DeploymentConfig(num_clones=1, 273 | num_ps_tasks=1) 274 | 275 | self.assertEqual(slim.get_variables(), []) 276 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 277 | self.assertEqual(len(clones), 1) 278 | clone = clones[0] 279 | self.assertEqual(clone.outputs.op.name, 280 | 'BatchNormClassifier/fully_connected/Sigmoid') 281 | self.assertDeviceEqual(clone.device, '/job:worker') 282 | self.assertEqual(clone.scope, '') 283 | self.assertEqual(len(slim.get_variables()), 5) 284 | for v in slim.get_variables(): 285 | self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0') 286 | self.assertDeviceEqual(v.device, v.value().device) 287 | 288 | def testCreateMulticloneWithPS(self): 289 | g = tf.Graph() 290 | with g.as_default(): 291 | tf.set_random_seed(0) 292 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 293 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 294 | 295 | model_fn = BatchNormClassifier 296 | clone_args = (tf_inputs, tf_labels) 297 | deploy_config = model_deploy.DeploymentConfig(num_clones=2, 298 | num_ps_tasks=2) 299 | 300 | self.assertEqual(slim.get_variables(), []) 301 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 302 | self.assertEqual(len(slim.get_variables()), 5) 303 | for i, v in enumerate(slim.get_variables()): 304 | t = i % 2 305 | self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t) 306 | self.assertDeviceEqual(v.device, v.value().device) 307 | self.assertEqual(len(clones), 2) 308 | for i, clone in enumerate(clones): 309 | self.assertEqual( 310 | clone.outputs.op.name, 311 | 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i) 312 | self.assertEqual(clone.scope, 'clone_%d/' % i) 313 | self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i) 314 | 315 | 316 | class OptimizeclonesTest(tf.test.TestCase): 317 | 318 | def setUp(self): 319 | # Create an easy training set: 320 | np.random.seed(0) 321 | 322 | self._inputs = np.zeros((16, 4)) 323 | self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32) 324 | self._logdir = self.get_temp_dir() 325 | 326 | for i in range(16): 327 | j = int(2 * self._labels[i] + np.random.randint(0, 2)) 328 | self._inputs[i, j] = 1 329 | 330 | def testCreateLogisticClassifier(self): 331 | g = tf.Graph() 332 | with g.as_default(): 333 | tf.set_random_seed(0) 334 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 335 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 336 | 337 | model_fn = LogisticClassifier 338 | clone_args = (tf_inputs, tf_labels) 339 | deploy_config = model_deploy.DeploymentConfig(num_clones=1) 340 | 341 | self.assertEqual(slim.get_variables(), []) 342 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 343 | self.assertEqual(len(slim.get_variables()), 2) 344 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 345 | self.assertEqual(update_ops, []) 346 | 347 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) 348 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones, 349 | optimizer) 350 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) 351 | self.assertEqual(total_loss.op.name, 'total_loss') 352 | for g, v in grads_and_vars: 353 | self.assertDeviceEqual(g.device, '') 354 | self.assertDeviceEqual(v.device, 'CPU:0') 355 | 356 | def testCreateSingleclone(self): 357 | g = tf.Graph() 358 | with g.as_default(): 359 | tf.set_random_seed(0) 360 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 361 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 362 | 363 | model_fn = BatchNormClassifier 364 | clone_args = (tf_inputs, tf_labels) 365 | deploy_config = model_deploy.DeploymentConfig(num_clones=1) 366 | 367 | self.assertEqual(slim.get_variables(), []) 368 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 369 | self.assertEqual(len(slim.get_variables()), 5) 370 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 371 | self.assertEqual(len(update_ops), 2) 372 | 373 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) 374 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones, 375 | optimizer) 376 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) 377 | self.assertEqual(total_loss.op.name, 'total_loss') 378 | for g, v in grads_and_vars: 379 | self.assertDeviceEqual(g.device, '') 380 | self.assertDeviceEqual(v.device, 'CPU:0') 381 | 382 | def testCreateMulticlone(self): 383 | g = tf.Graph() 384 | with g.as_default(): 385 | tf.set_random_seed(0) 386 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 387 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 388 | 389 | model_fn = BatchNormClassifier 390 | clone_args = (tf_inputs, tf_labels) 391 | num_clones = 4 392 | deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones) 393 | 394 | self.assertEqual(slim.get_variables(), []) 395 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) 396 | self.assertEqual(len(slim.get_variables()), 5) 397 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 398 | self.assertEqual(len(update_ops), num_clones * 2) 399 | 400 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) 401 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones, 402 | optimizer) 403 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) 404 | self.assertEqual(total_loss.op.name, 'total_loss') 405 | for g, v in grads_and_vars: 406 | self.assertDeviceEqual(g.device, '') 407 | self.assertDeviceEqual(v.device, 'CPU:0') 408 | 409 | def testCreateMulticloneCPU(self): 410 | g = tf.Graph() 411 | with g.as_default(): 412 | tf.set_random_seed(0) 413 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 414 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 415 | 416 | model_fn = BatchNormClassifier 417 | model_args = (tf_inputs, tf_labels) 418 | num_clones = 4 419 | deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones, 420 | clone_on_cpu=True) 421 | 422 | self.assertEqual(slim.get_variables(), []) 423 | clones = model_deploy.create_clones(deploy_config, model_fn, model_args) 424 | self.assertEqual(len(slim.get_variables()), 5) 425 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 426 | self.assertEqual(len(update_ops), num_clones * 2) 427 | 428 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) 429 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones, 430 | optimizer) 431 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) 432 | self.assertEqual(total_loss.op.name, 'total_loss') 433 | for g, v in grads_and_vars: 434 | self.assertDeviceEqual(g.device, '') 435 | self.assertDeviceEqual(v.device, 'CPU:0') 436 | 437 | def testCreateOnecloneWithPS(self): 438 | g = tf.Graph() 439 | with g.as_default(): 440 | tf.set_random_seed(0) 441 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 442 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 443 | 444 | model_fn = BatchNormClassifier 445 | model_args = (tf_inputs, tf_labels) 446 | deploy_config = model_deploy.DeploymentConfig(num_clones=1, 447 | num_ps_tasks=1) 448 | 449 | self.assertEqual(slim.get_variables(), []) 450 | clones = model_deploy.create_clones(deploy_config, model_fn, model_args) 451 | self.assertEqual(len(slim.get_variables()), 5) 452 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 453 | self.assertEqual(len(update_ops), 2) 454 | 455 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) 456 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones, 457 | optimizer) 458 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) 459 | self.assertEqual(total_loss.op.name, 'total_loss') 460 | for g, v in grads_and_vars: 461 | self.assertDeviceEqual(g.device, '/job:worker') 462 | self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0') 463 | 464 | 465 | class DeployTest(tf.test.TestCase): 466 | 467 | def setUp(self): 468 | # Create an easy training set: 469 | np.random.seed(0) 470 | 471 | self._inputs = np.zeros((16, 4)) 472 | self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32) 473 | self._logdir = self.get_temp_dir() 474 | 475 | for i in range(16): 476 | j = int(2 * self._labels[i] + np.random.randint(0, 2)) 477 | self._inputs[i, j] = 1 478 | 479 | def testLocalTrainOp(self): 480 | g = tf.Graph() 481 | with g.as_default(): 482 | tf.set_random_seed(0) 483 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32) 484 | tf_labels = tf.constant(self._labels, dtype=tf.float32) 485 | 486 | model_fn = BatchNormClassifier 487 | model_args = (tf_inputs, tf_labels) 488 | deploy_config = model_deploy.DeploymentConfig(num_clones=2, 489 | clone_on_cpu=True) 490 | 491 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) 492 | 493 | self.assertEqual(slim.get_variables(), []) 494 | model = model_deploy.deploy(deploy_config, model_fn, model_args, 495 | optimizer=optimizer) 496 | 497 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 498 | self.assertEqual(len(update_ops), 4) 499 | self.assertEqual(len(model.clones), 2) 500 | self.assertEqual(model.total_loss.op.name, 'total_loss') 501 | self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op') 502 | self.assertEqual(model.train_op.op.name, 'train_op') 503 | 504 | with tf.Session() as sess: 505 | sess.run(tf.initialize_all_variables()) 506 | moving_mean = tf.contrib.framework.get_variables_by_name( 507 | 'moving_mean')[0] 508 | moving_variance = tf.contrib.framework.get_variables_by_name( 509 | 'moving_variance')[0] 510 | initial_loss = sess.run(model.total_loss) 511 | initial_mean, initial_variance = sess.run([moving_mean, 512 | moving_variance]) 513 | self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0]) 514 | self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0]) 515 | for _ in range(10): 516 | sess.run(model.train_op) 517 | final_loss = sess.run(model.total_loss) 518 | self.assertLess(final_loss, initial_loss / 10.0) 519 | 520 | final_mean, final_variance = sess.run([moving_mean, 521 | moving_variance]) 522 | self.assertAllClose(final_mean, [0.125, 0.25, 0.375, 0.25]) 523 | self.assertAllClose(final_variance, [0.109375, 0.1875, 524 | 0.234375, 0.1875]) 525 | 526 | def testNoSummariesOnGPU(self): 527 | with tf.Graph().as_default(): 528 | deploy_config = model_deploy.DeploymentConfig(num_clones=2) 529 | 530 | # clone function creates a fully_connected layer with a regularizer loss. 531 | def ModelFn(): 532 | inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) 533 | reg = tf.contrib.layers.l2_regularizer(0.001) 534 | tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) 535 | 536 | model = model_deploy.deploy( 537 | deploy_config, ModelFn, 538 | optimizer=tf.train.GradientDescentOptimizer(1.0)) 539 | # The model summary op should have a few summary inputs and all of them 540 | # should be on the CPU. 541 | self.assertTrue(model.summary_op.op.inputs) 542 | for inp in model.summary_op.op.inputs: 543 | self.assertEqual('/device:CPU:0', inp.device) 544 | 545 | def testNoSummariesOnGPUForEvals(self): 546 | with tf.Graph().as_default(): 547 | deploy_config = model_deploy.DeploymentConfig(num_clones=2) 548 | 549 | # clone function creates a fully_connected layer with a regularizer loss. 550 | def ModelFn(): 551 | inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) 552 | reg = tf.contrib.layers.l2_regularizer(0.001) 553 | tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) 554 | 555 | # No optimizer here, it's an eval. 556 | model = model_deploy.deploy(deploy_config, ModelFn) 557 | # The model summary op should have a few summary inputs and all of them 558 | # should be on the CPU. 559 | self.assertTrue(model.summary_op.op.inputs) 560 | for inp in model.summary_op.op.inputs: 561 | self.assertEqual('/device:CPU:0', inp.device) 562 | 563 | 564 | if __name__ == '__main__': 565 | tf.test.main() 566 | -------------------------------------------------------------------------------- /tf/nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tf/nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import functools 21 | 22 | import tensorflow as tf 23 | 24 | from nets import alexnet 25 | from nets import cifarnet 26 | from nets import inception 27 | from nets import lenet 28 | from nets import overfeat 29 | from nets import resnet_v1 30 | from nets import resnet_v2 31 | from nets import vgg 32 | 33 | slim = tf.contrib.slim 34 | 35 | networks_map = {'alexnet_v2': alexnet.alexnet_v2, 36 | 'cifarnet': cifarnet.cifarnet, 37 | 'overfeat': overfeat.overfeat, 38 | 'vgg_a': vgg.vgg_a, 39 | 'vgg_16': vgg.vgg_16, 40 | 'vgg_19': vgg.vgg_19, 41 | 'inception_v1': inception.inception_v1, 42 | 'inception_v2': inception.inception_v2, 43 | 'inception_v3': inception.inception_v3, 44 | 'inception_v4': inception.inception_v4, 45 | 'inception_resnet_v2': inception.inception_resnet_v2, 46 | 'lenet': lenet.lenet, 47 | 'resnet_v1_50': resnet_v1.resnet_v1_50, 48 | 'resnet_v1_101': resnet_v1.resnet_v1_101, 49 | 'resnet_v1_152': resnet_v1.resnet_v1_152, 50 | 'resnet_v1_200': resnet_v1.resnet_v1_200, 51 | 'resnet_v2_50': resnet_v2.resnet_v2_50, 52 | 'resnet_v2_101': resnet_v2.resnet_v2_101, 53 | 'resnet_v2_152': resnet_v2.resnet_v2_152, 54 | 'resnet_v2_200': resnet_v2.resnet_v2_200, 55 | } 56 | 57 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, 58 | 'cifarnet': cifarnet.cifarnet_arg_scope, 59 | 'overfeat': overfeat.overfeat_arg_scope, 60 | 'vgg_a': vgg.vgg_arg_scope, 61 | 'vgg_16': vgg.vgg_arg_scope, 62 | 'vgg_19': vgg.vgg_arg_scope, 63 | 'inception_v1': inception.inception_v3_arg_scope, 64 | 'inception_v2': inception.inception_v3_arg_scope, 65 | 'inception_v3': inception.inception_v3_arg_scope, 66 | 'inception_v4': inception.inception_v4_arg_scope, 67 | 'inception_resnet_v2': 68 | inception.inception_resnet_v2_arg_scope, 69 | 'lenet': lenet.lenet_arg_scope, 70 | 'resnet_v1_50': resnet_v1.resnet_arg_scope, 71 | 'resnet_v1_101': resnet_v1.resnet_arg_scope, 72 | 'resnet_v1_152': resnet_v1.resnet_arg_scope, 73 | 'resnet_v1_200': resnet_v1.resnet_arg_scope, 74 | 'resnet_v2_50': resnet_v2.resnet_arg_scope, 75 | 'resnet_v2_101': resnet_v2.resnet_arg_scope, 76 | 'resnet_v2_152': resnet_v2.resnet_arg_scope, 77 | 'resnet_v2_200': resnet_v2.resnet_arg_scope, 78 | } 79 | 80 | 81 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): 82 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 83 | 84 | Args: 85 | name: The name of the network. 86 | num_classes: The number of classes to use for classification. 87 | weight_decay: The l2 coefficient for the model weights. 88 | is_training: `True` if the model is being used for training and `False` 89 | otherwise. 90 | 91 | Returns: 92 | network_fn: A function that applies the model to a batch of images. It has 93 | the following signature: 94 | logits, end_points = network_fn(images) 95 | Raises: 96 | ValueError: If network `name` is not recognized. 97 | """ 98 | if name not in networks_map: 99 | raise ValueError('Name of network unknown %s' % name) 100 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay) 101 | func = networks_map[name] 102 | @functools.wraps(func) 103 | def network_fn(images): 104 | with slim.arg_scope(arg_scope): 105 | return func(images, num_classes, is_training=is_training) 106 | if hasattr(func, 'default_image_size'): 107 | network_fn.default_image_size = func.default_image_size 108 | 109 | return network_fn 110 | -------------------------------------------------------------------------------- /tf/nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from nets import nets_factory 26 | 27 | 28 | class NetworksTest(tf.test.TestCase): 29 | 30 | def testGetNetworkFn(self): 31 | batch_size = 5 32 | num_classes = 1000 33 | for net in nets_factory.networks_map: 34 | with self.test_session(): 35 | net_fn = nets_factory.get_network_fn(net, num_classes) 36 | # Most networks use 224 as their default_image_size 37 | image_size = getattr(net_fn, 'default_image_size', 224) 38 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 39 | logits, end_points = net_fn(inputs) 40 | self.assertTrue(isinstance(logits, tf.Tensor)) 41 | self.assertTrue(isinstance(end_points, dict)) 42 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 43 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 44 | 45 | if __name__ == '__main__': 46 | tf.test.main() 47 | -------------------------------------------------------------------------------- /tf/nets/resnet_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains building blocks for various versions of Residual Networks. 16 | 17 | Residual networks (ResNets) were proposed in: 18 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 20 | 21 | More variants were introduced in: 22 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 23 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 24 | 25 | We can obtain different ResNet variants by changing the network depth, width, 26 | and form of residual unit. This module implements the infrastructure for 27 | building them. Concrete ResNet units and full ResNet networks are implemented in 28 | the accompanying resnet_v1.py and resnet_v2.py modules. 29 | 30 | Compared to https://github.com/KaimingHe/deep-residual-networks, in the current 31 | implementation we subsample the output activations in the last residual unit of 32 | each block, instead of subsampling the input activations in the first residual 33 | unit of each block. The two implementations give identical results but our 34 | implementation is more memory efficient. 35 | """ 36 | from __future__ import absolute_import 37 | from __future__ import division 38 | from __future__ import print_function 39 | 40 | import collections 41 | import tensorflow as tf 42 | 43 | slim = tf.contrib.slim 44 | 45 | 46 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 47 | """A named tuple describing a ResNet block. 48 | 49 | Its parts are: 50 | scope: The scope of the `Block`. 51 | unit_fn: The ResNet unit function which takes as input a `Tensor` and 52 | returns another `Tensor` with the output of the ResNet unit. 53 | args: A list of length equal to the number of units in the `Block`. The list 54 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the 55 | block to serve as argument to unit_fn. 56 | """ 57 | 58 | 59 | def subsample(inputs, factor, scope=None): 60 | """Subsamples the input along the spatial dimensions. 61 | 62 | Args: 63 | inputs: A `Tensor` of size [batch, height_in, width_in, channels]. 64 | factor: The subsampling factor. 65 | scope: Optional variable_scope. 66 | 67 | Returns: 68 | output: A `Tensor` of size [batch, height_out, width_out, channels] with the 69 | input, either intact (if factor == 1) or subsampled (if factor > 1). 70 | """ 71 | if factor == 1: 72 | return inputs 73 | else: 74 | return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) 75 | 76 | 77 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): 78 | """Strided 2-D convolution with 'SAME' padding. 79 | 80 | When stride > 1, then we do explicit zero-padding, followed by conv2d with 81 | 'VALID' padding. 82 | 83 | Note that 84 | 85 | net = conv2d_same(inputs, num_outputs, 3, stride=stride) 86 | 87 | is equivalent to 88 | 89 | net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') 90 | net = subsample(net, factor=stride) 91 | 92 | whereas 93 | 94 | net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') 95 | 96 | is different when the input's height or width is even, which is why we add the 97 | current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). 98 | 99 | Args: 100 | inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. 101 | num_outputs: An integer, the number of output filters. 102 | kernel_size: An int with the kernel_size of the filters. 103 | stride: An integer, the output stride. 104 | rate: An integer, rate for atrous convolution. 105 | scope: Scope. 106 | 107 | Returns: 108 | output: A 4-D tensor of size [batch, height_out, width_out, channels] with 109 | the convolution output. 110 | """ 111 | if stride == 1: 112 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, 113 | padding='SAME', scope=scope) 114 | else: 115 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) 116 | pad_total = kernel_size_effective - 1 117 | pad_beg = pad_total // 2 118 | pad_end = pad_total - pad_beg 119 | inputs = tf.pad(inputs, 120 | [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) 121 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, 122 | rate=rate, padding='VALID', scope=scope) 123 | 124 | 125 | @slim.add_arg_scope 126 | def stack_blocks_dense(net, blocks, output_stride=None, 127 | outputs_collections=None): 128 | """Stacks ResNet `Blocks` and controls output feature density. 129 | 130 | First, this function creates scopes for the ResNet in the form of 131 | 'block_name/unit_1', 'block_name/unit_2', etc. 132 | 133 | Second, this function allows the user to explicitly control the ResNet 134 | output_stride, which is the ratio of the input to output spatial resolution. 135 | This is useful for dense prediction tasks such as semantic segmentation or 136 | object detection. 137 | 138 | Most ResNets consist of 4 ResNet blocks and subsample the activations by a 139 | factor of 2 when transitioning between consecutive ResNet blocks. This results 140 | to a nominal ResNet output_stride equal to 8. If we set the output_stride to 141 | half the nominal network stride (e.g., output_stride=4), then we compute 142 | responses twice. 143 | 144 | Control of the output feature density is implemented by atrous convolution. 145 | 146 | Args: 147 | net: A `Tensor` of size [batch, height, width, channels]. 148 | blocks: A list of length equal to the number of ResNet `Blocks`. Each 149 | element is a ResNet `Block` object describing the units in the `Block`. 150 | output_stride: If `None`, then the output will be computed at the nominal 151 | network stride. If output_stride is not `None`, it specifies the requested 152 | ratio of input to output spatial resolution, which needs to be equal to 153 | the product of unit strides from the start up to some level of the ResNet. 154 | For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, 155 | then valid values for the output_stride are 1, 2, 6, 24 or None (which 156 | is equivalent to output_stride=24). 157 | outputs_collections: Collection to add the ResNet block outputs. 158 | 159 | Returns: 160 | net: Output tensor with stride equal to the specified output_stride. 161 | 162 | Raises: 163 | ValueError: If the target output_stride is not valid. 164 | """ 165 | # The current_stride variable keeps track of the effective stride of the 166 | # activations. This allows us to invoke atrous convolution whenever applying 167 | # the next residual unit would result in the activations having stride larger 168 | # than the target output_stride. 169 | current_stride = 1 170 | 171 | # The atrous convolution rate parameter. 172 | rate = 1 173 | 174 | for block in blocks: 175 | with tf.variable_scope(block.scope, 'block', [net]) as sc: 176 | for i, unit in enumerate(block.args): 177 | if output_stride is not None and current_stride > output_stride: 178 | raise ValueError('The target output_stride cannot be reached.') 179 | 180 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]): 181 | unit_depth, unit_depth_bottleneck, unit_stride = unit 182 | 183 | # If we have reached the target output_stride, then we need to employ 184 | # atrous convolution with stride=1 and multiply the atrous rate by the 185 | # current unit's stride for use in subsequent layers. 186 | if output_stride is not None and current_stride == output_stride: 187 | net = block.unit_fn(net, 188 | depth=unit_depth, 189 | depth_bottleneck=unit_depth_bottleneck, 190 | stride=1, 191 | rate=rate) 192 | rate *= unit_stride 193 | 194 | else: 195 | net = block.unit_fn(net, 196 | depth=unit_depth, 197 | depth_bottleneck=unit_depth_bottleneck, 198 | stride=unit_stride, 199 | rate=1) 200 | current_stride *= unit_stride 201 | net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) 202 | 203 | if output_stride is not None and current_stride != output_stride: 204 | raise ValueError('The target output_stride cannot be reached.') 205 | 206 | return net 207 | 208 | 209 | def resnet_arg_scope(weight_decay=0.0001, 210 | batch_norm_decay=0.997, 211 | batch_norm_epsilon=1e-5, 212 | batch_norm_scale=True): 213 | """Defines the default ResNet arg scope. 214 | 215 | TODO(gpapan): The batch-normalization related default values above are 216 | appropriate for use in conjunction with the reference ResNet models 217 | released at https://github.com/KaimingHe/deep-residual-networks. When 218 | training ResNets from scratch, they might need to be tuned. 219 | 220 | Args: 221 | weight_decay: The weight decay to use for regularizing the model. 222 | batch_norm_decay: The moving average decay when estimating layer activation 223 | statistics in batch normalization. 224 | batch_norm_epsilon: Small constant to prevent division by zero when 225 | normalizing activations by their variance in batch normalization. 226 | batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the 227 | activations in the batch normalization layer. 228 | 229 | Returns: 230 | An `arg_scope` to use for the resnet models. 231 | """ 232 | batch_norm_params = { 233 | 'decay': batch_norm_decay, 234 | 'epsilon': batch_norm_epsilon, 235 | 'scale': batch_norm_scale, 236 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 237 | } 238 | 239 | with slim.arg_scope( 240 | [slim.conv2d], 241 | weights_regularizer=slim.l2_regularizer(weight_decay), 242 | weights_initializer=slim.variance_scaling_initializer(), 243 | activation_fn=tf.nn.relu, 244 | normalizer_fn=slim.batch_norm, 245 | normalizer_params=batch_norm_params): 246 | with slim.arg_scope([slim.batch_norm], **batch_norm_params): 247 | # The following implies padding='SAME' for pool1, which makes feature 248 | # alignment easier for dense prediction tasks. This is also used in 249 | # https://github.com/facebook/fb.resnet.torch. However the accompanying 250 | # code of 'Deep Residual Learning for Image Recognition' uses 251 | # padding='VALID' for pool1. You can switch to that choice by setting 252 | # slim.arg_scope([slim.max_pool2d], padding='VALID'). 253 | with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: 254 | return arg_sc 255 | -------------------------------------------------------------------------------- /tf/nets/resnet_v1.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains definitions for the original form of Residual Networks. 16 | 17 | The 'v1' residual networks (ResNets) implemented in this module were proposed 18 | by: 19 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 20 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 21 | 22 | Other variants were introduced in: 23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 25 | 26 | The networks defined in this module utilize the bottleneck building block of 27 | [1] with projection shortcuts only for increasing depths. They employ batch 28 | normalization *after* every weight layer. This is the architecture used by 29 | MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and 30 | ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1' 31 | architecture and the alternative 'v2' architecture of [2] which uses batch 32 | normalization *before* every weight layer in the so-called full pre-activation 33 | units. 34 | 35 | Typical use: 36 | 37 | from tensorflow.contrib.slim.nets import resnet_v1 38 | 39 | ResNet-101 for image classification into 1000 classes: 40 | 41 | # inputs has shape [batch, 224, 224, 3] 42 | with slim.arg_scope(resnet_v1.resnet_arg_scope()): 43 | net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False) 44 | 45 | ResNet-101 for semantic segmentation into 21 classes: 46 | 47 | # inputs has shape [batch, 513, 513, 3] 48 | with slim.arg_scope(resnet_v1.resnet_arg_scope()): 49 | net, end_points = resnet_v1.resnet_v1_101(inputs, 50 | 21, 51 | is_training=False, 52 | global_pool=False, 53 | output_stride=16) 54 | """ 55 | from __future__ import absolute_import 56 | from __future__ import division 57 | from __future__ import print_function 58 | 59 | import tensorflow as tf 60 | 61 | from nets import resnet_utils 62 | 63 | 64 | resnet_arg_scope = resnet_utils.resnet_arg_scope 65 | slim = tf.contrib.slim 66 | 67 | 68 | @slim.add_arg_scope 69 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, 70 | outputs_collections=None, scope=None): 71 | """Bottleneck residual unit variant with BN after convolutions. 72 | 73 | This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for 74 | its definition. Note that we use here the bottleneck variant which has an 75 | extra bottleneck layer. 76 | 77 | When putting together two consecutive ResNet blocks that use this unit, one 78 | should use stride = 2 in the last unit of the first block. 79 | 80 | Args: 81 | inputs: A tensor of size [batch, height, width, channels]. 82 | depth: The depth of the ResNet unit output. 83 | depth_bottleneck: The depth of the bottleneck layers. 84 | stride: The ResNet unit's stride. Determines the amount of downsampling of 85 | the units output compared to its input. 86 | rate: An integer, rate for atrous convolution. 87 | outputs_collections: Collection to add the ResNet unit output. 88 | scope: Optional variable_scope. 89 | 90 | Returns: 91 | The ResNet unit's output. 92 | """ 93 | with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: 94 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 95 | if depth == depth_in: 96 | shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') 97 | else: 98 | shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride, 99 | activation_fn=None, scope='shortcut') 100 | 101 | residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, 102 | scope='conv1') 103 | residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, 104 | rate=rate, scope='conv2') 105 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, 106 | activation_fn=None, scope='conv3') 107 | 108 | output = tf.nn.relu(shortcut + residual) 109 | 110 | return slim.utils.collect_named_outputs(outputs_collections, 111 | sc.original_name_scope, 112 | output) 113 | 114 | 115 | def resnet_v1(inputs, 116 | blocks, 117 | num_classes=None, 118 | is_training=True, 119 | global_pool=True, 120 | output_stride=None, 121 | include_root_block=True, 122 | reuse=None, 123 | scope=None): 124 | """Generator for v1 ResNet models. 125 | 126 | This function generates a family of ResNet v1 models. See the resnet_v1_*() 127 | methods for specific model instantiations, obtained by selecting different 128 | block instantiations that produce ResNets of various depths. 129 | 130 | Training for image classification on Imagenet is usually done with [224, 224] 131 | inputs, resulting in [7, 7] feature maps at the output of the last ResNet 132 | block for the ResNets defined in [1] that have nominal stride equal to 32. 133 | However, for dense prediction tasks we advise that one uses inputs with 134 | spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In 135 | this case the feature maps at the ResNet output will have spatial shape 136 | [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] 137 | and corners exactly aligned with the input image corners, which greatly 138 | facilitates alignment of the features to the image. Using as input [225, 225] 139 | images results in [8, 8] feature maps at the output of the last ResNet block. 140 | 141 | For dense prediction tasks, the ResNet needs to run in fully-convolutional 142 | (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all 143 | have nominal stride equal to 32 and a good choice in FCN mode is to use 144 | output_stride=16 in order to increase the density of the computed features at 145 | small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. 146 | 147 | Args: 148 | inputs: A tensor of size [batch, height_in, width_in, channels]. 149 | blocks: A list of length equal to the number of ResNet blocks. Each element 150 | is a resnet_utils.Block object describing the units in the block. 151 | num_classes: Number of predicted classes for classification tasks. If None 152 | we return the features before the logit layer. 153 | is_training: whether is training or not. 154 | global_pool: If True, we perform global average pooling before computing the 155 | logits. Set to True for image classification, False for dense prediction. 156 | output_stride: If None, then the output will be computed at the nominal 157 | network stride. If output_stride is not None, it specifies the requested 158 | ratio of input to output spatial resolution. 159 | include_root_block: If True, include the initial convolution followed by 160 | max-pooling, if False excludes it. 161 | reuse: whether or not the network and its variables should be reused. To be 162 | able to reuse 'scope' must be given. 163 | scope: Optional variable_scope. 164 | 165 | Returns: 166 | net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. 167 | If global_pool is False, then height_out and width_out are reduced by a 168 | factor of output_stride compared to the respective height_in and width_in, 169 | else both height_out and width_out equal one. If num_classes is None, then 170 | net is the output of the last ResNet block, potentially after global 171 | average pooling. If num_classes is not None, net contains the pre-softmax 172 | activations. 173 | end_points: A dictionary from components of the network to the corresponding 174 | activation. 175 | 176 | Raises: 177 | ValueError: If the target output_stride is not valid. 178 | """ 179 | with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: 180 | end_points_collection = sc.name + '_end_points' 181 | with slim.arg_scope([slim.conv2d, bottleneck, 182 | resnet_utils.stack_blocks_dense], 183 | outputs_collections=end_points_collection): 184 | with slim.arg_scope([slim.batch_norm], is_training=is_training): 185 | net = inputs 186 | if include_root_block: 187 | if output_stride is not None: 188 | if output_stride % 4 != 0: 189 | raise ValueError('The output_stride needs to be a multiple of 4.') 190 | output_stride /= 4 191 | net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') 192 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 193 | net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) 194 | if global_pool: 195 | # Global average pooling. 196 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 197 | if num_classes is not None: 198 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, 199 | normalizer_fn=None, scope='logits') 200 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') 201 | # Convert end_points_collection into a dictionary of end_points. 202 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 203 | if num_classes is not None: 204 | end_points['predictions'] = slim.softmax(net, scope='predictions') 205 | return net, end_points 206 | resnet_v1.default_image_size = 224 207 | 208 | 209 | def resnet_v1_50(inputs, 210 | num_classes=None, 211 | is_training=True, 212 | global_pool=True, 213 | output_stride=None, 214 | reuse=None, 215 | scope='resnet_v1_50'): 216 | """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" 217 | blocks = [ 218 | resnet_utils.Block( 219 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 220 | resnet_utils.Block( 221 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 222 | resnet_utils.Block( 223 | 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), 224 | resnet_utils.Block( 225 | 'block4', bottleneck, [(2048, 512, 1)] * 3) 226 | ] 227 | return resnet_v1(inputs, blocks, num_classes, is_training, 228 | global_pool=global_pool, output_stride=output_stride, 229 | include_root_block=True, reuse=reuse, scope=scope) 230 | 231 | 232 | def resnet_v1_101(inputs, 233 | num_classes=None, 234 | is_training=True, 235 | global_pool=True, 236 | output_stride=None, 237 | reuse=None, 238 | scope='resnet_v1_101'): 239 | """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" 240 | blocks = [ 241 | resnet_utils.Block( 242 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 243 | resnet_utils.Block( 244 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 245 | resnet_utils.Block( 246 | 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), 247 | resnet_utils.Block( 248 | 'block4', bottleneck, [(2048, 512, 1)] * 3) 249 | ] 250 | return resnet_v1(inputs, blocks, num_classes, is_training, 251 | global_pool=global_pool, output_stride=output_stride, 252 | include_root_block=True, reuse=reuse, scope=scope) 253 | 254 | 255 | def resnet_v1_152(inputs, 256 | num_classes=None, 257 | is_training=True, 258 | global_pool=True, 259 | output_stride=None, 260 | reuse=None, 261 | scope='resnet_v1_152'): 262 | """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" 263 | blocks = [ 264 | resnet_utils.Block( 265 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 266 | resnet_utils.Block( 267 | 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), 268 | resnet_utils.Block( 269 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), 270 | resnet_utils.Block( 271 | 'block4', bottleneck, [(2048, 512, 1)] * 3)] 272 | return resnet_v1(inputs, blocks, num_classes, is_training, 273 | global_pool=global_pool, output_stride=output_stride, 274 | include_root_block=True, reuse=reuse, scope=scope) 275 | 276 | 277 | def resnet_v1_200(inputs, 278 | num_classes=None, 279 | is_training=True, 280 | global_pool=True, 281 | output_stride=None, 282 | reuse=None, 283 | scope='resnet_v1_200'): 284 | """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" 285 | blocks = [ 286 | resnet_utils.Block( 287 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 288 | resnet_utils.Block( 289 | 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), 290 | resnet_utils.Block( 291 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), 292 | resnet_utils.Block( 293 | 'block4', bottleneck, [(2048, 512, 1)] * 3)] 294 | return resnet_v1(inputs, blocks, num_classes, is_training, 295 | global_pool=global_pool, output_stride=output_stride, 296 | include_root_block=True, reuse=reuse, scope=scope) 297 | -------------------------------------------------------------------------------- /tf/nets/resnet_v1_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.resnet_v1.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow as tf 23 | 24 | from nets import resnet_utils 25 | from nets import resnet_v1 26 | 27 | slim = tf.contrib.slim 28 | 29 | 30 | def create_test_input(batch_size, height, width, channels): 31 | """Create test input tensor. 32 | 33 | Args: 34 | batch_size: The number of images per batch or `None` if unknown. 35 | height: The height of each image or `None` if unknown. 36 | width: The width of each image or `None` if unknown. 37 | channels: The number of channels per image or `None` if unknown. 38 | 39 | Returns: 40 | Either a placeholder `Tensor` of dimension 41 | [batch_size, height, width, channels] if any of the inputs are `None` or a 42 | constant `Tensor` with the mesh grid values along the spatial dimensions. 43 | """ 44 | if None in [batch_size, height, width, channels]: 45 | return tf.placeholder(tf.float32, (batch_size, height, width, channels)) 46 | else: 47 | return tf.to_float( 48 | np.tile( 49 | np.reshape( 50 | np.reshape(np.arange(height), [height, 1]) + 51 | np.reshape(np.arange(width), [1, width]), 52 | [1, height, width, 1]), 53 | [batch_size, 1, 1, channels])) 54 | 55 | 56 | class ResnetUtilsTest(tf.test.TestCase): 57 | 58 | def testSubsampleThreeByThree(self): 59 | x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1]) 60 | x = resnet_utils.subsample(x, 2) 61 | expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1]) 62 | with self.test_session(): 63 | self.assertAllClose(x.eval(), expected.eval()) 64 | 65 | def testSubsampleFourByFour(self): 66 | x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1]) 67 | x = resnet_utils.subsample(x, 2) 68 | expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1]) 69 | with self.test_session(): 70 | self.assertAllClose(x.eval(), expected.eval()) 71 | 72 | def testConv2DSameEven(self): 73 | n, n2 = 4, 2 74 | 75 | # Input image. 76 | x = create_test_input(1, n, n, 1) 77 | 78 | # Convolution kernel. 79 | w = create_test_input(1, 3, 3, 1) 80 | w = tf.reshape(w, [3, 3, 1, 1]) 81 | 82 | tf.get_variable('Conv/weights', initializer=w) 83 | tf.get_variable('Conv/biases', initializer=tf.zeros([1])) 84 | tf.get_variable_scope().reuse_variables() 85 | 86 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') 87 | y1_expected = tf.to_float([[14, 28, 43, 26], 88 | [28, 48, 66, 37], 89 | [43, 66, 84, 46], 90 | [26, 37, 46, 22]]) 91 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) 92 | 93 | y2 = resnet_utils.subsample(y1, 2) 94 | y2_expected = tf.to_float([[14, 43], 95 | [43, 84]]) 96 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) 97 | 98 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') 99 | y3_expected = y2_expected 100 | 101 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') 102 | y4_expected = tf.to_float([[48, 37], 103 | [37, 22]]) 104 | y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1]) 105 | 106 | with self.test_session() as sess: 107 | sess.run(tf.initialize_all_variables()) 108 | self.assertAllClose(y1.eval(), y1_expected.eval()) 109 | self.assertAllClose(y2.eval(), y2_expected.eval()) 110 | self.assertAllClose(y3.eval(), y3_expected.eval()) 111 | self.assertAllClose(y4.eval(), y4_expected.eval()) 112 | 113 | def testConv2DSameOdd(self): 114 | n, n2 = 5, 3 115 | 116 | # Input image. 117 | x = create_test_input(1, n, n, 1) 118 | 119 | # Convolution kernel. 120 | w = create_test_input(1, 3, 3, 1) 121 | w = tf.reshape(w, [3, 3, 1, 1]) 122 | 123 | tf.get_variable('Conv/weights', initializer=w) 124 | tf.get_variable('Conv/biases', initializer=tf.zeros([1])) 125 | tf.get_variable_scope().reuse_variables() 126 | 127 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') 128 | y1_expected = tf.to_float([[14, 28, 43, 58, 34], 129 | [28, 48, 66, 84, 46], 130 | [43, 66, 84, 102, 55], 131 | [58, 84, 102, 120, 64], 132 | [34, 46, 55, 64, 30]]) 133 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) 134 | 135 | y2 = resnet_utils.subsample(y1, 2) 136 | y2_expected = tf.to_float([[14, 43, 34], 137 | [43, 84, 55], 138 | [34, 55, 30]]) 139 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) 140 | 141 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') 142 | y3_expected = y2_expected 143 | 144 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') 145 | y4_expected = y2_expected 146 | 147 | with self.test_session() as sess: 148 | sess.run(tf.initialize_all_variables()) 149 | self.assertAllClose(y1.eval(), y1_expected.eval()) 150 | self.assertAllClose(y2.eval(), y2_expected.eval()) 151 | self.assertAllClose(y3.eval(), y3_expected.eval()) 152 | self.assertAllClose(y4.eval(), y4_expected.eval()) 153 | 154 | def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): 155 | """A plain ResNet without extra layers before or after the ResNet blocks.""" 156 | with tf.variable_scope(scope, values=[inputs]): 157 | with slim.arg_scope([slim.conv2d], outputs_collections='end_points'): 158 | net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) 159 | end_points = dict(tf.get_collection('end_points')) 160 | return net, end_points 161 | 162 | def testEndPointsV1(self): 163 | """Test the end points of a tiny v1 bottleneck network.""" 164 | bottleneck = resnet_v1.bottleneck 165 | blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), 166 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])] 167 | inputs = create_test_input(2, 32, 16, 3) 168 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 169 | _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') 170 | expected = [ 171 | 'tiny/block1/unit_1/bottleneck_v1/shortcut', 172 | 'tiny/block1/unit_1/bottleneck_v1/conv1', 173 | 'tiny/block1/unit_1/bottleneck_v1/conv2', 174 | 'tiny/block1/unit_1/bottleneck_v1/conv3', 175 | 'tiny/block1/unit_2/bottleneck_v1/conv1', 176 | 'tiny/block1/unit_2/bottleneck_v1/conv2', 177 | 'tiny/block1/unit_2/bottleneck_v1/conv3', 178 | 'tiny/block2/unit_1/bottleneck_v1/shortcut', 179 | 'tiny/block2/unit_1/bottleneck_v1/conv1', 180 | 'tiny/block2/unit_1/bottleneck_v1/conv2', 181 | 'tiny/block2/unit_1/bottleneck_v1/conv3', 182 | 'tiny/block2/unit_2/bottleneck_v1/conv1', 183 | 'tiny/block2/unit_2/bottleneck_v1/conv2', 184 | 'tiny/block2/unit_2/bottleneck_v1/conv3'] 185 | self.assertItemsEqual(expected, end_points) 186 | 187 | def _stack_blocks_nondense(self, net, blocks): 188 | """A simplified ResNet Block stacker without output stride control.""" 189 | for block in blocks: 190 | with tf.variable_scope(block.scope, 'block', [net]): 191 | for i, unit in enumerate(block.args): 192 | depth, depth_bottleneck, stride = unit 193 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]): 194 | net = block.unit_fn(net, 195 | depth=depth, 196 | depth_bottleneck=depth_bottleneck, 197 | stride=stride, 198 | rate=1) 199 | return net 200 | 201 | def _atrousValues(self, bottleneck): 202 | """Verify the values of dense feature extraction by atrous convolution. 203 | 204 | Make sure that dense feature extraction by stack_blocks_dense() followed by 205 | subsampling gives identical results to feature extraction at the nominal 206 | network output stride using the simple self._stack_blocks_nondense() above. 207 | 208 | Args: 209 | bottleneck: The bottleneck function. 210 | """ 211 | blocks = [ 212 | resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), 213 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), 214 | resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), 215 | resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) 216 | ] 217 | nominal_stride = 8 218 | 219 | # Test both odd and even input dimensions. 220 | height = 30 221 | width = 31 222 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 223 | with slim.arg_scope([slim.batch_norm], is_training=False): 224 | for output_stride in [1, 2, 4, 8, None]: 225 | with tf.Graph().as_default(): 226 | with self.test_session() as sess: 227 | tf.set_random_seed(0) 228 | inputs = create_test_input(1, height, width, 3) 229 | # Dense feature extraction followed by subsampling. 230 | output = resnet_utils.stack_blocks_dense(inputs, 231 | blocks, 232 | output_stride) 233 | if output_stride is None: 234 | factor = 1 235 | else: 236 | factor = nominal_stride // output_stride 237 | 238 | output = resnet_utils.subsample(output, factor) 239 | # Make the two networks use the same weights. 240 | tf.get_variable_scope().reuse_variables() 241 | # Feature extraction at the nominal network rate. 242 | expected = self._stack_blocks_nondense(inputs, blocks) 243 | sess.run(tf.initialize_all_variables()) 244 | output, expected = sess.run([output, expected]) 245 | self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4) 246 | 247 | def testAtrousValuesBottleneck(self): 248 | self._atrousValues(resnet_v1.bottleneck) 249 | 250 | 251 | class ResnetCompleteNetworkTest(tf.test.TestCase): 252 | """Tests with complete small ResNet v1 networks.""" 253 | 254 | def _resnet_small(self, 255 | inputs, 256 | num_classes=None, 257 | is_training=True, 258 | global_pool=True, 259 | output_stride=None, 260 | include_root_block=True, 261 | reuse=None, 262 | scope='resnet_v1_small'): 263 | """A shallow and thin ResNet v1 for faster tests.""" 264 | bottleneck = resnet_v1.bottleneck 265 | blocks = [ 266 | resnet_utils.Block( 267 | 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), 268 | resnet_utils.Block( 269 | 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), 270 | resnet_utils.Block( 271 | 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), 272 | resnet_utils.Block( 273 | 'block4', bottleneck, [(32, 8, 1)] * 2)] 274 | return resnet_v1.resnet_v1(inputs, blocks, num_classes, 275 | is_training=is_training, 276 | global_pool=global_pool, 277 | output_stride=output_stride, 278 | include_root_block=include_root_block, 279 | reuse=reuse, 280 | scope=scope) 281 | 282 | def testClassificationEndPoints(self): 283 | global_pool = True 284 | num_classes = 10 285 | inputs = create_test_input(2, 224, 224, 3) 286 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 287 | logits, end_points = self._resnet_small(inputs, num_classes, 288 | global_pool=global_pool, 289 | scope='resnet') 290 | self.assertTrue(logits.op.name.startswith('resnet/logits')) 291 | self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) 292 | self.assertTrue('predictions' in end_points) 293 | self.assertListEqual(end_points['predictions'].get_shape().as_list(), 294 | [2, 1, 1, num_classes]) 295 | 296 | def testClassificationShapes(self): 297 | global_pool = True 298 | num_classes = 10 299 | inputs = create_test_input(2, 224, 224, 3) 300 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 301 | _, end_points = self._resnet_small(inputs, num_classes, 302 | global_pool=global_pool, 303 | scope='resnet') 304 | endpoint_to_shape = { 305 | 'resnet/block1': [2, 28, 28, 4], 306 | 'resnet/block2': [2, 14, 14, 8], 307 | 'resnet/block3': [2, 7, 7, 16], 308 | 'resnet/block4': [2, 7, 7, 32]} 309 | for endpoint in endpoint_to_shape: 310 | shape = endpoint_to_shape[endpoint] 311 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 312 | 313 | def testFullyConvolutionalEndpointShapes(self): 314 | global_pool = False 315 | num_classes = 10 316 | inputs = create_test_input(2, 321, 321, 3) 317 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 318 | _, end_points = self._resnet_small(inputs, num_classes, 319 | global_pool=global_pool, 320 | scope='resnet') 321 | endpoint_to_shape = { 322 | 'resnet/block1': [2, 41, 41, 4], 323 | 'resnet/block2': [2, 21, 21, 8], 324 | 'resnet/block3': [2, 11, 11, 16], 325 | 'resnet/block4': [2, 11, 11, 32]} 326 | for endpoint in endpoint_to_shape: 327 | shape = endpoint_to_shape[endpoint] 328 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 329 | 330 | def testRootlessFullyConvolutionalEndpointShapes(self): 331 | global_pool = False 332 | num_classes = 10 333 | inputs = create_test_input(2, 128, 128, 3) 334 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 335 | _, end_points = self._resnet_small(inputs, num_classes, 336 | global_pool=global_pool, 337 | include_root_block=False, 338 | scope='resnet') 339 | endpoint_to_shape = { 340 | 'resnet/block1': [2, 64, 64, 4], 341 | 'resnet/block2': [2, 32, 32, 8], 342 | 'resnet/block3': [2, 16, 16, 16], 343 | 'resnet/block4': [2, 16, 16, 32]} 344 | for endpoint in endpoint_to_shape: 345 | shape = endpoint_to_shape[endpoint] 346 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 347 | 348 | def testAtrousFullyConvolutionalEndpointShapes(self): 349 | global_pool = False 350 | num_classes = 10 351 | output_stride = 8 352 | inputs = create_test_input(2, 321, 321, 3) 353 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 354 | _, end_points = self._resnet_small(inputs, 355 | num_classes, 356 | global_pool=global_pool, 357 | output_stride=output_stride, 358 | scope='resnet') 359 | endpoint_to_shape = { 360 | 'resnet/block1': [2, 41, 41, 4], 361 | 'resnet/block2': [2, 41, 41, 8], 362 | 'resnet/block3': [2, 41, 41, 16], 363 | 'resnet/block4': [2, 41, 41, 32]} 364 | for endpoint in endpoint_to_shape: 365 | shape = endpoint_to_shape[endpoint] 366 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 367 | 368 | def testAtrousFullyConvolutionalValues(self): 369 | """Verify dense feature extraction with atrous convolution.""" 370 | nominal_stride = 32 371 | for output_stride in [4, 8, 16, 32, None]: 372 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 373 | with tf.Graph().as_default(): 374 | with self.test_session() as sess: 375 | tf.set_random_seed(0) 376 | inputs = create_test_input(2, 81, 81, 3) 377 | # Dense feature extraction followed by subsampling. 378 | output, _ = self._resnet_small(inputs, None, is_training=False, 379 | global_pool=False, 380 | output_stride=output_stride) 381 | if output_stride is None: 382 | factor = 1 383 | else: 384 | factor = nominal_stride // output_stride 385 | output = resnet_utils.subsample(output, factor) 386 | # Make the two networks use the same weights. 387 | tf.get_variable_scope().reuse_variables() 388 | # Feature extraction at the nominal network rate. 389 | expected, _ = self._resnet_small(inputs, None, is_training=False, 390 | global_pool=False) 391 | sess.run(tf.initialize_all_variables()) 392 | self.assertAllClose(output.eval(), expected.eval(), 393 | atol=1e-4, rtol=1e-4) 394 | 395 | def testUnknownBatchSize(self): 396 | batch = 2 397 | height, width = 65, 65 398 | global_pool = True 399 | num_classes = 10 400 | inputs = create_test_input(None, height, width, 3) 401 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 402 | logits, _ = self._resnet_small(inputs, num_classes, 403 | global_pool=global_pool, 404 | scope='resnet') 405 | self.assertTrue(logits.op.name.startswith('resnet/logits')) 406 | self.assertListEqual(logits.get_shape().as_list(), 407 | [None, 1, 1, num_classes]) 408 | images = create_test_input(batch, height, width, 3) 409 | with self.test_session() as sess: 410 | sess.run(tf.initialize_all_variables()) 411 | output = sess.run(logits, {inputs: images.eval()}) 412 | self.assertEqual(output.shape, (batch, 1, 1, num_classes)) 413 | 414 | def testFullyConvolutionalUnknownHeightWidth(self): 415 | batch = 2 416 | height, width = 65, 65 417 | global_pool = False 418 | inputs = create_test_input(batch, None, None, 3) 419 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 420 | output, _ = self._resnet_small(inputs, None, global_pool=global_pool) 421 | self.assertListEqual(output.get_shape().as_list(), 422 | [batch, None, None, 32]) 423 | images = create_test_input(batch, height, width, 3) 424 | with self.test_session() as sess: 425 | sess.run(tf.initialize_all_variables()) 426 | output = sess.run(output, {inputs: images.eval()}) 427 | self.assertEqual(output.shape, (batch, 3, 3, 32)) 428 | 429 | def testAtrousFullyConvolutionalUnknownHeightWidth(self): 430 | batch = 2 431 | height, width = 65, 65 432 | global_pool = False 433 | output_stride = 8 434 | inputs = create_test_input(batch, None, None, 3) 435 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 436 | output, _ = self._resnet_small(inputs, 437 | None, 438 | global_pool=global_pool, 439 | output_stride=output_stride) 440 | self.assertListEqual(output.get_shape().as_list(), 441 | [batch, None, None, 32]) 442 | images = create_test_input(batch, height, width, 3) 443 | with self.test_session() as sess: 444 | sess.run(tf.initialize_all_variables()) 445 | output = sess.run(output, {inputs: images.eval()}) 446 | self.assertEqual(output.shape, (batch, 9, 9, 32)) 447 | 448 | 449 | if __name__ == '__main__': 450 | tf.test.main() 451 | -------------------------------------------------------------------------------- /tf/nets/resnet_v2.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains definitions for the preactivation form of Residual Networks. 16 | 17 | Residual networks (ResNets) were originally proposed in: 18 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 20 | 21 | The full preactivation 'v2' ResNet variant implemented in this module was 22 | introduced by: 23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 25 | 26 | The key difference of the full preactivation 'v2' variant compared to the 27 | 'v1' variant in [1] is the use of batch normalization before every weight layer. 28 | Another difference is that 'v2' ResNets do not include an activation function in 29 | the main pathway. Also see [2; Fig. 4e]. 30 | 31 | Typical use: 32 | 33 | from tensorflow.contrib.slim.nets import resnet_v2 34 | 35 | ResNet-101 for image classification into 1000 classes: 36 | 37 | # inputs has shape [batch, 224, 224, 3] 38 | with slim.arg_scope(resnet_v2.resnet_arg_scope()): 39 | net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False) 40 | 41 | ResNet-101 for semantic segmentation into 21 classes: 42 | 43 | # inputs has shape [batch, 513, 513, 3] 44 | with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)): 45 | net, end_points = resnet_v2.resnet_v2_101(inputs, 46 | 21, 47 | is_training=False, 48 | global_pool=False, 49 | output_stride=16) 50 | """ 51 | from __future__ import absolute_import 52 | from __future__ import division 53 | from __future__ import print_function 54 | 55 | import tensorflow as tf 56 | 57 | from nets import resnet_utils 58 | 59 | slim = tf.contrib.slim 60 | resnet_arg_scope = resnet_utils.resnet_arg_scope 61 | 62 | 63 | @slim.add_arg_scope 64 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, 65 | outputs_collections=None, scope=None): 66 | """Bottleneck residual unit variant with BN before convolutions. 67 | 68 | This is the full preactivation residual unit variant proposed in [2]. See 69 | Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck 70 | variant which has an extra bottleneck layer. 71 | 72 | When putting together two consecutive ResNet blocks that use this unit, one 73 | should use stride = 2 in the last unit of the first block. 74 | 75 | Args: 76 | inputs: A tensor of size [batch, height, width, channels]. 77 | depth: The depth of the ResNet unit output. 78 | depth_bottleneck: The depth of the bottleneck layers. 79 | stride: The ResNet unit's stride. Determines the amount of downsampling of 80 | the units output compared to its input. 81 | rate: An integer, rate for atrous convolution. 82 | outputs_collections: Collection to add the ResNet unit output. 83 | scope: Optional variable_scope. 84 | 85 | Returns: 86 | The ResNet unit's output. 87 | """ 88 | with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: 89 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 90 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') 91 | if depth == depth_in: 92 | shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') 93 | else: 94 | shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, 95 | normalizer_fn=None, activation_fn=None, 96 | scope='shortcut') 97 | 98 | residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, 99 | scope='conv1') 100 | residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, 101 | rate=rate, scope='conv2') 102 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, 103 | normalizer_fn=None, activation_fn=None, 104 | scope='conv3') 105 | 106 | output = shortcut + residual 107 | 108 | return slim.utils.collect_named_outputs(outputs_collections, 109 | sc.original_name_scope, 110 | output) 111 | 112 | 113 | def resnet_v2(inputs, 114 | blocks, 115 | num_classes=None, 116 | is_training=True, 117 | global_pool=True, 118 | output_stride=None, 119 | include_root_block=True, 120 | reuse=None, 121 | scope=None): 122 | """Generator for v2 (preactivation) ResNet models. 123 | 124 | This function generates a family of ResNet v2 models. See the resnet_v2_*() 125 | methods for specific model instantiations, obtained by selecting different 126 | block instantiations that produce ResNets of various depths. 127 | 128 | Training for image classification on Imagenet is usually done with [224, 224] 129 | inputs, resulting in [7, 7] feature maps at the output of the last ResNet 130 | block for the ResNets defined in [1] that have nominal stride equal to 32. 131 | However, for dense prediction tasks we advise that one uses inputs with 132 | spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In 133 | this case the feature maps at the ResNet output will have spatial shape 134 | [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] 135 | and corners exactly aligned with the input image corners, which greatly 136 | facilitates alignment of the features to the image. Using as input [225, 225] 137 | images results in [8, 8] feature maps at the output of the last ResNet block. 138 | 139 | For dense prediction tasks, the ResNet needs to run in fully-convolutional 140 | (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all 141 | have nominal stride equal to 32 and a good choice in FCN mode is to use 142 | output_stride=16 in order to increase the density of the computed features at 143 | small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. 144 | 145 | Args: 146 | inputs: A tensor of size [batch, height_in, width_in, channels]. 147 | blocks: A list of length equal to the number of ResNet blocks. Each element 148 | is a resnet_utils.Block object describing the units in the block. 149 | num_classes: Number of predicted classes for classification tasks. If None 150 | we return the features before the logit layer. 151 | is_training: whether is training or not. 152 | global_pool: If True, we perform global average pooling before computing the 153 | logits. Set to True for image classification, False for dense prediction. 154 | output_stride: If None, then the output will be computed at the nominal 155 | network stride. If output_stride is not None, it specifies the requested 156 | ratio of input to output spatial resolution. 157 | include_root_block: If True, include the initial convolution followed by 158 | max-pooling, if False excludes it. If excluded, `inputs` should be the 159 | results of an activation-less convolution. 160 | reuse: whether or not the network and its variables should be reused. To be 161 | able to reuse 'scope' must be given. 162 | scope: Optional variable_scope. 163 | 164 | 165 | Returns: 166 | net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. 167 | If global_pool is False, then height_out and width_out are reduced by a 168 | factor of output_stride compared to the respective height_in and width_in, 169 | else both height_out and width_out equal one. If num_classes is None, then 170 | net is the output of the last ResNet block, potentially after global 171 | average pooling. If num_classes is not None, net contains the pre-softmax 172 | activations. 173 | end_points: A dictionary from components of the network to the corresponding 174 | activation. 175 | 176 | Raises: 177 | ValueError: If the target output_stride is not valid. 178 | """ 179 | with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: 180 | end_points_collection = sc.name + '_end_points' 181 | with slim.arg_scope([slim.conv2d, bottleneck, 182 | resnet_utils.stack_blocks_dense], 183 | outputs_collections=end_points_collection): 184 | with slim.arg_scope([slim.batch_norm], is_training=is_training): 185 | net = inputs 186 | if include_root_block: 187 | if output_stride is not None: 188 | if output_stride % 4 != 0: 189 | raise ValueError('The output_stride needs to be a multiple of 4.') 190 | output_stride /= 4 191 | # We do not include batch normalization or activation functions in 192 | # conv1 because the first ResNet unit will perform these. Cf. 193 | # Appendix of [2]. 194 | with slim.arg_scope([slim.conv2d], 195 | activation_fn=None, normalizer_fn=None): 196 | net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') 197 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 198 | net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) 199 | # This is needed because the pre-activation variant does not have batch 200 | # normalization or activation functions in the residual unit output. See 201 | # Appendix of [2]. 202 | net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') 203 | if global_pool: 204 | # Global average pooling. 205 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 206 | if num_classes is not None: 207 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, 208 | normalizer_fn=None, scope='logits') 209 | # Convert end_points_collection into a dictionary of end_points. 210 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 211 | if num_classes is not None: 212 | end_points['predictions'] = slim.softmax(net, scope='predictions') 213 | return net, end_points 214 | resnet_v2.default_image_size = 224 215 | 216 | 217 | def resnet_v2_50(inputs, 218 | num_classes=None, 219 | is_training=True, 220 | global_pool=True, 221 | output_stride=None, 222 | reuse=None, 223 | scope='resnet_v2_50'): 224 | """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" 225 | blocks = [ 226 | resnet_utils.Block( 227 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 228 | resnet_utils.Block( 229 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 230 | resnet_utils.Block( 231 | 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), 232 | resnet_utils.Block( 233 | 'block4', bottleneck, [(2048, 512, 1)] * 3)] 234 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, 235 | global_pool=global_pool, output_stride=output_stride, 236 | include_root_block=True, reuse=reuse, scope=scope) 237 | 238 | 239 | def resnet_v2_101(inputs, 240 | num_classes=None, 241 | is_training=True, 242 | global_pool=True, 243 | output_stride=None, 244 | reuse=None, 245 | scope='resnet_v2_101'): 246 | """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" 247 | blocks = [ 248 | resnet_utils.Block( 249 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 250 | resnet_utils.Block( 251 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 252 | resnet_utils.Block( 253 | 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), 254 | resnet_utils.Block( 255 | 'block4', bottleneck, [(2048, 512, 1)] * 3)] 256 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, 257 | global_pool=global_pool, output_stride=output_stride, 258 | include_root_block=True, reuse=reuse, scope=scope) 259 | 260 | 261 | def resnet_v2_152(inputs, 262 | num_classes=None, 263 | is_training=True, 264 | global_pool=True, 265 | output_stride=None, 266 | reuse=None, 267 | scope='resnet_v2_152'): 268 | """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" 269 | blocks = [ 270 | resnet_utils.Block( 271 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 272 | resnet_utils.Block( 273 | 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), 274 | resnet_utils.Block( 275 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), 276 | resnet_utils.Block( 277 | 'block4', bottleneck, [(2048, 512, 1)] * 3)] 278 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, 279 | global_pool=global_pool, output_stride=output_stride, 280 | include_root_block=True, reuse=reuse, scope=scope) 281 | 282 | 283 | def resnet_v2_200(inputs, 284 | num_classes=None, 285 | is_training=True, 286 | global_pool=True, 287 | output_stride=None, 288 | reuse=None, 289 | scope='resnet_v2_200'): 290 | """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" 291 | blocks = [ 292 | resnet_utils.Block( 293 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 294 | resnet_utils.Block( 295 | 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), 296 | resnet_utils.Block( 297 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), 298 | resnet_utils.Block( 299 | 'block4', bottleneck, [(2048, 512, 1)] * 3)] 300 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, 301 | global_pool=global_pool, output_stride=output_stride, 302 | include_root_block=True, reuse=reuse, scope=scope) 303 | -------------------------------------------------------------------------------- /tf/nets/resnet_v2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.resnet_v2.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numpy as np 22 | import tensorflow as tf 23 | 24 | from nets import resnet_utils 25 | from nets import resnet_v2 26 | 27 | slim = tf.contrib.slim 28 | 29 | 30 | def create_test_input(batch_size, height, width, channels): 31 | """Create test input tensor. 32 | 33 | Args: 34 | batch_size: The number of images per batch or `None` if unknown. 35 | height: The height of each image or `None` if unknown. 36 | width: The width of each image or `None` if unknown. 37 | channels: The number of channels per image or `None` if unknown. 38 | 39 | Returns: 40 | Either a placeholder `Tensor` of dimension 41 | [batch_size, height, width, channels] if any of the inputs are `None` or a 42 | constant `Tensor` with the mesh grid values along the spatial dimensions. 43 | """ 44 | if None in [batch_size, height, width, channels]: 45 | return tf.placeholder(tf.float32, (batch_size, height, width, channels)) 46 | else: 47 | return tf.to_float( 48 | np.tile( 49 | np.reshape( 50 | np.reshape(np.arange(height), [height, 1]) + 51 | np.reshape(np.arange(width), [1, width]), 52 | [1, height, width, 1]), 53 | [batch_size, 1, 1, channels])) 54 | 55 | 56 | class ResnetUtilsTest(tf.test.TestCase): 57 | 58 | def testSubsampleThreeByThree(self): 59 | x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1]) 60 | x = resnet_utils.subsample(x, 2) 61 | expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1]) 62 | with self.test_session(): 63 | self.assertAllClose(x.eval(), expected.eval()) 64 | 65 | def testSubsampleFourByFour(self): 66 | x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1]) 67 | x = resnet_utils.subsample(x, 2) 68 | expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1]) 69 | with self.test_session(): 70 | self.assertAllClose(x.eval(), expected.eval()) 71 | 72 | def testConv2DSameEven(self): 73 | n, n2 = 4, 2 74 | 75 | # Input image. 76 | x = create_test_input(1, n, n, 1) 77 | 78 | # Convolution kernel. 79 | w = create_test_input(1, 3, 3, 1) 80 | w = tf.reshape(w, [3, 3, 1, 1]) 81 | 82 | tf.get_variable('Conv/weights', initializer=w) 83 | tf.get_variable('Conv/biases', initializer=tf.zeros([1])) 84 | tf.get_variable_scope().reuse_variables() 85 | 86 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') 87 | y1_expected = tf.to_float([[14, 28, 43, 26], 88 | [28, 48, 66, 37], 89 | [43, 66, 84, 46], 90 | [26, 37, 46, 22]]) 91 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) 92 | 93 | y2 = resnet_utils.subsample(y1, 2) 94 | y2_expected = tf.to_float([[14, 43], 95 | [43, 84]]) 96 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) 97 | 98 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') 99 | y3_expected = y2_expected 100 | 101 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') 102 | y4_expected = tf.to_float([[48, 37], 103 | [37, 22]]) 104 | y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1]) 105 | 106 | with self.test_session() as sess: 107 | sess.run(tf.initialize_all_variables()) 108 | self.assertAllClose(y1.eval(), y1_expected.eval()) 109 | self.assertAllClose(y2.eval(), y2_expected.eval()) 110 | self.assertAllClose(y3.eval(), y3_expected.eval()) 111 | self.assertAllClose(y4.eval(), y4_expected.eval()) 112 | 113 | def testConv2DSameOdd(self): 114 | n, n2 = 5, 3 115 | 116 | # Input image. 117 | x = create_test_input(1, n, n, 1) 118 | 119 | # Convolution kernel. 120 | w = create_test_input(1, 3, 3, 1) 121 | w = tf.reshape(w, [3, 3, 1, 1]) 122 | 123 | tf.get_variable('Conv/weights', initializer=w) 124 | tf.get_variable('Conv/biases', initializer=tf.zeros([1])) 125 | tf.get_variable_scope().reuse_variables() 126 | 127 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') 128 | y1_expected = tf.to_float([[14, 28, 43, 58, 34], 129 | [28, 48, 66, 84, 46], 130 | [43, 66, 84, 102, 55], 131 | [58, 84, 102, 120, 64], 132 | [34, 46, 55, 64, 30]]) 133 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) 134 | 135 | y2 = resnet_utils.subsample(y1, 2) 136 | y2_expected = tf.to_float([[14, 43, 34], 137 | [43, 84, 55], 138 | [34, 55, 30]]) 139 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) 140 | 141 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') 142 | y3_expected = y2_expected 143 | 144 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') 145 | y4_expected = y2_expected 146 | 147 | with self.test_session() as sess: 148 | sess.run(tf.initialize_all_variables()) 149 | self.assertAllClose(y1.eval(), y1_expected.eval()) 150 | self.assertAllClose(y2.eval(), y2_expected.eval()) 151 | self.assertAllClose(y3.eval(), y3_expected.eval()) 152 | self.assertAllClose(y4.eval(), y4_expected.eval()) 153 | 154 | def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): 155 | """A plain ResNet without extra layers before or after the ResNet blocks.""" 156 | with tf.variable_scope(scope, values=[inputs]): 157 | with slim.arg_scope([slim.conv2d], outputs_collections='end_points'): 158 | net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) 159 | end_points = dict(tf.get_collection('end_points')) 160 | return net, end_points 161 | 162 | def testEndPointsV2(self): 163 | """Test the end points of a tiny v2 bottleneck network.""" 164 | bottleneck = resnet_v2.bottleneck 165 | blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), 166 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])] 167 | inputs = create_test_input(2, 32, 16, 3) 168 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 169 | _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') 170 | expected = [ 171 | 'tiny/block1/unit_1/bottleneck_v2/shortcut', 172 | 'tiny/block1/unit_1/bottleneck_v2/conv1', 173 | 'tiny/block1/unit_1/bottleneck_v2/conv2', 174 | 'tiny/block1/unit_1/bottleneck_v2/conv3', 175 | 'tiny/block1/unit_2/bottleneck_v2/conv1', 176 | 'tiny/block1/unit_2/bottleneck_v2/conv2', 177 | 'tiny/block1/unit_2/bottleneck_v2/conv3', 178 | 'tiny/block2/unit_1/bottleneck_v2/shortcut', 179 | 'tiny/block2/unit_1/bottleneck_v2/conv1', 180 | 'tiny/block2/unit_1/bottleneck_v2/conv2', 181 | 'tiny/block2/unit_1/bottleneck_v2/conv3', 182 | 'tiny/block2/unit_2/bottleneck_v2/conv1', 183 | 'tiny/block2/unit_2/bottleneck_v2/conv2', 184 | 'tiny/block2/unit_2/bottleneck_v2/conv3'] 185 | self.assertItemsEqual(expected, end_points) 186 | 187 | def _stack_blocks_nondense(self, net, blocks): 188 | """A simplified ResNet Block stacker without output stride control.""" 189 | for block in blocks: 190 | with tf.variable_scope(block.scope, 'block', [net]): 191 | for i, unit in enumerate(block.args): 192 | depth, depth_bottleneck, stride = unit 193 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]): 194 | net = block.unit_fn(net, 195 | depth=depth, 196 | depth_bottleneck=depth_bottleneck, 197 | stride=stride, 198 | rate=1) 199 | return net 200 | 201 | def _atrousValues(self, bottleneck): 202 | """Verify the values of dense feature extraction by atrous convolution. 203 | 204 | Make sure that dense feature extraction by stack_blocks_dense() followed by 205 | subsampling gives identical results to feature extraction at the nominal 206 | network output stride using the simple self._stack_blocks_nondense() above. 207 | 208 | Args: 209 | bottleneck: The bottleneck function. 210 | """ 211 | blocks = [ 212 | resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), 213 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), 214 | resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), 215 | resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) 216 | ] 217 | nominal_stride = 8 218 | 219 | # Test both odd and even input dimensions. 220 | height = 30 221 | width = 31 222 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 223 | with slim.arg_scope([slim.batch_norm], is_training=False): 224 | for output_stride in [1, 2, 4, 8, None]: 225 | with tf.Graph().as_default(): 226 | with self.test_session() as sess: 227 | tf.set_random_seed(0) 228 | inputs = create_test_input(1, height, width, 3) 229 | # Dense feature extraction followed by subsampling. 230 | output = resnet_utils.stack_blocks_dense(inputs, 231 | blocks, 232 | output_stride) 233 | if output_stride is None: 234 | factor = 1 235 | else: 236 | factor = nominal_stride // output_stride 237 | 238 | output = resnet_utils.subsample(output, factor) 239 | # Make the two networks use the same weights. 240 | tf.get_variable_scope().reuse_variables() 241 | # Feature extraction at the nominal network rate. 242 | expected = self._stack_blocks_nondense(inputs, blocks) 243 | sess.run(tf.initialize_all_variables()) 244 | output, expected = sess.run([output, expected]) 245 | self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4) 246 | 247 | def testAtrousValuesBottleneck(self): 248 | self._atrousValues(resnet_v2.bottleneck) 249 | 250 | 251 | class ResnetCompleteNetworkTest(tf.test.TestCase): 252 | """Tests with complete small ResNet v2 networks.""" 253 | 254 | def _resnet_small(self, 255 | inputs, 256 | num_classes=None, 257 | is_training=True, 258 | global_pool=True, 259 | output_stride=None, 260 | include_root_block=True, 261 | reuse=None, 262 | scope='resnet_v2_small'): 263 | """A shallow and thin ResNet v2 for faster tests.""" 264 | bottleneck = resnet_v2.bottleneck 265 | blocks = [ 266 | resnet_utils.Block( 267 | 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), 268 | resnet_utils.Block( 269 | 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), 270 | resnet_utils.Block( 271 | 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), 272 | resnet_utils.Block( 273 | 'block4', bottleneck, [(32, 8, 1)] * 2)] 274 | return resnet_v2.resnet_v2(inputs, blocks, num_classes, 275 | is_training=is_training, 276 | global_pool=global_pool, 277 | output_stride=output_stride, 278 | include_root_block=include_root_block, 279 | reuse=reuse, 280 | scope=scope) 281 | 282 | def testClassificationEndPoints(self): 283 | global_pool = True 284 | num_classes = 10 285 | inputs = create_test_input(2, 224, 224, 3) 286 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 287 | logits, end_points = self._resnet_small(inputs, num_classes, 288 | global_pool=global_pool, 289 | scope='resnet') 290 | self.assertTrue(logits.op.name.startswith('resnet/logits')) 291 | self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) 292 | self.assertTrue('predictions' in end_points) 293 | self.assertListEqual(end_points['predictions'].get_shape().as_list(), 294 | [2, 1, 1, num_classes]) 295 | 296 | def testClassificationShapes(self): 297 | global_pool = True 298 | num_classes = 10 299 | inputs = create_test_input(2, 224, 224, 3) 300 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 301 | _, end_points = self._resnet_small(inputs, num_classes, 302 | global_pool=global_pool, 303 | scope='resnet') 304 | endpoint_to_shape = { 305 | 'resnet/block1': [2, 28, 28, 4], 306 | 'resnet/block2': [2, 14, 14, 8], 307 | 'resnet/block3': [2, 7, 7, 16], 308 | 'resnet/block4': [2, 7, 7, 32]} 309 | for endpoint in endpoint_to_shape: 310 | shape = endpoint_to_shape[endpoint] 311 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 312 | 313 | def testFullyConvolutionalEndpointShapes(self): 314 | global_pool = False 315 | num_classes = 10 316 | inputs = create_test_input(2, 321, 321, 3) 317 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 318 | _, end_points = self._resnet_small(inputs, num_classes, 319 | global_pool=global_pool, 320 | scope='resnet') 321 | endpoint_to_shape = { 322 | 'resnet/block1': [2, 41, 41, 4], 323 | 'resnet/block2': [2, 21, 21, 8], 324 | 'resnet/block3': [2, 11, 11, 16], 325 | 'resnet/block4': [2, 11, 11, 32]} 326 | for endpoint in endpoint_to_shape: 327 | shape = endpoint_to_shape[endpoint] 328 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 329 | 330 | def testRootlessFullyConvolutionalEndpointShapes(self): 331 | global_pool = False 332 | num_classes = 10 333 | inputs = create_test_input(2, 128, 128, 3) 334 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 335 | _, end_points = self._resnet_small(inputs, num_classes, 336 | global_pool=global_pool, 337 | include_root_block=False, 338 | scope='resnet') 339 | endpoint_to_shape = { 340 | 'resnet/block1': [2, 64, 64, 4], 341 | 'resnet/block2': [2, 32, 32, 8], 342 | 'resnet/block3': [2, 16, 16, 16], 343 | 'resnet/block4': [2, 16, 16, 32]} 344 | for endpoint in endpoint_to_shape: 345 | shape = endpoint_to_shape[endpoint] 346 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 347 | 348 | def testAtrousFullyConvolutionalEndpointShapes(self): 349 | global_pool = False 350 | num_classes = 10 351 | output_stride = 8 352 | inputs = create_test_input(2, 321, 321, 3) 353 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 354 | _, end_points = self._resnet_small(inputs, 355 | num_classes, 356 | global_pool=global_pool, 357 | output_stride=output_stride, 358 | scope='resnet') 359 | endpoint_to_shape = { 360 | 'resnet/block1': [2, 41, 41, 4], 361 | 'resnet/block2': [2, 41, 41, 8], 362 | 'resnet/block3': [2, 41, 41, 16], 363 | 'resnet/block4': [2, 41, 41, 32]} 364 | for endpoint in endpoint_to_shape: 365 | shape = endpoint_to_shape[endpoint] 366 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) 367 | 368 | def testAtrousFullyConvolutionalValues(self): 369 | """Verify dense feature extraction with atrous convolution.""" 370 | nominal_stride = 32 371 | for output_stride in [4, 8, 16, 32, None]: 372 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 373 | with tf.Graph().as_default(): 374 | with self.test_session() as sess: 375 | tf.set_random_seed(0) 376 | inputs = create_test_input(2, 81, 81, 3) 377 | # Dense feature extraction followed by subsampling. 378 | output, _ = self._resnet_small(inputs, None, 379 | is_training=False, 380 | global_pool=False, 381 | output_stride=output_stride) 382 | if output_stride is None: 383 | factor = 1 384 | else: 385 | factor = nominal_stride // output_stride 386 | output = resnet_utils.subsample(output, factor) 387 | # Make the two networks use the same weights. 388 | tf.get_variable_scope().reuse_variables() 389 | # Feature extraction at the nominal network rate. 390 | expected, _ = self._resnet_small(inputs, None, 391 | is_training=False, 392 | global_pool=False) 393 | sess.run(tf.initialize_all_variables()) 394 | self.assertAllClose(output.eval(), expected.eval(), 395 | atol=1e-4, rtol=1e-4) 396 | 397 | def testUnknownBatchSize(self): 398 | batch = 2 399 | height, width = 65, 65 400 | global_pool = True 401 | num_classes = 10 402 | inputs = create_test_input(None, height, width, 3) 403 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 404 | logits, _ = self._resnet_small(inputs, num_classes, 405 | global_pool=global_pool, 406 | scope='resnet') 407 | self.assertTrue(logits.op.name.startswith('resnet/logits')) 408 | self.assertListEqual(logits.get_shape().as_list(), 409 | [None, 1, 1, num_classes]) 410 | images = create_test_input(batch, height, width, 3) 411 | with self.test_session() as sess: 412 | sess.run(tf.initialize_all_variables()) 413 | output = sess.run(logits, {inputs: images.eval()}) 414 | self.assertEqual(output.shape, (batch, 1, 1, num_classes)) 415 | 416 | def testFullyConvolutionalUnknownHeightWidth(self): 417 | batch = 2 418 | height, width = 65, 65 419 | global_pool = False 420 | inputs = create_test_input(batch, None, None, 3) 421 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 422 | output, _ = self._resnet_small(inputs, None, 423 | global_pool=global_pool) 424 | self.assertListEqual(output.get_shape().as_list(), 425 | [batch, None, None, 32]) 426 | images = create_test_input(batch, height, width, 3) 427 | with self.test_session() as sess: 428 | sess.run(tf.initialize_all_variables()) 429 | output = sess.run(output, {inputs: images.eval()}) 430 | self.assertEqual(output.shape, (batch, 3, 3, 32)) 431 | 432 | def testAtrousFullyConvolutionalUnknownHeightWidth(self): 433 | batch = 2 434 | height, width = 65, 65 435 | global_pool = False 436 | output_stride = 8 437 | inputs = create_test_input(batch, None, None, 3) 438 | with slim.arg_scope(resnet_utils.resnet_arg_scope()): 439 | output, _ = self._resnet_small(inputs, 440 | None, 441 | global_pool=global_pool, 442 | output_stride=output_stride) 443 | self.assertListEqual(output.get_shape().as_list(), 444 | [batch, None, None, 32]) 445 | images = create_test_input(batch, height, width, 3) 446 | with self.test_session() as sess: 447 | sess.run(tf.initialize_all_variables()) 448 | output = sess.run(output, {inputs: images.eval()}) 449 | self.assertEqual(output.shape, (batch, 9, 9, 32)) 450 | 451 | 452 | if __name__ == '__main__': 453 | tf.test.main() 454 | -------------------------------------------------------------------------------- /tf/nets/vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains model definitions for versions of the Oxford VGG network. 16 | 17 | These model definitions were introduced in the following technical report: 18 | 19 | Very Deep Convolutional Networks For Large-Scale Image Recognition 20 | Karen Simonyan and Andrew Zisserman 21 | arXiv technical report, 2015 22 | PDF: http://arxiv.org/pdf/1409.1556.pdf 23 | ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf 24 | CC-BY-4.0 25 | 26 | More information can be obtained from the VGG website: 27 | www.robots.ox.ac.uk/~vgg/research/very_deep/ 28 | 29 | Usage: 30 | with slim.arg_scope(vgg.vgg_arg_scope()): 31 | outputs, end_points = vgg.vgg_a(inputs) 32 | 33 | with slim.arg_scope(vgg.vgg_arg_scope()): 34 | outputs, end_points = vgg.vgg_16(inputs) 35 | 36 | @@vgg_a 37 | @@vgg_16 38 | @@vgg_19 39 | """ 40 | from __future__ import absolute_import 41 | from __future__ import division 42 | from __future__ import print_function 43 | 44 | import tensorflow as tf 45 | 46 | slim = tf.contrib.slim 47 | 48 | 49 | def vgg_arg_scope(weight_decay=0.0005): 50 | """Defines the VGG arg scope. 51 | 52 | Args: 53 | weight_decay: The l2 regularization coefficient. 54 | 55 | Returns: 56 | An arg_scope. 57 | """ 58 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 59 | activation_fn=tf.nn.relu, 60 | weights_regularizer=slim.l2_regularizer(weight_decay), 61 | biases_initializer=tf.zeros_initializer): 62 | with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: 63 | return arg_sc 64 | 65 | 66 | def vgg_a(inputs, 67 | num_classes=1000, 68 | is_training=True, 69 | dropout_keep_prob=0.5, 70 | spatial_squeeze=True, 71 | scope='vgg_a'): 72 | """Oxford Net VGG 11-Layers version A Example. 73 | 74 | Note: All the fully_connected layers have been transformed to conv2d layers. 75 | To use in classification mode, resize input to 224x224. 76 | 77 | Args: 78 | inputs: a tensor of size [batch_size, height, width, channels]. 79 | num_classes: number of predicted classes. 80 | is_training: whether or not the model is being trained. 81 | dropout_keep_prob: the probability that activations are kept in the dropout 82 | layers during training. 83 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 84 | outputs. Useful to remove unnecessary dimensions for classification. 85 | scope: Optional scope for the variables. 86 | 87 | Returns: 88 | the last op containing the log predictions and end_points dict. 89 | """ 90 | with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc: 91 | end_points_collection = sc.name + '_end_points' 92 | # Collect outputs for conv2d, fully_connected and max_pool2d. 93 | with slim.arg_scope([slim.conv2d, slim.max_pool2d], 94 | outputs_collections=end_points_collection): 95 | net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') 96 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 97 | net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') 98 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 99 | net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') 100 | net = slim.max_pool2d(net, [2, 2], scope='pool3') 101 | net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') 102 | net = slim.max_pool2d(net, [2, 2], scope='pool4') 103 | net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') 104 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 105 | # Use conv2d instead of fully_connected layers. 106 | net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 107 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 108 | scope='dropout6') 109 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 110 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 111 | scope='dropout7') 112 | net = slim.conv2d(net, num_classes, [1, 1], 113 | activation_fn=None, 114 | normalizer_fn=None, 115 | scope='fc8') 116 | # Convert end_points_collection into a end_point dict. 117 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 118 | if spatial_squeeze: 119 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 120 | end_points[sc.name + '/fc8'] = net 121 | return net, end_points 122 | vgg_a.default_image_size = 224 123 | 124 | 125 | def vgg_16(inputs, 126 | num_classes=1000, 127 | is_training=True, 128 | dropout_keep_prob=0.5, 129 | spatial_squeeze=True, 130 | scope='vgg_16'): 131 | """Oxford Net VGG 16-Layers version D Example. 132 | 133 | Note: All the fully_connected layers have been transformed to conv2d layers. 134 | To use in classification mode, resize input to 224x224. 135 | 136 | Args: 137 | inputs: a tensor of size [batch_size, height, width, channels]. 138 | num_classes: number of predicted classes. 139 | is_training: whether or not the model is being trained. 140 | dropout_keep_prob: the probability that activations are kept in the dropout 141 | layers during training. 142 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 143 | outputs. Useful to remove unnecessary dimensions for classification. 144 | scope: Optional scope for the variables. 145 | 146 | Returns: 147 | the last op containing the log predictions and end_points dict. 148 | """ 149 | with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: 150 | end_points_collection = sc.name + '_end_points' 151 | # Collect outputs for conv2d, fully_connected and max_pool2d. 152 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 153 | outputs_collections=end_points_collection): 154 | net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') 155 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 156 | net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') 157 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 158 | net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') 159 | net = slim.max_pool2d(net, [2, 2], scope='pool3') 160 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') 161 | net = slim.max_pool2d(net, [2, 2], scope='pool4') 162 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') 163 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 164 | # Use conv2d instead of fully_connected layers. 165 | net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 166 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 167 | scope='dropout6') 168 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 169 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 170 | scope='dropout7') 171 | net = slim.conv2d(net, num_classes, [1, 1], 172 | activation_fn=None, 173 | normalizer_fn=None, 174 | scope='fc8') 175 | # Convert end_points_collection into a end_point dict. 176 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 177 | if spatial_squeeze: 178 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 179 | end_points[sc.name + '/fc8'] = net 180 | return net, end_points 181 | vgg_16.default_image_size = 224 182 | 183 | 184 | def vgg_19(inputs, 185 | num_classes=1000, 186 | is_training=True, 187 | dropout_keep_prob=0.5, 188 | spatial_squeeze=True, 189 | scope='vgg_19'): 190 | """Oxford Net VGG 19-Layers version E Example. 191 | 192 | Note: All the fully_connected layers have been transformed to conv2d layers. 193 | To use in classification mode, resize input to 224x224. 194 | 195 | Args: 196 | inputs: a tensor of size [batch_size, height, width, channels]. 197 | num_classes: number of predicted classes. 198 | is_training: whether or not the model is being trained. 199 | dropout_keep_prob: the probability that activations are kept in the dropout 200 | layers during training. 201 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 202 | outputs. Useful to remove unnecessary dimensions for classification. 203 | scope: Optional scope for the variables. 204 | 205 | Returns: 206 | the last op containing the log predictions and end_points dict. 207 | """ 208 | with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc: 209 | end_points_collection = sc.name + '_end_points' 210 | # Collect outputs for conv2d, fully_connected and max_pool2d. 211 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 212 | outputs_collections=end_points_collection): 213 | net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') 214 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 215 | net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') 216 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 217 | net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') 218 | net = slim.max_pool2d(net, [2, 2], scope='pool3') 219 | net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') 220 | net = slim.max_pool2d(net, [2, 2], scope='pool4') 221 | net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') 222 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 223 | # Use conv2d instead of fully_connected layers. 224 | net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 225 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 226 | scope='dropout6') 227 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 228 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 229 | scope='dropout7') 230 | net = slim.conv2d(net, num_classes, [1, 1], 231 | activation_fn=None, 232 | normalizer_fn=None, 233 | scope='fc8') 234 | # Convert end_points_collection into a end_point dict. 235 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 236 | if spatial_squeeze: 237 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 238 | end_points[sc.name + '/fc8'] = net 239 | return net, end_points 240 | vgg_19.default_image_size = 224 241 | 242 | # Alias 243 | vgg_d = vgg_16 244 | vgg_e = vgg_19 245 | -------------------------------------------------------------------------------- /tf/nets/vgg_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.vgg.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import vgg 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class VGGATest(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 224, 224 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = vgg.vgg_a(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 256, 256 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 2, 2, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 224, 224 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = vgg.vgg_a(inputs, num_classes) 58 | expected_names = ['vgg_a/conv1/conv1_1', 59 | 'vgg_a/pool1', 60 | 'vgg_a/conv2/conv2_1', 61 | 'vgg_a/pool2', 62 | 'vgg_a/conv3/conv3_1', 63 | 'vgg_a/conv3/conv3_2', 64 | 'vgg_a/pool3', 65 | 'vgg_a/conv4/conv4_1', 66 | 'vgg_a/conv4/conv4_2', 67 | 'vgg_a/pool4', 68 | 'vgg_a/conv5/conv5_1', 69 | 'vgg_a/conv5/conv5_2', 70 | 'vgg_a/pool5', 71 | 'vgg_a/fc6', 72 | 'vgg_a/fc7', 73 | 'vgg_a/fc8' 74 | ] 75 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 76 | 77 | def testModelVariables(self): 78 | batch_size = 5 79 | height, width = 224, 224 80 | num_classes = 1000 81 | with self.test_session(): 82 | inputs = tf.random_uniform((batch_size, height, width, 3)) 83 | vgg.vgg_a(inputs, num_classes) 84 | expected_names = ['vgg_a/conv1/conv1_1/weights', 85 | 'vgg_a/conv1/conv1_1/biases', 86 | 'vgg_a/conv2/conv2_1/weights', 87 | 'vgg_a/conv2/conv2_1/biases', 88 | 'vgg_a/conv3/conv3_1/weights', 89 | 'vgg_a/conv3/conv3_1/biases', 90 | 'vgg_a/conv3/conv3_2/weights', 91 | 'vgg_a/conv3/conv3_2/biases', 92 | 'vgg_a/conv4/conv4_1/weights', 93 | 'vgg_a/conv4/conv4_1/biases', 94 | 'vgg_a/conv4/conv4_2/weights', 95 | 'vgg_a/conv4/conv4_2/biases', 96 | 'vgg_a/conv5/conv5_1/weights', 97 | 'vgg_a/conv5/conv5_1/biases', 98 | 'vgg_a/conv5/conv5_2/weights', 99 | 'vgg_a/conv5/conv5_2/biases', 100 | 'vgg_a/fc6/weights', 101 | 'vgg_a/fc6/biases', 102 | 'vgg_a/fc7/weights', 103 | 'vgg_a/fc7/biases', 104 | 'vgg_a/fc8/weights', 105 | 'vgg_a/fc8/biases', 106 | ] 107 | model_variables = [v.op.name for v in slim.get_model_variables()] 108 | self.assertSetEqual(set(model_variables), set(expected_names)) 109 | 110 | def testEvaluation(self): 111 | batch_size = 2 112 | height, width = 224, 224 113 | num_classes = 1000 114 | with self.test_session(): 115 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 116 | logits, _ = vgg.vgg_a(eval_inputs, is_training=False) 117 | self.assertListEqual(logits.get_shape().as_list(), 118 | [batch_size, num_classes]) 119 | predictions = tf.argmax(logits, 1) 120 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 121 | 122 | def testTrainEvalWithReuse(self): 123 | train_batch_size = 2 124 | eval_batch_size = 1 125 | train_height, train_width = 224, 224 126 | eval_height, eval_width = 256, 256 127 | num_classes = 1000 128 | with self.test_session(): 129 | train_inputs = tf.random_uniform( 130 | (train_batch_size, train_height, train_width, 3)) 131 | logits, _ = vgg.vgg_a(train_inputs) 132 | self.assertListEqual(logits.get_shape().as_list(), 133 | [train_batch_size, num_classes]) 134 | tf.get_variable_scope().reuse_variables() 135 | eval_inputs = tf.random_uniform( 136 | (eval_batch_size, eval_height, eval_width, 3)) 137 | logits, _ = vgg.vgg_a(eval_inputs, is_training=False, 138 | spatial_squeeze=False) 139 | self.assertListEqual(logits.get_shape().as_list(), 140 | [eval_batch_size, 2, 2, num_classes]) 141 | logits = tf.reduce_mean(logits, [1, 2]) 142 | predictions = tf.argmax(logits, 1) 143 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 144 | 145 | def testForward(self): 146 | batch_size = 1 147 | height, width = 224, 224 148 | with self.test_session() as sess: 149 | inputs = tf.random_uniform((batch_size, height, width, 3)) 150 | logits, _ = vgg.vgg_a(inputs) 151 | sess.run(tf.initialize_all_variables()) 152 | output = sess.run(logits) 153 | self.assertTrue(output.any()) 154 | 155 | 156 | class VGG16Test(tf.test.TestCase): 157 | 158 | def testBuild(self): 159 | batch_size = 5 160 | height, width = 224, 224 161 | num_classes = 1000 162 | with self.test_session(): 163 | inputs = tf.random_uniform((batch_size, height, width, 3)) 164 | logits, _ = vgg.vgg_16(inputs, num_classes) 165 | self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed') 166 | self.assertListEqual(logits.get_shape().as_list(), 167 | [batch_size, num_classes]) 168 | 169 | def testFullyConvolutional(self): 170 | batch_size = 1 171 | height, width = 256, 256 172 | num_classes = 1000 173 | with self.test_session(): 174 | inputs = tf.random_uniform((batch_size, height, width, 3)) 175 | logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False) 176 | self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd') 177 | self.assertListEqual(logits.get_shape().as_list(), 178 | [batch_size, 2, 2, num_classes]) 179 | 180 | def testEndPoints(self): 181 | batch_size = 5 182 | height, width = 224, 224 183 | num_classes = 1000 184 | with self.test_session(): 185 | inputs = tf.random_uniform((batch_size, height, width, 3)) 186 | _, end_points = vgg.vgg_16(inputs, num_classes) 187 | expected_names = ['vgg_16/conv1/conv1_1', 188 | 'vgg_16/conv1/conv1_2', 189 | 'vgg_16/pool1', 190 | 'vgg_16/conv2/conv2_1', 191 | 'vgg_16/conv2/conv2_2', 192 | 'vgg_16/pool2', 193 | 'vgg_16/conv3/conv3_1', 194 | 'vgg_16/conv3/conv3_2', 195 | 'vgg_16/conv3/conv3_3', 196 | 'vgg_16/pool3', 197 | 'vgg_16/conv4/conv4_1', 198 | 'vgg_16/conv4/conv4_2', 199 | 'vgg_16/conv4/conv4_3', 200 | 'vgg_16/pool4', 201 | 'vgg_16/conv5/conv5_1', 202 | 'vgg_16/conv5/conv5_2', 203 | 'vgg_16/conv5/conv5_3', 204 | 'vgg_16/pool5', 205 | 'vgg_16/fc6', 206 | 'vgg_16/fc7', 207 | 'vgg_16/fc8' 208 | ] 209 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 210 | 211 | def testModelVariables(self): 212 | batch_size = 5 213 | height, width = 224, 224 214 | num_classes = 1000 215 | with self.test_session(): 216 | inputs = tf.random_uniform((batch_size, height, width, 3)) 217 | vgg.vgg_16(inputs, num_classes) 218 | expected_names = ['vgg_16/conv1/conv1_1/weights', 219 | 'vgg_16/conv1/conv1_1/biases', 220 | 'vgg_16/conv1/conv1_2/weights', 221 | 'vgg_16/conv1/conv1_2/biases', 222 | 'vgg_16/conv2/conv2_1/weights', 223 | 'vgg_16/conv2/conv2_1/biases', 224 | 'vgg_16/conv2/conv2_2/weights', 225 | 'vgg_16/conv2/conv2_2/biases', 226 | 'vgg_16/conv3/conv3_1/weights', 227 | 'vgg_16/conv3/conv3_1/biases', 228 | 'vgg_16/conv3/conv3_2/weights', 229 | 'vgg_16/conv3/conv3_2/biases', 230 | 'vgg_16/conv3/conv3_3/weights', 231 | 'vgg_16/conv3/conv3_3/biases', 232 | 'vgg_16/conv4/conv4_1/weights', 233 | 'vgg_16/conv4/conv4_1/biases', 234 | 'vgg_16/conv4/conv4_2/weights', 235 | 'vgg_16/conv4/conv4_2/biases', 236 | 'vgg_16/conv4/conv4_3/weights', 237 | 'vgg_16/conv4/conv4_3/biases', 238 | 'vgg_16/conv5/conv5_1/weights', 239 | 'vgg_16/conv5/conv5_1/biases', 240 | 'vgg_16/conv5/conv5_2/weights', 241 | 'vgg_16/conv5/conv5_2/biases', 242 | 'vgg_16/conv5/conv5_3/weights', 243 | 'vgg_16/conv5/conv5_3/biases', 244 | 'vgg_16/fc6/weights', 245 | 'vgg_16/fc6/biases', 246 | 'vgg_16/fc7/weights', 247 | 'vgg_16/fc7/biases', 248 | 'vgg_16/fc8/weights', 249 | 'vgg_16/fc8/biases', 250 | ] 251 | model_variables = [v.op.name for v in slim.get_model_variables()] 252 | self.assertSetEqual(set(model_variables), set(expected_names)) 253 | 254 | def testEvaluation(self): 255 | batch_size = 2 256 | height, width = 224, 224 257 | num_classes = 1000 258 | with self.test_session(): 259 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 260 | logits, _ = vgg.vgg_16(eval_inputs, is_training=False) 261 | self.assertListEqual(logits.get_shape().as_list(), 262 | [batch_size, num_classes]) 263 | predictions = tf.argmax(logits, 1) 264 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 265 | 266 | def testTrainEvalWithReuse(self): 267 | train_batch_size = 2 268 | eval_batch_size = 1 269 | train_height, train_width = 224, 224 270 | eval_height, eval_width = 256, 256 271 | num_classes = 1000 272 | with self.test_session(): 273 | train_inputs = tf.random_uniform( 274 | (train_batch_size, train_height, train_width, 3)) 275 | logits, _ = vgg.vgg_16(train_inputs) 276 | self.assertListEqual(logits.get_shape().as_list(), 277 | [train_batch_size, num_classes]) 278 | tf.get_variable_scope().reuse_variables() 279 | eval_inputs = tf.random_uniform( 280 | (eval_batch_size, eval_height, eval_width, 3)) 281 | logits, _ = vgg.vgg_16(eval_inputs, is_training=False, 282 | spatial_squeeze=False) 283 | self.assertListEqual(logits.get_shape().as_list(), 284 | [eval_batch_size, 2, 2, num_classes]) 285 | logits = tf.reduce_mean(logits, [1, 2]) 286 | predictions = tf.argmax(logits, 1) 287 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 288 | 289 | def testForward(self): 290 | batch_size = 1 291 | height, width = 224, 224 292 | with self.test_session() as sess: 293 | inputs = tf.random_uniform((batch_size, height, width, 3)) 294 | logits, _ = vgg.vgg_16(inputs) 295 | sess.run(tf.initialize_all_variables()) 296 | output = sess.run(logits) 297 | self.assertTrue(output.any()) 298 | 299 | 300 | class VGG19Test(tf.test.TestCase): 301 | 302 | def testBuild(self): 303 | batch_size = 5 304 | height, width = 224, 224 305 | num_classes = 1000 306 | with self.test_session(): 307 | inputs = tf.random_uniform((batch_size, height, width, 3)) 308 | logits, _ = vgg.vgg_19(inputs, num_classes) 309 | self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed') 310 | self.assertListEqual(logits.get_shape().as_list(), 311 | [batch_size, num_classes]) 312 | 313 | def testFullyConvolutional(self): 314 | batch_size = 1 315 | height, width = 256, 256 316 | num_classes = 1000 317 | with self.test_session(): 318 | inputs = tf.random_uniform((batch_size, height, width, 3)) 319 | logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False) 320 | self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd') 321 | self.assertListEqual(logits.get_shape().as_list(), 322 | [batch_size, 2, 2, num_classes]) 323 | 324 | def testEndPoints(self): 325 | batch_size = 5 326 | height, width = 224, 224 327 | num_classes = 1000 328 | with self.test_session(): 329 | inputs = tf.random_uniform((batch_size, height, width, 3)) 330 | _, end_points = vgg.vgg_19(inputs, num_classes) 331 | expected_names = [ 332 | 'vgg_19/conv1/conv1_1', 333 | 'vgg_19/conv1/conv1_2', 334 | 'vgg_19/pool1', 335 | 'vgg_19/conv2/conv2_1', 336 | 'vgg_19/conv2/conv2_2', 337 | 'vgg_19/pool2', 338 | 'vgg_19/conv3/conv3_1', 339 | 'vgg_19/conv3/conv3_2', 340 | 'vgg_19/conv3/conv3_3', 341 | 'vgg_19/conv3/conv3_4', 342 | 'vgg_19/pool3', 343 | 'vgg_19/conv4/conv4_1', 344 | 'vgg_19/conv4/conv4_2', 345 | 'vgg_19/conv4/conv4_3', 346 | 'vgg_19/conv4/conv4_4', 347 | 'vgg_19/pool4', 348 | 'vgg_19/conv5/conv5_1', 349 | 'vgg_19/conv5/conv5_2', 350 | 'vgg_19/conv5/conv5_3', 351 | 'vgg_19/conv5/conv5_4', 352 | 'vgg_19/pool5', 353 | 'vgg_19/fc6', 354 | 'vgg_19/fc7', 355 | 'vgg_19/fc8' 356 | ] 357 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 358 | 359 | def testModelVariables(self): 360 | batch_size = 5 361 | height, width = 224, 224 362 | num_classes = 1000 363 | with self.test_session(): 364 | inputs = tf.random_uniform((batch_size, height, width, 3)) 365 | vgg.vgg_19(inputs, num_classes) 366 | expected_names = [ 367 | 'vgg_19/conv1/conv1_1/weights', 368 | 'vgg_19/conv1/conv1_1/biases', 369 | 'vgg_19/conv1/conv1_2/weights', 370 | 'vgg_19/conv1/conv1_2/biases', 371 | 'vgg_19/conv2/conv2_1/weights', 372 | 'vgg_19/conv2/conv2_1/biases', 373 | 'vgg_19/conv2/conv2_2/weights', 374 | 'vgg_19/conv2/conv2_2/biases', 375 | 'vgg_19/conv3/conv3_1/weights', 376 | 'vgg_19/conv3/conv3_1/biases', 377 | 'vgg_19/conv3/conv3_2/weights', 378 | 'vgg_19/conv3/conv3_2/biases', 379 | 'vgg_19/conv3/conv3_3/weights', 380 | 'vgg_19/conv3/conv3_3/biases', 381 | 'vgg_19/conv3/conv3_4/weights', 382 | 'vgg_19/conv3/conv3_4/biases', 383 | 'vgg_19/conv4/conv4_1/weights', 384 | 'vgg_19/conv4/conv4_1/biases', 385 | 'vgg_19/conv4/conv4_2/weights', 386 | 'vgg_19/conv4/conv4_2/biases', 387 | 'vgg_19/conv4/conv4_3/weights', 388 | 'vgg_19/conv4/conv4_3/biases', 389 | 'vgg_19/conv4/conv4_4/weights', 390 | 'vgg_19/conv4/conv4_4/biases', 391 | 'vgg_19/conv5/conv5_1/weights', 392 | 'vgg_19/conv5/conv5_1/biases', 393 | 'vgg_19/conv5/conv5_2/weights', 394 | 'vgg_19/conv5/conv5_2/biases', 395 | 'vgg_19/conv5/conv5_3/weights', 396 | 'vgg_19/conv5/conv5_3/biases', 397 | 'vgg_19/conv5/conv5_4/weights', 398 | 'vgg_19/conv5/conv5_4/biases', 399 | 'vgg_19/fc6/weights', 400 | 'vgg_19/fc6/biases', 401 | 'vgg_19/fc7/weights', 402 | 'vgg_19/fc7/biases', 403 | 'vgg_19/fc8/weights', 404 | 'vgg_19/fc8/biases', 405 | ] 406 | model_variables = [v.op.name for v in slim.get_model_variables()] 407 | self.assertSetEqual(set(model_variables), set(expected_names)) 408 | 409 | def testEvaluation(self): 410 | batch_size = 2 411 | height, width = 224, 224 412 | num_classes = 1000 413 | with self.test_session(): 414 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 415 | logits, _ = vgg.vgg_19(eval_inputs, is_training=False) 416 | self.assertListEqual(logits.get_shape().as_list(), 417 | [batch_size, num_classes]) 418 | predictions = tf.argmax(logits, 1) 419 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 420 | 421 | def testTrainEvalWithReuse(self): 422 | train_batch_size = 2 423 | eval_batch_size = 1 424 | train_height, train_width = 224, 224 425 | eval_height, eval_width = 256, 256 426 | num_classes = 1000 427 | with self.test_session(): 428 | train_inputs = tf.random_uniform( 429 | (train_batch_size, train_height, train_width, 3)) 430 | logits, _ = vgg.vgg_19(train_inputs) 431 | self.assertListEqual(logits.get_shape().as_list(), 432 | [train_batch_size, num_classes]) 433 | tf.get_variable_scope().reuse_variables() 434 | eval_inputs = tf.random_uniform( 435 | (eval_batch_size, eval_height, eval_width, 3)) 436 | logits, _ = vgg.vgg_19(eval_inputs, is_training=False, 437 | spatial_squeeze=False) 438 | self.assertListEqual(logits.get_shape().as_list(), 439 | [eval_batch_size, 2, 2, num_classes]) 440 | logits = tf.reduce_mean(logits, [1, 2]) 441 | predictions = tf.argmax(logits, 1) 442 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 443 | 444 | def testForward(self): 445 | batch_size = 1 446 | height, width = 224, 224 447 | with self.test_session() as sess: 448 | inputs = tf.random_uniform((batch_size, height, width, 3)) 449 | logits, _ = vgg.vgg_19(inputs) 450 | sess.run(tf.initialize_all_variables()) 451 | output = sess.run(logits) 452 | self.assertTrue(output.any()) 453 | 454 | if __name__ == '__main__': 455 | tf.test.main() 456 | -------------------------------------------------------------------------------- /tf/retrain.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # Modified 2017 Microsoft Corporation. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """Generic training script that trains a model using a given dataset.""" 17 | 18 | import tensorflow as tf 19 | import pandas as pd 20 | import numpy as np 21 | import os 22 | import functools 23 | 24 | from tensorflow.python.ops import control_flow_ops 25 | from deployment import model_deploy 26 | from nets import resnet_v1 # Needed to be modified, see https://github.com/tensorflow/models/issues/533 27 | from tensorflow.contrib.training.python.training import evaluation 28 | 29 | slim = tf.contrib.slim 30 | 31 | ''' Enumerate the flags ''' 32 | tf.app.flags.DEFINE_string('train_dir', 33 | 'D:\\tf\\models', 34 | 'Directory where checkpoints and event logs are written to.') 35 | tf.app.flags.DEFINE_string('dataset_name', 'aerial', 'The name of the dataset to load.') 36 | tf.app.flags.DEFINE_string('dataset_dir', 37 | 'D:\\combined\\train_subsample', 38 | 'The directory where the dataset files are stored.') 39 | tf.app.flags.DEFINE_string('checkpoint_path', 40 | 'D:\\tf\\resnet_v1_50.ckpt', 41 | 'The path to a checkpoint from which to fine-tune.') 42 | 43 | tf.app.flags.DEFINE_string('checkpoint_exclude_scopes', 'resnet_v1_50/logits', 44 | 'Comma-separated list of scopes of variables to exclude when restoring ' 45 | 'from a checkpoint.') 46 | tf.app.flags.DEFINE_string('trainable_scopes', 'resnet_v1_50/logits', 47 | 'Comma-separated list of scopes to filter the set of variables to train.' 48 | 'By default, None would train all the variables.') 49 | 50 | tf.app.flags.DEFINE_integer('num_clones', 1, 'Number of model clones to deploy.') 51 | tf.app.flags.DEFINE_boolean('clone_on_cpu', False, 'Use CPUs to deploy clones.') 52 | tf.app.flags.DEFINE_integer('num_readers', 4, 'The number of parallel readers that read data from the dataset.') 53 | tf.app.flags.DEFINE_integer('num_preprocessing_threads', 4, 'The number of threads used to create the batches.') 54 | tf.app.flags.DEFINE_integer('log_every_n_steps', 10, 'The frequency with which logs are printed.') 55 | tf.app.flags.DEFINE_integer('save_summaries_secs', 600, 'The frequency with which summaries are saved, in seconds.') 56 | tf.app.flags.DEFINE_integer('save_interval_secs', 600, 'The frequency with which the model is saved, in seconds.') 57 | 58 | tf.app.flags.DEFINE_float('weight_decay', 0.00004, 'The weight decay on the model weights.') 59 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.') 60 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.') 61 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.') 62 | tf.app.flags.DEFINE_float('learning_rate', 0.02, 'Initial learning rate.') 63 | tf.app.flags.DEFINE_float('label_smoothing', 0.0, 'The amount of label smoothing.') 64 | tf.app.flags.DEFINE_float('learning_rate_decay_factor', 0.9, 'Learning rate decay factor.') 65 | tf.app.flags.DEFINE_float('num_epochs_per_decay', 2.0, 'Number of epochs after which learning rate decays.') 66 | tf.app.flags.DEFINE_integer('replicas_to_aggregate', 1, 'The number of gradients to collect before updating params.') 67 | tf.app.flags.DEFINE_integer('batch_size', 32, 'The number of samples in each batch.') 68 | tf.app.flags.DEFINE_integer('max_number_of_steps', 4000, 'The maximum number of training steps.') 69 | 70 | FLAGS = tf.app.flags.FLAGS 71 | 72 | def get_image_and_class_count(dataset_dir, split_name): 73 | df = pd.read_csv(os.path.join(dataset_dir, 'dataset_split_info.csv')) 74 | image_count = len(df.loc[df['split_name'] == split_name].index) 75 | class_count = len(df['class_name'].unique()) 76 | return(image_count, class_count) 77 | 78 | def read_label_file(dataset_dir, filename='labels.txt'): 79 | labels_filename = os.path.join(dataset_dir, filename) 80 | with tf.gfile.Open(labels_filename, 'r') as f: 81 | lines = f.read() 82 | lines = lines.split('\n') 83 | lines = filter(None, lines) 84 | 85 | labels_to_class_names = {} 86 | for line in lines: 87 | index = line.index(':') 88 | labels_to_class_names[line[:index]] = line[index+1:] 89 | return(labels_to_class_names) 90 | 91 | def mean_image_subtraction(image, means): 92 | if image.get_shape().ndims != 3: 93 | raise ValueError('Input must be of size [height, width, C>0]') 94 | num_channels = image.get_shape().as_list()[-1] 95 | if len(means) != num_channels: 96 | raise ValueError('len(means) must match the number of channels') 97 | 98 | channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) 99 | for i in range(num_channels): 100 | channels[i] -= means[i] 101 | return(tf.concat(axis=2, values=channels)) 102 | 103 | def get_preprocessing(): 104 | def preprocessing_fn(image, output_height=224, output_width=224): 105 | ''' Resize the image and subtract "mean" RGB values ''' 106 | _R_MEAN = 123.68 107 | _G_MEAN = 116.78 108 | _B_MEAN = 103.94 109 | #image = tf.expand_dims(image, 0) 110 | 111 | temp_dim = np.random.randint(175, 223) 112 | distorted_image = tf.random_crop(image, [output_height, output_width, 3]) 113 | distorted_image = tf.expand_dims(distorted_image, 0) 114 | resized_image = tf.image.resize_bilinear(distorted_image, [output_height, output_width], align_corners=False) 115 | resized_image = tf.squeeze(resized_image) 116 | resized_image.set_shape([output_height, output_width, 3]) 117 | resized_image = tf.image.random_flip_left_right(resized_image) 118 | 119 | image = tf.to_float(resized_image) 120 | return(mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])) 121 | return(preprocessing_fn) 122 | 123 | def get_network_fn(num_classes, weight_decay=0.0): 124 | arg_scope = resnet_v1.resnet_arg_scope(weight_decay=weight_decay) 125 | func = resnet_v1.resnet_v1_50 126 | @functools.wraps(func) 127 | def network_fn(images): 128 | with slim.arg_scope(arg_scope): 129 | return func(images, num_classes) 130 | if hasattr(func, 'default_image_size'): 131 | network_fn.default_image_size = func.default_image_size 132 | return(network_fn) 133 | 134 | def _add_variables_summaries(learning_rate): 135 | summaries = [] 136 | for variable in slim.get_model_variables(): 137 | summaries.append(tf.summary.image(variable.op.name, variable)) 138 | summaries.append(tf.summary.scalar(learning_rate, name='training/Learning Rate')) 139 | return(summaries) 140 | 141 | def _get_init_fn(): 142 | if (FLAGS.checkpoint_path is None) or (tf.train.latest_checkpoint(FLAGS.train_dir)): 143 | return None 144 | 145 | exclusions = [] 146 | if FLAGS.checkpoint_exclude_scopes: 147 | exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 148 | 149 | variables_to_restore = [] 150 | for var in slim.get_model_variables(): 151 | excluded = False 152 | for exclusion in exclusions: 153 | if var.op.name.startswith(exclusion): 154 | excluded = True 155 | break 156 | if not excluded: 157 | variables_to_restore.append(var) 158 | 159 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path): 160 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 161 | else: 162 | checkpoint_path = FLAGS.checkpoint_path 163 | 164 | tf.logging.info('Fine-tuning from {}'.format(checkpoint_path)) 165 | 166 | return(slim.assign_from_checkpoint_fn(checkpoint_path, 167 | variables_to_restore, 168 | ignore_missing_vars=False)) 169 | 170 | def _get_variables_to_train(): 171 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] 172 | variables_to_train = [] 173 | for scope in scopes: 174 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 175 | variables_to_train.extend(variables) 176 | return(variables_to_train) 177 | 178 | def get_dataset(dataset_name, dataset_dir, image_count, class_count, split_name): 179 | slim = tf.contrib.slim 180 | items_to_descriptions = {'image': 'A color image.', 181 | 'label': 'An integer in range(0, class_count)'} 182 | file_pattern = os.path.join(dataset_dir, '{}_{}_*.tfrecord'.format(dataset_name, split_name)) 183 | reader = tf.TFRecordReader 184 | keys_to_features = {'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 185 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 186 | 'image/class/label': tf.FixedLenFeature([], tf.int64, 187 | default_value=tf.zeros([], dtype=tf.int64))} 188 | items_to_handlers = {'image': slim.tfexample_decoder.Image(), 189 | 'label': slim.tfexample_decoder.Tensor('image/class/label')} 190 | decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) 191 | labels_to_names = read_label_file(dataset_dir) 192 | return(slim.dataset.Dataset(data_sources=file_pattern, 193 | reader=reader, 194 | decoder=decoder, 195 | num_samples=image_count, 196 | items_to_descriptions=items_to_descriptions, 197 | num_classes=class_count, 198 | labels_to_names=labels_to_names, 199 | shuffle=True)) 200 | 201 | def main(_): 202 | tf.logging.set_verbosity(tf.logging.INFO) 203 | with tf.Graph().as_default(): 204 | deploy_config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones, 205 | clone_on_cpu=FLAGS.clone_on_cpu, 206 | replica_id=0, 207 | num_replicas=1, 208 | num_ps_tasks=0) 209 | 210 | with tf.device(deploy_config.variables_device()): 211 | global_step = slim.create_global_step() 212 | 213 | image_count, class_count = get_image_and_class_count(FLAGS.dataset_dir, 'train') 214 | dataset = get_dataset('aerial', FLAGS.dataset_dir, image_count, class_count, 'train') 215 | network_fn = get_network_fn(num_classes=(dataset.num_classes), weight_decay=FLAGS.weight_decay) 216 | image_preprocessing_fn = get_preprocessing() 217 | 218 | with tf.device(deploy_config.inputs_device()): 219 | provider = slim.dataset_data_provider.DatasetDataProvider(dataset, 220 | num_readers=FLAGS.num_readers, 221 | common_queue_capacity=20 * FLAGS.batch_size, 222 | common_queue_min=10 * FLAGS.batch_size) 223 | [image, label] = provider.get(['image', 'label']) 224 | image = image_preprocessing_fn(image, 224, 224) 225 | images, labels = tf.train.batch([image, label], 226 | batch_size=FLAGS.batch_size, 227 | num_threads=FLAGS.num_preprocessing_threads, 228 | capacity=5 * FLAGS.batch_size) 229 | labels = slim.one_hot_encoding(labels, dataset.num_classes) 230 | batch_queue = slim.prefetch_queue.prefetch_queue([images, labels], capacity=2 * deploy_config.num_clones) 231 | 232 | def clone_fn(batch_queue): 233 | images, labels = batch_queue.dequeue() 234 | logits, end_points = network_fn(images) 235 | logits = tf.squeeze(logits) # added -- does this help? 236 | slim.losses.softmax_cross_entropy(logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) 237 | return(end_points) 238 | 239 | summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) 240 | 241 | clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) 242 | first_clone_scope = deploy_config.clone_scope(0) 243 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) 244 | 245 | end_points = clones[0].outputs 246 | for end_point in end_points: 247 | x = end_points[end_point] 248 | summaries.add(tf.summary.histogram('activations/' + end_point, x)) 249 | summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) 250 | for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): 251 | summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) 252 | for variable in slim.get_model_variables(): 253 | summaries.add(tf.summary.histogram(variable.op.name, variable)) 254 | 255 | with tf.device(deploy_config.optimizer_device()): 256 | decay_steps = int(dataset.num_samples / FLAGS.batch_size * FLAGS.num_epochs_per_decay) 257 | learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, 258 | global_step, 259 | decay_steps, 260 | FLAGS.learning_rate_decay_factor, 261 | staircase=True, 262 | name='exponential_decay_learning_rate') 263 | optimizer = tf.train.RMSPropOptimizer(learning_rate, 264 | decay=FLAGS.rmsprop_decay, 265 | momentum=FLAGS.rmsprop_momentum, 266 | epsilon=FLAGS.opt_epsilon) 267 | summaries.add(tf.summary.scalar('learning_rate', learning_rate)) 268 | 269 | 270 | 271 | variables_to_train = _get_variables_to_train() 272 | total_loss, clones_gradients = model_deploy.optimize_clones(clones, optimizer, var_list=variables_to_train) 273 | summaries.add(tf.summary.scalar('total_loss', total_loss)) 274 | 275 | grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) 276 | update_ops.append(grad_updates) 277 | 278 | update_op = tf.group(*update_ops) 279 | train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') 280 | 281 | summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) 282 | summary_op = tf.summary.merge(list(summaries), name='summary_op') 283 | 284 | slim.learning.train(train_tensor, 285 | logdir=FLAGS.train_dir, 286 | master='', 287 | is_chief=True, 288 | init_fn=_get_init_fn(), 289 | summary_op=summary_op, 290 | number_of_steps=FLAGS.max_number_of_steps, 291 | log_every_n_steps=FLAGS.log_every_n_steps, 292 | save_summaries_secs=FLAGS.save_summaries_secs, 293 | save_interval_secs=FLAGS.save_interval_secs, 294 | sync_optimizer=None) 295 | 296 | 297 | if __name__ == '__main__': 298 | tf.app.run() --------------------------------------------------------------------------------