├── .gitignore
├── LICENSE
├── README.md
├── cntk
└── retrain.py
├── image_set_preparation.ipynb
├── img
├── data_overview
│ ├── mediumnaip_white.png
│ ├── mediumnlcd.png
│ └── middlesex_ma.png
├── extraction
│ ├── common_naip_tiled.png
│ ├── common_points.png
│ ├── common_tiled_only.png
│ └── sample_tile.png
├── middlesex
│ ├── 20655.png
│ ├── 20655_small.png
│ ├── 33308.png
│ ├── 33308_small.png
│ ├── 36083.png
│ ├── 37002.png
│ ├── 47331.png
│ ├── true_and_predicted_labels.png
│ └── true_and_predicted_labels_smoothened.png
├── scoring
│ ├── balanced_cm.pdf
│ ├── balanced_cm.png
│ ├── balanced_cm_small.png
│ └── scaling.png
└── spark_adls_provisioning
│ ├── ambari_configs_tab.GIF
│ ├── ambari_custom_spark2_defaults.GIF
│ ├── ambari_spark2.GIF
│ ├── cluster_type_settings.GIF
│ ├── create.GIF
│ ├── new_resource_button.GIF
│ ├── resource_search_box.GIF
│ ├── resource_search_box_adls.GIF
│ ├── resource_search_result.GIF
│ ├── resource_search_result_adls.GIF
│ ├── spark_basics_screenshot.GIF
│ └── spark_basics_screenshot2.GIF
├── land_use_prediction.md
├── model_training.ipynb
├── scoring
└── script_action.sh
├── scoring_on_spark.ipynb
└── tf
├── deployment
├── __init__.py
├── model_deploy.py
└── model_deploy_test.py
├── nets
├── __init__.py
├── nets_factory.py
├── nets_factory_test.py
├── resnet_utils.py
├── resnet_v1.py
├── resnet_v1_test.py
├── resnet_v2.py
├── resnet_v2_test.py
├── vgg.py
└── vgg_test.py
└── retrain.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation. All rights reserved.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
22 |
23 | ==========================================================================
24 |
25 | Copyright 2017 Microsoft Corporation. All Rights Reserved.
26 |
27 | Licensed under the Apache License, Version 2.0 (the "License");
28 | you may not use this file except in compliance with the License.
29 | You may obtain a copy of the License at
30 |
31 | http://www.apache.org/licenses/LICENSE-2.0
32 |
33 | Unless required by applicable law or agreed to in writing, software
34 | distributed under the License is distributed on an "AS IS" BASIS,
35 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36 | See the License for the specific language governing permissions and
37 | limitations under the License.
38 | ==========================================================================
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Embarrassingly Parallel Image Classification: Inferring Land Use from Aerial Imagery
2 |
3 | ## Introduction
4 |
5 | Deep neural networks (DNNs) are extraordinarily versatile artificial intelligence models that have achieved widespread use over the last five years. These neural networks excel at automated feature creation and processing of complex data types like images, audio, and free-form text. Common business use cases for DNNs include:
6 |
7 | - Determining whether an uploaded video, audio, or text file contains inappropriate content
8 | - Inferring a user's intent from their spoken or typed input
9 | - Identifying objects or persons in a still image
10 | - Translating speech or text between languages or modalities
11 |
12 | Unfortunately, DNNs are also among the most time- and resource-intensive machine learning models. Whereas a trained linear regression model results can typically score input in negligible time, applying a DNN to a single file of interest may take hundreds or thousands of milliseconds -- a processing rate insufficient for some business needs. Fortunately, DNNs can be applied in parallel and scalable fashion when evaluation is performed on Spark clusters.
13 |
14 | This repository demonstrates how trained DNNs produced with two common deep learning frameworks, Microsoft's [Cognitive Toolkit (CNTK)](https://github.com/Microsoft/CNTK/wiki) and Google's [TensorFlow](https://github.com/tensorflow/tensorflow), can be operationalized on Spark to score a large image set. Files stored on [Azure Data Lake Store](https://azure.microsoft.com/en-us/services/data-lake-store/), Microsoft's HDFS-based cloud storage resource, are processed in parallel by workers on the Spark cluster. The guide follows a specific example use case: land use classification from aerial imagery.
15 |
16 | ## Fast start
17 |
18 | To get started right away,
19 | * Follow the instructions in the [Image Set Preparation](./image_set_preparation.ipynb) notebook to generate the training and validation datasets.
20 | * If you will use our provided image sets, you only need to complete the "Prepare an Azure Data Science Virtual Machine for image extraction" and "Dataset preparation for deep learning" sections.
21 | * If you seek a CNTK Spark operationalization example that doesn't require image set preparation or VM deployment, you may prefer [this walkthrough](https://github.com/Azure-Samples/hdinsight-pyspark-cntk-integration) instead. A brief description of the technique is included in [this blog post](https://blogs.technet.microsoft.com/machinelearning/2017/04/25/using-microsofts-deep-learning-toolkit-with-spark-on-azure-hdinsight-clusters/).
22 | * If you want to retrain an image classification DNN using transfer learning, complete the [Model Training](./model_training.ipynb) notebook.
23 | * You can skip this step if you choose to use our example DNNs.
24 | * If you want to operationalize trainedDNNs on Spark, complete the [Scoring on Spark](./scoring_on_spark.ipynb) notebook.
25 | * If you want to learn how the retrained DNN can be used to study urban development trends, see the [Middlesex County Land Use Prediction](./land_use_prediction.md) page.
26 | * For the motivation and summary of our work, see below.
27 |
28 | ## Land use classification from aerial imagery
29 |
30 | In this guide, we develop a classifier that can predict how a parcel of land has been used -- e.g., whether it is developed, cultivated, forested, etc. -- from an aerial image. We apply the classifier to track recent land development in Middlesex County, MA: the home of Microsoft's New England Research and Development (NERD) Center. Aerial image classification has many important applications in industry and government, including:
31 | - Enforcing tax codes (cf. [identification of home pools in Greece](http://www.nytimes.com/2010/05/02/world/europe/02evasion.html))
32 | - Monitoring agricultural crop performance
33 | - Quantifying the impact of climate change on natural resources
34 | - Property value estimation and feature tracking for marketing purposes
35 | - Geopolitical surveillance
36 |
37 | This use case was chosen because sample images and ground-truth labels are available in abundance. We use aerial imagery provided by the U.S. [National Agriculture Imagery Program](https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/), and land use labels from the [National Land Cover Database](https://www.mrlc.gov/). NLCD labels are published roughly every five years, while NAIP data are collected more frequently: we were able to apply our land use classification DNN to images collected five years after the most recent training data available. For more information on dataset creation, please see the [Image Set Preparation](./image_set_preparation.ipynb) Jupyter notebook.
38 |
39 | ## Model training and validation
40 |
41 | We applied transfer learning to retrain the final layers of existing TensorFlow ([ResNet](https://github.com/tensorflow/models/tree/master/slim)) and CNTK ([AlexNet](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN)) models for classification of 1-meter resolution NAIP aerial images of 224 meter x 224 meter regions selected from across the United States. Retraining was performed on [Azure N-Series GPU VMs](http://gpu.azure.com/) with the [Deep Learning Toolkit](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/microsoft-ads.dsvm-deep-learning) pre-installed. We created balanced training and validation sets containing aerial images in six major land use categories (Developed, Cultivated, Forest, Shrub, Barren, and Herbaceous) from non-neighboring counties and collection years. For more information on model creation, please see the [Model Training](./model_training.ipynb) Jupyter notebook.
42 |
43 | We used Spark to apply the trained CNTK and TensorFlow models to the 11,760 images in the validation set. Spreading the scoring task across multiple worker nodes allowed us to decrease the total time required to under one minute:
44 |
45 |
46 |
47 | Our retrained models achieved an overall classification accuracy of ~80% on these six categories, with the majority of errors occurring between different types of undeveloped land (see the confusion matrix for the CNTK model's predictions, below):
48 |
49 |
50 |
51 | For a subsequent application -- identifying and quantifying recently-developed land -- we further grouped these land use labels into "Developed," "Cultivated," and "Undeveloped" classes. Our model's overall accuracy at predicting these higher-level labels was roughly 95% in our validation set. For more information on model validation on Spark, see the [Scoring on Spark](./scoring_on_spark.ipynb) Jupyter notebook.
52 |
53 | ## Inferring recent land development
54 |
55 | The trained land use models were applied to 2016 aerial images tiling Middlesex County. The predicted 2016 labels were then compared to the ground-truth 2011 labels to identify putative regions of recent development: such an application may be useful for regulatory bodies seeking to automatically identify new structures or cultivated land in remote locations. Example results (with surrounding tiles for context) are included below:
56 |
57 |
58 |
59 |
60 | Development could also be visualized and quantified at the county level. In the figure below, regions classified as developed land are represented by red pixels, cultivated land by white pixels, and undeveloped land by green pixels.
61 |
62 |
63 |
64 | The predicted land classes largely matched the true 2011 labels. Unfortunately, noisy year-to-year variation (likely reflecting differences in coloration and vegetation) were too large in magnitude to quantify general trends in development.
65 |
66 | For more information on inferring recent land development with our trained DNNs, please see the [Middlesex County Land Use Prediction](./land_use_prediction.md) page.
67 |
68 | ## Contributing and Adapting
69 |
70 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
71 |
72 | The code in this repository is shared under the [MIT and Apache licenses](./LICENSE) included in this directory. Some TensorFlow scripts have been adapted from the [TensorFlow Models repository's slim](https://github.com/tensorflow/models/tree/master/slim) subdirectory (indicated where applicable). Cognitive Toolkit (CNTK) scripts for network definition and training have been adapted from the [CIFAR-10 Image Classification](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/ResNet/Python) example.
73 |
--------------------------------------------------------------------------------
/cntk/retrain.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 | #
3 | # Modified by Mary Wahl from work by Patrick Buehler, cf.
4 | # https://github.com/Microsoft/CNTK/blob/master/Examples/Image/Detection/FastRCNN/A2_RunCntk_py3.py
5 | #
6 | # Licensed under the MIT license. See LICENSE.md file in the project root
7 | # for full license information.
8 | # ==============================================================================
9 |
10 | import cntk.io.transforms as xforms
11 | from cntk.train.training_session import CheckpointConfig, training_session
12 | import numpy as np
13 | import os, sys, argparse, cntk
14 | from PIL import Image
15 |
16 | def create_reader(map_filename, image_height, image_width, num_channels,
17 | num_classes):
18 | transforms = [xforms.crop(crop_type='randomside',
19 | side_ratio=0.85,
20 | jitter_type='uniratio'),
21 | xforms.scale(width=image_width,
22 | height=image_height,
23 | channels=num_channels,
24 | interpolations='linear'),
25 | xforms.color(brightness_radius=0.2,
26 | contrast_radius=0.2,
27 | saturation_radius=0.2)]
28 | return(cntk.io.MinibatchSource(
29 | cntk.io.ImageDeserializer(map_filename, cntk.io.StreamDefs(
30 | features=cntk.io.StreamDef(
31 | field='image', transforms=transforms, is_sparse=False),
32 | labels=cntk.io.StreamDef(
33 | field='label', shape=num_classes, is_sparse=False)))))
34 |
35 | def modify_model(pretrained_model_filename, features, num_classes):
36 | loaded_model = cntk.load_model(pretrained_model_filename)
37 | feature_node = cntk.logging.graph.find_by_name(loaded_model, 'features')
38 | last_node = cntk.logging.graph.find_by_name(loaded_model, 'h2_d')
39 | all_layers = cntk.ops.combine([last_node.owner]).clone(
40 | cntk.ops.functions.CloneMethod.freeze,
41 | {feature_node: cntk.ops.placeholder()})
42 |
43 | feat_norm = features - cntk.layers.Constant(114)
44 | fc_out = all_layers(feat_norm)
45 | z = cntk.layers.Dense(num_classes)(fc_out)
46 |
47 | return(z)
48 |
49 | def main(map_filename, output_dir, pretrained_model_filename):
50 | ''' Retrain and save the existing AlexNet model '''
51 | num_epochs = 50
52 | mb_size = 16
53 |
54 | # Find the number of classes and the number of samples per epoch
55 | labels = set([])
56 | epoch_size = 0
57 | with open(map_filename, 'r') as f:
58 | for line in f:
59 | labels.add(line.strip().split('\t')[1])
60 | epoch_size += 1
61 | sample_image_filename = line.strip().split('\t')[0]
62 | num_classes = len(labels)
63 | num_minibatches = int(epoch_size // mb_size)
64 |
65 | # find the typical image dimensions
66 | image_height, image_width, num_channels = np.asarray(
67 | Image.open(sample_image_filename)).shape
68 | assert num_channels == 3, 'Expected to find images with 3 color channels'
69 | assert (image_height == 224) and (image_width == 224), \
70 | 'Expected to find images of size 224 pixels x 224 pixels'
71 |
72 | # Create the minibatch source
73 | minibatch_source = create_reader(map_filename, image_height, image_width,
74 | num_channels, num_classes)
75 |
76 | # Input variables denoting features, rois and label data
77 | image_input = cntk.ops.input_variable(
78 | (num_channels, image_height, image_width))
79 | label_input = cntk.ops.input_variable((num_classes))
80 |
81 | # define mapping from reader streams to network inputs
82 | input_map = {image_input: minibatch_source.streams.features,
83 | label_input: minibatch_source.streams.labels}
84 |
85 | # Instantiate the Fast R-CNN prediction model and loss function
86 | model = modify_model(pretrained_model_filename, image_input, num_classes)
87 | ce = cntk.losses.cross_entropy_with_softmax(model, label_input)
88 | pe = cntk.metrics.classification_error(model, label_input)
89 |
90 | # Set learning parameters
91 | l2_reg_weight = 0.0005
92 | lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
93 | momentum_time_constant = 10
94 | lr_schedule = cntk.learners.learning_rate_schedule(lr_per_sample,
95 | unit=cntk.UnitType.sample)
96 | mm_schedule = cntk.learners.momentum_as_time_constant_schedule(
97 | momentum_time_constant)
98 |
99 | # Instantiate the trainer object
100 | progress_writers = [cntk.logging.progress_print.ProgressPrinter(
101 | tag='Training',
102 | num_epochs=num_epochs,
103 | freq=num_minibatches)]
104 | learner = cntk.learners.momentum_sgd(model.parameters,
105 | lr_schedule,
106 | mm_schedule,
107 | l2_regularization_weight=l2_reg_weight)
108 | trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers)
109 |
110 | # Perform retraining and save the resulting model
111 | cntk.logging.progress_print.log_number_of_parameters(model)
112 | training_session(
113 | trainer=trainer,
114 | max_samples=num_epochs*epoch_size,
115 | mb_source=minibatch_source,
116 | mb_size=mb_size,
117 | model_inputs_to_streams=input_map,
118 | checkpoint_config=CheckpointConfig(
119 | frequency=epoch_size,
120 | filename=os.path.join(output_dir,
121 | 'retrained_checkpoint.model')),
122 | progress_frequency=epoch_size
123 | ).train()
124 | model.save(os.path.join(output_dir, 'retrained.model'))
125 | return
126 |
127 | if __name__ == '__main__':
128 | parser = argparse.ArgumentParser(description='''
129 | Retrains a pretrained Alexnet model to label aerial images according to land
130 | use.
131 | ''')
132 | parser.add_argument('-i', '--input_map_file', type=str, required=True,
133 | help='MAP file listing training images and labels.')
134 | parser.add_argument('-o', '--output_dir',
135 | type=str, required=True,
136 | help='Output directory where model will be saved.')
137 | parser.add_argument('-p', '--pretrained_model_filename',
138 | type=str, required=True,
139 | help='Filepath of the pretrained AlexNet model.')
140 | args = parser.parse_args()
141 |
142 | # Ensure argument values are acceptable before proceeding
143 | assert os.path.exists(args.input_map_file), \
144 | 'Input MAP file {} does not exist'.format(args.input_map_file)
145 | os.makedirs(args.output_dir, exist_ok=True)
146 | assert os.path.exists(args.pretrained_model_filename), \
147 | 'Model file {} does not exist'.format(args.pretrained_model_filename)
148 |
149 | main(args.input_map_file, args.output_dir, args.pretrained_model_filename)
--------------------------------------------------------------------------------
/img/data_overview/mediumnaip_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/data_overview/mediumnaip_white.png
--------------------------------------------------------------------------------
/img/data_overview/mediumnlcd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/data_overview/mediumnlcd.png
--------------------------------------------------------------------------------
/img/data_overview/middlesex_ma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/data_overview/middlesex_ma.png
--------------------------------------------------------------------------------
/img/extraction/common_naip_tiled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/common_naip_tiled.png
--------------------------------------------------------------------------------
/img/extraction/common_points.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/common_points.png
--------------------------------------------------------------------------------
/img/extraction/common_tiled_only.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/common_tiled_only.png
--------------------------------------------------------------------------------
/img/extraction/sample_tile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/extraction/sample_tile.png
--------------------------------------------------------------------------------
/img/middlesex/20655.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/20655.png
--------------------------------------------------------------------------------
/img/middlesex/20655_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/20655_small.png
--------------------------------------------------------------------------------
/img/middlesex/33308.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/33308.png
--------------------------------------------------------------------------------
/img/middlesex/33308_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/33308_small.png
--------------------------------------------------------------------------------
/img/middlesex/36083.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/36083.png
--------------------------------------------------------------------------------
/img/middlesex/37002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/37002.png
--------------------------------------------------------------------------------
/img/middlesex/47331.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/47331.png
--------------------------------------------------------------------------------
/img/middlesex/true_and_predicted_labels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/true_and_predicted_labels.png
--------------------------------------------------------------------------------
/img/middlesex/true_and_predicted_labels_smoothened.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/middlesex/true_and_predicted_labels_smoothened.png
--------------------------------------------------------------------------------
/img/scoring/balanced_cm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/balanced_cm.pdf
--------------------------------------------------------------------------------
/img/scoring/balanced_cm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/balanced_cm.png
--------------------------------------------------------------------------------
/img/scoring/balanced_cm_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/balanced_cm_small.png
--------------------------------------------------------------------------------
/img/scoring/scaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/scoring/scaling.png
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/ambari_configs_tab.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/ambari_configs_tab.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/ambari_custom_spark2_defaults.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/ambari_custom_spark2_defaults.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/ambari_spark2.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/ambari_spark2.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/cluster_type_settings.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/cluster_type_settings.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/create.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/create.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/new_resource_button.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/new_resource_button.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/resource_search_box.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_box.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/resource_search_box_adls.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_box_adls.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/resource_search_result.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_result.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/resource_search_result_adls.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/resource_search_result_adls.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/spark_basics_screenshot.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/spark_basics_screenshot.GIF
--------------------------------------------------------------------------------
/img/spark_adls_provisioning/spark_basics_screenshot2.GIF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/Embarrassingly-Parallel-Image-Classification/72e394f99b526f548b27316fcb2fe8e0d488b12d/img/spark_adls_provisioning/spark_basics_screenshot2.GIF
--------------------------------------------------------------------------------
/land_use_prediction.md:
--------------------------------------------------------------------------------
1 | # Middlesex County Land Use Prediction
2 |
3 | This notebook illustrates how trained Cognitive Toolkit (CNTK) and TensorFlow models can be applied to predict current land usage from recent aerial imagery. For more detail on image set creation, model training, and Spark cluster deployment, please see the rest of the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository.
4 |
5 |
6 |
7 | ## Image preparation and labeling
8 |
9 | We have used National Land Cover Database (NLCD) data for our ground truth labels during model training and evaluation. The most recent NLCD dataset was published in 2011, but aerial images from the National Agriculture Imagery Program (NAIP) are available for 2016. Our trained models therefore allow us to bridge a five-year data gap by predicting land use in 2016.
10 |
11 | To demonstrate this approach, we extracted a set of 65,563 images tiling Middlesex County, MA (home to Microsoft's New England Research and Development Center) at one-meter resolution from 2010 and 2016 NAIP data as [described previously](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/image_set_preparation.ipynb). Note that unlike the image set used in training and evaluation, some of these images have ambiguous land use types: for example, they may depict the boundary between a forest and developed land. These images were then scored with [trained CNTK and TensorFlow land use classification models](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/model_training.ipynb) applied in [parallel fashion using Spark](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb). Both models performed similarly; results for the CNTK model are shown.
12 |
13 | For those unfamiliar with the region, we include below an aerial view of a 80 km x 70km region covering the county. The Greater Boston Area is centered along the ESE border of the county and extends through all but the northernmost regions.
14 |
15 |
16 | ## Visualizing land use
17 |
18 | To visualize the results, we represent the labels of each 224 m x 224 m tile with a single color-coded pixel:
19 | - Red represents developed regions (NLCD codes 21-24; see [legend](https://www.mrlc.gov/nlcd11_leg.php))
20 | - White represents cultivated regions (NLCD codes 81-82)
21 | - Green represents undeveloped and uncultivated regions (all other NLCD codes)
22 |
23 | Below left, the plurality NLCD 2011 label is shown for each tile. (NLCD data is provided at 30-meter resolution, so any tile may contain multiple land use labels.) The predicted labels for each tile in 2010 (most directly comparable to the NLCD labels) and 2016 (most recent available) are shown at center and right, respectively.
24 |
25 |
26 |
27 | We found a striking correspondence between true and predicted labels at both timepoints. the classification error for 2010 predictions (the most contemporary image set for the ground-truth 2011 labels) was ~4%. An uptick in the fraction of developed land was observed between 2010 and 2016 (see table below), but we believe this change is attributable in large part to the impact of image coloration and vegetation differences (e.g. browning in drought conditions) on labeling. Some systematic errors are noticable in the predictions, including the apparent mislabeling of some highways as cultivated land (white lines in 2016 image).
28 |
29 | | |No. developed tiles |No. cultivated tiles (%) |No. undeveloped tiles |
30 | |--- |--- |--- |--- |
31 | |2010 predicted labels |27,584 (42.1%) |941 (1.4%) |37,038 (56.4%) |
32 | |NLCD 2011 labels |28,537 (43.5%) |2,337 (3.6%) |34,689 (52.9%) |
33 | |2016 predicted labels |28,911 (44.1%) |4,011 (6.1%) |32,641 (49.8%) |
34 |
35 | For the purposes of mapping and quantifying land use, it may be preferable to discount isolated patches of differing land use. For example, an urban park may not be considered undeveloped land for the purposes of habit conservation, and construction of a rural homestead may not indicate substantial development in an otherwise cultivated region. We note that isolated tiles of land use can be removed by applying a 3x3 plurality-voting filter (with added weight for the center tile's own predicted label) to the raw predictions. The results of such a smoothing operation are shown below:
36 |
37 |
38 |
39 | After smoothing, the classification error for predictions on 2010 images was reduced from 20% to 17%.
40 |
41 | ## Identifying newly developed regions
42 |
43 | The ability to programmatically identify new development and cultivation in remote areas may be useful to government agencies that regulate housing and commerce, e.g. to identify tax evasion or enforce land use regulations. Roughly 400 regions of putative new development were identified in Middlesex County based on a change in their label from "Undeveloped" in 2011 to "Developed" in our 2016 predictions. A few examples (including bordering tiles for context) are shown below:
44 |
45 |
46 |
47 |
48 |
49 | In some cases, our land use classifier was sensitive enough to identify the development of single properties within a tile:
50 |
51 |
52 |
53 |
54 | A visual comparison of the ~400 candidate tiles in 2010 vs. 2016 NAIP images reveals that roughly one-third have truly been developed; the false positives may reflect differences in lighting and drought conditions between the 2016 images and the training data.
--------------------------------------------------------------------------------
/model_training.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Producing CNTK and TensorFlow models for image classification\n",
8 | "\n",
9 | "In this notebook, we illustrate how one can produce residual networks (ResNets) to classify aerial images based on land use type (developed, forested, cultivated, etc.). We apply transfer learning with Microsoft Cognitive Toolkit (CNTK) and TensorFlow (TF) to adapt pretrained models for our classification use case. The CNTK and TF sections of this notebook can be completed in either order, or even concurrently.\n",
10 | "\n",
11 | "This notebook is part of the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) git repository. It assumes that a dataset and Azure N-series GPU VM have already been created for model training as described in the previous [Image Set Preparation](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/image_set_preparation.ipynb) notebook. Note that an abbreviated instruction set is mentioned in that notebook for users who would like to employ our sample image set rather than generating their own.\n",
12 | "\n",
13 | "For instructions on applying the trained models to large image sets using Spark, see the [Scoring on Spark](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb) notebook. It is not necessary to complete this notebook before proceeding to [Scoring on Spark](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb), as we have provided sample retrained DNNs for your use."
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {},
19 | "source": [
20 | "## Outline\n",
21 | "- [Prepare the VM and training data](#input)\n",
22 | "- [Clone or download this repository](#repo)\n",
23 | "- [Retrain an AlexNet with Microsoft Cognitive Toolkit (CNTK)](#cntk)\n",
24 | " - [Download the pretrained model](#alexnet)\n",
25 | " - [Update and run the training script](#cntkrun)\n",
26 | "- [Retrain a pretrained ResNet with TensorFlow](#tensorflow)\n",
27 | " - [Download a pretrained model](#tfmodel)\n",
28 | " - [Run the training script](#tfrun)\n",
29 | "- [Next Steps](#nextsteps)"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "\n",
37 | "## Prepare the VM and training data\n",
38 | "\n",
39 | "If you have not done so already, please complete the instructions in the [Image Set Preparation](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/image_set_preparation.ipynb) notebook to prepare an Azure Data Science VM with the Deep Learning Toolkit and the necessary training data for this tutorial. Note that if you will use our provided training and validation images, it is sufficient to complete the \"Prepare an Azure Data Science Virtual Machine for image extraction\" and \"Dataset preparation for deep learning\" sections."
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "\n",
47 | "## Clone or download this repository\n",
48 | "\n",
49 | "This repository ([Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification)) contains Python scripts that will be referenced by the code cells below. Clone or download/decompress the repository's contents to a directory on your Azure GPU VM and make note of the path."
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "\n",
57 | "## Retrain an AlexNet with Microsoft Cognitive Toolkit (CNTK)\n",
58 | "\n",
59 | "At the time of this writing, the Windows 2016 DSVM comes pre-installed with CNTK 2.0. The CNTK code in this repo is therefore designed for version 2.0, and has not been tested with more recent CNTK versions. You can use the code cell below to check when CNTK version has been installed on your DSVM:"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "collapsed": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "import cntk\n",
71 | "print(cntk.__version__)"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "\n",
79 | "### Download the pretrained model\n",
80 | "You will need to download [the pretrained AlexNet model](https://mawahstorage.blob.core.windows.net/aerialimageclassification/models/AlexNet_cntk2beta15.model) and save the file to a new directory on your temporary storage drive, `D:\\models`."
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "\n",
88 | "### Update and run the training script\n",
89 | "The `retrain.py` script in the `cntk` subfolder of this repo can be used to retrain an AlexNet for aerial image classification. The script is adapted from the [Object Detection using Fast-R-CNN](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN) example in the [CNTK](https://github.com/Microsoft/CNTK) repository. This code has been written for single-GPU training: if using a multi-GPU VM, see the [CNTK ResNet/CIFAR10 image classification](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Classification/ResNet/Python) use case for example code illustrating distributed training.\n",
90 | "\n",
91 | "Run the `retrain.py` script in the `cntk` subfolder from an Anaconda prompt as follows:"
92 | ]
93 | },
94 | {
95 | "cell_type": "raw",
96 | "metadata": {
97 | "collapsed": true
98 | },
99 | "source": [
100 | "activate py35\n",
101 | "python \\retrain.py --input_map_file D:\\balanced_training_set\\map.txt --output_dir D:\\retrained_models --pretrained_model_file D:\\models\\AlexNet_cntk2beta15.model"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "The training script will load the pretrained AlexNet model, removing the final layer and freezing the weights in all retained layer. A transfer learning model is then created by subtracting an approximate mean value from the RGB channels of the input image, applying the frozen retained layers of AlexNet, and finally applying a dense, trainable last layer. The transfer learning model's output label is given by the index of the maximally-activated node in the final layer, which can be converted to a descriptive string using the mapping in `D:\\balanced_training_set\\labels.txt` (created previously by the image set preparation notebook).\n",
109 | "\n",
110 | "The training script applies several transforms when each minibatch's images are loaded, including a random crop/rescaling and random colorization. These transforms generate variety in the input set, limiting the degree of overfitting.\n",
111 | "\n",
112 | "For details of the model evaluation process, please see the scoring notebook in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository."
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "\n",
120 | "## Retrain a pretrained ResNet with TensorFlow\n",
121 | "\n",
122 | "We made use of the [`tf-slim` API](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim) for TensorFlow, which provides pre-trained ResNet models and helpful scripts for retraining and scoring. During training set preparation, we created the [TFRecords](https://www.tensorflow.org/how_tos/reading_data/#file_formats) that the training script will use as input. For more details on the training data, please see the image preparation notebook in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository. \n",
123 | "\n",
124 | "Our retraining script, `retrain.py` in the `tf` folder of [this repository](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification), is a modified version of `train_image_classifier.py` from the [TensorFlow models repo's slim subdirectory](https://github.com/tensorflow/models/tree/master/slim)."
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "\n",
132 | "### Download a pretrained model\n",
133 | "\n",
134 | "We obtained a 50-layer ResNet pretrained on ImageNet from a link in the [TensorFlow models repo's slim subdirectory](https://github.com/tensorflow/models/tree/master/slim). The pretrained model can be obtained and unpacked with the code snippet below. Note that if you have not already done so, you will first need to [download or clone this repo](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification), then update the variable name `repo_dir` below to point to the repo's root folder."
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 1,
140 | "metadata": {
141 | "collapsed": true
142 | },
143 | "outputs": [],
144 | "source": [
145 | "import urllib.request\n",
146 | "import tarfile\n",
147 | "import os\n",
148 | "\n",
149 | "# Change this directory to point to the location where you downloaded or cloned this git repo\n",
150 | "repo_dir = 'C:\\\\dsvm\\\\notebooks'\n",
151 | "\n",
152 | "os.makedirs(os.path.join(repo_dir, 'tf'), exist_ok=True)\n",
153 | "urllib.request.urlretrieve('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz',\n",
154 | " os.path.join(repo_dir, 'tf', 'resnet_v1_50_2016_08_28.tar.gz'))\n",
155 | "with tarfile.open(os.path.join(repo_dir, 'tf', 'resnet_v1_50_2016_08_28.tar.gz'), 'r:gz') as f:\n",
156 | " f.extractall(path=os.path.join(repo_dir, 'tf'))\n",
157 | "os.remove(os.path.join(repo_dir, 'tf', 'resnet_v1_50_2016_08_28.tar.gz'))"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "\n",
165 | "### Run the training script\n",
166 | "\n",
167 | "We recommend that you run the training script from an Anaconda prompt. The code cell below will help you generate the appropriate command based on your file locations."
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 4,
173 | "metadata": {
174 | "collapsed": false
175 | },
176 | "outputs": [
177 | {
178 | "name": "stdout",
179 | "output_type": "stream",
180 | "text": [
181 | "activate py35\n",
182 | "python D:\\repo\\tf\\retrain.py --train_dir=D:\\repo\\tf\\models --dataset_name=aerial --dataset_split_name=train --dataset_dir=D:\\balanced_training_set --checkpoint_path=D:\\repo\\tf\\resnet_v1_50.ckpt\n",
183 | "\n"
184 | ]
185 | }
186 | ],
187 | "source": [
188 | "# path where retrained model and logs will be saved during training\n",
189 | "train_dir = os.path.join(repo_dir, 'tf', 'models')\n",
190 | "if not os.path.exists(train_dir):\n",
191 | " os.makedirs(train_dir)\n",
192 | " \n",
193 | "# location of the unpacked pretrained model\n",
194 | "checkpoint_path = os.path.join(repo_dir, 'tf', 'resnet_v1_50.ckpt')\n",
195 | "\n",
196 | "# Location of the TFRecords and other files generated during image set preparation\n",
197 | "training_image_dir = 'D:\\\\balanced_training_set'\n",
198 | "\n",
199 | "command = '''activate py35\n",
200 | "python {0} --train_dir={1} --dataset_name=aerial --dataset_split_name=train --dataset_dir={2} --checkpoint_path={3}\n",
201 | "'''.format(os.path.join(repo_dir, 'tf', 'retrain.py'),\n",
202 | " train_dir,\n",
203 | " training_image_dir,\n",
204 | " checkpoint_path)\n",
205 | "\n",
206 | "print(command)"
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "metadata": {},
212 | "source": [
213 | "The training script will load the pretrained ResNet model, freezing the weights for all but the final logits layer. The transfer learning model's output label is taken to be the index of the maximally-activated node in the final layer.\n",
214 | "\n",
215 | "The training script applies several transforms when each minibatch's images are loaded, including subtracting an approximation of the mean values for each channel (red, blue, and green) and randomly cropping/colorizing the image. These transforms generate variety in the input set, limiting the degree of overfitting.\n",
216 | "\n",
217 | "For details of the model evaluation process, please see the scoring notebook in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository."
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "\n",
225 | "## Next Steps\n",
226 | "\n",
227 | "Each training step above should take under one hour when performed alone. Please note that the apparent performance of your retrained models on the training set may be significantly better than the models' performance on the independent validation set of images. (We saw ~6% and ~20% classification error on the training set and validation sets, respectively.)\n",
228 | "\n",
229 | "For details on evaluating the trained models, please see the [Scoring on Spark notebook](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification/blob/master/scoring_on_spark.ipynb) in the [Embarrassingly Parallel Image Classification](https://github.com/Azure/Embarrassingly-Parallel-Image-Classification) repository. Note that you can proceed using our provided sample retrained DNNs if you prefer not to wait for model training to complete."
230 | ]
231 | }
232 | ],
233 | "metadata": {
234 | "anaconda-cloud": {},
235 | "kernelspec": {
236 | "display_name": "Python 3",
237 | "language": "python",
238 | "name": "python3"
239 | },
240 | "language_info": {
241 | "codemirror_mode": {
242 | "name": "ipython",
243 | "version": 3
244 | },
245 | "file_extension": ".py",
246 | "mimetype": "text/x-python",
247 | "name": "python",
248 | "nbconvert_exporter": "python",
249 | "pygments_lexer": "ipython3",
250 | "version": "3.6.0"
251 | }
252 | },
253 | "nbformat": 4,
254 | "nbformat_minor": 1
255 | }
256 |
--------------------------------------------------------------------------------
/scoring/script_action.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # This install script generously shared by Miruna Oprescu
4 | # (then lightly modified by Mary Wahl), Microsoft Corporation, 2017
5 |
6 | cntk_home="/usr/hdp/current"
7 | cd $cntk_home
8 | curl "https://cntk.ai/BinaryDrop/CNTK-2-1-Linux-64bit-CPU-Only.tar.gz" | tar xzf -
9 | cd ./cntk/Scripts/install/linux
10 | sed -i "s#"ANACONDA_PREFIX=\"\$HOME/anaconda3\""#"ANACONDA_PREFIX=\"\/usr/bin/anaconda\""#g" install-cntk.sh
11 | sed -i "s#"\$HOME/anaconda3"#"\$ANACONDA_PREFIX"#g" install-cntk.sh
12 | ./install-cntk.sh --py-version 35
13 |
14 | sudo /usr/bin/anaconda/envs/cntk-py35/bin/pip install pillow
15 | sudo /usr/bin/anaconda/envs/cntk-py35/bin/pip install tensorflow
16 |
17 | sudo mkdir /tmp/models
18 | cd /tmp/models
19 | wget https://mawahstorage.blob.core.windows.net/models/20170906/tf.zip -P /tmp/models
20 | unzip /tmp/models/tf.zip
21 | wget https://mawahstorage.blob.core.windows.net/models/20170906/retrained.model -P /tmp/models
22 | sudo chmod -R 777 /tmp/models
--------------------------------------------------------------------------------
/tf/deployment/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tf/deployment/model_deploy_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for model_deploy."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import numpy as np
22 | import tensorflow as tf
23 |
24 | from deployment import model_deploy
25 |
26 | slim = tf.contrib.slim
27 |
28 |
29 | class DeploymentConfigTest(tf.test.TestCase):
30 |
31 | def testDefaults(self):
32 | deploy_config = model_deploy.DeploymentConfig()
33 |
34 | self.assertEqual(slim.get_variables(), [])
35 | self.assertEqual(deploy_config.caching_device(), None)
36 | self.assertDeviceEqual(deploy_config.clone_device(0), '')
37 | self.assertEqual(deploy_config.clone_scope(0), '')
38 | self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
39 | self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
40 | self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
41 |
42 | def testCPUonly(self):
43 | deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)
44 |
45 | self.assertEqual(deploy_config.caching_device(), None)
46 | self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')
47 | self.assertEqual(deploy_config.clone_scope(0), '')
48 | self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
49 | self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
50 | self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
51 |
52 | def testMultiGPU(self):
53 | deploy_config = model_deploy.DeploymentConfig(num_clones=2)
54 |
55 | self.assertEqual(deploy_config.caching_device(), None)
56 | self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
57 | self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')
58 | self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
59 | self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
60 | self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
61 | self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
62 | self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
63 |
64 | def testPS(self):
65 | deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1)
66 |
67 | self.assertDeviceEqual(deploy_config.clone_device(0),
68 | '/job:worker')
69 | self.assertEqual(deploy_config.clone_scope(0), '')
70 | self.assertDeviceEqual(deploy_config.optimizer_device(),
71 | '/job:worker/device:CPU:0')
72 | self.assertDeviceEqual(deploy_config.inputs_device(),
73 | '/job:worker/device:CPU:0')
74 | with tf.device(deploy_config.variables_device()):
75 | a = tf.Variable(0)
76 | b = tf.Variable(0)
77 | c = tf.no_op()
78 | d = slim.variable('a', [],
79 | caching_device=deploy_config.caching_device())
80 | self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
81 | self.assertDeviceEqual(a.device, a.value().device)
82 | self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')
83 | self.assertDeviceEqual(b.device, b.value().device)
84 | self.assertDeviceEqual(c.device, '')
85 | self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
86 | self.assertDeviceEqual(d.value().device, '')
87 |
88 | def testMultiGPUPS(self):
89 | deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1)
90 |
91 | self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
92 | self.assertDeviceEqual(deploy_config.clone_device(0),
93 | '/job:worker/device:GPU:0')
94 | self.assertDeviceEqual(deploy_config.clone_device(1),
95 | '/job:worker/device:GPU:1')
96 | self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
97 | self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
98 | self.assertDeviceEqual(deploy_config.optimizer_device(),
99 | '/job:worker/device:CPU:0')
100 | self.assertDeviceEqual(deploy_config.inputs_device(),
101 | '/job:worker/device:CPU:0')
102 |
103 | def testReplicasPS(self):
104 | deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
105 | num_ps_tasks=2)
106 |
107 | self.assertDeviceEqual(deploy_config.clone_device(0),
108 | '/job:worker')
109 | self.assertEqual(deploy_config.clone_scope(0), '')
110 | self.assertDeviceEqual(deploy_config.optimizer_device(),
111 | '/job:worker/device:CPU:0')
112 | self.assertDeviceEqual(deploy_config.inputs_device(),
113 | '/job:worker/device:CPU:0')
114 |
115 | def testReplicasMultiGPUPS(self):
116 | deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
117 | num_clones=2,
118 | num_ps_tasks=2)
119 | self.assertDeviceEqual(deploy_config.clone_device(0),
120 | '/job:worker/device:GPU:0')
121 | self.assertDeviceEqual(deploy_config.clone_device(1),
122 | '/job:worker/device:GPU:1')
123 | self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
124 | self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
125 | self.assertDeviceEqual(deploy_config.optimizer_device(),
126 | '/job:worker/device:CPU:0')
127 | self.assertDeviceEqual(deploy_config.inputs_device(),
128 | '/job:worker/device:CPU:0')
129 |
130 | def testVariablesPS(self):
131 | deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)
132 |
133 | with tf.device(deploy_config.variables_device()):
134 | a = tf.Variable(0)
135 | b = tf.Variable(0)
136 | c = tf.no_op()
137 | d = slim.variable('a', [],
138 | caching_device=deploy_config.caching_device())
139 |
140 | self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
141 | self.assertDeviceEqual(a.device, a.value().device)
142 | self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
143 | self.assertDeviceEqual(b.device, b.value().device)
144 | self.assertDeviceEqual(c.device, '')
145 | self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
146 | self.assertDeviceEqual(d.value().device, '')
147 |
148 |
149 | def LogisticClassifier(inputs, labels, scope=None, reuse=None):
150 | with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels],
151 | reuse=reuse):
152 | predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid,
153 | scope='fully_connected')
154 | slim.losses.log_loss(predictions, labels)
155 | return predictions
156 |
157 |
158 | def BatchNormClassifier(inputs, labels, scope=None, reuse=None):
159 | with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels],
160 | reuse=reuse):
161 | inputs = slim.batch_norm(inputs, decay=0.1)
162 | predictions = slim.fully_connected(inputs, 1,
163 | activation_fn=tf.sigmoid,
164 | scope='fully_connected')
165 | slim.losses.log_loss(predictions, labels)
166 | return predictions
167 |
168 |
169 | class CreatecloneTest(tf.test.TestCase):
170 |
171 | def setUp(self):
172 | # Create an easy training set:
173 | np.random.seed(0)
174 |
175 | self._inputs = np.zeros((16, 4))
176 | self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
177 | self._logdir = self.get_temp_dir()
178 |
179 | for i in range(16):
180 | j = int(2 * self._labels[i] + np.random.randint(0, 2))
181 | self._inputs[i, j] = 1
182 |
183 | def testCreateLogisticClassifier(self):
184 | g = tf.Graph()
185 | with g.as_default():
186 | tf.set_random_seed(0)
187 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
188 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
189 |
190 | model_fn = LogisticClassifier
191 | clone_args = (tf_inputs, tf_labels)
192 | deploy_config = model_deploy.DeploymentConfig(num_clones=1)
193 |
194 | self.assertEqual(slim.get_variables(), [])
195 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
196 | clone = clones[0]
197 | self.assertEqual(len(slim.get_variables()), 2)
198 | for v in slim.get_variables():
199 | self.assertDeviceEqual(v.device, 'CPU:0')
200 | self.assertDeviceEqual(v.value().device, 'CPU:0')
201 | self.assertEqual(clone.outputs.op.name,
202 | 'LogisticClassifier/fully_connected/Sigmoid')
203 | self.assertEqual(clone.scope, '')
204 | self.assertDeviceEqual(clone.device, '')
205 | self.assertEqual(len(slim.losses.get_losses()), 1)
206 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
207 | self.assertEqual(update_ops, [])
208 |
209 | def testCreateSingleclone(self):
210 | g = tf.Graph()
211 | with g.as_default():
212 | tf.set_random_seed(0)
213 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
214 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
215 |
216 | model_fn = BatchNormClassifier
217 | clone_args = (tf_inputs, tf_labels)
218 | deploy_config = model_deploy.DeploymentConfig(num_clones=1)
219 |
220 | self.assertEqual(slim.get_variables(), [])
221 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
222 | clone = clones[0]
223 | self.assertEqual(len(slim.get_variables()), 5)
224 | for v in slim.get_variables():
225 | self.assertDeviceEqual(v.device, 'CPU:0')
226 | self.assertDeviceEqual(v.value().device, 'CPU:0')
227 | self.assertEqual(clone.outputs.op.name,
228 | 'BatchNormClassifier/fully_connected/Sigmoid')
229 | self.assertEqual(clone.scope, '')
230 | self.assertDeviceEqual(clone.device, '')
231 | self.assertEqual(len(slim.losses.get_losses()), 1)
232 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
233 | self.assertEqual(len(update_ops), 2)
234 |
235 | def testCreateMulticlone(self):
236 | g = tf.Graph()
237 | with g.as_default():
238 | tf.set_random_seed(0)
239 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
240 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
241 |
242 | model_fn = BatchNormClassifier
243 | clone_args = (tf_inputs, tf_labels)
244 | num_clones = 4
245 | deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)
246 |
247 | self.assertEqual(slim.get_variables(), [])
248 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
249 | self.assertEqual(len(slim.get_variables()), 5)
250 | for v in slim.get_variables():
251 | self.assertDeviceEqual(v.device, 'CPU:0')
252 | self.assertDeviceEqual(v.value().device, 'CPU:0')
253 | self.assertEqual(len(clones), num_clones)
254 | for i, clone in enumerate(clones):
255 | self.assertEqual(
256 | clone.outputs.op.name,
257 | 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
258 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
259 | self.assertEqual(len(update_ops), 2)
260 | self.assertEqual(clone.scope, 'clone_%d/' % i)
261 | self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
262 |
263 | def testCreateOnecloneWithPS(self):
264 | g = tf.Graph()
265 | with g.as_default():
266 | tf.set_random_seed(0)
267 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
268 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
269 |
270 | model_fn = BatchNormClassifier
271 | clone_args = (tf_inputs, tf_labels)
272 | deploy_config = model_deploy.DeploymentConfig(num_clones=1,
273 | num_ps_tasks=1)
274 |
275 | self.assertEqual(slim.get_variables(), [])
276 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
277 | self.assertEqual(len(clones), 1)
278 | clone = clones[0]
279 | self.assertEqual(clone.outputs.op.name,
280 | 'BatchNormClassifier/fully_connected/Sigmoid')
281 | self.assertDeviceEqual(clone.device, '/job:worker')
282 | self.assertEqual(clone.scope, '')
283 | self.assertEqual(len(slim.get_variables()), 5)
284 | for v in slim.get_variables():
285 | self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
286 | self.assertDeviceEqual(v.device, v.value().device)
287 |
288 | def testCreateMulticloneWithPS(self):
289 | g = tf.Graph()
290 | with g.as_default():
291 | tf.set_random_seed(0)
292 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
293 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
294 |
295 | model_fn = BatchNormClassifier
296 | clone_args = (tf_inputs, tf_labels)
297 | deploy_config = model_deploy.DeploymentConfig(num_clones=2,
298 | num_ps_tasks=2)
299 |
300 | self.assertEqual(slim.get_variables(), [])
301 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
302 | self.assertEqual(len(slim.get_variables()), 5)
303 | for i, v in enumerate(slim.get_variables()):
304 | t = i % 2
305 | self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)
306 | self.assertDeviceEqual(v.device, v.value().device)
307 | self.assertEqual(len(clones), 2)
308 | for i, clone in enumerate(clones):
309 | self.assertEqual(
310 | clone.outputs.op.name,
311 | 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
312 | self.assertEqual(clone.scope, 'clone_%d/' % i)
313 | self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)
314 |
315 |
316 | class OptimizeclonesTest(tf.test.TestCase):
317 |
318 | def setUp(self):
319 | # Create an easy training set:
320 | np.random.seed(0)
321 |
322 | self._inputs = np.zeros((16, 4))
323 | self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
324 | self._logdir = self.get_temp_dir()
325 |
326 | for i in range(16):
327 | j = int(2 * self._labels[i] + np.random.randint(0, 2))
328 | self._inputs[i, j] = 1
329 |
330 | def testCreateLogisticClassifier(self):
331 | g = tf.Graph()
332 | with g.as_default():
333 | tf.set_random_seed(0)
334 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
335 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
336 |
337 | model_fn = LogisticClassifier
338 | clone_args = (tf_inputs, tf_labels)
339 | deploy_config = model_deploy.DeploymentConfig(num_clones=1)
340 |
341 | self.assertEqual(slim.get_variables(), [])
342 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
343 | self.assertEqual(len(slim.get_variables()), 2)
344 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
345 | self.assertEqual(update_ops, [])
346 |
347 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
348 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
349 | optimizer)
350 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
351 | self.assertEqual(total_loss.op.name, 'total_loss')
352 | for g, v in grads_and_vars:
353 | self.assertDeviceEqual(g.device, '')
354 | self.assertDeviceEqual(v.device, 'CPU:0')
355 |
356 | def testCreateSingleclone(self):
357 | g = tf.Graph()
358 | with g.as_default():
359 | tf.set_random_seed(0)
360 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
361 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
362 |
363 | model_fn = BatchNormClassifier
364 | clone_args = (tf_inputs, tf_labels)
365 | deploy_config = model_deploy.DeploymentConfig(num_clones=1)
366 |
367 | self.assertEqual(slim.get_variables(), [])
368 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
369 | self.assertEqual(len(slim.get_variables()), 5)
370 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
371 | self.assertEqual(len(update_ops), 2)
372 |
373 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
374 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
375 | optimizer)
376 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
377 | self.assertEqual(total_loss.op.name, 'total_loss')
378 | for g, v in grads_and_vars:
379 | self.assertDeviceEqual(g.device, '')
380 | self.assertDeviceEqual(v.device, 'CPU:0')
381 |
382 | def testCreateMulticlone(self):
383 | g = tf.Graph()
384 | with g.as_default():
385 | tf.set_random_seed(0)
386 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
387 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
388 |
389 | model_fn = BatchNormClassifier
390 | clone_args = (tf_inputs, tf_labels)
391 | num_clones = 4
392 | deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)
393 |
394 | self.assertEqual(slim.get_variables(), [])
395 | clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
396 | self.assertEqual(len(slim.get_variables()), 5)
397 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
398 | self.assertEqual(len(update_ops), num_clones * 2)
399 |
400 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
401 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
402 | optimizer)
403 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
404 | self.assertEqual(total_loss.op.name, 'total_loss')
405 | for g, v in grads_and_vars:
406 | self.assertDeviceEqual(g.device, '')
407 | self.assertDeviceEqual(v.device, 'CPU:0')
408 |
409 | def testCreateMulticloneCPU(self):
410 | g = tf.Graph()
411 | with g.as_default():
412 | tf.set_random_seed(0)
413 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
414 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
415 |
416 | model_fn = BatchNormClassifier
417 | model_args = (tf_inputs, tf_labels)
418 | num_clones = 4
419 | deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones,
420 | clone_on_cpu=True)
421 |
422 | self.assertEqual(slim.get_variables(), [])
423 | clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
424 | self.assertEqual(len(slim.get_variables()), 5)
425 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
426 | self.assertEqual(len(update_ops), num_clones * 2)
427 |
428 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
429 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
430 | optimizer)
431 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
432 | self.assertEqual(total_loss.op.name, 'total_loss')
433 | for g, v in grads_and_vars:
434 | self.assertDeviceEqual(g.device, '')
435 | self.assertDeviceEqual(v.device, 'CPU:0')
436 |
437 | def testCreateOnecloneWithPS(self):
438 | g = tf.Graph()
439 | with g.as_default():
440 | tf.set_random_seed(0)
441 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
442 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
443 |
444 | model_fn = BatchNormClassifier
445 | model_args = (tf_inputs, tf_labels)
446 | deploy_config = model_deploy.DeploymentConfig(num_clones=1,
447 | num_ps_tasks=1)
448 |
449 | self.assertEqual(slim.get_variables(), [])
450 | clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
451 | self.assertEqual(len(slim.get_variables()), 5)
452 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
453 | self.assertEqual(len(update_ops), 2)
454 |
455 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
456 | total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
457 | optimizer)
458 | self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
459 | self.assertEqual(total_loss.op.name, 'total_loss')
460 | for g, v in grads_and_vars:
461 | self.assertDeviceEqual(g.device, '/job:worker')
462 | self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
463 |
464 |
465 | class DeployTest(tf.test.TestCase):
466 |
467 | def setUp(self):
468 | # Create an easy training set:
469 | np.random.seed(0)
470 |
471 | self._inputs = np.zeros((16, 4))
472 | self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
473 | self._logdir = self.get_temp_dir()
474 |
475 | for i in range(16):
476 | j = int(2 * self._labels[i] + np.random.randint(0, 2))
477 | self._inputs[i, j] = 1
478 |
479 | def testLocalTrainOp(self):
480 | g = tf.Graph()
481 | with g.as_default():
482 | tf.set_random_seed(0)
483 | tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
484 | tf_labels = tf.constant(self._labels, dtype=tf.float32)
485 |
486 | model_fn = BatchNormClassifier
487 | model_args = (tf_inputs, tf_labels)
488 | deploy_config = model_deploy.DeploymentConfig(num_clones=2,
489 | clone_on_cpu=True)
490 |
491 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
492 |
493 | self.assertEqual(slim.get_variables(), [])
494 | model = model_deploy.deploy(deploy_config, model_fn, model_args,
495 | optimizer=optimizer)
496 |
497 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
498 | self.assertEqual(len(update_ops), 4)
499 | self.assertEqual(len(model.clones), 2)
500 | self.assertEqual(model.total_loss.op.name, 'total_loss')
501 | self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
502 | self.assertEqual(model.train_op.op.name, 'train_op')
503 |
504 | with tf.Session() as sess:
505 | sess.run(tf.initialize_all_variables())
506 | moving_mean = tf.contrib.framework.get_variables_by_name(
507 | 'moving_mean')[0]
508 | moving_variance = tf.contrib.framework.get_variables_by_name(
509 | 'moving_variance')[0]
510 | initial_loss = sess.run(model.total_loss)
511 | initial_mean, initial_variance = sess.run([moving_mean,
512 | moving_variance])
513 | self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
514 | self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
515 | for _ in range(10):
516 | sess.run(model.train_op)
517 | final_loss = sess.run(model.total_loss)
518 | self.assertLess(final_loss, initial_loss / 10.0)
519 |
520 | final_mean, final_variance = sess.run([moving_mean,
521 | moving_variance])
522 | self.assertAllClose(final_mean, [0.125, 0.25, 0.375, 0.25])
523 | self.assertAllClose(final_variance, [0.109375, 0.1875,
524 | 0.234375, 0.1875])
525 |
526 | def testNoSummariesOnGPU(self):
527 | with tf.Graph().as_default():
528 | deploy_config = model_deploy.DeploymentConfig(num_clones=2)
529 |
530 | # clone function creates a fully_connected layer with a regularizer loss.
531 | def ModelFn():
532 | inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
533 | reg = tf.contrib.layers.l2_regularizer(0.001)
534 | tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)
535 |
536 | model = model_deploy.deploy(
537 | deploy_config, ModelFn,
538 | optimizer=tf.train.GradientDescentOptimizer(1.0))
539 | # The model summary op should have a few summary inputs and all of them
540 | # should be on the CPU.
541 | self.assertTrue(model.summary_op.op.inputs)
542 | for inp in model.summary_op.op.inputs:
543 | self.assertEqual('/device:CPU:0', inp.device)
544 |
545 | def testNoSummariesOnGPUForEvals(self):
546 | with tf.Graph().as_default():
547 | deploy_config = model_deploy.DeploymentConfig(num_clones=2)
548 |
549 | # clone function creates a fully_connected layer with a regularizer loss.
550 | def ModelFn():
551 | inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
552 | reg = tf.contrib.layers.l2_regularizer(0.001)
553 | tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)
554 |
555 | # No optimizer here, it's an eval.
556 | model = model_deploy.deploy(deploy_config, ModelFn)
557 | # The model summary op should have a few summary inputs and all of them
558 | # should be on the CPU.
559 | self.assertTrue(model.summary_op.op.inputs)
560 | for inp in model.summary_op.op.inputs:
561 | self.assertEqual('/device:CPU:0', inp.device)
562 |
563 |
564 | if __name__ == '__main__':
565 | tf.test.main()
566 |
--------------------------------------------------------------------------------
/tf/nets/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tf/nets/nets_factory.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a factory for building various models."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | import functools
21 |
22 | import tensorflow as tf
23 |
24 | from nets import alexnet
25 | from nets import cifarnet
26 | from nets import inception
27 | from nets import lenet
28 | from nets import overfeat
29 | from nets import resnet_v1
30 | from nets import resnet_v2
31 | from nets import vgg
32 |
33 | slim = tf.contrib.slim
34 |
35 | networks_map = {'alexnet_v2': alexnet.alexnet_v2,
36 | 'cifarnet': cifarnet.cifarnet,
37 | 'overfeat': overfeat.overfeat,
38 | 'vgg_a': vgg.vgg_a,
39 | 'vgg_16': vgg.vgg_16,
40 | 'vgg_19': vgg.vgg_19,
41 | 'inception_v1': inception.inception_v1,
42 | 'inception_v2': inception.inception_v2,
43 | 'inception_v3': inception.inception_v3,
44 | 'inception_v4': inception.inception_v4,
45 | 'inception_resnet_v2': inception.inception_resnet_v2,
46 | 'lenet': lenet.lenet,
47 | 'resnet_v1_50': resnet_v1.resnet_v1_50,
48 | 'resnet_v1_101': resnet_v1.resnet_v1_101,
49 | 'resnet_v1_152': resnet_v1.resnet_v1_152,
50 | 'resnet_v1_200': resnet_v1.resnet_v1_200,
51 | 'resnet_v2_50': resnet_v2.resnet_v2_50,
52 | 'resnet_v2_101': resnet_v2.resnet_v2_101,
53 | 'resnet_v2_152': resnet_v2.resnet_v2_152,
54 | 'resnet_v2_200': resnet_v2.resnet_v2_200,
55 | }
56 |
57 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
58 | 'cifarnet': cifarnet.cifarnet_arg_scope,
59 | 'overfeat': overfeat.overfeat_arg_scope,
60 | 'vgg_a': vgg.vgg_arg_scope,
61 | 'vgg_16': vgg.vgg_arg_scope,
62 | 'vgg_19': vgg.vgg_arg_scope,
63 | 'inception_v1': inception.inception_v3_arg_scope,
64 | 'inception_v2': inception.inception_v3_arg_scope,
65 | 'inception_v3': inception.inception_v3_arg_scope,
66 | 'inception_v4': inception.inception_v4_arg_scope,
67 | 'inception_resnet_v2':
68 | inception.inception_resnet_v2_arg_scope,
69 | 'lenet': lenet.lenet_arg_scope,
70 | 'resnet_v1_50': resnet_v1.resnet_arg_scope,
71 | 'resnet_v1_101': resnet_v1.resnet_arg_scope,
72 | 'resnet_v1_152': resnet_v1.resnet_arg_scope,
73 | 'resnet_v1_200': resnet_v1.resnet_arg_scope,
74 | 'resnet_v2_50': resnet_v2.resnet_arg_scope,
75 | 'resnet_v2_101': resnet_v2.resnet_arg_scope,
76 | 'resnet_v2_152': resnet_v2.resnet_arg_scope,
77 | 'resnet_v2_200': resnet_v2.resnet_arg_scope,
78 | }
79 |
80 |
81 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
82 | """Returns a network_fn such as `logits, end_points = network_fn(images)`.
83 |
84 | Args:
85 | name: The name of the network.
86 | num_classes: The number of classes to use for classification.
87 | weight_decay: The l2 coefficient for the model weights.
88 | is_training: `True` if the model is being used for training and `False`
89 | otherwise.
90 |
91 | Returns:
92 | network_fn: A function that applies the model to a batch of images. It has
93 | the following signature:
94 | logits, end_points = network_fn(images)
95 | Raises:
96 | ValueError: If network `name` is not recognized.
97 | """
98 | if name not in networks_map:
99 | raise ValueError('Name of network unknown %s' % name)
100 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
101 | func = networks_map[name]
102 | @functools.wraps(func)
103 | def network_fn(images):
104 | with slim.arg_scope(arg_scope):
105 | return func(images, num_classes, is_training=is_training)
106 | if hasattr(func, 'default_image_size'):
107 | network_fn.default_image_size = func.default_image_size
108 |
109 | return network_fn
110 |
--------------------------------------------------------------------------------
/tf/nets/nets_factory_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 Google Inc. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Tests for slim.inception."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 |
23 | import tensorflow as tf
24 |
25 | from nets import nets_factory
26 |
27 |
28 | class NetworksTest(tf.test.TestCase):
29 |
30 | def testGetNetworkFn(self):
31 | batch_size = 5
32 | num_classes = 1000
33 | for net in nets_factory.networks_map:
34 | with self.test_session():
35 | net_fn = nets_factory.get_network_fn(net, num_classes)
36 | # Most networks use 224 as their default_image_size
37 | image_size = getattr(net_fn, 'default_image_size', 224)
38 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
39 | logits, end_points = net_fn(inputs)
40 | self.assertTrue(isinstance(logits, tf.Tensor))
41 | self.assertTrue(isinstance(end_points, dict))
42 | self.assertEqual(logits.get_shape().as_list()[0], batch_size)
43 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
44 |
45 | if __name__ == '__main__':
46 | tf.test.main()
47 |
--------------------------------------------------------------------------------
/tf/nets/resnet_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains building blocks for various versions of Residual Networks.
16 |
17 | Residual networks (ResNets) were proposed in:
18 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
20 |
21 | More variants were introduced in:
22 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
23 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
24 |
25 | We can obtain different ResNet variants by changing the network depth, width,
26 | and form of residual unit. This module implements the infrastructure for
27 | building them. Concrete ResNet units and full ResNet networks are implemented in
28 | the accompanying resnet_v1.py and resnet_v2.py modules.
29 |
30 | Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
31 | implementation we subsample the output activations in the last residual unit of
32 | each block, instead of subsampling the input activations in the first residual
33 | unit of each block. The two implementations give identical results but our
34 | implementation is more memory efficient.
35 | """
36 | from __future__ import absolute_import
37 | from __future__ import division
38 | from __future__ import print_function
39 |
40 | import collections
41 | import tensorflow as tf
42 |
43 | slim = tf.contrib.slim
44 |
45 |
46 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
47 | """A named tuple describing a ResNet block.
48 |
49 | Its parts are:
50 | scope: The scope of the `Block`.
51 | unit_fn: The ResNet unit function which takes as input a `Tensor` and
52 | returns another `Tensor` with the output of the ResNet unit.
53 | args: A list of length equal to the number of units in the `Block`. The list
54 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the
55 | block to serve as argument to unit_fn.
56 | """
57 |
58 |
59 | def subsample(inputs, factor, scope=None):
60 | """Subsamples the input along the spatial dimensions.
61 |
62 | Args:
63 | inputs: A `Tensor` of size [batch, height_in, width_in, channels].
64 | factor: The subsampling factor.
65 | scope: Optional variable_scope.
66 |
67 | Returns:
68 | output: A `Tensor` of size [batch, height_out, width_out, channels] with the
69 | input, either intact (if factor == 1) or subsampled (if factor > 1).
70 | """
71 | if factor == 1:
72 | return inputs
73 | else:
74 | return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
75 |
76 |
77 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
78 | """Strided 2-D convolution with 'SAME' padding.
79 |
80 | When stride > 1, then we do explicit zero-padding, followed by conv2d with
81 | 'VALID' padding.
82 |
83 | Note that
84 |
85 | net = conv2d_same(inputs, num_outputs, 3, stride=stride)
86 |
87 | is equivalent to
88 |
89 | net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
90 | net = subsample(net, factor=stride)
91 |
92 | whereas
93 |
94 | net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
95 |
96 | is different when the input's height or width is even, which is why we add the
97 | current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
98 |
99 | Args:
100 | inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
101 | num_outputs: An integer, the number of output filters.
102 | kernel_size: An int with the kernel_size of the filters.
103 | stride: An integer, the output stride.
104 | rate: An integer, rate for atrous convolution.
105 | scope: Scope.
106 |
107 | Returns:
108 | output: A 4-D tensor of size [batch, height_out, width_out, channels] with
109 | the convolution output.
110 | """
111 | if stride == 1:
112 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
113 | padding='SAME', scope=scope)
114 | else:
115 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
116 | pad_total = kernel_size_effective - 1
117 | pad_beg = pad_total // 2
118 | pad_end = pad_total - pad_beg
119 | inputs = tf.pad(inputs,
120 | [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
121 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
122 | rate=rate, padding='VALID', scope=scope)
123 |
124 |
125 | @slim.add_arg_scope
126 | def stack_blocks_dense(net, blocks, output_stride=None,
127 | outputs_collections=None):
128 | """Stacks ResNet `Blocks` and controls output feature density.
129 |
130 | First, this function creates scopes for the ResNet in the form of
131 | 'block_name/unit_1', 'block_name/unit_2', etc.
132 |
133 | Second, this function allows the user to explicitly control the ResNet
134 | output_stride, which is the ratio of the input to output spatial resolution.
135 | This is useful for dense prediction tasks such as semantic segmentation or
136 | object detection.
137 |
138 | Most ResNets consist of 4 ResNet blocks and subsample the activations by a
139 | factor of 2 when transitioning between consecutive ResNet blocks. This results
140 | to a nominal ResNet output_stride equal to 8. If we set the output_stride to
141 | half the nominal network stride (e.g., output_stride=4), then we compute
142 | responses twice.
143 |
144 | Control of the output feature density is implemented by atrous convolution.
145 |
146 | Args:
147 | net: A `Tensor` of size [batch, height, width, channels].
148 | blocks: A list of length equal to the number of ResNet `Blocks`. Each
149 | element is a ResNet `Block` object describing the units in the `Block`.
150 | output_stride: If `None`, then the output will be computed at the nominal
151 | network stride. If output_stride is not `None`, it specifies the requested
152 | ratio of input to output spatial resolution, which needs to be equal to
153 | the product of unit strides from the start up to some level of the ResNet.
154 | For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
155 | then valid values for the output_stride are 1, 2, 6, 24 or None (which
156 | is equivalent to output_stride=24).
157 | outputs_collections: Collection to add the ResNet block outputs.
158 |
159 | Returns:
160 | net: Output tensor with stride equal to the specified output_stride.
161 |
162 | Raises:
163 | ValueError: If the target output_stride is not valid.
164 | """
165 | # The current_stride variable keeps track of the effective stride of the
166 | # activations. This allows us to invoke atrous convolution whenever applying
167 | # the next residual unit would result in the activations having stride larger
168 | # than the target output_stride.
169 | current_stride = 1
170 |
171 | # The atrous convolution rate parameter.
172 | rate = 1
173 |
174 | for block in blocks:
175 | with tf.variable_scope(block.scope, 'block', [net]) as sc:
176 | for i, unit in enumerate(block.args):
177 | if output_stride is not None and current_stride > output_stride:
178 | raise ValueError('The target output_stride cannot be reached.')
179 |
180 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
181 | unit_depth, unit_depth_bottleneck, unit_stride = unit
182 |
183 | # If we have reached the target output_stride, then we need to employ
184 | # atrous convolution with stride=1 and multiply the atrous rate by the
185 | # current unit's stride for use in subsequent layers.
186 | if output_stride is not None and current_stride == output_stride:
187 | net = block.unit_fn(net,
188 | depth=unit_depth,
189 | depth_bottleneck=unit_depth_bottleneck,
190 | stride=1,
191 | rate=rate)
192 | rate *= unit_stride
193 |
194 | else:
195 | net = block.unit_fn(net,
196 | depth=unit_depth,
197 | depth_bottleneck=unit_depth_bottleneck,
198 | stride=unit_stride,
199 | rate=1)
200 | current_stride *= unit_stride
201 | net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
202 |
203 | if output_stride is not None and current_stride != output_stride:
204 | raise ValueError('The target output_stride cannot be reached.')
205 |
206 | return net
207 |
208 |
209 | def resnet_arg_scope(weight_decay=0.0001,
210 | batch_norm_decay=0.997,
211 | batch_norm_epsilon=1e-5,
212 | batch_norm_scale=True):
213 | """Defines the default ResNet arg scope.
214 |
215 | TODO(gpapan): The batch-normalization related default values above are
216 | appropriate for use in conjunction with the reference ResNet models
217 | released at https://github.com/KaimingHe/deep-residual-networks. When
218 | training ResNets from scratch, they might need to be tuned.
219 |
220 | Args:
221 | weight_decay: The weight decay to use for regularizing the model.
222 | batch_norm_decay: The moving average decay when estimating layer activation
223 | statistics in batch normalization.
224 | batch_norm_epsilon: Small constant to prevent division by zero when
225 | normalizing activations by their variance in batch normalization.
226 | batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
227 | activations in the batch normalization layer.
228 |
229 | Returns:
230 | An `arg_scope` to use for the resnet models.
231 | """
232 | batch_norm_params = {
233 | 'decay': batch_norm_decay,
234 | 'epsilon': batch_norm_epsilon,
235 | 'scale': batch_norm_scale,
236 | 'updates_collections': tf.GraphKeys.UPDATE_OPS,
237 | }
238 |
239 | with slim.arg_scope(
240 | [slim.conv2d],
241 | weights_regularizer=slim.l2_regularizer(weight_decay),
242 | weights_initializer=slim.variance_scaling_initializer(),
243 | activation_fn=tf.nn.relu,
244 | normalizer_fn=slim.batch_norm,
245 | normalizer_params=batch_norm_params):
246 | with slim.arg_scope([slim.batch_norm], **batch_norm_params):
247 | # The following implies padding='SAME' for pool1, which makes feature
248 | # alignment easier for dense prediction tasks. This is also used in
249 | # https://github.com/facebook/fb.resnet.torch. However the accompanying
250 | # code of 'Deep Residual Learning for Image Recognition' uses
251 | # padding='VALID' for pool1. You can switch to that choice by setting
252 | # slim.arg_scope([slim.max_pool2d], padding='VALID').
253 | with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
254 | return arg_sc
255 |
--------------------------------------------------------------------------------
/tf/nets/resnet_v1.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains definitions for the original form of Residual Networks.
16 |
17 | The 'v1' residual networks (ResNets) implemented in this module were proposed
18 | by:
19 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
20 | Deep Residual Learning for Image Recognition. arXiv:1512.03385
21 |
22 | Other variants were introduced in:
23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
25 |
26 | The networks defined in this module utilize the bottleneck building block of
27 | [1] with projection shortcuts only for increasing depths. They employ batch
28 | normalization *after* every weight layer. This is the architecture used by
29 | MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
30 | ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
31 | architecture and the alternative 'v2' architecture of [2] which uses batch
32 | normalization *before* every weight layer in the so-called full pre-activation
33 | units.
34 |
35 | Typical use:
36 |
37 | from tensorflow.contrib.slim.nets import resnet_v1
38 |
39 | ResNet-101 for image classification into 1000 classes:
40 |
41 | # inputs has shape [batch, 224, 224, 3]
42 | with slim.arg_scope(resnet_v1.resnet_arg_scope()):
43 | net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)
44 |
45 | ResNet-101 for semantic segmentation into 21 classes:
46 |
47 | # inputs has shape [batch, 513, 513, 3]
48 | with slim.arg_scope(resnet_v1.resnet_arg_scope()):
49 | net, end_points = resnet_v1.resnet_v1_101(inputs,
50 | 21,
51 | is_training=False,
52 | global_pool=False,
53 | output_stride=16)
54 | """
55 | from __future__ import absolute_import
56 | from __future__ import division
57 | from __future__ import print_function
58 |
59 | import tensorflow as tf
60 |
61 | from nets import resnet_utils
62 |
63 |
64 | resnet_arg_scope = resnet_utils.resnet_arg_scope
65 | slim = tf.contrib.slim
66 |
67 |
68 | @slim.add_arg_scope
69 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
70 | outputs_collections=None, scope=None):
71 | """Bottleneck residual unit variant with BN after convolutions.
72 |
73 | This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
74 | its definition. Note that we use here the bottleneck variant which has an
75 | extra bottleneck layer.
76 |
77 | When putting together two consecutive ResNet blocks that use this unit, one
78 | should use stride = 2 in the last unit of the first block.
79 |
80 | Args:
81 | inputs: A tensor of size [batch, height, width, channels].
82 | depth: The depth of the ResNet unit output.
83 | depth_bottleneck: The depth of the bottleneck layers.
84 | stride: The ResNet unit's stride. Determines the amount of downsampling of
85 | the units output compared to its input.
86 | rate: An integer, rate for atrous convolution.
87 | outputs_collections: Collection to add the ResNet unit output.
88 | scope: Optional variable_scope.
89 |
90 | Returns:
91 | The ResNet unit's output.
92 | """
93 | with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
94 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
95 | if depth == depth_in:
96 | shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
97 | else:
98 | shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,
99 | activation_fn=None, scope='shortcut')
100 |
101 | residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
102 | scope='conv1')
103 | residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
104 | rate=rate, scope='conv2')
105 | residual = slim.conv2d(residual, depth, [1, 1], stride=1,
106 | activation_fn=None, scope='conv3')
107 |
108 | output = tf.nn.relu(shortcut + residual)
109 |
110 | return slim.utils.collect_named_outputs(outputs_collections,
111 | sc.original_name_scope,
112 | output)
113 |
114 |
115 | def resnet_v1(inputs,
116 | blocks,
117 | num_classes=None,
118 | is_training=True,
119 | global_pool=True,
120 | output_stride=None,
121 | include_root_block=True,
122 | reuse=None,
123 | scope=None):
124 | """Generator for v1 ResNet models.
125 |
126 | This function generates a family of ResNet v1 models. See the resnet_v1_*()
127 | methods for specific model instantiations, obtained by selecting different
128 | block instantiations that produce ResNets of various depths.
129 |
130 | Training for image classification on Imagenet is usually done with [224, 224]
131 | inputs, resulting in [7, 7] feature maps at the output of the last ResNet
132 | block for the ResNets defined in [1] that have nominal stride equal to 32.
133 | However, for dense prediction tasks we advise that one uses inputs with
134 | spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
135 | this case the feature maps at the ResNet output will have spatial shape
136 | [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
137 | and corners exactly aligned with the input image corners, which greatly
138 | facilitates alignment of the features to the image. Using as input [225, 225]
139 | images results in [8, 8] feature maps at the output of the last ResNet block.
140 |
141 | For dense prediction tasks, the ResNet needs to run in fully-convolutional
142 | (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
143 | have nominal stride equal to 32 and a good choice in FCN mode is to use
144 | output_stride=16 in order to increase the density of the computed features at
145 | small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
146 |
147 | Args:
148 | inputs: A tensor of size [batch, height_in, width_in, channels].
149 | blocks: A list of length equal to the number of ResNet blocks. Each element
150 | is a resnet_utils.Block object describing the units in the block.
151 | num_classes: Number of predicted classes for classification tasks. If None
152 | we return the features before the logit layer.
153 | is_training: whether is training or not.
154 | global_pool: If True, we perform global average pooling before computing the
155 | logits. Set to True for image classification, False for dense prediction.
156 | output_stride: If None, then the output will be computed at the nominal
157 | network stride. If output_stride is not None, it specifies the requested
158 | ratio of input to output spatial resolution.
159 | include_root_block: If True, include the initial convolution followed by
160 | max-pooling, if False excludes it.
161 | reuse: whether or not the network and its variables should be reused. To be
162 | able to reuse 'scope' must be given.
163 | scope: Optional variable_scope.
164 |
165 | Returns:
166 | net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
167 | If global_pool is False, then height_out and width_out are reduced by a
168 | factor of output_stride compared to the respective height_in and width_in,
169 | else both height_out and width_out equal one. If num_classes is None, then
170 | net is the output of the last ResNet block, potentially after global
171 | average pooling. If num_classes is not None, net contains the pre-softmax
172 | activations.
173 | end_points: A dictionary from components of the network to the corresponding
174 | activation.
175 |
176 | Raises:
177 | ValueError: If the target output_stride is not valid.
178 | """
179 | with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
180 | end_points_collection = sc.name + '_end_points'
181 | with slim.arg_scope([slim.conv2d, bottleneck,
182 | resnet_utils.stack_blocks_dense],
183 | outputs_collections=end_points_collection):
184 | with slim.arg_scope([slim.batch_norm], is_training=is_training):
185 | net = inputs
186 | if include_root_block:
187 | if output_stride is not None:
188 | if output_stride % 4 != 0:
189 | raise ValueError('The output_stride needs to be a multiple of 4.')
190 | output_stride /= 4
191 | net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
192 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
193 | net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
194 | if global_pool:
195 | # Global average pooling.
196 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
197 | if num_classes is not None:
198 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
199 | normalizer_fn=None, scope='logits')
200 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
201 | # Convert end_points_collection into a dictionary of end_points.
202 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
203 | if num_classes is not None:
204 | end_points['predictions'] = slim.softmax(net, scope='predictions')
205 | return net, end_points
206 | resnet_v1.default_image_size = 224
207 |
208 |
209 | def resnet_v1_50(inputs,
210 | num_classes=None,
211 | is_training=True,
212 | global_pool=True,
213 | output_stride=None,
214 | reuse=None,
215 | scope='resnet_v1_50'):
216 | """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
217 | blocks = [
218 | resnet_utils.Block(
219 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
220 | resnet_utils.Block(
221 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
222 | resnet_utils.Block(
223 | 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
224 | resnet_utils.Block(
225 | 'block4', bottleneck, [(2048, 512, 1)] * 3)
226 | ]
227 | return resnet_v1(inputs, blocks, num_classes, is_training,
228 | global_pool=global_pool, output_stride=output_stride,
229 | include_root_block=True, reuse=reuse, scope=scope)
230 |
231 |
232 | def resnet_v1_101(inputs,
233 | num_classes=None,
234 | is_training=True,
235 | global_pool=True,
236 | output_stride=None,
237 | reuse=None,
238 | scope='resnet_v1_101'):
239 | """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
240 | blocks = [
241 | resnet_utils.Block(
242 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
243 | resnet_utils.Block(
244 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
245 | resnet_utils.Block(
246 | 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
247 | resnet_utils.Block(
248 | 'block4', bottleneck, [(2048, 512, 1)] * 3)
249 | ]
250 | return resnet_v1(inputs, blocks, num_classes, is_training,
251 | global_pool=global_pool, output_stride=output_stride,
252 | include_root_block=True, reuse=reuse, scope=scope)
253 |
254 |
255 | def resnet_v1_152(inputs,
256 | num_classes=None,
257 | is_training=True,
258 | global_pool=True,
259 | output_stride=None,
260 | reuse=None,
261 | scope='resnet_v1_152'):
262 | """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
263 | blocks = [
264 | resnet_utils.Block(
265 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
266 | resnet_utils.Block(
267 | 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
268 | resnet_utils.Block(
269 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
270 | resnet_utils.Block(
271 | 'block4', bottleneck, [(2048, 512, 1)] * 3)]
272 | return resnet_v1(inputs, blocks, num_classes, is_training,
273 | global_pool=global_pool, output_stride=output_stride,
274 | include_root_block=True, reuse=reuse, scope=scope)
275 |
276 |
277 | def resnet_v1_200(inputs,
278 | num_classes=None,
279 | is_training=True,
280 | global_pool=True,
281 | output_stride=None,
282 | reuse=None,
283 | scope='resnet_v1_200'):
284 | """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
285 | blocks = [
286 | resnet_utils.Block(
287 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
288 | resnet_utils.Block(
289 | 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
290 | resnet_utils.Block(
291 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
292 | resnet_utils.Block(
293 | 'block4', bottleneck, [(2048, 512, 1)] * 3)]
294 | return resnet_v1(inputs, blocks, num_classes, is_training,
295 | global_pool=global_pool, output_stride=output_stride,
296 | include_root_block=True, reuse=reuse, scope=scope)
297 |
--------------------------------------------------------------------------------
/tf/nets/resnet_v1_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for slim.nets.resnet_v1."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import numpy as np
22 | import tensorflow as tf
23 |
24 | from nets import resnet_utils
25 | from nets import resnet_v1
26 |
27 | slim = tf.contrib.slim
28 |
29 |
30 | def create_test_input(batch_size, height, width, channels):
31 | """Create test input tensor.
32 |
33 | Args:
34 | batch_size: The number of images per batch or `None` if unknown.
35 | height: The height of each image or `None` if unknown.
36 | width: The width of each image or `None` if unknown.
37 | channels: The number of channels per image or `None` if unknown.
38 |
39 | Returns:
40 | Either a placeholder `Tensor` of dimension
41 | [batch_size, height, width, channels] if any of the inputs are `None` or a
42 | constant `Tensor` with the mesh grid values along the spatial dimensions.
43 | """
44 | if None in [batch_size, height, width, channels]:
45 | return tf.placeholder(tf.float32, (batch_size, height, width, channels))
46 | else:
47 | return tf.to_float(
48 | np.tile(
49 | np.reshape(
50 | np.reshape(np.arange(height), [height, 1]) +
51 | np.reshape(np.arange(width), [1, width]),
52 | [1, height, width, 1]),
53 | [batch_size, 1, 1, channels]))
54 |
55 |
56 | class ResnetUtilsTest(tf.test.TestCase):
57 |
58 | def testSubsampleThreeByThree(self):
59 | x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
60 | x = resnet_utils.subsample(x, 2)
61 | expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
62 | with self.test_session():
63 | self.assertAllClose(x.eval(), expected.eval())
64 |
65 | def testSubsampleFourByFour(self):
66 | x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
67 | x = resnet_utils.subsample(x, 2)
68 | expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
69 | with self.test_session():
70 | self.assertAllClose(x.eval(), expected.eval())
71 |
72 | def testConv2DSameEven(self):
73 | n, n2 = 4, 2
74 |
75 | # Input image.
76 | x = create_test_input(1, n, n, 1)
77 |
78 | # Convolution kernel.
79 | w = create_test_input(1, 3, 3, 1)
80 | w = tf.reshape(w, [3, 3, 1, 1])
81 |
82 | tf.get_variable('Conv/weights', initializer=w)
83 | tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
84 | tf.get_variable_scope().reuse_variables()
85 |
86 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
87 | y1_expected = tf.to_float([[14, 28, 43, 26],
88 | [28, 48, 66, 37],
89 | [43, 66, 84, 46],
90 | [26, 37, 46, 22]])
91 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
92 |
93 | y2 = resnet_utils.subsample(y1, 2)
94 | y2_expected = tf.to_float([[14, 43],
95 | [43, 84]])
96 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
97 |
98 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
99 | y3_expected = y2_expected
100 |
101 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
102 | y4_expected = tf.to_float([[48, 37],
103 | [37, 22]])
104 | y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])
105 |
106 | with self.test_session() as sess:
107 | sess.run(tf.initialize_all_variables())
108 | self.assertAllClose(y1.eval(), y1_expected.eval())
109 | self.assertAllClose(y2.eval(), y2_expected.eval())
110 | self.assertAllClose(y3.eval(), y3_expected.eval())
111 | self.assertAllClose(y4.eval(), y4_expected.eval())
112 |
113 | def testConv2DSameOdd(self):
114 | n, n2 = 5, 3
115 |
116 | # Input image.
117 | x = create_test_input(1, n, n, 1)
118 |
119 | # Convolution kernel.
120 | w = create_test_input(1, 3, 3, 1)
121 | w = tf.reshape(w, [3, 3, 1, 1])
122 |
123 | tf.get_variable('Conv/weights', initializer=w)
124 | tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
125 | tf.get_variable_scope().reuse_variables()
126 |
127 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
128 | y1_expected = tf.to_float([[14, 28, 43, 58, 34],
129 | [28, 48, 66, 84, 46],
130 | [43, 66, 84, 102, 55],
131 | [58, 84, 102, 120, 64],
132 | [34, 46, 55, 64, 30]])
133 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
134 |
135 | y2 = resnet_utils.subsample(y1, 2)
136 | y2_expected = tf.to_float([[14, 43, 34],
137 | [43, 84, 55],
138 | [34, 55, 30]])
139 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
140 |
141 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
142 | y3_expected = y2_expected
143 |
144 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
145 | y4_expected = y2_expected
146 |
147 | with self.test_session() as sess:
148 | sess.run(tf.initialize_all_variables())
149 | self.assertAllClose(y1.eval(), y1_expected.eval())
150 | self.assertAllClose(y2.eval(), y2_expected.eval())
151 | self.assertAllClose(y3.eval(), y3_expected.eval())
152 | self.assertAllClose(y4.eval(), y4_expected.eval())
153 |
154 | def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
155 | """A plain ResNet without extra layers before or after the ResNet blocks."""
156 | with tf.variable_scope(scope, values=[inputs]):
157 | with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
158 | net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
159 | end_points = dict(tf.get_collection('end_points'))
160 | return net, end_points
161 |
162 | def testEndPointsV1(self):
163 | """Test the end points of a tiny v1 bottleneck network."""
164 | bottleneck = resnet_v1.bottleneck
165 | blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
166 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
167 | inputs = create_test_input(2, 32, 16, 3)
168 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
169 | _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
170 | expected = [
171 | 'tiny/block1/unit_1/bottleneck_v1/shortcut',
172 | 'tiny/block1/unit_1/bottleneck_v1/conv1',
173 | 'tiny/block1/unit_1/bottleneck_v1/conv2',
174 | 'tiny/block1/unit_1/bottleneck_v1/conv3',
175 | 'tiny/block1/unit_2/bottleneck_v1/conv1',
176 | 'tiny/block1/unit_2/bottleneck_v1/conv2',
177 | 'tiny/block1/unit_2/bottleneck_v1/conv3',
178 | 'tiny/block2/unit_1/bottleneck_v1/shortcut',
179 | 'tiny/block2/unit_1/bottleneck_v1/conv1',
180 | 'tiny/block2/unit_1/bottleneck_v1/conv2',
181 | 'tiny/block2/unit_1/bottleneck_v1/conv3',
182 | 'tiny/block2/unit_2/bottleneck_v1/conv1',
183 | 'tiny/block2/unit_2/bottleneck_v1/conv2',
184 | 'tiny/block2/unit_2/bottleneck_v1/conv3']
185 | self.assertItemsEqual(expected, end_points)
186 |
187 | def _stack_blocks_nondense(self, net, blocks):
188 | """A simplified ResNet Block stacker without output stride control."""
189 | for block in blocks:
190 | with tf.variable_scope(block.scope, 'block', [net]):
191 | for i, unit in enumerate(block.args):
192 | depth, depth_bottleneck, stride = unit
193 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
194 | net = block.unit_fn(net,
195 | depth=depth,
196 | depth_bottleneck=depth_bottleneck,
197 | stride=stride,
198 | rate=1)
199 | return net
200 |
201 | def _atrousValues(self, bottleneck):
202 | """Verify the values of dense feature extraction by atrous convolution.
203 |
204 | Make sure that dense feature extraction by stack_blocks_dense() followed by
205 | subsampling gives identical results to feature extraction at the nominal
206 | network output stride using the simple self._stack_blocks_nondense() above.
207 |
208 | Args:
209 | bottleneck: The bottleneck function.
210 | """
211 | blocks = [
212 | resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
213 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
214 | resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
215 | resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
216 | ]
217 | nominal_stride = 8
218 |
219 | # Test both odd and even input dimensions.
220 | height = 30
221 | width = 31
222 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
223 | with slim.arg_scope([slim.batch_norm], is_training=False):
224 | for output_stride in [1, 2, 4, 8, None]:
225 | with tf.Graph().as_default():
226 | with self.test_session() as sess:
227 | tf.set_random_seed(0)
228 | inputs = create_test_input(1, height, width, 3)
229 | # Dense feature extraction followed by subsampling.
230 | output = resnet_utils.stack_blocks_dense(inputs,
231 | blocks,
232 | output_stride)
233 | if output_stride is None:
234 | factor = 1
235 | else:
236 | factor = nominal_stride // output_stride
237 |
238 | output = resnet_utils.subsample(output, factor)
239 | # Make the two networks use the same weights.
240 | tf.get_variable_scope().reuse_variables()
241 | # Feature extraction at the nominal network rate.
242 | expected = self._stack_blocks_nondense(inputs, blocks)
243 | sess.run(tf.initialize_all_variables())
244 | output, expected = sess.run([output, expected])
245 | self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
246 |
247 | def testAtrousValuesBottleneck(self):
248 | self._atrousValues(resnet_v1.bottleneck)
249 |
250 |
251 | class ResnetCompleteNetworkTest(tf.test.TestCase):
252 | """Tests with complete small ResNet v1 networks."""
253 |
254 | def _resnet_small(self,
255 | inputs,
256 | num_classes=None,
257 | is_training=True,
258 | global_pool=True,
259 | output_stride=None,
260 | include_root_block=True,
261 | reuse=None,
262 | scope='resnet_v1_small'):
263 | """A shallow and thin ResNet v1 for faster tests."""
264 | bottleneck = resnet_v1.bottleneck
265 | blocks = [
266 | resnet_utils.Block(
267 | 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
268 | resnet_utils.Block(
269 | 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
270 | resnet_utils.Block(
271 | 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
272 | resnet_utils.Block(
273 | 'block4', bottleneck, [(32, 8, 1)] * 2)]
274 | return resnet_v1.resnet_v1(inputs, blocks, num_classes,
275 | is_training=is_training,
276 | global_pool=global_pool,
277 | output_stride=output_stride,
278 | include_root_block=include_root_block,
279 | reuse=reuse,
280 | scope=scope)
281 |
282 | def testClassificationEndPoints(self):
283 | global_pool = True
284 | num_classes = 10
285 | inputs = create_test_input(2, 224, 224, 3)
286 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
287 | logits, end_points = self._resnet_small(inputs, num_classes,
288 | global_pool=global_pool,
289 | scope='resnet')
290 | self.assertTrue(logits.op.name.startswith('resnet/logits'))
291 | self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
292 | self.assertTrue('predictions' in end_points)
293 | self.assertListEqual(end_points['predictions'].get_shape().as_list(),
294 | [2, 1, 1, num_classes])
295 |
296 | def testClassificationShapes(self):
297 | global_pool = True
298 | num_classes = 10
299 | inputs = create_test_input(2, 224, 224, 3)
300 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
301 | _, end_points = self._resnet_small(inputs, num_classes,
302 | global_pool=global_pool,
303 | scope='resnet')
304 | endpoint_to_shape = {
305 | 'resnet/block1': [2, 28, 28, 4],
306 | 'resnet/block2': [2, 14, 14, 8],
307 | 'resnet/block3': [2, 7, 7, 16],
308 | 'resnet/block4': [2, 7, 7, 32]}
309 | for endpoint in endpoint_to_shape:
310 | shape = endpoint_to_shape[endpoint]
311 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
312 |
313 | def testFullyConvolutionalEndpointShapes(self):
314 | global_pool = False
315 | num_classes = 10
316 | inputs = create_test_input(2, 321, 321, 3)
317 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
318 | _, end_points = self._resnet_small(inputs, num_classes,
319 | global_pool=global_pool,
320 | scope='resnet')
321 | endpoint_to_shape = {
322 | 'resnet/block1': [2, 41, 41, 4],
323 | 'resnet/block2': [2, 21, 21, 8],
324 | 'resnet/block3': [2, 11, 11, 16],
325 | 'resnet/block4': [2, 11, 11, 32]}
326 | for endpoint in endpoint_to_shape:
327 | shape = endpoint_to_shape[endpoint]
328 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
329 |
330 | def testRootlessFullyConvolutionalEndpointShapes(self):
331 | global_pool = False
332 | num_classes = 10
333 | inputs = create_test_input(2, 128, 128, 3)
334 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
335 | _, end_points = self._resnet_small(inputs, num_classes,
336 | global_pool=global_pool,
337 | include_root_block=False,
338 | scope='resnet')
339 | endpoint_to_shape = {
340 | 'resnet/block1': [2, 64, 64, 4],
341 | 'resnet/block2': [2, 32, 32, 8],
342 | 'resnet/block3': [2, 16, 16, 16],
343 | 'resnet/block4': [2, 16, 16, 32]}
344 | for endpoint in endpoint_to_shape:
345 | shape = endpoint_to_shape[endpoint]
346 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
347 |
348 | def testAtrousFullyConvolutionalEndpointShapes(self):
349 | global_pool = False
350 | num_classes = 10
351 | output_stride = 8
352 | inputs = create_test_input(2, 321, 321, 3)
353 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
354 | _, end_points = self._resnet_small(inputs,
355 | num_classes,
356 | global_pool=global_pool,
357 | output_stride=output_stride,
358 | scope='resnet')
359 | endpoint_to_shape = {
360 | 'resnet/block1': [2, 41, 41, 4],
361 | 'resnet/block2': [2, 41, 41, 8],
362 | 'resnet/block3': [2, 41, 41, 16],
363 | 'resnet/block4': [2, 41, 41, 32]}
364 | for endpoint in endpoint_to_shape:
365 | shape = endpoint_to_shape[endpoint]
366 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
367 |
368 | def testAtrousFullyConvolutionalValues(self):
369 | """Verify dense feature extraction with atrous convolution."""
370 | nominal_stride = 32
371 | for output_stride in [4, 8, 16, 32, None]:
372 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
373 | with tf.Graph().as_default():
374 | with self.test_session() as sess:
375 | tf.set_random_seed(0)
376 | inputs = create_test_input(2, 81, 81, 3)
377 | # Dense feature extraction followed by subsampling.
378 | output, _ = self._resnet_small(inputs, None, is_training=False,
379 | global_pool=False,
380 | output_stride=output_stride)
381 | if output_stride is None:
382 | factor = 1
383 | else:
384 | factor = nominal_stride // output_stride
385 | output = resnet_utils.subsample(output, factor)
386 | # Make the two networks use the same weights.
387 | tf.get_variable_scope().reuse_variables()
388 | # Feature extraction at the nominal network rate.
389 | expected, _ = self._resnet_small(inputs, None, is_training=False,
390 | global_pool=False)
391 | sess.run(tf.initialize_all_variables())
392 | self.assertAllClose(output.eval(), expected.eval(),
393 | atol=1e-4, rtol=1e-4)
394 |
395 | def testUnknownBatchSize(self):
396 | batch = 2
397 | height, width = 65, 65
398 | global_pool = True
399 | num_classes = 10
400 | inputs = create_test_input(None, height, width, 3)
401 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
402 | logits, _ = self._resnet_small(inputs, num_classes,
403 | global_pool=global_pool,
404 | scope='resnet')
405 | self.assertTrue(logits.op.name.startswith('resnet/logits'))
406 | self.assertListEqual(logits.get_shape().as_list(),
407 | [None, 1, 1, num_classes])
408 | images = create_test_input(batch, height, width, 3)
409 | with self.test_session() as sess:
410 | sess.run(tf.initialize_all_variables())
411 | output = sess.run(logits, {inputs: images.eval()})
412 | self.assertEqual(output.shape, (batch, 1, 1, num_classes))
413 |
414 | def testFullyConvolutionalUnknownHeightWidth(self):
415 | batch = 2
416 | height, width = 65, 65
417 | global_pool = False
418 | inputs = create_test_input(batch, None, None, 3)
419 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
420 | output, _ = self._resnet_small(inputs, None, global_pool=global_pool)
421 | self.assertListEqual(output.get_shape().as_list(),
422 | [batch, None, None, 32])
423 | images = create_test_input(batch, height, width, 3)
424 | with self.test_session() as sess:
425 | sess.run(tf.initialize_all_variables())
426 | output = sess.run(output, {inputs: images.eval()})
427 | self.assertEqual(output.shape, (batch, 3, 3, 32))
428 |
429 | def testAtrousFullyConvolutionalUnknownHeightWidth(self):
430 | batch = 2
431 | height, width = 65, 65
432 | global_pool = False
433 | output_stride = 8
434 | inputs = create_test_input(batch, None, None, 3)
435 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
436 | output, _ = self._resnet_small(inputs,
437 | None,
438 | global_pool=global_pool,
439 | output_stride=output_stride)
440 | self.assertListEqual(output.get_shape().as_list(),
441 | [batch, None, None, 32])
442 | images = create_test_input(batch, height, width, 3)
443 | with self.test_session() as sess:
444 | sess.run(tf.initialize_all_variables())
445 | output = sess.run(output, {inputs: images.eval()})
446 | self.assertEqual(output.shape, (batch, 9, 9, 32))
447 |
448 |
449 | if __name__ == '__main__':
450 | tf.test.main()
451 |
--------------------------------------------------------------------------------
/tf/nets/resnet_v2.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains definitions for the preactivation form of Residual Networks.
16 |
17 | Residual networks (ResNets) were originally proposed in:
18 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385
20 |
21 | The full preactivation 'v2' ResNet variant implemented in this module was
22 | introduced by:
23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
25 |
26 | The key difference of the full preactivation 'v2' variant compared to the
27 | 'v1' variant in [1] is the use of batch normalization before every weight layer.
28 | Another difference is that 'v2' ResNets do not include an activation function in
29 | the main pathway. Also see [2; Fig. 4e].
30 |
31 | Typical use:
32 |
33 | from tensorflow.contrib.slim.nets import resnet_v2
34 |
35 | ResNet-101 for image classification into 1000 classes:
36 |
37 | # inputs has shape [batch, 224, 224, 3]
38 | with slim.arg_scope(resnet_v2.resnet_arg_scope()):
39 | net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False)
40 |
41 | ResNet-101 for semantic segmentation into 21 classes:
42 |
43 | # inputs has shape [batch, 513, 513, 3]
44 | with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)):
45 | net, end_points = resnet_v2.resnet_v2_101(inputs,
46 | 21,
47 | is_training=False,
48 | global_pool=False,
49 | output_stride=16)
50 | """
51 | from __future__ import absolute_import
52 | from __future__ import division
53 | from __future__ import print_function
54 |
55 | import tensorflow as tf
56 |
57 | from nets import resnet_utils
58 |
59 | slim = tf.contrib.slim
60 | resnet_arg_scope = resnet_utils.resnet_arg_scope
61 |
62 |
63 | @slim.add_arg_scope
64 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
65 | outputs_collections=None, scope=None):
66 | """Bottleneck residual unit variant with BN before convolutions.
67 |
68 | This is the full preactivation residual unit variant proposed in [2]. See
69 | Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
70 | variant which has an extra bottleneck layer.
71 |
72 | When putting together two consecutive ResNet blocks that use this unit, one
73 | should use stride = 2 in the last unit of the first block.
74 |
75 | Args:
76 | inputs: A tensor of size [batch, height, width, channels].
77 | depth: The depth of the ResNet unit output.
78 | depth_bottleneck: The depth of the bottleneck layers.
79 | stride: The ResNet unit's stride. Determines the amount of downsampling of
80 | the units output compared to its input.
81 | rate: An integer, rate for atrous convolution.
82 | outputs_collections: Collection to add the ResNet unit output.
83 | scope: Optional variable_scope.
84 |
85 | Returns:
86 | The ResNet unit's output.
87 | """
88 | with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
89 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
90 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
91 | if depth == depth_in:
92 | shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
93 | else:
94 | shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
95 | normalizer_fn=None, activation_fn=None,
96 | scope='shortcut')
97 |
98 | residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
99 | scope='conv1')
100 | residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
101 | rate=rate, scope='conv2')
102 | residual = slim.conv2d(residual, depth, [1, 1], stride=1,
103 | normalizer_fn=None, activation_fn=None,
104 | scope='conv3')
105 |
106 | output = shortcut + residual
107 |
108 | return slim.utils.collect_named_outputs(outputs_collections,
109 | sc.original_name_scope,
110 | output)
111 |
112 |
113 | def resnet_v2(inputs,
114 | blocks,
115 | num_classes=None,
116 | is_training=True,
117 | global_pool=True,
118 | output_stride=None,
119 | include_root_block=True,
120 | reuse=None,
121 | scope=None):
122 | """Generator for v2 (preactivation) ResNet models.
123 |
124 | This function generates a family of ResNet v2 models. See the resnet_v2_*()
125 | methods for specific model instantiations, obtained by selecting different
126 | block instantiations that produce ResNets of various depths.
127 |
128 | Training for image classification on Imagenet is usually done with [224, 224]
129 | inputs, resulting in [7, 7] feature maps at the output of the last ResNet
130 | block for the ResNets defined in [1] that have nominal stride equal to 32.
131 | However, for dense prediction tasks we advise that one uses inputs with
132 | spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
133 | this case the feature maps at the ResNet output will have spatial shape
134 | [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
135 | and corners exactly aligned with the input image corners, which greatly
136 | facilitates alignment of the features to the image. Using as input [225, 225]
137 | images results in [8, 8] feature maps at the output of the last ResNet block.
138 |
139 | For dense prediction tasks, the ResNet needs to run in fully-convolutional
140 | (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
141 | have nominal stride equal to 32 and a good choice in FCN mode is to use
142 | output_stride=16 in order to increase the density of the computed features at
143 | small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
144 |
145 | Args:
146 | inputs: A tensor of size [batch, height_in, width_in, channels].
147 | blocks: A list of length equal to the number of ResNet blocks. Each element
148 | is a resnet_utils.Block object describing the units in the block.
149 | num_classes: Number of predicted classes for classification tasks. If None
150 | we return the features before the logit layer.
151 | is_training: whether is training or not.
152 | global_pool: If True, we perform global average pooling before computing the
153 | logits. Set to True for image classification, False for dense prediction.
154 | output_stride: If None, then the output will be computed at the nominal
155 | network stride. If output_stride is not None, it specifies the requested
156 | ratio of input to output spatial resolution.
157 | include_root_block: If True, include the initial convolution followed by
158 | max-pooling, if False excludes it. If excluded, `inputs` should be the
159 | results of an activation-less convolution.
160 | reuse: whether or not the network and its variables should be reused. To be
161 | able to reuse 'scope' must be given.
162 | scope: Optional variable_scope.
163 |
164 |
165 | Returns:
166 | net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
167 | If global_pool is False, then height_out and width_out are reduced by a
168 | factor of output_stride compared to the respective height_in and width_in,
169 | else both height_out and width_out equal one. If num_classes is None, then
170 | net is the output of the last ResNet block, potentially after global
171 | average pooling. If num_classes is not None, net contains the pre-softmax
172 | activations.
173 | end_points: A dictionary from components of the network to the corresponding
174 | activation.
175 |
176 | Raises:
177 | ValueError: If the target output_stride is not valid.
178 | """
179 | with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
180 | end_points_collection = sc.name + '_end_points'
181 | with slim.arg_scope([slim.conv2d, bottleneck,
182 | resnet_utils.stack_blocks_dense],
183 | outputs_collections=end_points_collection):
184 | with slim.arg_scope([slim.batch_norm], is_training=is_training):
185 | net = inputs
186 | if include_root_block:
187 | if output_stride is not None:
188 | if output_stride % 4 != 0:
189 | raise ValueError('The output_stride needs to be a multiple of 4.')
190 | output_stride /= 4
191 | # We do not include batch normalization or activation functions in
192 | # conv1 because the first ResNet unit will perform these. Cf.
193 | # Appendix of [2].
194 | with slim.arg_scope([slim.conv2d],
195 | activation_fn=None, normalizer_fn=None):
196 | net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
197 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
198 | net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
199 | # This is needed because the pre-activation variant does not have batch
200 | # normalization or activation functions in the residual unit output. See
201 | # Appendix of [2].
202 | net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
203 | if global_pool:
204 | # Global average pooling.
205 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
206 | if num_classes is not None:
207 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
208 | normalizer_fn=None, scope='logits')
209 | # Convert end_points_collection into a dictionary of end_points.
210 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
211 | if num_classes is not None:
212 | end_points['predictions'] = slim.softmax(net, scope='predictions')
213 | return net, end_points
214 | resnet_v2.default_image_size = 224
215 |
216 |
217 | def resnet_v2_50(inputs,
218 | num_classes=None,
219 | is_training=True,
220 | global_pool=True,
221 | output_stride=None,
222 | reuse=None,
223 | scope='resnet_v2_50'):
224 | """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
225 | blocks = [
226 | resnet_utils.Block(
227 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
228 | resnet_utils.Block(
229 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
230 | resnet_utils.Block(
231 | 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
232 | resnet_utils.Block(
233 | 'block4', bottleneck, [(2048, 512, 1)] * 3)]
234 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
235 | global_pool=global_pool, output_stride=output_stride,
236 | include_root_block=True, reuse=reuse, scope=scope)
237 |
238 |
239 | def resnet_v2_101(inputs,
240 | num_classes=None,
241 | is_training=True,
242 | global_pool=True,
243 | output_stride=None,
244 | reuse=None,
245 | scope='resnet_v2_101'):
246 | """ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
247 | blocks = [
248 | resnet_utils.Block(
249 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
250 | resnet_utils.Block(
251 | 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
252 | resnet_utils.Block(
253 | 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
254 | resnet_utils.Block(
255 | 'block4', bottleneck, [(2048, 512, 1)] * 3)]
256 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
257 | global_pool=global_pool, output_stride=output_stride,
258 | include_root_block=True, reuse=reuse, scope=scope)
259 |
260 |
261 | def resnet_v2_152(inputs,
262 | num_classes=None,
263 | is_training=True,
264 | global_pool=True,
265 | output_stride=None,
266 | reuse=None,
267 | scope='resnet_v2_152'):
268 | """ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
269 | blocks = [
270 | resnet_utils.Block(
271 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
272 | resnet_utils.Block(
273 | 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
274 | resnet_utils.Block(
275 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
276 | resnet_utils.Block(
277 | 'block4', bottleneck, [(2048, 512, 1)] * 3)]
278 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
279 | global_pool=global_pool, output_stride=output_stride,
280 | include_root_block=True, reuse=reuse, scope=scope)
281 |
282 |
283 | def resnet_v2_200(inputs,
284 | num_classes=None,
285 | is_training=True,
286 | global_pool=True,
287 | output_stride=None,
288 | reuse=None,
289 | scope='resnet_v2_200'):
290 | """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
291 | blocks = [
292 | resnet_utils.Block(
293 | 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
294 | resnet_utils.Block(
295 | 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
296 | resnet_utils.Block(
297 | 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
298 | resnet_utils.Block(
299 | 'block4', bottleneck, [(2048, 512, 1)] * 3)]
300 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
301 | global_pool=global_pool, output_stride=output_stride,
302 | include_root_block=True, reuse=reuse, scope=scope)
303 |
--------------------------------------------------------------------------------
/tf/nets/resnet_v2_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for slim.nets.resnet_v2."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import numpy as np
22 | import tensorflow as tf
23 |
24 | from nets import resnet_utils
25 | from nets import resnet_v2
26 |
27 | slim = tf.contrib.slim
28 |
29 |
30 | def create_test_input(batch_size, height, width, channels):
31 | """Create test input tensor.
32 |
33 | Args:
34 | batch_size: The number of images per batch or `None` if unknown.
35 | height: The height of each image or `None` if unknown.
36 | width: The width of each image or `None` if unknown.
37 | channels: The number of channels per image or `None` if unknown.
38 |
39 | Returns:
40 | Either a placeholder `Tensor` of dimension
41 | [batch_size, height, width, channels] if any of the inputs are `None` or a
42 | constant `Tensor` with the mesh grid values along the spatial dimensions.
43 | """
44 | if None in [batch_size, height, width, channels]:
45 | return tf.placeholder(tf.float32, (batch_size, height, width, channels))
46 | else:
47 | return tf.to_float(
48 | np.tile(
49 | np.reshape(
50 | np.reshape(np.arange(height), [height, 1]) +
51 | np.reshape(np.arange(width), [1, width]),
52 | [1, height, width, 1]),
53 | [batch_size, 1, 1, channels]))
54 |
55 |
56 | class ResnetUtilsTest(tf.test.TestCase):
57 |
58 | def testSubsampleThreeByThree(self):
59 | x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
60 | x = resnet_utils.subsample(x, 2)
61 | expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
62 | with self.test_session():
63 | self.assertAllClose(x.eval(), expected.eval())
64 |
65 | def testSubsampleFourByFour(self):
66 | x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
67 | x = resnet_utils.subsample(x, 2)
68 | expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
69 | with self.test_session():
70 | self.assertAllClose(x.eval(), expected.eval())
71 |
72 | def testConv2DSameEven(self):
73 | n, n2 = 4, 2
74 |
75 | # Input image.
76 | x = create_test_input(1, n, n, 1)
77 |
78 | # Convolution kernel.
79 | w = create_test_input(1, 3, 3, 1)
80 | w = tf.reshape(w, [3, 3, 1, 1])
81 |
82 | tf.get_variable('Conv/weights', initializer=w)
83 | tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
84 | tf.get_variable_scope().reuse_variables()
85 |
86 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
87 | y1_expected = tf.to_float([[14, 28, 43, 26],
88 | [28, 48, 66, 37],
89 | [43, 66, 84, 46],
90 | [26, 37, 46, 22]])
91 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
92 |
93 | y2 = resnet_utils.subsample(y1, 2)
94 | y2_expected = tf.to_float([[14, 43],
95 | [43, 84]])
96 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
97 |
98 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
99 | y3_expected = y2_expected
100 |
101 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
102 | y4_expected = tf.to_float([[48, 37],
103 | [37, 22]])
104 | y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])
105 |
106 | with self.test_session() as sess:
107 | sess.run(tf.initialize_all_variables())
108 | self.assertAllClose(y1.eval(), y1_expected.eval())
109 | self.assertAllClose(y2.eval(), y2_expected.eval())
110 | self.assertAllClose(y3.eval(), y3_expected.eval())
111 | self.assertAllClose(y4.eval(), y4_expected.eval())
112 |
113 | def testConv2DSameOdd(self):
114 | n, n2 = 5, 3
115 |
116 | # Input image.
117 | x = create_test_input(1, n, n, 1)
118 |
119 | # Convolution kernel.
120 | w = create_test_input(1, 3, 3, 1)
121 | w = tf.reshape(w, [3, 3, 1, 1])
122 |
123 | tf.get_variable('Conv/weights', initializer=w)
124 | tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
125 | tf.get_variable_scope().reuse_variables()
126 |
127 | y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
128 | y1_expected = tf.to_float([[14, 28, 43, 58, 34],
129 | [28, 48, 66, 84, 46],
130 | [43, 66, 84, 102, 55],
131 | [58, 84, 102, 120, 64],
132 | [34, 46, 55, 64, 30]])
133 | y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
134 |
135 | y2 = resnet_utils.subsample(y1, 2)
136 | y2_expected = tf.to_float([[14, 43, 34],
137 | [43, 84, 55],
138 | [34, 55, 30]])
139 | y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
140 |
141 | y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
142 | y3_expected = y2_expected
143 |
144 | y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
145 | y4_expected = y2_expected
146 |
147 | with self.test_session() as sess:
148 | sess.run(tf.initialize_all_variables())
149 | self.assertAllClose(y1.eval(), y1_expected.eval())
150 | self.assertAllClose(y2.eval(), y2_expected.eval())
151 | self.assertAllClose(y3.eval(), y3_expected.eval())
152 | self.assertAllClose(y4.eval(), y4_expected.eval())
153 |
154 | def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
155 | """A plain ResNet without extra layers before or after the ResNet blocks."""
156 | with tf.variable_scope(scope, values=[inputs]):
157 | with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
158 | net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
159 | end_points = dict(tf.get_collection('end_points'))
160 | return net, end_points
161 |
162 | def testEndPointsV2(self):
163 | """Test the end points of a tiny v2 bottleneck network."""
164 | bottleneck = resnet_v2.bottleneck
165 | blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
166 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
167 | inputs = create_test_input(2, 32, 16, 3)
168 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
169 | _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
170 | expected = [
171 | 'tiny/block1/unit_1/bottleneck_v2/shortcut',
172 | 'tiny/block1/unit_1/bottleneck_v2/conv1',
173 | 'tiny/block1/unit_1/bottleneck_v2/conv2',
174 | 'tiny/block1/unit_1/bottleneck_v2/conv3',
175 | 'tiny/block1/unit_2/bottleneck_v2/conv1',
176 | 'tiny/block1/unit_2/bottleneck_v2/conv2',
177 | 'tiny/block1/unit_2/bottleneck_v2/conv3',
178 | 'tiny/block2/unit_1/bottleneck_v2/shortcut',
179 | 'tiny/block2/unit_1/bottleneck_v2/conv1',
180 | 'tiny/block2/unit_1/bottleneck_v2/conv2',
181 | 'tiny/block2/unit_1/bottleneck_v2/conv3',
182 | 'tiny/block2/unit_2/bottleneck_v2/conv1',
183 | 'tiny/block2/unit_2/bottleneck_v2/conv2',
184 | 'tiny/block2/unit_2/bottleneck_v2/conv3']
185 | self.assertItemsEqual(expected, end_points)
186 |
187 | def _stack_blocks_nondense(self, net, blocks):
188 | """A simplified ResNet Block stacker without output stride control."""
189 | for block in blocks:
190 | with tf.variable_scope(block.scope, 'block', [net]):
191 | for i, unit in enumerate(block.args):
192 | depth, depth_bottleneck, stride = unit
193 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
194 | net = block.unit_fn(net,
195 | depth=depth,
196 | depth_bottleneck=depth_bottleneck,
197 | stride=stride,
198 | rate=1)
199 | return net
200 |
201 | def _atrousValues(self, bottleneck):
202 | """Verify the values of dense feature extraction by atrous convolution.
203 |
204 | Make sure that dense feature extraction by stack_blocks_dense() followed by
205 | subsampling gives identical results to feature extraction at the nominal
206 | network output stride using the simple self._stack_blocks_nondense() above.
207 |
208 | Args:
209 | bottleneck: The bottleneck function.
210 | """
211 | blocks = [
212 | resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
213 | resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
214 | resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
215 | resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
216 | ]
217 | nominal_stride = 8
218 |
219 | # Test both odd and even input dimensions.
220 | height = 30
221 | width = 31
222 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
223 | with slim.arg_scope([slim.batch_norm], is_training=False):
224 | for output_stride in [1, 2, 4, 8, None]:
225 | with tf.Graph().as_default():
226 | with self.test_session() as sess:
227 | tf.set_random_seed(0)
228 | inputs = create_test_input(1, height, width, 3)
229 | # Dense feature extraction followed by subsampling.
230 | output = resnet_utils.stack_blocks_dense(inputs,
231 | blocks,
232 | output_stride)
233 | if output_stride is None:
234 | factor = 1
235 | else:
236 | factor = nominal_stride // output_stride
237 |
238 | output = resnet_utils.subsample(output, factor)
239 | # Make the two networks use the same weights.
240 | tf.get_variable_scope().reuse_variables()
241 | # Feature extraction at the nominal network rate.
242 | expected = self._stack_blocks_nondense(inputs, blocks)
243 | sess.run(tf.initialize_all_variables())
244 | output, expected = sess.run([output, expected])
245 | self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
246 |
247 | def testAtrousValuesBottleneck(self):
248 | self._atrousValues(resnet_v2.bottleneck)
249 |
250 |
251 | class ResnetCompleteNetworkTest(tf.test.TestCase):
252 | """Tests with complete small ResNet v2 networks."""
253 |
254 | def _resnet_small(self,
255 | inputs,
256 | num_classes=None,
257 | is_training=True,
258 | global_pool=True,
259 | output_stride=None,
260 | include_root_block=True,
261 | reuse=None,
262 | scope='resnet_v2_small'):
263 | """A shallow and thin ResNet v2 for faster tests."""
264 | bottleneck = resnet_v2.bottleneck
265 | blocks = [
266 | resnet_utils.Block(
267 | 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
268 | resnet_utils.Block(
269 | 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
270 | resnet_utils.Block(
271 | 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
272 | resnet_utils.Block(
273 | 'block4', bottleneck, [(32, 8, 1)] * 2)]
274 | return resnet_v2.resnet_v2(inputs, blocks, num_classes,
275 | is_training=is_training,
276 | global_pool=global_pool,
277 | output_stride=output_stride,
278 | include_root_block=include_root_block,
279 | reuse=reuse,
280 | scope=scope)
281 |
282 | def testClassificationEndPoints(self):
283 | global_pool = True
284 | num_classes = 10
285 | inputs = create_test_input(2, 224, 224, 3)
286 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
287 | logits, end_points = self._resnet_small(inputs, num_classes,
288 | global_pool=global_pool,
289 | scope='resnet')
290 | self.assertTrue(logits.op.name.startswith('resnet/logits'))
291 | self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
292 | self.assertTrue('predictions' in end_points)
293 | self.assertListEqual(end_points['predictions'].get_shape().as_list(),
294 | [2, 1, 1, num_classes])
295 |
296 | def testClassificationShapes(self):
297 | global_pool = True
298 | num_classes = 10
299 | inputs = create_test_input(2, 224, 224, 3)
300 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
301 | _, end_points = self._resnet_small(inputs, num_classes,
302 | global_pool=global_pool,
303 | scope='resnet')
304 | endpoint_to_shape = {
305 | 'resnet/block1': [2, 28, 28, 4],
306 | 'resnet/block2': [2, 14, 14, 8],
307 | 'resnet/block3': [2, 7, 7, 16],
308 | 'resnet/block4': [2, 7, 7, 32]}
309 | for endpoint in endpoint_to_shape:
310 | shape = endpoint_to_shape[endpoint]
311 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
312 |
313 | def testFullyConvolutionalEndpointShapes(self):
314 | global_pool = False
315 | num_classes = 10
316 | inputs = create_test_input(2, 321, 321, 3)
317 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
318 | _, end_points = self._resnet_small(inputs, num_classes,
319 | global_pool=global_pool,
320 | scope='resnet')
321 | endpoint_to_shape = {
322 | 'resnet/block1': [2, 41, 41, 4],
323 | 'resnet/block2': [2, 21, 21, 8],
324 | 'resnet/block3': [2, 11, 11, 16],
325 | 'resnet/block4': [2, 11, 11, 32]}
326 | for endpoint in endpoint_to_shape:
327 | shape = endpoint_to_shape[endpoint]
328 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
329 |
330 | def testRootlessFullyConvolutionalEndpointShapes(self):
331 | global_pool = False
332 | num_classes = 10
333 | inputs = create_test_input(2, 128, 128, 3)
334 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
335 | _, end_points = self._resnet_small(inputs, num_classes,
336 | global_pool=global_pool,
337 | include_root_block=False,
338 | scope='resnet')
339 | endpoint_to_shape = {
340 | 'resnet/block1': [2, 64, 64, 4],
341 | 'resnet/block2': [2, 32, 32, 8],
342 | 'resnet/block3': [2, 16, 16, 16],
343 | 'resnet/block4': [2, 16, 16, 32]}
344 | for endpoint in endpoint_to_shape:
345 | shape = endpoint_to_shape[endpoint]
346 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
347 |
348 | def testAtrousFullyConvolutionalEndpointShapes(self):
349 | global_pool = False
350 | num_classes = 10
351 | output_stride = 8
352 | inputs = create_test_input(2, 321, 321, 3)
353 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
354 | _, end_points = self._resnet_small(inputs,
355 | num_classes,
356 | global_pool=global_pool,
357 | output_stride=output_stride,
358 | scope='resnet')
359 | endpoint_to_shape = {
360 | 'resnet/block1': [2, 41, 41, 4],
361 | 'resnet/block2': [2, 41, 41, 8],
362 | 'resnet/block3': [2, 41, 41, 16],
363 | 'resnet/block4': [2, 41, 41, 32]}
364 | for endpoint in endpoint_to_shape:
365 | shape = endpoint_to_shape[endpoint]
366 | self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
367 |
368 | def testAtrousFullyConvolutionalValues(self):
369 | """Verify dense feature extraction with atrous convolution."""
370 | nominal_stride = 32
371 | for output_stride in [4, 8, 16, 32, None]:
372 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
373 | with tf.Graph().as_default():
374 | with self.test_session() as sess:
375 | tf.set_random_seed(0)
376 | inputs = create_test_input(2, 81, 81, 3)
377 | # Dense feature extraction followed by subsampling.
378 | output, _ = self._resnet_small(inputs, None,
379 | is_training=False,
380 | global_pool=False,
381 | output_stride=output_stride)
382 | if output_stride is None:
383 | factor = 1
384 | else:
385 | factor = nominal_stride // output_stride
386 | output = resnet_utils.subsample(output, factor)
387 | # Make the two networks use the same weights.
388 | tf.get_variable_scope().reuse_variables()
389 | # Feature extraction at the nominal network rate.
390 | expected, _ = self._resnet_small(inputs, None,
391 | is_training=False,
392 | global_pool=False)
393 | sess.run(tf.initialize_all_variables())
394 | self.assertAllClose(output.eval(), expected.eval(),
395 | atol=1e-4, rtol=1e-4)
396 |
397 | def testUnknownBatchSize(self):
398 | batch = 2
399 | height, width = 65, 65
400 | global_pool = True
401 | num_classes = 10
402 | inputs = create_test_input(None, height, width, 3)
403 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
404 | logits, _ = self._resnet_small(inputs, num_classes,
405 | global_pool=global_pool,
406 | scope='resnet')
407 | self.assertTrue(logits.op.name.startswith('resnet/logits'))
408 | self.assertListEqual(logits.get_shape().as_list(),
409 | [None, 1, 1, num_classes])
410 | images = create_test_input(batch, height, width, 3)
411 | with self.test_session() as sess:
412 | sess.run(tf.initialize_all_variables())
413 | output = sess.run(logits, {inputs: images.eval()})
414 | self.assertEqual(output.shape, (batch, 1, 1, num_classes))
415 |
416 | def testFullyConvolutionalUnknownHeightWidth(self):
417 | batch = 2
418 | height, width = 65, 65
419 | global_pool = False
420 | inputs = create_test_input(batch, None, None, 3)
421 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
422 | output, _ = self._resnet_small(inputs, None,
423 | global_pool=global_pool)
424 | self.assertListEqual(output.get_shape().as_list(),
425 | [batch, None, None, 32])
426 | images = create_test_input(batch, height, width, 3)
427 | with self.test_session() as sess:
428 | sess.run(tf.initialize_all_variables())
429 | output = sess.run(output, {inputs: images.eval()})
430 | self.assertEqual(output.shape, (batch, 3, 3, 32))
431 |
432 | def testAtrousFullyConvolutionalUnknownHeightWidth(self):
433 | batch = 2
434 | height, width = 65, 65
435 | global_pool = False
436 | output_stride = 8
437 | inputs = create_test_input(batch, None, None, 3)
438 | with slim.arg_scope(resnet_utils.resnet_arg_scope()):
439 | output, _ = self._resnet_small(inputs,
440 | None,
441 | global_pool=global_pool,
442 | output_stride=output_stride)
443 | self.assertListEqual(output.get_shape().as_list(),
444 | [batch, None, None, 32])
445 | images = create_test_input(batch, height, width, 3)
446 | with self.test_session() as sess:
447 | sess.run(tf.initialize_all_variables())
448 | output = sess.run(output, {inputs: images.eval()})
449 | self.assertEqual(output.shape, (batch, 9, 9, 32))
450 |
451 |
452 | if __name__ == '__main__':
453 | tf.test.main()
454 |
--------------------------------------------------------------------------------
/tf/nets/vgg.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains model definitions for versions of the Oxford VGG network.
16 |
17 | These model definitions were introduced in the following technical report:
18 |
19 | Very Deep Convolutional Networks For Large-Scale Image Recognition
20 | Karen Simonyan and Andrew Zisserman
21 | arXiv technical report, 2015
22 | PDF: http://arxiv.org/pdf/1409.1556.pdf
23 | ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
24 | CC-BY-4.0
25 |
26 | More information can be obtained from the VGG website:
27 | www.robots.ox.ac.uk/~vgg/research/very_deep/
28 |
29 | Usage:
30 | with slim.arg_scope(vgg.vgg_arg_scope()):
31 | outputs, end_points = vgg.vgg_a(inputs)
32 |
33 | with slim.arg_scope(vgg.vgg_arg_scope()):
34 | outputs, end_points = vgg.vgg_16(inputs)
35 |
36 | @@vgg_a
37 | @@vgg_16
38 | @@vgg_19
39 | """
40 | from __future__ import absolute_import
41 | from __future__ import division
42 | from __future__ import print_function
43 |
44 | import tensorflow as tf
45 |
46 | slim = tf.contrib.slim
47 |
48 |
49 | def vgg_arg_scope(weight_decay=0.0005):
50 | """Defines the VGG arg scope.
51 |
52 | Args:
53 | weight_decay: The l2 regularization coefficient.
54 |
55 | Returns:
56 | An arg_scope.
57 | """
58 | with slim.arg_scope([slim.conv2d, slim.fully_connected],
59 | activation_fn=tf.nn.relu,
60 | weights_regularizer=slim.l2_regularizer(weight_decay),
61 | biases_initializer=tf.zeros_initializer):
62 | with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
63 | return arg_sc
64 |
65 |
66 | def vgg_a(inputs,
67 | num_classes=1000,
68 | is_training=True,
69 | dropout_keep_prob=0.5,
70 | spatial_squeeze=True,
71 | scope='vgg_a'):
72 | """Oxford Net VGG 11-Layers version A Example.
73 |
74 | Note: All the fully_connected layers have been transformed to conv2d layers.
75 | To use in classification mode, resize input to 224x224.
76 |
77 | Args:
78 | inputs: a tensor of size [batch_size, height, width, channels].
79 | num_classes: number of predicted classes.
80 | is_training: whether or not the model is being trained.
81 | dropout_keep_prob: the probability that activations are kept in the dropout
82 | layers during training.
83 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
84 | outputs. Useful to remove unnecessary dimensions for classification.
85 | scope: Optional scope for the variables.
86 |
87 | Returns:
88 | the last op containing the log predictions and end_points dict.
89 | """
90 | with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
91 | end_points_collection = sc.name + '_end_points'
92 | # Collect outputs for conv2d, fully_connected and max_pool2d.
93 | with slim.arg_scope([slim.conv2d, slim.max_pool2d],
94 | outputs_collections=end_points_collection):
95 | net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
96 | net = slim.max_pool2d(net, [2, 2], scope='pool1')
97 | net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
98 | net = slim.max_pool2d(net, [2, 2], scope='pool2')
99 | net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
100 | net = slim.max_pool2d(net, [2, 2], scope='pool3')
101 | net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
102 | net = slim.max_pool2d(net, [2, 2], scope='pool4')
103 | net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
104 | net = slim.max_pool2d(net, [2, 2], scope='pool5')
105 | # Use conv2d instead of fully_connected layers.
106 | net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
107 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
108 | scope='dropout6')
109 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
110 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
111 | scope='dropout7')
112 | net = slim.conv2d(net, num_classes, [1, 1],
113 | activation_fn=None,
114 | normalizer_fn=None,
115 | scope='fc8')
116 | # Convert end_points_collection into a end_point dict.
117 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
118 | if spatial_squeeze:
119 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
120 | end_points[sc.name + '/fc8'] = net
121 | return net, end_points
122 | vgg_a.default_image_size = 224
123 |
124 |
125 | def vgg_16(inputs,
126 | num_classes=1000,
127 | is_training=True,
128 | dropout_keep_prob=0.5,
129 | spatial_squeeze=True,
130 | scope='vgg_16'):
131 | """Oxford Net VGG 16-Layers version D Example.
132 |
133 | Note: All the fully_connected layers have been transformed to conv2d layers.
134 | To use in classification mode, resize input to 224x224.
135 |
136 | Args:
137 | inputs: a tensor of size [batch_size, height, width, channels].
138 | num_classes: number of predicted classes.
139 | is_training: whether or not the model is being trained.
140 | dropout_keep_prob: the probability that activations are kept in the dropout
141 | layers during training.
142 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
143 | outputs. Useful to remove unnecessary dimensions for classification.
144 | scope: Optional scope for the variables.
145 |
146 | Returns:
147 | the last op containing the log predictions and end_points dict.
148 | """
149 | with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
150 | end_points_collection = sc.name + '_end_points'
151 | # Collect outputs for conv2d, fully_connected and max_pool2d.
152 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
153 | outputs_collections=end_points_collection):
154 | net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
155 | net = slim.max_pool2d(net, [2, 2], scope='pool1')
156 | net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
157 | net = slim.max_pool2d(net, [2, 2], scope='pool2')
158 | net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
159 | net = slim.max_pool2d(net, [2, 2], scope='pool3')
160 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
161 | net = slim.max_pool2d(net, [2, 2], scope='pool4')
162 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
163 | net = slim.max_pool2d(net, [2, 2], scope='pool5')
164 | # Use conv2d instead of fully_connected layers.
165 | net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
166 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
167 | scope='dropout6')
168 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
169 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
170 | scope='dropout7')
171 | net = slim.conv2d(net, num_classes, [1, 1],
172 | activation_fn=None,
173 | normalizer_fn=None,
174 | scope='fc8')
175 | # Convert end_points_collection into a end_point dict.
176 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
177 | if spatial_squeeze:
178 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
179 | end_points[sc.name + '/fc8'] = net
180 | return net, end_points
181 | vgg_16.default_image_size = 224
182 |
183 |
184 | def vgg_19(inputs,
185 | num_classes=1000,
186 | is_training=True,
187 | dropout_keep_prob=0.5,
188 | spatial_squeeze=True,
189 | scope='vgg_19'):
190 | """Oxford Net VGG 19-Layers version E Example.
191 |
192 | Note: All the fully_connected layers have been transformed to conv2d layers.
193 | To use in classification mode, resize input to 224x224.
194 |
195 | Args:
196 | inputs: a tensor of size [batch_size, height, width, channels].
197 | num_classes: number of predicted classes.
198 | is_training: whether or not the model is being trained.
199 | dropout_keep_prob: the probability that activations are kept in the dropout
200 | layers during training.
201 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
202 | outputs. Useful to remove unnecessary dimensions for classification.
203 | scope: Optional scope for the variables.
204 |
205 | Returns:
206 | the last op containing the log predictions and end_points dict.
207 | """
208 | with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
209 | end_points_collection = sc.name + '_end_points'
210 | # Collect outputs for conv2d, fully_connected and max_pool2d.
211 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
212 | outputs_collections=end_points_collection):
213 | net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
214 | net = slim.max_pool2d(net, [2, 2], scope='pool1')
215 | net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
216 | net = slim.max_pool2d(net, [2, 2], scope='pool2')
217 | net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
218 | net = slim.max_pool2d(net, [2, 2], scope='pool3')
219 | net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
220 | net = slim.max_pool2d(net, [2, 2], scope='pool4')
221 | net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
222 | net = slim.max_pool2d(net, [2, 2], scope='pool5')
223 | # Use conv2d instead of fully_connected layers.
224 | net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
225 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
226 | scope='dropout6')
227 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
228 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
229 | scope='dropout7')
230 | net = slim.conv2d(net, num_classes, [1, 1],
231 | activation_fn=None,
232 | normalizer_fn=None,
233 | scope='fc8')
234 | # Convert end_points_collection into a end_point dict.
235 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
236 | if spatial_squeeze:
237 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
238 | end_points[sc.name + '/fc8'] = net
239 | return net, end_points
240 | vgg_19.default_image_size = 224
241 |
242 | # Alias
243 | vgg_d = vgg_16
244 | vgg_e = vgg_19
245 |
--------------------------------------------------------------------------------
/tf/nets/vgg_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for slim.nets.vgg."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import tensorflow as tf
21 |
22 | from nets import vgg
23 |
24 | slim = tf.contrib.slim
25 |
26 |
27 | class VGGATest(tf.test.TestCase):
28 |
29 | def testBuild(self):
30 | batch_size = 5
31 | height, width = 224, 224
32 | num_classes = 1000
33 | with self.test_session():
34 | inputs = tf.random_uniform((batch_size, height, width, 3))
35 | logits, _ = vgg.vgg_a(inputs, num_classes)
36 | self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed')
37 | self.assertListEqual(logits.get_shape().as_list(),
38 | [batch_size, num_classes])
39 |
40 | def testFullyConvolutional(self):
41 | batch_size = 1
42 | height, width = 256, 256
43 | num_classes = 1000
44 | with self.test_session():
45 | inputs = tf.random_uniform((batch_size, height, width, 3))
46 | logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False)
47 | self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd')
48 | self.assertListEqual(logits.get_shape().as_list(),
49 | [batch_size, 2, 2, num_classes])
50 |
51 | def testEndPoints(self):
52 | batch_size = 5
53 | height, width = 224, 224
54 | num_classes = 1000
55 | with self.test_session():
56 | inputs = tf.random_uniform((batch_size, height, width, 3))
57 | _, end_points = vgg.vgg_a(inputs, num_classes)
58 | expected_names = ['vgg_a/conv1/conv1_1',
59 | 'vgg_a/pool1',
60 | 'vgg_a/conv2/conv2_1',
61 | 'vgg_a/pool2',
62 | 'vgg_a/conv3/conv3_1',
63 | 'vgg_a/conv3/conv3_2',
64 | 'vgg_a/pool3',
65 | 'vgg_a/conv4/conv4_1',
66 | 'vgg_a/conv4/conv4_2',
67 | 'vgg_a/pool4',
68 | 'vgg_a/conv5/conv5_1',
69 | 'vgg_a/conv5/conv5_2',
70 | 'vgg_a/pool5',
71 | 'vgg_a/fc6',
72 | 'vgg_a/fc7',
73 | 'vgg_a/fc8'
74 | ]
75 | self.assertSetEqual(set(end_points.keys()), set(expected_names))
76 |
77 | def testModelVariables(self):
78 | batch_size = 5
79 | height, width = 224, 224
80 | num_classes = 1000
81 | with self.test_session():
82 | inputs = tf.random_uniform((batch_size, height, width, 3))
83 | vgg.vgg_a(inputs, num_classes)
84 | expected_names = ['vgg_a/conv1/conv1_1/weights',
85 | 'vgg_a/conv1/conv1_1/biases',
86 | 'vgg_a/conv2/conv2_1/weights',
87 | 'vgg_a/conv2/conv2_1/biases',
88 | 'vgg_a/conv3/conv3_1/weights',
89 | 'vgg_a/conv3/conv3_1/biases',
90 | 'vgg_a/conv3/conv3_2/weights',
91 | 'vgg_a/conv3/conv3_2/biases',
92 | 'vgg_a/conv4/conv4_1/weights',
93 | 'vgg_a/conv4/conv4_1/biases',
94 | 'vgg_a/conv4/conv4_2/weights',
95 | 'vgg_a/conv4/conv4_2/biases',
96 | 'vgg_a/conv5/conv5_1/weights',
97 | 'vgg_a/conv5/conv5_1/biases',
98 | 'vgg_a/conv5/conv5_2/weights',
99 | 'vgg_a/conv5/conv5_2/biases',
100 | 'vgg_a/fc6/weights',
101 | 'vgg_a/fc6/biases',
102 | 'vgg_a/fc7/weights',
103 | 'vgg_a/fc7/biases',
104 | 'vgg_a/fc8/weights',
105 | 'vgg_a/fc8/biases',
106 | ]
107 | model_variables = [v.op.name for v in slim.get_model_variables()]
108 | self.assertSetEqual(set(model_variables), set(expected_names))
109 |
110 | def testEvaluation(self):
111 | batch_size = 2
112 | height, width = 224, 224
113 | num_classes = 1000
114 | with self.test_session():
115 | eval_inputs = tf.random_uniform((batch_size, height, width, 3))
116 | logits, _ = vgg.vgg_a(eval_inputs, is_training=False)
117 | self.assertListEqual(logits.get_shape().as_list(),
118 | [batch_size, num_classes])
119 | predictions = tf.argmax(logits, 1)
120 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
121 |
122 | def testTrainEvalWithReuse(self):
123 | train_batch_size = 2
124 | eval_batch_size = 1
125 | train_height, train_width = 224, 224
126 | eval_height, eval_width = 256, 256
127 | num_classes = 1000
128 | with self.test_session():
129 | train_inputs = tf.random_uniform(
130 | (train_batch_size, train_height, train_width, 3))
131 | logits, _ = vgg.vgg_a(train_inputs)
132 | self.assertListEqual(logits.get_shape().as_list(),
133 | [train_batch_size, num_classes])
134 | tf.get_variable_scope().reuse_variables()
135 | eval_inputs = tf.random_uniform(
136 | (eval_batch_size, eval_height, eval_width, 3))
137 | logits, _ = vgg.vgg_a(eval_inputs, is_training=False,
138 | spatial_squeeze=False)
139 | self.assertListEqual(logits.get_shape().as_list(),
140 | [eval_batch_size, 2, 2, num_classes])
141 | logits = tf.reduce_mean(logits, [1, 2])
142 | predictions = tf.argmax(logits, 1)
143 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
144 |
145 | def testForward(self):
146 | batch_size = 1
147 | height, width = 224, 224
148 | with self.test_session() as sess:
149 | inputs = tf.random_uniform((batch_size, height, width, 3))
150 | logits, _ = vgg.vgg_a(inputs)
151 | sess.run(tf.initialize_all_variables())
152 | output = sess.run(logits)
153 | self.assertTrue(output.any())
154 |
155 |
156 | class VGG16Test(tf.test.TestCase):
157 |
158 | def testBuild(self):
159 | batch_size = 5
160 | height, width = 224, 224
161 | num_classes = 1000
162 | with self.test_session():
163 | inputs = tf.random_uniform((batch_size, height, width, 3))
164 | logits, _ = vgg.vgg_16(inputs, num_classes)
165 | self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
166 | self.assertListEqual(logits.get_shape().as_list(),
167 | [batch_size, num_classes])
168 |
169 | def testFullyConvolutional(self):
170 | batch_size = 1
171 | height, width = 256, 256
172 | num_classes = 1000
173 | with self.test_session():
174 | inputs = tf.random_uniform((batch_size, height, width, 3))
175 | logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
176 | self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
177 | self.assertListEqual(logits.get_shape().as_list(),
178 | [batch_size, 2, 2, num_classes])
179 |
180 | def testEndPoints(self):
181 | batch_size = 5
182 | height, width = 224, 224
183 | num_classes = 1000
184 | with self.test_session():
185 | inputs = tf.random_uniform((batch_size, height, width, 3))
186 | _, end_points = vgg.vgg_16(inputs, num_classes)
187 | expected_names = ['vgg_16/conv1/conv1_1',
188 | 'vgg_16/conv1/conv1_2',
189 | 'vgg_16/pool1',
190 | 'vgg_16/conv2/conv2_1',
191 | 'vgg_16/conv2/conv2_2',
192 | 'vgg_16/pool2',
193 | 'vgg_16/conv3/conv3_1',
194 | 'vgg_16/conv3/conv3_2',
195 | 'vgg_16/conv3/conv3_3',
196 | 'vgg_16/pool3',
197 | 'vgg_16/conv4/conv4_1',
198 | 'vgg_16/conv4/conv4_2',
199 | 'vgg_16/conv4/conv4_3',
200 | 'vgg_16/pool4',
201 | 'vgg_16/conv5/conv5_1',
202 | 'vgg_16/conv5/conv5_2',
203 | 'vgg_16/conv5/conv5_3',
204 | 'vgg_16/pool5',
205 | 'vgg_16/fc6',
206 | 'vgg_16/fc7',
207 | 'vgg_16/fc8'
208 | ]
209 | self.assertSetEqual(set(end_points.keys()), set(expected_names))
210 |
211 | def testModelVariables(self):
212 | batch_size = 5
213 | height, width = 224, 224
214 | num_classes = 1000
215 | with self.test_session():
216 | inputs = tf.random_uniform((batch_size, height, width, 3))
217 | vgg.vgg_16(inputs, num_classes)
218 | expected_names = ['vgg_16/conv1/conv1_1/weights',
219 | 'vgg_16/conv1/conv1_1/biases',
220 | 'vgg_16/conv1/conv1_2/weights',
221 | 'vgg_16/conv1/conv1_2/biases',
222 | 'vgg_16/conv2/conv2_1/weights',
223 | 'vgg_16/conv2/conv2_1/biases',
224 | 'vgg_16/conv2/conv2_2/weights',
225 | 'vgg_16/conv2/conv2_2/biases',
226 | 'vgg_16/conv3/conv3_1/weights',
227 | 'vgg_16/conv3/conv3_1/biases',
228 | 'vgg_16/conv3/conv3_2/weights',
229 | 'vgg_16/conv3/conv3_2/biases',
230 | 'vgg_16/conv3/conv3_3/weights',
231 | 'vgg_16/conv3/conv3_3/biases',
232 | 'vgg_16/conv4/conv4_1/weights',
233 | 'vgg_16/conv4/conv4_1/biases',
234 | 'vgg_16/conv4/conv4_2/weights',
235 | 'vgg_16/conv4/conv4_2/biases',
236 | 'vgg_16/conv4/conv4_3/weights',
237 | 'vgg_16/conv4/conv4_3/biases',
238 | 'vgg_16/conv5/conv5_1/weights',
239 | 'vgg_16/conv5/conv5_1/biases',
240 | 'vgg_16/conv5/conv5_2/weights',
241 | 'vgg_16/conv5/conv5_2/biases',
242 | 'vgg_16/conv5/conv5_3/weights',
243 | 'vgg_16/conv5/conv5_3/biases',
244 | 'vgg_16/fc6/weights',
245 | 'vgg_16/fc6/biases',
246 | 'vgg_16/fc7/weights',
247 | 'vgg_16/fc7/biases',
248 | 'vgg_16/fc8/weights',
249 | 'vgg_16/fc8/biases',
250 | ]
251 | model_variables = [v.op.name for v in slim.get_model_variables()]
252 | self.assertSetEqual(set(model_variables), set(expected_names))
253 |
254 | def testEvaluation(self):
255 | batch_size = 2
256 | height, width = 224, 224
257 | num_classes = 1000
258 | with self.test_session():
259 | eval_inputs = tf.random_uniform((batch_size, height, width, 3))
260 | logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
261 | self.assertListEqual(logits.get_shape().as_list(),
262 | [batch_size, num_classes])
263 | predictions = tf.argmax(logits, 1)
264 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
265 |
266 | def testTrainEvalWithReuse(self):
267 | train_batch_size = 2
268 | eval_batch_size = 1
269 | train_height, train_width = 224, 224
270 | eval_height, eval_width = 256, 256
271 | num_classes = 1000
272 | with self.test_session():
273 | train_inputs = tf.random_uniform(
274 | (train_batch_size, train_height, train_width, 3))
275 | logits, _ = vgg.vgg_16(train_inputs)
276 | self.assertListEqual(logits.get_shape().as_list(),
277 | [train_batch_size, num_classes])
278 | tf.get_variable_scope().reuse_variables()
279 | eval_inputs = tf.random_uniform(
280 | (eval_batch_size, eval_height, eval_width, 3))
281 | logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
282 | spatial_squeeze=False)
283 | self.assertListEqual(logits.get_shape().as_list(),
284 | [eval_batch_size, 2, 2, num_classes])
285 | logits = tf.reduce_mean(logits, [1, 2])
286 | predictions = tf.argmax(logits, 1)
287 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
288 |
289 | def testForward(self):
290 | batch_size = 1
291 | height, width = 224, 224
292 | with self.test_session() as sess:
293 | inputs = tf.random_uniform((batch_size, height, width, 3))
294 | logits, _ = vgg.vgg_16(inputs)
295 | sess.run(tf.initialize_all_variables())
296 | output = sess.run(logits)
297 | self.assertTrue(output.any())
298 |
299 |
300 | class VGG19Test(tf.test.TestCase):
301 |
302 | def testBuild(self):
303 | batch_size = 5
304 | height, width = 224, 224
305 | num_classes = 1000
306 | with self.test_session():
307 | inputs = tf.random_uniform((batch_size, height, width, 3))
308 | logits, _ = vgg.vgg_19(inputs, num_classes)
309 | self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed')
310 | self.assertListEqual(logits.get_shape().as_list(),
311 | [batch_size, num_classes])
312 |
313 | def testFullyConvolutional(self):
314 | batch_size = 1
315 | height, width = 256, 256
316 | num_classes = 1000
317 | with self.test_session():
318 | inputs = tf.random_uniform((batch_size, height, width, 3))
319 | logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False)
320 | self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd')
321 | self.assertListEqual(logits.get_shape().as_list(),
322 | [batch_size, 2, 2, num_classes])
323 |
324 | def testEndPoints(self):
325 | batch_size = 5
326 | height, width = 224, 224
327 | num_classes = 1000
328 | with self.test_session():
329 | inputs = tf.random_uniform((batch_size, height, width, 3))
330 | _, end_points = vgg.vgg_19(inputs, num_classes)
331 | expected_names = [
332 | 'vgg_19/conv1/conv1_1',
333 | 'vgg_19/conv1/conv1_2',
334 | 'vgg_19/pool1',
335 | 'vgg_19/conv2/conv2_1',
336 | 'vgg_19/conv2/conv2_2',
337 | 'vgg_19/pool2',
338 | 'vgg_19/conv3/conv3_1',
339 | 'vgg_19/conv3/conv3_2',
340 | 'vgg_19/conv3/conv3_3',
341 | 'vgg_19/conv3/conv3_4',
342 | 'vgg_19/pool3',
343 | 'vgg_19/conv4/conv4_1',
344 | 'vgg_19/conv4/conv4_2',
345 | 'vgg_19/conv4/conv4_3',
346 | 'vgg_19/conv4/conv4_4',
347 | 'vgg_19/pool4',
348 | 'vgg_19/conv5/conv5_1',
349 | 'vgg_19/conv5/conv5_2',
350 | 'vgg_19/conv5/conv5_3',
351 | 'vgg_19/conv5/conv5_4',
352 | 'vgg_19/pool5',
353 | 'vgg_19/fc6',
354 | 'vgg_19/fc7',
355 | 'vgg_19/fc8'
356 | ]
357 | self.assertSetEqual(set(end_points.keys()), set(expected_names))
358 |
359 | def testModelVariables(self):
360 | batch_size = 5
361 | height, width = 224, 224
362 | num_classes = 1000
363 | with self.test_session():
364 | inputs = tf.random_uniform((batch_size, height, width, 3))
365 | vgg.vgg_19(inputs, num_classes)
366 | expected_names = [
367 | 'vgg_19/conv1/conv1_1/weights',
368 | 'vgg_19/conv1/conv1_1/biases',
369 | 'vgg_19/conv1/conv1_2/weights',
370 | 'vgg_19/conv1/conv1_2/biases',
371 | 'vgg_19/conv2/conv2_1/weights',
372 | 'vgg_19/conv2/conv2_1/biases',
373 | 'vgg_19/conv2/conv2_2/weights',
374 | 'vgg_19/conv2/conv2_2/biases',
375 | 'vgg_19/conv3/conv3_1/weights',
376 | 'vgg_19/conv3/conv3_1/biases',
377 | 'vgg_19/conv3/conv3_2/weights',
378 | 'vgg_19/conv3/conv3_2/biases',
379 | 'vgg_19/conv3/conv3_3/weights',
380 | 'vgg_19/conv3/conv3_3/biases',
381 | 'vgg_19/conv3/conv3_4/weights',
382 | 'vgg_19/conv3/conv3_4/biases',
383 | 'vgg_19/conv4/conv4_1/weights',
384 | 'vgg_19/conv4/conv4_1/biases',
385 | 'vgg_19/conv4/conv4_2/weights',
386 | 'vgg_19/conv4/conv4_2/biases',
387 | 'vgg_19/conv4/conv4_3/weights',
388 | 'vgg_19/conv4/conv4_3/biases',
389 | 'vgg_19/conv4/conv4_4/weights',
390 | 'vgg_19/conv4/conv4_4/biases',
391 | 'vgg_19/conv5/conv5_1/weights',
392 | 'vgg_19/conv5/conv5_1/biases',
393 | 'vgg_19/conv5/conv5_2/weights',
394 | 'vgg_19/conv5/conv5_2/biases',
395 | 'vgg_19/conv5/conv5_3/weights',
396 | 'vgg_19/conv5/conv5_3/biases',
397 | 'vgg_19/conv5/conv5_4/weights',
398 | 'vgg_19/conv5/conv5_4/biases',
399 | 'vgg_19/fc6/weights',
400 | 'vgg_19/fc6/biases',
401 | 'vgg_19/fc7/weights',
402 | 'vgg_19/fc7/biases',
403 | 'vgg_19/fc8/weights',
404 | 'vgg_19/fc8/biases',
405 | ]
406 | model_variables = [v.op.name for v in slim.get_model_variables()]
407 | self.assertSetEqual(set(model_variables), set(expected_names))
408 |
409 | def testEvaluation(self):
410 | batch_size = 2
411 | height, width = 224, 224
412 | num_classes = 1000
413 | with self.test_session():
414 | eval_inputs = tf.random_uniform((batch_size, height, width, 3))
415 | logits, _ = vgg.vgg_19(eval_inputs, is_training=False)
416 | self.assertListEqual(logits.get_shape().as_list(),
417 | [batch_size, num_classes])
418 | predictions = tf.argmax(logits, 1)
419 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
420 |
421 | def testTrainEvalWithReuse(self):
422 | train_batch_size = 2
423 | eval_batch_size = 1
424 | train_height, train_width = 224, 224
425 | eval_height, eval_width = 256, 256
426 | num_classes = 1000
427 | with self.test_session():
428 | train_inputs = tf.random_uniform(
429 | (train_batch_size, train_height, train_width, 3))
430 | logits, _ = vgg.vgg_19(train_inputs)
431 | self.assertListEqual(logits.get_shape().as_list(),
432 | [train_batch_size, num_classes])
433 | tf.get_variable_scope().reuse_variables()
434 | eval_inputs = tf.random_uniform(
435 | (eval_batch_size, eval_height, eval_width, 3))
436 | logits, _ = vgg.vgg_19(eval_inputs, is_training=False,
437 | spatial_squeeze=False)
438 | self.assertListEqual(logits.get_shape().as_list(),
439 | [eval_batch_size, 2, 2, num_classes])
440 | logits = tf.reduce_mean(logits, [1, 2])
441 | predictions = tf.argmax(logits, 1)
442 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
443 |
444 | def testForward(self):
445 | batch_size = 1
446 | height, width = 224, 224
447 | with self.test_session() as sess:
448 | inputs = tf.random_uniform((batch_size, height, width, 3))
449 | logits, _ = vgg.vgg_19(inputs)
450 | sess.run(tf.initialize_all_variables())
451 | output = sess.run(logits)
452 | self.assertTrue(output.any())
453 |
454 | if __name__ == '__main__':
455 | tf.test.main()
456 |
--------------------------------------------------------------------------------
/tf/retrain.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | # Modified 2017 Microsoft Corporation.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | """Generic training script that trains a model using a given dataset."""
17 |
18 | import tensorflow as tf
19 | import pandas as pd
20 | import numpy as np
21 | import os
22 | import functools
23 |
24 | from tensorflow.python.ops import control_flow_ops
25 | from deployment import model_deploy
26 | from nets import resnet_v1 # Needed to be modified, see https://github.com/tensorflow/models/issues/533
27 | from tensorflow.contrib.training.python.training import evaluation
28 |
29 | slim = tf.contrib.slim
30 |
31 | ''' Enumerate the flags '''
32 | tf.app.flags.DEFINE_string('train_dir',
33 | 'D:\\tf\\models',
34 | 'Directory where checkpoints and event logs are written to.')
35 | tf.app.flags.DEFINE_string('dataset_name', 'aerial', 'The name of the dataset to load.')
36 | tf.app.flags.DEFINE_string('dataset_dir',
37 | 'D:\\combined\\train_subsample',
38 | 'The directory where the dataset files are stored.')
39 | tf.app.flags.DEFINE_string('checkpoint_path',
40 | 'D:\\tf\\resnet_v1_50.ckpt',
41 | 'The path to a checkpoint from which to fine-tune.')
42 |
43 | tf.app.flags.DEFINE_string('checkpoint_exclude_scopes', 'resnet_v1_50/logits',
44 | 'Comma-separated list of scopes of variables to exclude when restoring '
45 | 'from a checkpoint.')
46 | tf.app.flags.DEFINE_string('trainable_scopes', 'resnet_v1_50/logits',
47 | 'Comma-separated list of scopes to filter the set of variables to train.'
48 | 'By default, None would train all the variables.')
49 |
50 | tf.app.flags.DEFINE_integer('num_clones', 1, 'Number of model clones to deploy.')
51 | tf.app.flags.DEFINE_boolean('clone_on_cpu', False, 'Use CPUs to deploy clones.')
52 | tf.app.flags.DEFINE_integer('num_readers', 4, 'The number of parallel readers that read data from the dataset.')
53 | tf.app.flags.DEFINE_integer('num_preprocessing_threads', 4, 'The number of threads used to create the batches.')
54 | tf.app.flags.DEFINE_integer('log_every_n_steps', 10, 'The frequency with which logs are printed.')
55 | tf.app.flags.DEFINE_integer('save_summaries_secs', 600, 'The frequency with which summaries are saved, in seconds.')
56 | tf.app.flags.DEFINE_integer('save_interval_secs', 600, 'The frequency with which the model is saved, in seconds.')
57 |
58 | tf.app.flags.DEFINE_float('weight_decay', 0.00004, 'The weight decay on the model weights.')
59 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.')
60 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.')
61 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.')
62 | tf.app.flags.DEFINE_float('learning_rate', 0.02, 'Initial learning rate.')
63 | tf.app.flags.DEFINE_float('label_smoothing', 0.0, 'The amount of label smoothing.')
64 | tf.app.flags.DEFINE_float('learning_rate_decay_factor', 0.9, 'Learning rate decay factor.')
65 | tf.app.flags.DEFINE_float('num_epochs_per_decay', 2.0, 'Number of epochs after which learning rate decays.')
66 | tf.app.flags.DEFINE_integer('replicas_to_aggregate', 1, 'The number of gradients to collect before updating params.')
67 | tf.app.flags.DEFINE_integer('batch_size', 32, 'The number of samples in each batch.')
68 | tf.app.flags.DEFINE_integer('max_number_of_steps', 4000, 'The maximum number of training steps.')
69 |
70 | FLAGS = tf.app.flags.FLAGS
71 |
72 | def get_image_and_class_count(dataset_dir, split_name):
73 | df = pd.read_csv(os.path.join(dataset_dir, 'dataset_split_info.csv'))
74 | image_count = len(df.loc[df['split_name'] == split_name].index)
75 | class_count = len(df['class_name'].unique())
76 | return(image_count, class_count)
77 |
78 | def read_label_file(dataset_dir, filename='labels.txt'):
79 | labels_filename = os.path.join(dataset_dir, filename)
80 | with tf.gfile.Open(labels_filename, 'r') as f:
81 | lines = f.read()
82 | lines = lines.split('\n')
83 | lines = filter(None, lines)
84 |
85 | labels_to_class_names = {}
86 | for line in lines:
87 | index = line.index(':')
88 | labels_to_class_names[line[:index]] = line[index+1:]
89 | return(labels_to_class_names)
90 |
91 | def mean_image_subtraction(image, means):
92 | if image.get_shape().ndims != 3:
93 | raise ValueError('Input must be of size [height, width, C>0]')
94 | num_channels = image.get_shape().as_list()[-1]
95 | if len(means) != num_channels:
96 | raise ValueError('len(means) must match the number of channels')
97 |
98 | channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
99 | for i in range(num_channels):
100 | channels[i] -= means[i]
101 | return(tf.concat(axis=2, values=channels))
102 |
103 | def get_preprocessing():
104 | def preprocessing_fn(image, output_height=224, output_width=224):
105 | ''' Resize the image and subtract "mean" RGB values '''
106 | _R_MEAN = 123.68
107 | _G_MEAN = 116.78
108 | _B_MEAN = 103.94
109 | #image = tf.expand_dims(image, 0)
110 |
111 | temp_dim = np.random.randint(175, 223)
112 | distorted_image = tf.random_crop(image, [output_height, output_width, 3])
113 | distorted_image = tf.expand_dims(distorted_image, 0)
114 | resized_image = tf.image.resize_bilinear(distorted_image, [output_height, output_width], align_corners=False)
115 | resized_image = tf.squeeze(resized_image)
116 | resized_image.set_shape([output_height, output_width, 3])
117 | resized_image = tf.image.random_flip_left_right(resized_image)
118 |
119 | image = tf.to_float(resized_image)
120 | return(mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]))
121 | return(preprocessing_fn)
122 |
123 | def get_network_fn(num_classes, weight_decay=0.0):
124 | arg_scope = resnet_v1.resnet_arg_scope(weight_decay=weight_decay)
125 | func = resnet_v1.resnet_v1_50
126 | @functools.wraps(func)
127 | def network_fn(images):
128 | with slim.arg_scope(arg_scope):
129 | return func(images, num_classes)
130 | if hasattr(func, 'default_image_size'):
131 | network_fn.default_image_size = func.default_image_size
132 | return(network_fn)
133 |
134 | def _add_variables_summaries(learning_rate):
135 | summaries = []
136 | for variable in slim.get_model_variables():
137 | summaries.append(tf.summary.image(variable.op.name, variable))
138 | summaries.append(tf.summary.scalar(learning_rate, name='training/Learning Rate'))
139 | return(summaries)
140 |
141 | def _get_init_fn():
142 | if (FLAGS.checkpoint_path is None) or (tf.train.latest_checkpoint(FLAGS.train_dir)):
143 | return None
144 |
145 | exclusions = []
146 | if FLAGS.checkpoint_exclude_scopes:
147 | exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
148 |
149 | variables_to_restore = []
150 | for var in slim.get_model_variables():
151 | excluded = False
152 | for exclusion in exclusions:
153 | if var.op.name.startswith(exclusion):
154 | excluded = True
155 | break
156 | if not excluded:
157 | variables_to_restore.append(var)
158 |
159 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
160 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
161 | else:
162 | checkpoint_path = FLAGS.checkpoint_path
163 |
164 | tf.logging.info('Fine-tuning from {}'.format(checkpoint_path))
165 |
166 | return(slim.assign_from_checkpoint_fn(checkpoint_path,
167 | variables_to_restore,
168 | ignore_missing_vars=False))
169 |
170 | def _get_variables_to_train():
171 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
172 | variables_to_train = []
173 | for scope in scopes:
174 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
175 | variables_to_train.extend(variables)
176 | return(variables_to_train)
177 |
178 | def get_dataset(dataset_name, dataset_dir, image_count, class_count, split_name):
179 | slim = tf.contrib.slim
180 | items_to_descriptions = {'image': 'A color image.',
181 | 'label': 'An integer in range(0, class_count)'}
182 | file_pattern = os.path.join(dataset_dir, '{}_{}_*.tfrecord'.format(dataset_name, split_name))
183 | reader = tf.TFRecordReader
184 | keys_to_features = {'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
185 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
186 | 'image/class/label': tf.FixedLenFeature([], tf.int64,
187 | default_value=tf.zeros([], dtype=tf.int64))}
188 | items_to_handlers = {'image': slim.tfexample_decoder.Image(),
189 | 'label': slim.tfexample_decoder.Tensor('image/class/label')}
190 | decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
191 | labels_to_names = read_label_file(dataset_dir)
192 | return(slim.dataset.Dataset(data_sources=file_pattern,
193 | reader=reader,
194 | decoder=decoder,
195 | num_samples=image_count,
196 | items_to_descriptions=items_to_descriptions,
197 | num_classes=class_count,
198 | labels_to_names=labels_to_names,
199 | shuffle=True))
200 |
201 | def main(_):
202 | tf.logging.set_verbosity(tf.logging.INFO)
203 | with tf.Graph().as_default():
204 | deploy_config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
205 | clone_on_cpu=FLAGS.clone_on_cpu,
206 | replica_id=0,
207 | num_replicas=1,
208 | num_ps_tasks=0)
209 |
210 | with tf.device(deploy_config.variables_device()):
211 | global_step = slim.create_global_step()
212 |
213 | image_count, class_count = get_image_and_class_count(FLAGS.dataset_dir, 'train')
214 | dataset = get_dataset('aerial', FLAGS.dataset_dir, image_count, class_count, 'train')
215 | network_fn = get_network_fn(num_classes=(dataset.num_classes), weight_decay=FLAGS.weight_decay)
216 | image_preprocessing_fn = get_preprocessing()
217 |
218 | with tf.device(deploy_config.inputs_device()):
219 | provider = slim.dataset_data_provider.DatasetDataProvider(dataset,
220 | num_readers=FLAGS.num_readers,
221 | common_queue_capacity=20 * FLAGS.batch_size,
222 | common_queue_min=10 * FLAGS.batch_size)
223 | [image, label] = provider.get(['image', 'label'])
224 | image = image_preprocessing_fn(image, 224, 224)
225 | images, labels = tf.train.batch([image, label],
226 | batch_size=FLAGS.batch_size,
227 | num_threads=FLAGS.num_preprocessing_threads,
228 | capacity=5 * FLAGS.batch_size)
229 | labels = slim.one_hot_encoding(labels, dataset.num_classes)
230 | batch_queue = slim.prefetch_queue.prefetch_queue([images, labels], capacity=2 * deploy_config.num_clones)
231 |
232 | def clone_fn(batch_queue):
233 | images, labels = batch_queue.dequeue()
234 | logits, end_points = network_fn(images)
235 | logits = tf.squeeze(logits) # added -- does this help?
236 | slim.losses.softmax_cross_entropy(logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
237 | return(end_points)
238 |
239 | summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
240 |
241 | clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
242 | first_clone_scope = deploy_config.clone_scope(0)
243 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
244 |
245 | end_points = clones[0].outputs
246 | for end_point in end_points:
247 | x = end_points[end_point]
248 | summaries.add(tf.summary.histogram('activations/' + end_point, x))
249 | summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x)))
250 | for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
251 | summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))
252 | for variable in slim.get_model_variables():
253 | summaries.add(tf.summary.histogram(variable.op.name, variable))
254 |
255 | with tf.device(deploy_config.optimizer_device()):
256 | decay_steps = int(dataset.num_samples / FLAGS.batch_size * FLAGS.num_epochs_per_decay)
257 | learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
258 | global_step,
259 | decay_steps,
260 | FLAGS.learning_rate_decay_factor,
261 | staircase=True,
262 | name='exponential_decay_learning_rate')
263 | optimizer = tf.train.RMSPropOptimizer(learning_rate,
264 | decay=FLAGS.rmsprop_decay,
265 | momentum=FLAGS.rmsprop_momentum,
266 | epsilon=FLAGS.opt_epsilon)
267 | summaries.add(tf.summary.scalar('learning_rate', learning_rate))
268 |
269 |
270 |
271 | variables_to_train = _get_variables_to_train()
272 | total_loss, clones_gradients = model_deploy.optimize_clones(clones, optimizer, var_list=variables_to_train)
273 | summaries.add(tf.summary.scalar('total_loss', total_loss))
274 |
275 | grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step)
276 | update_ops.append(grad_updates)
277 |
278 | update_op = tf.group(*update_ops)
279 | train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op')
280 |
281 | summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))
282 | summary_op = tf.summary.merge(list(summaries), name='summary_op')
283 |
284 | slim.learning.train(train_tensor,
285 | logdir=FLAGS.train_dir,
286 | master='',
287 | is_chief=True,
288 | init_fn=_get_init_fn(),
289 | summary_op=summary_op,
290 | number_of_steps=FLAGS.max_number_of_steps,
291 | log_every_n_steps=FLAGS.log_every_n_steps,
292 | save_summaries_secs=FLAGS.save_summaries_secs,
293 | save_interval_secs=FLAGS.save_interval_secs,
294 | sync_optimizer=None)
295 |
296 |
297 | if __name__ == '__main__':
298 | tf.app.run()
--------------------------------------------------------------------------------