├── PyTorch
    ├── GCT.py
    ├── README.md
    ├── main.py
    ├── resnet.py
    └── run.sh
├── README.md
├── TensorFlow
    ├── LICENSE
    ├── README.md
    ├── all_reduce_benchmark.py
    ├── all_reduce_benchmark_test.py
    ├── allreduce.py
    ├── allreduce_test.py
    ├── batch_allreduce.py
    ├── benchmark_cnn.py
    ├── benchmark_cnn_distributed_test.py
    ├── benchmark_cnn_distributed_test_runner.py
    ├── benchmark_cnn_test.py
    ├── cnn_util.py
    ├── cnn_util_test.py
    ├── constants.py
    ├── convnet_builder.py
    ├── data_utils.py
    ├── datasets.py
    ├── eval_all_ckpt.py
    ├── eval_gct_resnet50.sh
    ├── flags.py
    ├── models
    │   ├── __init__.py
    │   ├── alexnet_model.py
    │   ├── densenet_model.py
    │   ├── googlenet_model.py
    │   ├── inception_model.py
    │   ├── lenet_model.py
    │   ├── mobilenet.py
    │   ├── mobilenet_conv_blocks.py
    │   ├── mobilenet_test.py
    │   ├── mobilenet_v2.py
    │   ├── model.py
    │   ├── model_config.py
    │   ├── nasnet_model.py
    │   ├── nasnet_test.py
    │   ├── nasnet_utils.py
    │   ├── official_resnet_model.py
    │   ├── overfeat_model.py
    │   ├── resnet_model.py
    │   ├── trivial_model.py
    │   └── vgg_model.py
    ├── platforms
    │   ├── __init__.py
    │   ├── default
    │   │   ├── __init__.py
    │   │   └── util.py
    │   └── util.py
    ├── preprocessing.py
    ├── run_tests.py
    ├── test_data
    │   ├── __init__.py
    │   ├── fake_tf_record_data
    │   │   ├── train-00000-of-00008
    │   │   ├── train-00001-of-00008
    │   │   ├── train-00002-of-00008
    │   │   ├── train-00003-of-00008
    │   │   ├── train-00004-of-00008
    │   │   ├── train-00005-of-00008
    │   │   ├── train-00006-of-00008
    │   │   ├── train-00007-of-00008
    │   │   ├── validation-00000-of-00002
    │   │   └── validation-00001-of-00002
    │   └── images
    │   │   ├── black_image.jpg
    │   │   └── white_image.jpg
    ├── test_util.py
    ├── tf_cnn_benchmarks.py
    ├── train_gct_resnet50.sh
    ├── variable_mgr.py
    ├── variable_mgr_util.py
    └── variable_mgr_util_test.py
└── overview.png


/PyTorch/GCT.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import math
 4 | from torch import nn
 5 | 
 6 | 
 7 | class GCT(nn.Module):
 8 | 
 9 |     def __init__(self, num_channels, epsilon=1e-5, mode='l2', after_relu=False):
10 |         super(GCT, self).__init__()
11 | 
12 |         self.alpha = nn.Parameter(torch.ones(1, num_channels, 1, 1))
13 |         self.gamma = nn.Parameter(torch.zeros(1, num_channels, 1, 1))
14 |         self.beta = nn.Parameter(torch.zeros(1, num_channels, 1, 1))
15 |         self.epsilon = epsilon
16 |         self.mode = mode
17 |         self.after_relu = after_relu
18 | 
19 |     def forward(self, x):
20 | 
21 |         if self.mode == 'l2':
22 |             embedding = (x.pow(2).sum((2,3), keepdim=True) + self.epsilon).pow(0.5) * self.alpha
23 |             norm = self.gamma / (embedding.pow(2).mean(dim=1, keepdim=True) + self.epsilon).pow(0.5)
24 |             
25 |         elif self.mode == 'l1':
26 |             if not self.after_relu:
27 |                 _x = torch.abs(x)
28 |             else:
29 |                 _x = x
30 |             embedding = _x.sum((2,3), keepdim=True) * self.alpha
31 |             norm = self.gamma / (torch.abs(embedding).mean(dim=1, keepdim=True) + self.epsilon)
32 |         else:
33 |             print('Unknown mode!')
34 |             sys.exit()
35 | 
36 |         gate = 1. + torch.tanh(embedding * norm + self.beta)
37 | 
38 |         return x * gate


--------------------------------------------------------------------------------
/PyTorch/README.md:
--------------------------------------------------------------------------------
 1 | ## Gated Channel Transformation for Visual Recognition (GCT)
 2 | The PyTorch implementation of Gated Channel Transformation for Visual Recognition (CVPR 2020) [[paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Yang_Gated_Channel_Transformation_for_Visual_Recognition_CVPR_2020_paper.pdf)].
 3 | 
 4 | The training code is based on [ImageNet training in PyTorch](https://github.com/pytorch/examples/tree/master/imagenet). The code of the ResNet backbone is from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark).
 5 | 
 6 | ## Getting Started
 7 | First, please install torch and torchvision.
 8 | 
 9 | To train and evaluate GCT-ResNet50, run
10 | ```
11 | bash run.sh
12 | ```
13 | After finished, the best accuracy should be around 77.2%, which is slightly lower than our TensorFlow version. The reason for the lower performance is that the augmentation method in this simple PyTorch version is a little bit different from the TensorFlow version.
14 | 
15 | ## Object Detection
16 | If you want to use GCT-ResNet in [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark), an object detection framework, please fllow the guidance below.
17 | 
18 | First, you need to replace the backbone file of maskrcnn-benchmark, [resnet.py](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/resnet.py) by our backbone. Notably, the batch normalization in [Line 1](https://github.com/z-x-yang/GCT/blob/dc69cc83513fd04b1960512644693aaa15020b67/PyTorch/resnet.py#L403) and [Line 2](https://github.com/z-x-yang/GCT/blob/dc69cc83513fd04b1960512644693aaa15020b67/PyTorch/resnet.py#L410) **should** be replaced by **frozen batch normalization** as [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/57eec25b75144d9fb1a6857f32553e1574177daf/maskrcnn_benchmark/modeling/backbone/resnet.py#L397) in maskrcnn-benchmark.
19 | 
20 | Second, you need to **remove** the weight decay on the **beta** parameters of GCT, following the default setting in our paper. In detail, you need to modify the code for applying weight decay of maskrcnn-benchmark, [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/solver/build.py). About how to remove the weight decay on the beta, you can refer to our code [here](https://github.com/z-x-yang/GCT/blob/78a0b863d6b5cd28cb417ab6c573c3c3364d8825/PyTorch/main.py#L184).
21 | 
22 | After running Mask-RCNN & ResNet-50 in its default training schedule ([here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml)) on COCO, the preformance should be around 39.8 (box AP) and 36.0 (mask AP), while the baseline without GCT should be 37.8 (box AP) and 34.2 (mask AP).
23 | 
24 | More results based on Mask-RCNN are below:
25 | | Backbone  | box AP | mask AP |
26 | | --------- | -------- | ------------------- |
27 | | ResNet-50 | 37.8 | 34.2 |
28 | | GCT-ResNet-50 | **39.8** | **36.0** |
29 | | ResNet-101 | 40.1 | 36.1 |
30 | | GCT-ResNet-101 | **42.0** | **37.7** |
31 | 
32 | 
33 | ## Pretrain Model
34 | We also prepared a pretrain model of GCT-ResNet50 (top-1 acc: 77.2%), which can be downloaded from [here](https://drive.google.com/file/d/1y5a56UzBjUWlWwlrU42lxueJY_cBpWLL/view?usp=sharing).
35 | 
36 | ## Citation
37 | ```
38 | @inproceedings{yang2020gated,
39 |   title={Gated Channel Transformation for Visual Recognition},
40 |   author={Yang, Zongxin and Zhu, Linchao and Wu, Yu and Yang, Yi},
41 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
42 |   pages={11794--11803},
43 |   year={2020}
44 | }
45 | ```
46 | 
47 | 


--------------------------------------------------------------------------------
/PyTorch/resnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | Variant of the resnet module that takes cfg as an argument.
  4 | Example usage. Strings may be specified in the config file.
  5 |     model = ResNet(
  6 |         "StemWithFixedBatchNorm",
  7 |         "BottleneckWithFixedBatchNorm",
  8 |         "ResNet50StagesTo4",
  9 |     )
 10 | OR:
 11 |     model = ResNet(
 12 |         "StemWithGN",
 13 |         "BottleneckWithGN",
 14 |         "ResNet50StagesTo4",
 15 |     )
 16 | Custom implementations may be written in user code and hooked in via the
 17 | `register_*` functions.
 18 | """
 19 | from collections import namedtuple
 20 | 
 21 | import torch
 22 | import torch.nn.functional as F
 23 | import math
 24 | from torch import nn
 25 | from GCT import GCT
 26 | 
 27 | class _NewEmptyTensorOp(torch.autograd.Function):
 28 |     @staticmethod
 29 |     def forward(ctx, x, new_shape):
 30 |         ctx.shape = x.shape
 31 |         return x.new_empty(new_shape)
 32 | 
 33 |     @staticmethod
 34 |     def backward(ctx, grad):
 35 |         shape = ctx.shape
 36 |         return _NewEmptyTensorOp.apply(grad, shape), None
 37 | 
 38 | 
 39 | class Conv2d(torch.nn.Conv2d):
 40 |     def forward(self, x):
 41 |         if x.numel() > 0:
 42 |             return super(Conv2d, self).forward(x)
 43 |         # get output shape
 44 | 
 45 |         output_shape = [
 46 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
 47 |             for i, p, di, k, d in zip(
 48 |                 x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
 49 |             )
 50 |         ]
 51 |         output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
 52 |         return _NewEmptyTensorOp.apply(x, output_shape)
 53 | 
 54 | 
 55 | # ResNet stage specification
 56 | StageSpec = namedtuple(
 57 |     "StageSpec",
 58 |     [
 59 |         "index",  # Index of the stage, eg 1, 2, ..,. 5
 60 |         "block_count",  # Numer of residual blocks in the stage
 61 |         "return_features",  # True => return the last feature map from this stage
 62 |     ],
 63 | )
 64 | 
 65 | # -----------------------------------------------------------------------------
 66 | # Standard ResNet models
 67 | # -----------------------------------------------------------------------------
 68 | # ResNet-50 (including all stages)
 69 | ResNet50StagesTo5 = tuple(
 70 |     StageSpec(index=i, block_count=c, return_features=r)
 71 |     for (i, c, r) in ((1, 3, False), (2, 4, False), (3, 6, False), (4, 3, True))
 72 | )
 73 | # ResNet-50 up to stage 4 (excludes stage 5)
 74 | ResNet50StagesTo4 = tuple(
 75 |     StageSpec(index=i, block_count=c, return_features=r)
 76 |     for (i, c, r) in ((1, 3, False), (2, 4, False), (3, 6, True))
 77 | )
 78 | # ResNet-101 (including all stages)
 79 | ResNet101StagesTo5 = tuple(
 80 |     StageSpec(index=i, block_count=c, return_features=r)
 81 |     for (i, c, r) in ((1, 3, False), (2, 4, False), (3, 23, False), (4, 3, True))
 82 | )
 83 | # ResNet-101 up to stage 4 (excludes stage 5)
 84 | ResNet101StagesTo4 = tuple(
 85 |     StageSpec(index=i, block_count=c, return_features=r)
 86 |     for (i, c, r) in ((1, 3, False), (2, 4, False), (3, 23, True))
 87 | )
 88 | # ResNet-50-FPN (including all stages)
 89 | ResNet50FPNStagesTo5 = tuple(
 90 |     StageSpec(index=i, block_count=c, return_features=r)
 91 |     for (i, c, r) in ((1, 3, True), (2, 4, True), (3, 6, True), (4, 3, True))
 92 | )
 93 | # ResNet-101-FPN (including all stages)
 94 | ResNet101FPNStagesTo5 = tuple(
 95 |     StageSpec(index=i, block_count=c, return_features=r)
 96 |     for (i, c, r) in ((1, 3, True), (2, 4, True), (3, 23, True), (4, 3, True))
 97 | )
 98 | 
 99 | 
100 | 
101 | class ResNet(nn.Module):
102 |     def __init__(self, cfg):
103 |         super(ResNet, self).__init__()
104 | 
105 |         # If we want to use the cfg in forward(), then we should make a copy
106 |         # of it and store it for later use:
107 |         # self.cfg = cfg.clone()
108 | 
109 |         # Translate string names to implementations
110 |         stem_module = StemWithFixedBatchNorm
111 |         stage_specs = ResNet50FPNStagesTo5
112 |         transformation_module = BottleneckWithFixedBatchNorm
113 | 
114 |         # Construct the stem module
115 |         self.stem = stem_module(cfg)
116 | 
117 |         # Constuct the specified ResNet stages
118 |         num_groups = 1  # cfg.MODEL.RESNETS.NUM_GROUPS
119 |         width_per_group = 64  # cfg.MODEL.RESNETS.WIDTH_PER_GROUP
120 |         in_channels = 64  # cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
121 |         stage2_bottleneck_channels = num_groups * width_per_group
122 |         stage2_out_channels = 256  # cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
123 |         self.stages = []
124 |         self.return_features = {}
125 |         for stage_spec in stage_specs:
126 |             name = "layer" + str(stage_spec.index)
127 |             stage2_relative_factor = 2 ** (stage_spec.index - 1)
128 |             bottleneck_channels = stage2_bottleneck_channels * stage2_relative_factor
129 |             out_channels = stage2_out_channels * stage2_relative_factor
130 |             module = _make_stage(
131 |                 transformation_module,
132 |                 in_channels,
133 |                 bottleneck_channels,
134 |                 out_channels,
135 |                 stage_spec.block_count,
136 |                 num_groups,
137 |                 True,  # cfg.MODEL.RESNETS.STRIDE_IN_1X1,
138 |                 first_stride=int(stage_spec.index > 1) + 1,
139 |             )
140 |             in_channels = out_channels
141 |             self.add_module(name, module)
142 |             self.stages.append(name)
143 |             self.return_features[name] = stage_spec.return_features
144 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
145 |         self.fc = nn.Linear(512 * 4, 1000)
146 | 
147 |         # Optionally freeze (requires_grad=False) parts of the backbone
148 |         # self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT)
149 | 
150 |     def _freeze_backbone(self, freeze_at):
151 |         if freeze_at < 0:
152 |             return
153 |         for stage_index in range(freeze_at):
154 |             if stage_index == 0:
155 |                 m = self.stem  # stage 0 is the stem
156 |             else:
157 |                 m = getattr(self, "layer" + str(stage_index))
158 |             for p in m.parameters():
159 |                 p.requires_grad = False
160 | 
161 |     def forward(self, x):
162 |         outputs = []
163 |         x = self.stem(x)
164 |         for stage_name in self.stages:
165 |             x = getattr(self, stage_name)(x)
166 |             if self.return_features[stage_name]:
167 |                 outputs.append(x)
168 |         x = self.avgpool(x)
169 |         x = x.view(x.size(0), -1)
170 |         x = self.fc(x)
171 |         return x
172 | 
173 | 
174 | class ResNetHead(nn.Module):
175 |     def __init__(
176 |         self,
177 |         block_module,
178 |         stages,
179 |         num_groups=1,
180 |         width_per_group=64,
181 |         stride_in_1x1=True,
182 |         stride_init=None,
183 |         res2_out_channels=256,
184 |         dilation=1
185 |     ):
186 |         super(ResNetHead, self).__init__()
187 | 
188 |         stage2_relative_factor = 2 ** (stages[0].index - 1)
189 |         stage2_bottleneck_channels = num_groups * width_per_group
190 |         out_channels = res2_out_channels * stage2_relative_factor
191 |         in_channels = out_channels // 2
192 |         bottleneck_channels = stage2_bottleneck_channels * stage2_relative_factor
193 | 
194 |         block_module = _TRANSFORMATION_MODULES[block_module]
195 | 
196 |         self.stages = []
197 |         stride = stride_init
198 |         for stage in stages:
199 |             name = "layer" + str(stage.index)
200 |             if not stride:
201 |                 stride = int(stage.index > 1) + 1
202 |             module = _make_stage(
203 |                 block_module,
204 |                 in_channels,
205 |                 bottleneck_channels,
206 |                 out_channels,
207 |                 stage.block_count,
208 |                 num_groups,
209 |                 stride_in_1x1,
210 |                 first_stride=stride,
211 |                 dilation=dilation
212 |             )
213 |             stride = None
214 |             self.add_module(name, module)
215 |             self.stages.append(name)
216 | 
217 |     def forward(self, x):
218 |         for stage in self.stages:
219 |             x = getattr(self, stage)(x)
220 |         return x
221 | 
222 | 
223 | def _make_stage(
224 |     transformation_module,
225 |     in_channels,
226 |     bottleneck_channels,
227 |     out_channels,
228 |     block_count,
229 |     num_groups,
230 |     stride_in_1x1,
231 |     first_stride,
232 |     dilation=1
233 | ):
234 |     blocks = []
235 |     stride = first_stride
236 |     for _ in range(block_count):
237 |         blocks.append(
238 |             transformation_module(
239 |                 in_channels,
240 |                 bottleneck_channels,
241 |                 out_channels,
242 |                 num_groups,
243 |                 stride_in_1x1,
244 |                 stride,
245 |                 dilation=dilation
246 |             )
247 |         )
248 |         stride = 1
249 |         in_channels = out_channels
250 |     return nn.Sequential(*blocks)
251 | 
252 | 
253 | class Bottleneck(nn.Module):
254 |     def __init__(
255 |         self,
256 |         in_channels,
257 |         bottleneck_channels,
258 |         out_channels,
259 |         num_groups,
260 |         stride_in_1x1,
261 |         stride,
262 |         dilation,
263 |         norm_func
264 |     ):
265 |         super(Bottleneck, self).__init__()
266 | 
267 |         self.downsample = None
268 |         if in_channels != out_channels:
269 |             down_stride = stride if dilation == 1 else 1
270 |             downsample_bn = norm_func(out_channels)
271 |             torch.nn.init.constant_(downsample_bn.weight, 1)
272 | 
273 |             self.downsample = nn.Sequential(
274 |                 GCT(in_channels),
275 |                 Conv2d(
276 |                     in_channels, out_channels,
277 |                     kernel_size=1, stride=down_stride, bias=False
278 |                 ),
279 |                 downsample_bn,
280 |             )
281 |             for modules in [self.downsample, ]:
282 |                 for l in modules.modules():
283 |                     if isinstance(l, Conv2d):
284 |                         nn.init.kaiming_uniform_(l.weight, a=1)
285 | 
286 |         if dilation > 1:
287 |             stride = 1  # reset to be 1
288 | 
289 |         # The original MSRA ResNet models have stride in the first 1x1 conv
290 |         # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
291 |         # stride in the 3x3 conv
292 |         stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
293 | 
294 |         self.conv1 = nn.Sequential(
295 |             GCT(in_channels),
296 |             Conv2d(
297 |                 in_channels,
298 |                 bottleneck_channels,
299 |                 kernel_size=1,
300 |                 stride=stride_1x1,
301 |                 bias=False,
302 |             ),)
303 |         self.bn1 = norm_func(bottleneck_channels)
304 |         # TODO: specify init for the above
305 | 
306 |         self.conv2 = nn.Sequential(
307 |             GCT(bottleneck_channels),
308 |             Conv2d(
309 |             bottleneck_channels,
310 |             bottleneck_channels,
311 |             kernel_size=3,
312 |             stride=stride_3x3,
313 |             padding=dilation,
314 |             bias=False,
315 |             groups=num_groups,
316 |             dilation=dilation
317 |         ),)
318 |         self.bn2 = norm_func(bottleneck_channels)
319 | 
320 |         self.conv3 = nn.Sequential(
321 |             GCT(bottleneck_channels),
322 |             Conv2d(
323 |             bottleneck_channels, out_channels, kernel_size=1, bias=False
324 |         ),)
325 |         self.bn3 = norm_func(out_channels)
326 | 
327 |         for modules in [self.conv1, self.conv2, self.conv3, ]:
328 |             for l in modules.modules():
329 |                 if isinstance(l, Conv2d):
330 |                     nn.init.kaiming_uniform_(l.weight, a=1)
331 |         for l in [self.bn1, self.bn2, self.bn3]:
332 |             torch.nn.init.constant_(l.weight, 1)
333 | 
334 |     def forward(self, x):
335 |         identity = x
336 | 
337 |         out = self.conv1(x)
338 |         out = self.bn1(out)
339 |         out = F.relu_(out)
340 | 
341 |         out = self.conv2(out)
342 |         out = self.bn2(out)
343 |         out = F.relu_(out)
344 | 
345 |         out0 = self.conv3(out)
346 |         out = self.bn3(out0)
347 | 
348 |         if self.downsample is not None:
349 |             identity = self.downsample(x)
350 | 
351 |         out += identity
352 |         out = F.relu_(out)
353 | 
354 |         return out
355 | 
356 | 
357 | class BaseStem(nn.Module):
358 |     def __init__(self, cfg, norm_func):
359 |         super(BaseStem, self).__init__()
360 | 
361 |         out_channels = 64  # cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
362 | 
363 |         self.conv1 = nn.Sequential(
364 |             GCT(3),
365 |             Conv2d(
366 |             3, out_channels, kernel_size=7, stride=2, padding=3, bias=False
367 |         ),)
368 |         self.bn1 = norm_func(out_channels)
369 |         torch.nn.init.constant_(self.bn1.weight, 1)
370 | 
371 |         for modules in [self.conv1, ]:
372 |             for l in modules.modules():
373 |                 if isinstance(l, Conv2d):
374 |                     nn.init.kaiming_uniform_(l.weight, a=1)
375 | 
376 |     def forward(self, x):
377 |         x = self.conv1(x)
378 |         x = self.bn1(x)
379 |         x = F.relu_(x)
380 |         x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
381 |         return x
382 | 
383 | 
384 | class BottleneckWithFixedBatchNorm(Bottleneck):
385 |     def __init__(
386 |         self,
387 |         in_channels,
388 |         bottleneck_channels,
389 |         out_channels,
390 |         num_groups=1,
391 |         stride_in_1x1=True,
392 |         stride=1,
393 |         dilation=1
394 |     ):
395 |         super(BottleneckWithFixedBatchNorm, self).__init__(
396 |             in_channels=in_channels,
397 |             bottleneck_channels=bottleneck_channels,
398 |             out_channels=out_channels,
399 |             num_groups=num_groups,
400 |             stride_in_1x1=stride_in_1x1,
401 |             stride=stride,
402 |             dilation=dilation,
403 |             norm_func=nn.BatchNorm2d
404 |         )
405 | 
406 | 
407 | class StemWithFixedBatchNorm(BaseStem):
408 |     def __init__(self, cfg):
409 |         super(StemWithFixedBatchNorm, self).__init__(
410 |             cfg, norm_func=nn.BatchNorm2d
411 |         )
412 | 
413 | 
414 | class BottleneckWithGN(Bottleneck):
415 |     def __init__(
416 |         self,
417 |         in_channels,
418 |         bottleneck_channels,
419 |         out_channels,
420 |         num_groups=1,
421 |         stride_in_1x1=True,
422 |         stride=1,
423 |         dilation=1
424 |     ):
425 |         super(BottleneckWithGN, self).__init__(
426 |             in_channels=in_channels,
427 |             bottleneck_channels=bottleneck_channels,
428 |             out_channels=out_channels,
429 |             num_groups=num_groups,
430 |             stride_in_1x1=stride_in_1x1,
431 |             stride=stride,
432 |             dilation=dilation,
433 |             norm_func=group_norm
434 |         )
435 | 
436 | 
437 | class StemWithGN(BaseStem):
438 |     def __init__(self, cfg):
439 |         super(StemWithGN, self).__init__(cfg, norm_func=group_norm)
440 | 
441 | 
442 | # _TRANSFORMATION_MODULES = Registry({
443 | #     "BottleneckWithFixedBatchNorm": BottleneckWithFixedBatchNorm,
444 | #     "BottleneckWithGN": BottleneckWithGN,
445 | # })
446 | #
447 | # _STEM_MODULES = Registry({
448 | #     "StemWithFixedBatchNorm": StemWithFixedBatchNorm,
449 | #     "StemWithGN": StemWithGN,
450 | # })
451 | #
452 | # _STAGE_SPECS = Registry({
453 | #     "R-50-C4": ResNet50StagesTo4,
454 | #     "R-50-C5": ResNet50StagesTo5,
455 | #     "R-101-C4": ResNet101StagesTo4,
456 | #     "R-101-C5": ResNet101StagesTo5,
457 | #     "R-50-FPN": ResNet50FPNStagesTo5,
458 | #     "R-101-FPN": ResNet101FPNStagesTo5,
459 | # })
460 | 


--------------------------------------------------------------------------------
/PyTorch/run.sh:
--------------------------------------------------------------------------------
1 | python main.py --dist-url 'tcp://127.0.0.1:FREEPORT' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 /path/to/imagenet


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Gated Channel Transformation for Visual Recognition (GCT)
 2 | The TensorFlow (1.10) and PyTorch implementation of Gated Channel Transformation for Visual Recognition (CVPR 2020) [[paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Yang_Gated_Channel_Transformation_for_Visual_Recognition_CVPR_2020_paper.pdf)].
 3 | 
 4 | <div align=center><img src="https://github.com/z-x-yang/GCT/raw/master/overview.png" width="80%"/></div>
 5 | 
 6 | The TensorFlow implementation supports the backbones of ResNet-50/101/152, VGG-16 and Inception-V3. For PyTorch implementation, we give an example of GCT-ResNet-50. 
 7 | 
 8 | ## Apply GCT in Your Network
 9 | First, we propose to apply GCT before convolutional layers (2D Conv or 3D Conv). Conveniently, you can apply GCT for every Conv layers in your network as the default setting in our paper. But, if you want to save memory, you can reduce the number of GCT modules. In our experiments, if we apply only one GCT for each ResBlock (before the first or last 1x1 Conv) in ResNet-50, the performance will drop only 0.1~0.2% on ImageNet, compared to full GCT setting.
10 | 
11 | Second, we propose not to apply weight decay on the gating bias (beta parameter) of GCT. In most situations, applying weight decay on the gating bias will decreasse the performance.
12 | 
13 | ## Citation
14 | ```
15 | @inproceedings{yang2020gated,
16 |   title={Gated Channel Transformation for Visual Recognition},
17 |   author={Yang, Zongxin and Zhu, Linchao and Wu, Yu and Yang, Yi},
18 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
19 |   pages={11794--11803},
20 |   year={2020}
21 | }
22 | ```
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/TensorFlow/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 z-x-yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/TensorFlow/README.md:
--------------------------------------------------------------------------------
 1 | ## Gated Channel Transformation for Visual Recognition (GCT)
 2 | The TensorFlow implementation of Gated Channel Transformation for Visual Recognition (CVPR 2020) [[paper](http://openaccess.thecvf.com/content_CVPR_2020/papers/Yang_Gated_Channel_Transformation_for_Visual_Recognition_CVPR_2020_paper.pdf)].
 3 | 
 4 | The code is based on [tf_cnn_benchmarks](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks) (TF 1.10). The code can be obtained via:
 5 | ```
 6 | git clone https://github.com/tensorflow/benchmarks
 7 | ```
 8 | Our GCT can be readily applied to [TF 2.0](https://github.com/tensorflow/models/tree/master/official/vision/image_classification) as well, but we take `tf_cnn_benchmarks` as an example here.
 9 | We made two changes on `tf_cnn_benchmarks`.
10 | 
11 | 
12 | 1. We applied [GCT](https://github.com/z-x-yang/GCT/blob/master/TensorFlow/convnet_builder.py#L123) before every convolutional layer. The definition of GCT can be found [here](https://github.com/z-x-yang/GCT/blob/master/TensorFlow/convnet_builder.py#L147-L211). This TensorFlow version will automatically apply GCT before every convolutional layer.
13 | 
14 | 2. We added a new argument [weight_decay_on_beta](https://github.com/z-x-yang/GCT/blob/master/TensorFlow/benchmark_cnn.py#L271-L272). When `weight_decay_on_beta` is `True`, weight decay (WD) is applied on the gating bias of GCT.
15 | 
16 | ## Getting Started
17 | First, please install TensorFlow 1.10. And then, please follow the [instruction](https://github.com/tensorflow/models/tree/master/research/inception#getting-started) in [tf_cnn_benchmarks](https://github.com/awslabs/deeplearning-benchmark/tree/master/tensorflow_benchmark/tf_cnn_benchmarks) to prepare the Imagenet data in TFRecord format.
18 | 
19 | To train GCT-ResNet50, run
20 | ```
21 | bash train_gct_resnet50.sh
22 | ```
23 | 
24 | To evaluate the last checkpoint, run
25 | ```
26 | bash eval_gct_resnet50.sh
27 | ```
28 | 
29 | To evaluate all the checkpoints, run
30 | ```
31 | python eval_all_ckpt.py
32 | ```
33 | After running the script, the top-1 accuracy should be about 77.6%. If you remove the WD on the gating bias of GCT, the performance should be about 77.3% as the result in our paper. Without GCT, the top-1 accurracy of ResNet50 should be around 76.2%.
34 | 
35 | To avoid applying weight decay (WD) on the gating bias of GCT as the default setting in our paper, you can set
36 | ```
37 | --weight_decay_on_beta=False
38 | ```
39 | in the training script. In this version of GCT, we apply WD on the gating bias of GCT, which we found to be better on some backbones (such as ResNet-50).
40 | 
41 | To train on other backbones, such as ResNet101 or Inception, you can change the model name in the above example scripts. All the names of availbale backbones can be found in [here](https://github.com/z-x-yang/GCT/blob/59bba462bb2b9dd14425333625a2e59d6a5eb57d/models/model_config.py#L33).
42 | 
43 | ## Performance
44 | The accuracy (top-1/top-5 %) should be close to the results below when using 4 GPUs.
45 | 
46 | | Backbone  | Original | +GCT (no WD on beta) | +GCT (WD on beta) |
47 | | --------- | -------- | ------------------- | ------------------- |
48 | | VGG-16 | 73.8/91.7 | 74.9/**92.5** | **75.0**/92.4 |
49 | | Inception-V3 | 75.7/92.7 | **76.3**/**92.9** | 76.2/**92.9** |
50 | | ResNet-50 | 76.2/93.0 | 77.3/**93.7** | **77.7**/93.6|
51 | | ResNet-101 | 77.8/93.8 | **78.6**/94.1 | 78.5/**94.3** |
52 | | ResNet-152 | 78.4/94.1 | **79.2**/**94.5** | 79.0/94.4 |
53 | 
54 | 
55 | ## Citation
56 | ```
57 | @inproceedings{yang2020gated,
58 |   title={Gated Channel Transformation for Visual Recognition},
59 |   author={Yang, Zongxin and Zhu, Linchao and Wu, Yu and Yang, Yi},
60 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
61 |   pages={11794--11803},
62 |   year={2020}
63 | }
64 | ```
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/TensorFlow/all_reduce_benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Benchmarks the all-reduce algorithms of tf_cnn_benchmarks.
 16 | 
 17 | tf_cnn_benchmarks uses all-reduce to aggregate gradients. This benchmark is
 18 | useful for benchmarking the performance of just this gradient aggregation,
 19 | instead of the entire model. All the flags that tf_cnn_benchmarks accepts are
 20 | also accepted by this script, although many are silently ignored.
 21 | 
 22 | The number and shapes of the tensors all-reduced are those of the variables of
 23 | the model specified by the --model flag.
 24 | TODO(reedwm): Allow custom sizes to be specified.
 25 | """
 26 | 
 27 | from __future__ import absolute_import
 28 | from __future__ import division
 29 | from __future__ import print_function
 30 | 
 31 | 
 32 | import os
 33 | import time
 34 | 
 35 | from absl import app
 36 | from absl import flags as absl_flags
 37 | import tensorflow as tf
 38 | 
 39 | from tensorflow.python.ops import control_flow_ops
 40 | import benchmark_cnn
 41 | import cnn_util
 42 | import flags
 43 | from cnn_util import log_fn
 44 | 
 45 | 
 46 | absl_flags.DEFINE_integer('iters_per_step', 5,
 47 |                           'Number of iterations to run all-reduce for, per '
 48 |                           'step. Every step, a session will be run on a Graph '
 49 |                           'that contains this many copies of the all-reduce. '
 50 |                           'The copies are run sequentially. Setting this above '
 51 |                           '1 is useful to lower the overhead of starting the '
 52 |                           'session run, running the VariableV2 ops at the '
 53 |                           'start of the step, etc.')
 54 | 
 55 | 
 56 | flags.define_flags()
 57 | for name in flags.param_specs.keys():
 58 |   absl_flags.declare_key_flag(name)
 59 | 
 60 | 
 61 | def get_var_shapes(model):
 62 |   """Returns the list of variable shapes for a tf_cnn_benchmarks Model."""
 63 |   with tf.Graph().as_default():
 64 |     image_size = model.get_image_size()
 65 |     # The batch size of 2 is arbitrary, as the variable shapes do not depend on
 66 |     # the batch size.
 67 |     images = tf.placeholder(tf.float32, (2, image_size, image_size, 3))
 68 |     model.build_network(images)
 69 |     return [[int(d) for d in v.shape.dims] for v in tf.trainable_variables()]
 70 | 
 71 | 
 72 | def all_reduce(all_device_tensors, variable_mgr):
 73 |   """Performs a single batch all-reduce.
 74 | 
 75 |   Args:
 76 |     all_device_tensors: List of lists of tensors. all_device_tensors[t][i] is
 77 |       a tensor, where t is the tower the tensor is on and i is the index of
 78 |       the tensor.
 79 |     variable_mgr: The VariableMgr to perform the all-reduce.
 80 |   Returns:
 81 |     List of list of tensors in the same form as `all_device_tensors`, except the
 82 |     tensors are aggregated across towers.
 83 |   """
 84 |   tower_grads = [[(g, None) for g in device_tensors] for
 85 |                  device_tensors in all_device_tensors]
 86 |   _, aggregated_tower_grads = variable_mgr.preprocess_device_grads(tower_grads)
 87 |   return [
 88 |       [g for g, _ in agg_device_tensors]
 89 |       for agg_device_tensors in aggregated_tower_grads]
 90 | 
 91 | 
 92 | def build_all_reduce_iterations(all_device_tensors, tower_devices, variable_mgr,
 93 |                                 num_iters):
 94 |   """Builds the all-reduce ops for multiple iterations to aggregate tensors.
 95 | 
 96 |   The tensors in `all_device_tensors` are aggregated `num_iters` times. Each
 97 |   iteration aggregates the results from the previous iteration. The iterations
 98 |   are run sequentially, so the aggregations for an iteration do not start
 99 |   running until the previous iteration has completed. Each iteration after the
100 |   first is aggregating already-aggregated values, but it does not matter because
101 |   we are only aggregating for benchmarking purposes.
102 | 
103 |   Args:
104 |     all_device_tensors: List of lists of tensors. all_device_tensors[t][i] is
105 |       a tensor, where t is the tower the tensor is on and i is the index of
106 |       the tensor.
107 |     tower_devices: A list of device strings. tower_devices[t] is the device
108 |       of the tensors in all_device_tensors[t].
109 |     variable_mgr: The VariableMgr to perform the all-reduce.
110 |     num_iters: Number of iterations to aggregate tensors for.
111 |   Returns:
112 |     An op that when run, causes the all-reduce ops to run.
113 |   """
114 |   for i in range(num_iters):
115 |     with tf.name_scope('iteration_%d' % i):
116 |       # Step 1: Do the aggregation.
117 |       with tf.name_scope('tensor_aggregation'):
118 |         all_device_tensors = all_reduce(all_device_tensors, variable_mgr)
119 | 
120 |       # Step 2. Create identity ops, to bring the aggregated results back to
121 |       # each device.
122 |       new_all_device_tensors = []
123 |       for device, device_tensors in zip(tower_devices, all_device_tensors):
124 |         with tf.device(device):
125 |           new_all_device_tensors.append([
126 |               tf.identity(t, name='identity_after_allreduce')
127 |               for t in device_tensors
128 |           ])
129 |       all_device_tensors = new_all_device_tensors
130 | 
131 |       # Step 3. Add control dependencies to delay the next iteration until this
132 |       # iteration is complete. To avoid extra overhead, we do not have any
133 |       # cross-device control dependencies, which means it's possible for two
134 |       # iterations to slightly overlap.
135 |       new_all_device_tensors = []
136 |       for device_tensors in all_device_tensors:
137 |         new_all_device_tensors.append([
138 |             control_flow_ops.with_dependencies(
139 |                 device_tensors, t, name='identity_after_dependencies')
140 |             for t in device_tensors
141 |         ])
142 |       all_device_tensors = new_all_device_tensors
143 | 
144 |   # To prevent the dependency optimizer from removing every op we created,
145 |   # we store the results in variables.
146 |   ops_to_run = []
147 |   for device, device_tensors in zip(tower_devices, all_device_tensors):
148 |     with tf.device(device):
149 |       for t in device_tensors:
150 |         # The placeholder initial value is never run.
151 |         var = tf.Variable(tf.placeholder(tf.float32, t.shape), collections=[])
152 |         ops_to_run.append(var.assign(t))
153 |   return tf.group(*ops_to_run)
154 | 
155 | 
156 | def build_graph(tower_devices, tensor_shapes, variable_mgr, num_iters):
157 |   """Builds the graph for the benchmark.
158 | 
159 |   Args:
160 |     tower_devices: A list of device strings of the devices to run the all-reduce
161 |       benchmark on.
162 |     tensor_shapes: A list of shapes of the tensors that will be aggregated for
163 |       the all-reduce.
164 |     variable_mgr: The VariableMgr to perform the all-reduce.
165 |     num_iters: Number of iterations to aggregate tensors for.
166 |   Returns:
167 |     An op that runs the benchmark.
168 |   """
169 |   all_device_tensors = []
170 |   for i, tower_device in enumerate(tower_devices):
171 |     with tf.device(tower_device):
172 |       device_tensors = []
173 |       for j, shape in enumerate(tensor_shapes):
174 |         tensor = tf.Variable(tf.random_normal(shape, dtype=tf.float32),
175 |                              name='tensor_%d_on_device_%d' % (j, i))
176 |         device_tensors.append(tensor)
177 |     all_device_tensors.append(device_tensors)
178 | 
179 |   log_fn('Building all-reduce ops')
180 |   benchmark_op = build_all_reduce_iterations(all_device_tensors, tower_devices,
181 |                                              variable_mgr, num_iters)
182 |   log_fn('Done building all-reduce ops')
183 |   return benchmark_op
184 | 
185 | 
186 | def run_graph(benchmark_op, bench_cnn, init_ops, dummy_loss_op):
187 |   """Runs the graph for the benchmark.
188 | 
189 |   Args:
190 |     benchmark_op: An op that runs the benchmark.
191 |     bench_cnn: The BenchmarkCNN where params and other attributes are obtained.
192 |     init_ops: A list of ops that are run before `benchmark_op` for
193 |       initialization.
194 |     dummy_loss_op: Any op. We must pass a loss op to
195 |       `benchmark_cnn.benchmark_one_step`, but the result of the op is never
196 |       actually used.
197 |   """
198 |   config = benchmark_cnn.create_config_proto(bench_cnn.params)
199 |   with tf.Session(config=config) as sess:
200 |     for op in init_ops:
201 |       sess.run(op)
202 |     step_train_times = []
203 |     fetches = {'average_loss': dummy_loss_op, 'benchmark_op': benchmark_op}
204 |     log_fn('Running warmup')
205 |     for i in range(-bench_cnn.num_warmup_batches, bench_cnn.num_batches):
206 |       if i == 0:
207 |         log_fn('Running all-reduce ops')
208 |         start = time.time()
209 |       if i > 0 and i % bench_cnn.params.display_every == 0:
210 |         log_fn('Iteration: %d. Average time per step so far: %s' %
211 |                (i, (time.time() - start) / i))
212 |       # Call benchmark_one_step instead of directly calling sess.run(...), to
213 |       # potentially get a trace file, partitioned graphs, etc.
214 |       benchmark_cnn.benchmark_one_step(
215 |           sess=sess,
216 |           fetches=fetches,
217 |           step=i,
218 |           # The batch size is only used for the images/sec calculation, which is
219 |           # not actually calculated because we pass show_images_per_sec=False.
220 |           batch_size=None,
221 |           step_train_times=step_train_times,
222 |           trace_filename=bench_cnn.trace_filename,
223 |           partitioned_graph_file_prefix=(
224 |               bench_cnn.params.partitioned_graph_file_prefix),
225 |           profiler=None,
226 |           image_producer=None,
227 |           params=bench_cnn.params,
228 |           show_images_per_sec=False)
229 |     log_fn('Average time per step: %s' %
230 |            ((time.time() - start) / bench_cnn.num_batches))
231 | 
232 | 
233 | def run_benchmark(bench_cnn, num_iters):
234 |   """Runs the all-reduce benchmark.
235 | 
236 |   Args:
237 |     bench_cnn: The BenchmarkCNN where params, the variable manager, and other
238 |       attributes are obtained.
239 |     num_iters: Number of iterations to do all-reduce for for.
240 | 
241 |   Raises:
242 |     ValueError: Invalid params of bench_cnn.
243 |   """
244 |   if bench_cnn.params.variable_update != 'replicated':
245 |     raise ValueError('--variable_update=replicated must be specified to use'
246 |                      'the all-reduce benchmark')
247 |   if bench_cnn.params.variable_consistency == 'relaxed':
248 |     raise ValueError('--variable_consistency=relaxed is not supported')
249 | 
250 |   benchmark_op = build_graph(bench_cnn.raw_devices,
251 |                              get_var_shapes(bench_cnn.model),
252 |                              bench_cnn.variable_mgr, num_iters)
253 |   init_ops = [
254 |       tf.global_variables_initializer(),
255 |       bench_cnn.variable_mgr.get_post_init_ops()
256 |   ]
257 |   loss_op = tf.no_op()
258 | 
259 |   if bench_cnn.graph_file:
260 |     path, filename = os.path.split(bench_cnn.graph_file)
261 |     as_text = filename.endswith('txt')
262 |     log_fn('Writing GraphDef as %s to %s' % (
263 |         'text' if as_text else 'binary', bench_cnn.graph_file))
264 |     tf.train.write_graph(tf.get_default_graph().as_graph_def(add_shapes=True),
265 |                          path, filename, as_text)
266 | 
267 |   run_graph(benchmark_op, bench_cnn, init_ops, loss_op)
268 | 
269 | 
270 | # TODO(reedwm): Reduce redundancy with tf_cnn_benchmarks
271 | def main(positional_arguments):
272 |   # Command-line arguments like '--distortions False' are equivalent to
273 |   # '--distortions=True False', where False is a positional argument. To prevent
274 |   # this from silently running with distortions, we do not allow positional
275 |   # arguments.
276 |   assert len(positional_arguments) >= 1
277 |   if len(positional_arguments) > 1:
278 |     raise ValueError('Received unknown positional arguments: %s'
279 |                      % positional_arguments[1:])
280 | 
281 |   params = benchmark_cnn.make_params_from_flags()
282 |   params = benchmark_cnn.setup(params)
283 |   bench = benchmark_cnn.BenchmarkCNN(params)
284 | 
285 |   tfversion = cnn_util.tensorflow_version_tuple()
286 |   log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
287 | 
288 |   run_benchmark(bench, absl_flags.FLAGS.iters_per_step)
289 | 
290 | if __name__ == '__main__':
291 |   app.run(main)  # Raises error on invalid flags, unlike tf.app.run()
292 | 


--------------------------------------------------------------------------------
/TensorFlow/all_reduce_benchmark_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Tests for all_reduce_benchmark.py."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | import all_reduce_benchmark
24 | import benchmark_cnn
25 | import test_util
26 | 
27 | 
28 | class AllReduceBenchmarkTest(tf.test.TestCase):
29 |   """Tests the all-reduce benchmark."""
30 | 
31 |   def _test_run_benchmark(self, params):
32 |     """Tests that run_benchmark() runs successfully with the params."""
33 |     logs = []
34 |     with test_util.monkey_patch(all_reduce_benchmark,
35 |                                 log_fn=test_util.print_and_add_to_list(logs)):
36 |       bench_cnn = benchmark_cnn.BenchmarkCNN(params)
37 |       all_reduce_benchmark.run_benchmark(bench_cnn, num_iters=5)
38 |       self.assertRegexpMatches(logs[-1], '^Average time per step: [0-9.]+$')
39 | 
40 |   def test_run_benchmark(self):
41 |     """Tests that run_benchmark() runs successfully."""
42 |     params = benchmark_cnn.make_params(num_batches=10,
43 |                                        variable_update='replicated',
44 |                                        num_gpus=2)
45 |     self._test_run_benchmark(params)
46 |     params = params._replace(hierarchical_copy=True, gradient_repacking=8,
47 |                              num_gpus=8)
48 |     self._test_run_benchmark(params)
49 | 
50 | if __name__ == '__main__':
51 |   tf.test.main()
52 | 


--------------------------------------------------------------------------------
/TensorFlow/benchmark_cnn_distributed_test_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Used to run benchmark_cnn for distributed tests.
 17 | 
 18 | In distributed tests, we spawn processes to run tf_cnn_benchmark tasks. We could
 19 | directly spawn tf_cnn_benchmark processes, but we want some added functionality,
 20 | such as being able to inject custom images during training. So instead, this
 21 | file is spawned as a Python process, which supports the added functionality.
 22 | """
 23 | 
 24 | from absl import flags as absl_flags
 25 | import numpy as np
 26 | import tensorflow as tf
 27 | import benchmark_cnn
 28 | import flags
 29 | import preprocessing
 30 | import test_util
 31 | 
 32 | 
 33 | absl_flags.DEFINE_string('fake_input', 'none',
 34 |                          """What fake input to inject into benchmark_cnn. This
 35 |                             is ignored if --model=test_model.
 36 |                             Options are:
 37 |                             none: Do not inject any fake input.
 38 |                             zeros_and_ones: Half the images will be all 0s with
 39 |                             a label of 0. Half the images will be all 1s with a
 40 |                             label of 1.""")
 41 | 
 42 | flags.define_flags()
 43 | FLAGS = flags.FLAGS
 44 | 
 45 | 
 46 | def get_test_image_preprocessor(batch_size, params):
 47 |   """Returns the preprocessing.TestImagePreprocessor that should be injected.
 48 | 
 49 |   Returns None if no preprocessor should be injected.
 50 | 
 51 |   Args:
 52 |     batch_size: The batch size across all GPUs.
 53 |     params: BenchmarkCNN's parameters.
 54 |   Returns:
 55 |     Returns the preprocessing.TestImagePreprocessor that should be injected.
 56 |   Raises:
 57 |     ValueError: Flag --fake_input is an invalid value.
 58 |   """
 59 |   if FLAGS.fake_input == 'none':
 60 |     return None
 61 |   elif FLAGS.fake_input == 'zeros_and_ones':
 62 |     half_batch_size = batch_size // 2
 63 |     images = np.zeros((batch_size, 227, 227, 3), dtype=np.float32)
 64 |     images[half_batch_size:, :, :, :] = 1
 65 |     labels = np.array([0] * half_batch_size + [1] * half_batch_size,
 66 |                       dtype=np.int32)
 67 |     preprocessor = preprocessing.TestImagePreprocessor(
 68 |         227, 227, batch_size, params.num_gpus,
 69 |         benchmark_cnn.get_data_type(params))
 70 |     preprocessor.set_fake_data(images, labels)
 71 |     preprocessor.expected_subset = 'validation' if params.eval else 'train'
 72 |     return preprocessor
 73 |   else:
 74 |     raise ValueError('Invalid --fake_input: %s' % FLAGS.fake_input)
 75 | 
 76 | 
 77 | def run_with_real_model(params):
 78 |   """Runs tf_cnn_benchmarks with a real model."""
 79 |   bench = benchmark_cnn.BenchmarkCNN(params)
 80 |   bench.print_info()
 81 |   preprocessor = get_test_image_preprocessor(bench.batch_size, params)
 82 |   if preprocessor is not None:
 83 |     # The test image preprocessor requires queue runners. Since this file is
 84 |     # used for testing, it is OK to access protected members.
 85 |     # pylint: disable=protected-access
 86 |     bench.dataset._queue_runner_required = True
 87 |     # pylint: enable=protected-access
 88 |     bench.image_preprocessor = preprocessor
 89 |   bench.run()
 90 | 
 91 | 
 92 | def run_with_test_model(params):
 93 |   """Runs tf_cnn_benchmarks with a test model."""
 94 |   model = test_util.TestCNNModel()
 95 |   inputs = test_util.get_fake_var_update_inputs()
 96 |   with test_util.monkey_patch(benchmark_cnn,
 97 |                               LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15):
 98 |     bench = benchmark_cnn.BenchmarkCNN(params, dataset=test_util.TestDataSet(),
 99 |                                        model=model)
100 |     # The test model does not use labels when computing loss, so the label
101 |     # values do not matter as long as it's the right shape.
102 |     labels = np.array([1] * inputs.shape[0])
103 |     bench.image_preprocessor.set_fake_data(inputs, labels)
104 |     bench.run()
105 | 
106 | 
107 | def main(_):
108 |   params = benchmark_cnn.make_params_from_flags()
109 |   params = benchmark_cnn.setup(params)
110 |   if params.model == 'test_model':
111 |     run_with_test_model(params)
112 |   else:
113 |     run_with_real_model(params)
114 | 
115 | 
116 | if __name__ == '__main__':
117 |   tf.app.run()
118 | 


--------------------------------------------------------------------------------
/TensorFlow/cnn_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Utilities for CNN benchmarks."""
 17 | from __future__ import print_function
 18 | 
 19 | import sys
 20 | import threading
 21 | 
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | 
 25 | 
 26 | def tensorflow_version_tuple():
 27 |   v = tf.__version__
 28 |   major, minor, patch = v.split('.')
 29 |   return (int(major), int(minor), patch)
 30 | 
 31 | 
 32 | def tensorflow_version():
 33 |   vt = tensorflow_version_tuple()
 34 |   return vt[0] * 1000 + vt[1]
 35 | 
 36 | 
 37 | def log_fn(log):
 38 |   print(log)
 39 | 
 40 | 
 41 | def roll_numpy_batches(array, batch_size, shift_ratio):
 42 |   """Moves a proportion of batches from start to the end of the array.
 43 | 
 44 |   This function moves a proportion of batches, specified by `shift_ratio`, from
 45 |   the starts of the array to the end. The number of batches moved is rounded
 46 |   down to the nearest integer. For example,
 47 | 
 48 |   ```
 49 |   roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2]
 50 |   ```
 51 | 
 52 |   Args:
 53 |     array: A Numpy array whose first dimension is the batch dimension.
 54 |     batch_size: The batch size.
 55 |     shift_ratio: Proportion of batches to move from the start of the array to
 56 |       the end of the array.
 57 |   Returns:
 58 |     A new Numpy array, with a proportion of the batches at the start of `array`
 59 |     moved to the end.
 60 |   """
 61 |   num_items = array.shape[0]
 62 |   assert num_items % batch_size == 0
 63 |   num_batches = num_items // batch_size
 64 |   starting_batch = int(num_batches * shift_ratio)
 65 |   starting_item = starting_batch * batch_size
 66 |   return np.roll(array, -starting_item, axis=0)
 67 | 
 68 | 
 69 | # For Python 2.7 compatibility, we do not use threading.Barrier.
 70 | class Barrier(object):
 71 |   """Implements a lightweight Barrier.
 72 | 
 73 |   Useful for synchronizing a fixed number of threads at known synchronization
 74 |   points.  Threads block on 'wait()' and simultaneously return once they have
 75 |   all made that call.
 76 | 
 77 |   # Implementation adopted from boost/thread/barrier.hpp
 78 |   """
 79 | 
 80 |   def __init__(self, parties):
 81 |     """Create a barrier, initialised to 'parties' threads."""
 82 |     self.cond = threading.Condition(threading.Lock())
 83 |     self.parties = parties
 84 |     # Indicates the number of waiting parties.
 85 |     self.waiting = 0
 86 |     # generation is needed to deal with spurious wakeups. If self.cond.wait()
 87 |     # wakes up for other reasons, generation will force it go back to wait().
 88 |     self.generation = 0
 89 |     self.broken = False
 90 | 
 91 |   def wait(self):
 92 |     """Wait for the barrier."""
 93 |     with self.cond:
 94 |       # Check if the barrier has been disabled or not.
 95 |       if self.broken:
 96 |         return
 97 |       gen = self.generation
 98 |       self.waiting += 1
 99 |       if self.waiting == self.parties:
100 |         self.waiting = 0
101 |         self.generation += 1
102 |         self.cond.notify_all()
103 |       # loop because of spurious wakeups
104 |       while gen == self.generation:
105 |         self.cond.wait()
106 | 
107 |   # TODO(huangyp): Remove this method once we find a way to know which step
108 |   # is the last barrier.
109 |   def abort(self):
110 |     """Clear existing barrier and disable this barrier."""
111 |     with self.cond:
112 |       if self.waiting > 0:
113 |         self.generation += 1
114 |         self.cond.notify_all()
115 |       self.broken = True
116 | 
117 | 
118 | class ImageProducer(object):
119 |   """An image producer that puts images into a staging area periodically.
120 | 
121 |   This class is useful for periodically running a set of ops, `put_ops` on a
122 |   different thread every `batch_group_size` steps.
123 | 
124 |   The notify_image_consumption() method is used to increment an internal counter
125 |   so that every `batch_group_size` times it is called, `put_ops` is executed. A
126 |   barrier is placed so that notify_image_consumption() will block until
127 |   the previous call to `put_ops` has been executed.
128 | 
129 |   The start() method is used to start the thread that runs `put_ops`.
130 | 
131 |   The done() method waits until the last put_ops is executed and stops the
132 |   thread.
133 | 
134 |   The purpose of this class is to fill an image input pipeline every
135 |   `batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images
136 |   to the input pipeline when run, and that every step, 1 batch of images is
137 |   consumed. Then, by calling notify_image_consumption() every step, images are
138 |   supplied to the input pipeline at the same amount they are consumed.
139 | 
140 |   Example usage:
141 |   ```
142 |   put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea
143 |   get_op = ...  # Dequeues 1 batch, and does some operations on it
144 |   batch_group_size = 4
145 |   with tf.Session() as sess:
146 |     image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size)
147 |     image_producer.start()
148 |     for _ in range(100):
149 |       sess.run(get_op)
150 |       image_producer.notify_image_consumption()
151 |   ```
152 |   """
153 | 
154 |   def __init__(self, sess, put_ops, batch_group_size, use_python32_barrier):
155 |     self.sess = sess
156 |     self.num_gets = 0
157 |     self.put_ops = put_ops
158 |     self.batch_group_size = batch_group_size
159 |     self.done_event = threading.Event()
160 |     if (use_python32_barrier and
161 |         sys.version_info[0] == 3 and sys.version_info[1] >= 2):
162 |       self.put_barrier = threading.Barrier(2)
163 |     else:
164 |       self.put_barrier = Barrier(2)
165 | 
166 |   def _should_put(self):
167 |     return (self.num_gets + 1) % self.batch_group_size == 0
168 | 
169 |   def done(self):
170 |     """Stop the image producer."""
171 |     self.done_event.set()
172 |     self.put_barrier.abort()
173 |     self.thread.join()
174 | 
175 |   def start(self):
176 |     """Start the image producer."""
177 |     self.sess.run([self.put_ops])
178 |     self.thread = threading.Thread(target=self._loop_producer)
179 |     # Set daemon to true to allow Ctrl + C to terminate all threads.
180 |     self.thread.daemon = True
181 |     self.thread.start()
182 | 
183 |   def notify_image_consumption(self):
184 |     """Increment the counter of image_producer by 1.
185 | 
186 |     This should only be called by the main thread that consumes images and runs
187 |     the model computation. One batch of images should be consumed between
188 |     calling start() and the first call to this method. Then, one batch of images
189 |     should be consumed between any two successive calls to this method.
190 |     """
191 |     if self._should_put():
192 |       self.put_barrier.wait()
193 |     self.num_gets += 1
194 | 
195 |   def _loop_producer(self):
196 |     while not self.done_event.isSet():
197 |       self.sess.run([self.put_ops])
198 |       self.put_barrier.wait()
199 | 
200 | 
201 | class BaseClusterManager(object):
202 |   """The manager for the cluster of servers running the benchmark."""
203 | 
204 |   def __init__(self, params):
205 |     worker_hosts = params.worker_hosts.split(',')
206 |     ps_hosts = params.ps_hosts.split(',') if params.ps_hosts else []
207 |     cluster = {'worker': worker_hosts}
208 |     if ps_hosts:
209 |       cluster['ps'] = ps_hosts
210 |     self._cluster_spec = tf.train.ClusterSpec(cluster)
211 | 
212 |   def get_target(self):
213 |     """Returns a target to be passed to tf.Session()."""
214 |     raise NotImplementedError('get_target must be implemented by subclass')
215 | 
216 |   def join_server(self):
217 |     raise NotImplementedError('join must be implemented by subclass')
218 | 
219 |   def get_cluster_spec(self):
220 |     return self._cluster_spec
221 | 
222 |   def num_workers(self):
223 |     return len(self._cluster_spec.job_tasks('worker'))
224 | 
225 |   def num_ps(self):
226 |     if 'ps' in self._cluster_spec.jobs:
227 |       return len(self._cluster_spec.job_tasks('ps'))
228 |     else:
229 |       return 0
230 | 
231 | 
232 | class GrpcClusterManager(BaseClusterManager):
233 |   """A cluster manager for a cluster networked with gRPC."""
234 | 
235 |   def __init__(self, params, config_proto):
236 |     super(GrpcClusterManager, self).__init__(params)
237 |     if params.job_name == 'controller':
238 |       self._target = 'grpc://%s' % self._cluster_spec.job_tasks('worker')[0]
239 |     else:
240 |       self._server = tf.train.Server(self._cluster_spec,
241 |                                      job_name=params.job_name,
242 |                                      task_index=params.task_index,
243 |                                      config=config_proto,
244 |                                      protocol=params.server_protocol)
245 |       self._target = self._server.target
246 | 
247 |   def get_target(self):
248 |     return self._target
249 | 
250 |   def join_server(self):
251 |     return self._server.join()
252 | 


--------------------------------------------------------------------------------
/TensorFlow/cnn_util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Tests for tf_cnn_benchmarks.cnn_util."""
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import threading
 23 | import time
 24 | 
 25 | import tensorflow as tf
 26 | 
 27 | import cnn_util
 28 | 
 29 | 
 30 | class CnnUtilBarrierTest(tf.test.TestCase):
 31 | 
 32 |   def testBarrier(self):
 33 |     num_tasks = 20
 34 |     num_waits = 4
 35 |     barrier = cnn_util.Barrier(num_tasks)
 36 |     threads = []
 37 |     sync_matrix = []
 38 |     for i in range(num_tasks):
 39 |       sync_times = [0] * num_waits
 40 |       thread = threading.Thread(
 41 |           target=self._run_task, args=(barrier, sync_times))
 42 |       thread.start()
 43 |       threads.append(thread)
 44 |       sync_matrix.append(sync_times)
 45 |     for thread in threads:
 46 |       thread.join()
 47 |     for wait_index in range(num_waits - 1):
 48 |       # Max of times at iteration i < min of times at iteration i + 1
 49 |       self.assertLessEqual(
 50 |           max([sync_matrix[i][wait_index] for i in range(num_tasks)]),
 51 |           min([sync_matrix[i][wait_index + 1] for i in range(num_tasks)]))
 52 | 
 53 |   def _run_task(self, barrier, sync_times):
 54 |     for wait_index in range(len(sync_times)):
 55 |       sync_times[wait_index] = time.time()
 56 |       barrier.wait()
 57 | 
 58 |   def testBarrierAbort(self):
 59 |     num_tasks = 2
 60 |     num_waits = 1
 61 |     sync_times = [0] * num_waits
 62 |     barrier = cnn_util.Barrier(num_tasks)
 63 |     thread = threading.Thread(
 64 |         target=self._run_task, args=(barrier, sync_times))
 65 |     thread.start()
 66 |     barrier.abort()
 67 |     # thread won't be blocked by done barrier.
 68 |     thread.join()
 69 | 
 70 | 
 71 | class ImageProducerTest(tf.test.TestCase):
 72 | 
 73 |   def _slow_tensorflow_op(self):
 74 |     """Returns a TensorFlow op that takes approximately 0.1s to complete."""
 75 |     def slow_func(v):
 76 |       time.sleep(0.1)
 77 |       return v
 78 |     return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op
 79 | 
 80 |   def _test_image_producer(self, batch_group_size, put_slower_than_get):
 81 |     # We use the variable x to simulate a staging area of images. x represents
 82 |     # the number of batches in the staging area.
 83 |     x = tf.Variable(0, dtype=tf.int32)
 84 |     if put_slower_than_get:
 85 |       put_dep = self._slow_tensorflow_op()
 86 |       get_dep = tf.no_op()
 87 |     else:
 88 |       put_dep = tf.no_op()
 89 |       get_dep = self._slow_tensorflow_op()
 90 |     with tf.control_dependencies([put_dep]):
 91 |       put_op = x.assign_add(batch_group_size, use_locking=True)
 92 |     with tf.control_dependencies([get_dep]):
 93 |       get_op = x.assign_sub(1, use_locking=True)
 94 |     with self.test_session() as sess:
 95 |       sess.run(tf.variables_initializer([x]))
 96 |       image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size,
 97 |                                               use_python32_barrier=False)
 98 |       image_producer.start()
 99 |       for _ in range(5 * batch_group_size):
100 |         sess.run(get_op)
101 |         # We assert x is nonnegative, to ensure image_producer never causes
102 |         # an unstage op to block. We assert x is at most 2 * batch_group_size,
103 |         # to ensure it doesn't use too much memory by storing too many batches
104 |         # in the staging area.
105 |         self.assertGreaterEqual(sess.run(x), 0)
106 |         self.assertLessEqual(sess.run(x), 2 * batch_group_size)
107 |         image_producer.notify_image_consumption()
108 |         self.assertGreaterEqual(sess.run(x), 0)
109 |         self.assertLessEqual(sess.run(x), 2 * batch_group_size)
110 | 
111 |       image_producer.done()
112 |       time.sleep(0.1)
113 |       self.assertGreaterEqual(sess.run(x), 0)
114 |       self.assertLessEqual(sess.run(x), 2 * batch_group_size)
115 | 
116 |   def test_image_producer(self):
117 |     self._test_image_producer(1, False)
118 |     self._test_image_producer(1, True)
119 |     self._test_image_producer(2, False)
120 |     self._test_image_producer(2, True)
121 |     self._test_image_producer(3, False)
122 |     self._test_image_producer(3, True)
123 |     self._test_image_producer(8, False)
124 |     self._test_image_producer(8, True)
125 | 
126 | 
127 | if __name__ == '__main__':
128 |   tf.test.main()
129 | 


--------------------------------------------------------------------------------
/TensorFlow/constants.py:
--------------------------------------------------------------------------------
 1 | """Constants used in tf_cnn_benchmarks."""
 2 | 
 3 | from enum import Enum
 4 | 
 5 | 
 6 | class NetworkTopology(str, Enum):
 7 |   """Network topology describes how multiple GPUs are inter-connected.
 8 |   """
 9 |   # DGX-1 uses hybrid cube mesh topology with the following device peer to peer
10 |   # matrix:
11 |   # DMA: 0 1 2 3 4 5 6 7
12 |   # 0:   Y Y Y Y Y N N N
13 |   # 1:   Y Y Y Y N Y N N
14 |   # 2:   Y Y Y Y N N Y N
15 |   # 3:   Y Y Y Y N N N Y
16 |   # 4:   Y N N N Y Y Y Y
17 |   # 5:   N Y N N Y Y Y Y
18 |   # 6:   N N Y N Y Y Y Y
19 |   # 7:   N N N Y Y Y Y Y
20 |   DGX1 = "dgx1"
21 | 
22 |   # V100 in GCP are connected with the following device peer to peer matrix.
23 |   # In this topology, bandwidth of the connection depends on if it uses NVLink
24 |   # or PCIe link.
25 |   # DMA: 0 1 2 3 4 5 6 7
26 |   # 0:   Y Y Y Y N Y N N
27 |   # 1:   Y Y Y Y N N N N
28 |   # 2:   Y Y Y Y N N N Y
29 |   # 3:   Y Y Y Y N N N N
30 |   # 4:   N N N N Y Y Y Y
31 |   # 5:   Y N N N Y Y Y Y
32 |   # 6:   N N N N Y Y Y Y
33 |   # 7:   N N Y N Y Y Y Y
34 |   GCP_V100 = "gcp_v100"
35 | 


--------------------------------------------------------------------------------
/TensorFlow/data_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """tf.data utility methods.
 16 | 
 17 | Collection of utility methods that make CNN benchmark code use tf.data easier.
 18 | """
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow.contrib.data.python.ops import batching
 22 | from tensorflow.contrib.data.python.ops import interleave_ops
 23 | from tensorflow.contrib.data.python.ops import prefetching_ops
 24 | from tensorflow.contrib.data.python.ops import threadpool
 25 | from tensorflow.python.framework import function
 26 | from tensorflow.python.platform import gfile
 27 | 
 28 | 
 29 | def build_prefetch_image_processing(height, width, batch_size, num_splits,
 30 |                                     preprocess_fn, cpu_device, params,
 31 |                                     gpu_devices, data_type, dataset):
 32 |   """"Returns FunctionBufferingResources that do image pre(processing)."""
 33 |   with tf.device(cpu_device):
 34 |     if params.eval:
 35 |       subset = 'validation'
 36 |     else:
 37 |       subset = 'train'
 38 | 
 39 |     function_buffering_resources = []
 40 |     remote_fn, args = minibatch_fn(
 41 |         height=height,
 42 |         width=width,
 43 |         batch_size=batch_size,
 44 |         num_splits=num_splits,
 45 |         preprocess_fn=preprocess_fn,
 46 |         dataset=dataset,
 47 |         subset=subset,
 48 |         train=(not params.eval),
 49 |         cache_data=params.cache_data,
 50 |         num_threads=params.datasets_num_private_threads)
 51 |     for device_num in range(len(gpu_devices)):
 52 |       with tf.device(gpu_devices[device_num]):
 53 |         buffer_resource_handle = prefetching_ops.function_buffering_resource(
 54 |             f=remote_fn,
 55 |             output_types=[data_type, tf.int32],
 56 |             target_device=cpu_device,
 57 |             string_arg=args[0],
 58 |             buffer_size=params.datasets_prefetch_buffer_size,
 59 |             shared_name=None)
 60 |         function_buffering_resources.append(buffer_resource_handle)
 61 |     return function_buffering_resources
 62 | 
 63 | 
 64 | def get_images_and_labels(function_buffering_resource, data_type):
 65 |   """Given a FunctionBufferingResource obtains images and labels from it."""
 66 |   return prefetching_ops.function_buffering_resource_get_next(
 67 |       function_buffer_resource=function_buffering_resource,
 68 |       output_types=[data_type, tf.int32])
 69 | 
 70 | 
 71 | def create_iterator(batch_size,
 72 |                     num_splits,
 73 |                     batch_size_per_split,
 74 |                     preprocess_fn,
 75 |                     dataset,
 76 |                     subset,
 77 |                     train,
 78 |                     cache_data,
 79 |                     num_threads=None):
 80 |   """Creates a dataset iterator for the benchmark."""
 81 |   glob_pattern = dataset.tf_record_pattern(subset)
 82 |   file_names = gfile.Glob(glob_pattern)
 83 |   if not file_names:
 84 |     raise ValueError('Found no files in --data_dir matching: {}'
 85 |                      .format(glob_pattern))
 86 |   ds = tf.data.TFRecordDataset.list_files(file_names)
 87 |   ds = ds.apply(
 88 |       interleave_ops.parallel_interleave(
 89 |           tf.data.TFRecordDataset, cycle_length=10))
 90 |   if cache_data:
 91 |     ds = ds.take(1).cache().repeat()
 92 |   counter = tf.data.Dataset.range(batch_size)
 93 |   counter = counter.repeat()
 94 |   ds = tf.data.Dataset.zip((ds, counter))
 95 |   ds = ds.prefetch(buffer_size=batch_size)
 96 |   if train:
 97 |     ds = ds.shuffle(buffer_size=10000)
 98 |   ds = ds.repeat()
 99 |   ds = ds.apply(
100 |       batching.map_and_batch(
101 |           map_func=preprocess_fn,
102 |           batch_size=batch_size_per_split,
103 |           num_parallel_batches=num_splits))
104 |   ds = ds.prefetch(buffer_size=num_splits)
105 |   if num_threads:
106 |     ds = threadpool.override_threadpool(
107 |         ds,
108 |         threadpool.PrivateThreadPool(
109 |             num_threads, display_name='input_pipeline_thread_pool'))
110 |     ds_iterator = ds.make_initializable_iterator()
111 |     tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
112 |                          ds_iterator.initializer)
113 |   else:
114 |     ds_iterator = ds.make_one_shot_iterator()
115 |   return ds_iterator
116 | 
117 | 
118 | def minibatch_fn(height, width, batch_size, num_splits, preprocess_fn, dataset,
119 |                  subset, train, cache_data, num_threads):
120 |   """Returns a function and list of args for the fn to create a minibatch."""
121 |   batch_size_per_split = batch_size // num_splits
122 |   with tf.name_scope('batch_processing'):
123 |     ds_iterator = create_iterator(batch_size, num_splits, batch_size_per_split,
124 |                                   preprocess_fn, dataset, subset, train,
125 |                                   cache_data, num_threads)
126 |     ds_iterator_string_handle = ds_iterator.string_handle()
127 | 
128 |     @function.Defun(tf.string)
129 |     def _fn(h):
130 |       depth = 3
131 |       remote_iterator = tf.data.Iterator.from_string_handle(
132 |           h, ds_iterator.output_types, ds_iterator.output_shapes)
133 |       labels, images = remote_iterator.get_next()
134 |       images = tf.reshape(
135 |           images, shape=[batch_size_per_split, height, width, depth])
136 |       labels = tf.reshape(labels, [batch_size_per_split])
137 |       return images, labels
138 | 
139 |     return _fn, [ds_iterator_string_handle]
140 | 


--------------------------------------------------------------------------------
/TensorFlow/datasets.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Benchmark dataset utilities.
 17 | """
 18 | 
 19 | from abc import abstractmethod
 20 | import os
 21 | 
 22 | import numpy as np
 23 | from six.moves import cPickle
 24 | from six.moves import xrange  # pylint: disable=redefined-builtin
 25 | import tensorflow as tf
 26 | 
 27 | from tensorflow.python.platform import gfile
 28 | import preprocessing
 29 | 
 30 | IMAGENET_NUM_TRAIN_IMAGES = 1281167
 31 | IMAGENET_NUM_VAL_IMAGES = 50000
 32 | 
 33 | 
 34 | class Dataset(object):
 35 |   """Abstract class for cnn benchmarks dataset."""
 36 | 
 37 |   def __init__(self, name, height=None, width=None, depth=None, data_dir=None,
 38 |                queue_runner_required=False, num_classes=1001):
 39 |     self.name = name
 40 |     self.height = height
 41 |     self.width = width
 42 |     self.depth = depth or 3
 43 | 
 44 |     self.data_dir = data_dir
 45 |     self._queue_runner_required = queue_runner_required
 46 |     self._num_classes = num_classes
 47 | 
 48 |   def tf_record_pattern(self, subset):
 49 |     return os.path.join(self.data_dir, '%s-*-of-*' % subset)
 50 | 
 51 |   def reader(self):
 52 |     return tf.TFRecordReader()
 53 | 
 54 |   @property
 55 |   def num_classes(self):
 56 |     return self._num_classes
 57 | 
 58 |   @num_classes.setter
 59 |   def num_classes(self, val):
 60 |     self._num_classes = val
 61 | 
 62 |   @abstractmethod
 63 |   def num_examples_per_epoch(self, subset):
 64 |     pass
 65 | 
 66 |   def __str__(self):
 67 |     return self.name
 68 | 
 69 |   def get_image_preprocessor(self, input_preprocessor='default'):
 70 |     if self.use_synthetic_gpu_images():
 71 |       return preprocessing.SyntheticImagePreprocessor
 72 |     return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor]
 73 | 
 74 |   def queue_runner_required(self):
 75 |     return self._queue_runner_required
 76 | 
 77 |   def use_synthetic_gpu_images(self):
 78 |     return not self.data_dir
 79 | 
 80 | 
 81 | class ImagenetData(Dataset):
 82 |   """Configuration for Imagenet dataset."""
 83 | 
 84 |   def __init__(self, data_dir=None):
 85 |     super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir)
 86 | 
 87 |   def num_examples_per_epoch(self, subset='train'):
 88 |     if subset == 'train':
 89 |       return IMAGENET_NUM_TRAIN_IMAGES
 90 |     elif subset == 'validation':
 91 |       return IMAGENET_NUM_VAL_IMAGES
 92 |     else:
 93 |       raise ValueError('Invalid data subset "%s"' % subset)
 94 | 
 95 | 
 96 | class Cifar10Data(Dataset):
 97 |   """Configuration for cifar 10 dataset.
 98 | 
 99 |   It will mount all the input images to memory.
100 |   """
101 | 
102 |   def __init__(self, data_dir=None):
103 |     super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir,
104 |                                       queue_runner_required=True,
105 |                                       num_classes=11)
106 | 
107 |   def read_data_files(self, subset='train'):
108 |     """Reads from data file and returns images and labels in a numpy array."""
109 |     assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
110 |                            'data')
111 |     if subset == 'train':
112 |       filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i)
113 |                    for i in xrange(1, 6)]
114 |     elif subset == 'validation':
115 |       filenames = [os.path.join(self.data_dir, 'test_batch')]
116 |     else:
117 |       raise ValueError('Invalid data subset "%s"' % subset)
118 | 
119 |     inputs = []
120 |     for filename in filenames:
121 |       with gfile.Open(filename, 'r') as f:
122 |         inputs.append(cPickle.load(f))
123 |     # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
124 |     # input format.
125 |     all_images = np.concatenate(
126 |         [each_input['data'] for each_input in inputs]).astype(np.float32)
127 |     all_labels = np.concatenate(
128 |         [each_input['labels'] for each_input in inputs])
129 |     return all_images, all_labels
130 | 
131 |   def num_examples_per_epoch(self, subset='train'):
132 |     if subset == 'train':
133 |       return 50000
134 |     elif subset == 'validation':
135 |       return 10000
136 |     else:
137 |       raise ValueError('Invalid data subset "%s"' % subset)
138 | 
139 | 
140 | _SUPPORTED_DATASETS = {
141 |     'imagenet': ImagenetData,
142 |     'cifar10': Cifar10Data,
143 | }
144 | 
145 | _SUPPORTED_INPUT_PREPROCESSORS = {
146 |     'imagenet': {
147 |         'default': preprocessing.RecordInputImagePreprocessor,
148 |         'official_models_imagenet': preprocessing.ImagenetPreprocessor,
149 |     },
150 |     'cifar10': {
151 |         'default': preprocessing.Cifar10ImagePreprocessor
152 |     }
153 | }
154 | 
155 | 
156 | def create_dataset(data_dir, data_name):
157 |   """Create a Dataset instance based on data_dir and data_name."""
158 |   if not data_dir and not data_name:
159 |     # When using synthetic data, use synthetic imagenet images by default.
160 |     data_name = 'imagenet'
161 | 
162 |   # Infere dataset name from data_dir if data_name is not provided.
163 |   if data_name is None:
164 |     for supported_name in _SUPPORTED_DATASETS:
165 |       if supported_name in data_dir:
166 |         data_name = supported_name
167 |         break
168 |     else:  # Failed to identify dataset name from data dir.
169 |       raise ValueError('Could not identify name of dataset. '
170 |                        'Please specify with --data_name option.')
171 |   if data_name not in _SUPPORTED_DATASETS:
172 |     raise ValueError('Unknown dataset. Must be one of %s', ', '.join(
173 |         [key for key in sorted(_SUPPORTED_DATASETS.keys())]))
174 | 
175 |   return _SUPPORTED_DATASETS[data_name](data_dir)
176 | 


--------------------------------------------------------------------------------
/TensorFlow/eval_all_ckpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import time
 4 | import shutil
 5 | 
 6 | 
 7 | ckpt_dir = './results/gct_resnet50'
 8 | ckpt_list_dir = os.path.join(ckpt_dir, "checkpoint")
 9 | temp_dir = './checkpoint.backup'
10 | 
11 | start_step = 0
12 | 
13 | shutil.move(ckpt_list_dir, temp_dir)
14 | 
15 | with open(temp_dir) as f:
16 | 	line = f.readline()
17 | 	while(line):
18 | 		if line[:3] == 'all':
19 | 
20 | 			ckpt_index = line.split('\"')[-2]
21 | 			num_index = int(ckpt_index.split('-')[-1])
22 | 			print(ckpt_index)
23 | 			if num_index > start_step:
24 | 				with open(ckpt_list_dir, 'w') as f_ckpt:
25 | 					f_ckpt.write("model_checkpoint_path: \"" + ckpt_index + "\"")
26 | 				print("start eval: " + ckpt_index)
27 | 				output = os.popen('bash eval_gct_resnet50.sh').read()
28 | 				print(output)
29 | 		else:
30 | 			print('skip')
31 | 		line = f.readline()
32 | 
33 | shutil.move(temp_dir, ckpt_list_dir)


--------------------------------------------------------------------------------
/TensorFlow/eval_gct_resnet50.sh:
--------------------------------------------------------------------------------
 1 | DATA_DIR="/path/to/imagenet"
 2 | CKPT_DIR="results/gct_resnet50"
 3 | echo ${CKPT_DIR}
 4 | python tf_cnn_benchmarks.py --data_format=NCHW --batch_size=50 \
 5 | --model=resnet50 --optimizer=momentum --variable_update=replicated \
 6 | --nodistortions --gradient_repacking=8 --num_gpus=4 \
 7 | --num_epochs=1 --weight_decay=1e-4 --data_dir=${DATA_DIR} \
 8 | --train_dir=${CKPT_DIR} --print_training_accuracy --xla \
 9 | --summary_verbosity=1 --save_summaries_steps=50 --eval_dir=${CKPT_DIR}/eval \
10 | --eval


--------------------------------------------------------------------------------
/TensorFlow/flags.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains functions to define flags and params.
16 | 
17 | Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
18 | dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
19 | a command-line flag for every ParamSpec defined by a DEFINE_* functions.
20 | 
21 | The reason we don't use absl flags directly is that we want to be able to use
22 | tf_cnn_benchmarks as a library. When using it as a library, we don't want to
23 | define any flags, but instead pass parameters to the BenchmarkCNN constructor.
24 | """
25 | 
26 | from collections import namedtuple
27 | 
28 | from absl import flags as absl_flags
29 | import six
30 | 
31 | 
32 | FLAGS = absl_flags.FLAGS
33 | 
34 | 
35 | # ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
36 | ParamSpec = namedtuple('_ParamSpec',
37 |                        ['flag_type', 'default_value', 'description',
38 |                         'kwargs'])
39 | 
40 | 
41 | # Maps from parameter name to its ParamSpec.
42 | param_specs = {}
43 | 
44 | 
45 | def DEFINE_string(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
46 |   param_specs[name] = ParamSpec('string', default, help, {})
47 | 
48 | 
49 | def DEFINE_boolean(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
50 |   param_specs[name] = ParamSpec('boolean', default, help, {})
51 | 
52 | 
53 | def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
54 |   kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
55 |   param_specs[name] = ParamSpec('integer', default, help, kwargs)
56 | 
57 | 
58 | def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
59 |   kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
60 |   param_specs[name] = ParamSpec('float', default, help, kwargs)
61 | 
62 | 
63 | def DEFINE_enum(name, default, enum_values, help):  # pylint: disable=invalid-name,redefined-builtin
64 |   kwargs = {'enum_values': enum_values}
65 |   param_specs[name] = ParamSpec('enum', default, help, kwargs)
66 | 
67 | 
68 | def DEFINE_list(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
69 |   param_specs[name] = ParamSpec('list', default, help, {})
70 | 
71 | 
72 | def define_flags(specs=None):
73 |   """Define a command line flag for each ParamSpec in flags.param_specs."""
74 |   specs = specs or param_specs
75 |   define_flag = {
76 |       'boolean': absl_flags.DEFINE_boolean,
77 |       'float': absl_flags.DEFINE_float,
78 |       'integer': absl_flags.DEFINE_integer,
79 |       'string': absl_flags.DEFINE_string,
80 |       'enum': absl_flags.DEFINE_enum,
81 |       'list': absl_flags.DEFINE_list
82 |   }
83 |   for name, param_spec in six.iteritems(specs):
84 |     if param_spec.flag_type not in define_flag:
85 |       raise ValueError('Unknown flag_type %s' % param_spec.flag_type)
86 |     else:
87 |       define_flag[param_spec.flag_type](name, param_spec.default_value,
88 |                                         help=param_spec.description,
89 |                                         **param_spec.kwargs)
90 | 


--------------------------------------------------------------------------------
/TensorFlow/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/models/__init__.py


--------------------------------------------------------------------------------
/TensorFlow/models/alexnet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Alexnet model configuration.
17 | 
18 | References:
19 |   Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
20 |   ImageNet Classification with Deep Convolutional Neural Networks
21 |   Advances in Neural Information Processing Systems. 2012
22 | """
23 | 
24 | import tensorflow as tf
25 | from models import model
26 | 
27 | 
28 | class AlexnetModel(model.CNNModel):
29 |   """Alexnet cnn model."""
30 | 
31 |   def __init__(self):
32 |     super(AlexnetModel, self).__init__('alexnet', 224 + 3, 512, 0.005)
33 | 
34 |   def add_inference(self, cnn):
35 |     # Note: VALID requires padding the images by 3 in width and height
36 |     cnn.conv(64, 11, 11, 4, 4, 'VALID')
37 |     cnn.mpool(3, 3, 2, 2)
38 |     cnn.conv(192, 5, 5)
39 |     cnn.mpool(3, 3, 2, 2)
40 |     cnn.conv(384, 3, 3)
41 |     cnn.conv(384, 3, 3)
42 |     cnn.conv(256, 3, 3)
43 |     cnn.mpool(3, 3, 2, 2)
44 |     cnn.reshape([-1, 256 * 6 * 6])
45 |     cnn.affine(4096)
46 |     cnn.dropout()
47 |     cnn.affine(4096)
48 |     cnn.dropout()
49 | 
50 | 
51 | class AlexnetCifar10Model(model.CNNModel):
52 |   """Alexnet cnn model for cifar datasets.
53 | 
54 |   The model architecture follows the one defined in the tensorflow tutorial
55 |   model.
56 | 
57 |   Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
58 |   Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
59 |   """
60 | 
61 |   def __init__(self):
62 |     super(AlexnetCifar10Model, self).__init__('alexnet', 32, 128, 0.1)
63 | 
64 |   def add_inference(self, cnn):
65 |     cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2)
66 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
67 |     cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
68 |     cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2)
69 |     cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
70 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
71 |     shape = cnn.top_layer.get_shape().as_list()
72 |     flat_dim = shape[1] * shape[2] * shape[3]
73 |     cnn.reshape([-1, flat_dim])
74 |     cnn.affine(384, stddev=0.04, bias=0.1)
75 |     cnn.affine(192, stddev=0.04, bias=0.1)
76 | 
77 |   def get_learning_rate(self, global_step, batch_size):
78 |     num_examples_per_epoch = 50000
79 |     num_epochs_per_decay = 100
80 |     decay_steps = int(num_epochs_per_decay * num_examples_per_epoch /
81 |                       batch_size)
82 |     decay_factor = 0.1
83 |     return tf.train.exponential_decay(
84 |         self.learning_rate, global_step, decay_steps, decay_factor,
85 |         staircase=True)
86 | 


--------------------------------------------------------------------------------
/TensorFlow/models/densenet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Densenet model configuration.
17 | 
18 | References:
19 |   "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
20 | """
21 | import numpy as np
22 | from six.moves import xrange  # pylint: disable=redefined-builtin
23 | import tensorflow as tf
24 | from models import model as model_lib
25 | 
26 | 
27 | class DensenetCifar10Model(model_lib.CNNModel):
28 |   """Densenet cnn network configuration."""
29 | 
30 |   def __init__(self, model, layer_counts, growth_rate):
31 |     self.growth_rate = growth_rate
32 |     super(DensenetCifar10Model, self).__init__(model, 32, 64, 0.1,
33 |                                                layer_counts=layer_counts)
34 |     self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
35 | 
36 |   def dense_block(self, cnn, growth_rate):
37 |     input_layer = cnn.top_layer
38 |     c = cnn.batch_norm(input_layer, **self.batch_norm_config)
39 |     c = tf.nn.relu(c)
40 |     c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate),
41 |                  activation=None, input_layer=c)
42 |     channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
43 |     cnn.top_layer = tf.concat([input_layer, c], channel_index)
44 |     cnn.top_size += growth_rate
45 | 
46 |   def transition_layer(self, cnn):
47 |     in_size = cnn.top_size
48 |     cnn.batch_norm(**self.batch_norm_config)
49 |     cnn.top_layer = tf.nn.relu(cnn.top_layer)
50 |     cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size))
51 |     cnn.apool(2, 2, 2, 2)
52 | 
53 |   def add_inference(self, cnn):
54 |     if self.layer_counts is None:
55 |       raise ValueError('Layer counts not specified for %s' % self.get_model())
56 |     if self.growth_rate is None:
57 |       raise ValueError('Growth rate not specified for %s' % self.get_model())
58 | 
59 |     cnn.conv(16, 3, 3, 1, 1, activation=None)
60 |     # Block 1
61 |     for _ in xrange(self.layer_counts[0]):
62 |       self.dense_block(cnn, self.growth_rate)
63 |     self.transition_layer(cnn)
64 |     # Block 2
65 |     for _ in xrange(self.layer_counts[1]):
66 |       self.dense_block(cnn, self.growth_rate)
67 |     self.transition_layer(cnn)
68 |     # Block 3
69 |     for _ in xrange(self.layer_counts[2]):
70 |       self.dense_block(cnn, self.growth_rate)
71 |     cnn.batch_norm(**self.batch_norm_config)
72 |     cnn.top_layer = tf.nn.relu(cnn.top_layer)
73 |     channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
74 |     cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index]
75 |     cnn.spatial_mean()
76 | 
77 |   def get_learning_rate(self, global_step, batch_size):
78 |     num_batches_per_epoch = int(50000 / batch_size)
79 |     boundaries = num_batches_per_epoch * np.array([150, 225, 300],
80 |                                                   dtype=np.int64)
81 |     boundaries = [x for x in boundaries]
82 |     values = [0.1, 0.01, 0.001, 0.0001]
83 |     return tf.train.piecewise_constant(global_step, boundaries, values)
84 | 
85 | 
86 | def create_densenet40_k12_model():
87 |   return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12)
88 | 
89 | 
90 | def create_densenet100_k12_model():
91 |   return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12)
92 | 
93 | 
94 | def create_densenet100_k24_model():
95 |   return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24)
96 | 


--------------------------------------------------------------------------------
/TensorFlow/models/googlenet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Googlenet model configuration.
17 | 
18 | References:
19 |   Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
20 |   Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
21 |   Going deeper with convolutions
22 |   arXiv preprint arXiv:1409.4842 (2014)
23 | """
24 | 
25 | from models import model
26 | 
27 | 
28 | class GooglenetModel(model.CNNModel):
29 |   """GoogLeNet."""
30 | 
31 |   def __init__(self):
32 |     super(GooglenetModel, self).__init__('googlenet', 224, 32, 0.005)
33 | 
34 |   def add_inference(self, cnn):
35 |     def inception_v1(cnn, k, l, m, n, p, q):
36 |       cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
37 |               [('conv', n, 1, 1), ('conv', p, 5, 5)],
38 |               [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
39 |       cnn.inception_module('incept_v1', cols)
40 | 
41 |     cnn.conv(64, 7, 7, 2, 2)
42 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
43 |     cnn.conv(64, 1, 1)
44 |     cnn.conv(192, 3, 3)
45 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
46 |     inception_v1(cnn, 64, 96, 128, 16, 32, 32)
47 |     inception_v1(cnn, 128, 128, 192, 32, 96, 64)
48 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
49 |     inception_v1(cnn, 192, 96, 208, 16, 48, 64)
50 |     inception_v1(cnn, 160, 112, 224, 24, 64, 64)
51 |     inception_v1(cnn, 128, 128, 256, 24, 64, 64)
52 |     inception_v1(cnn, 112, 144, 288, 32, 64, 64)
53 |     inception_v1(cnn, 256, 160, 320, 32, 128, 128)
54 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
55 |     inception_v1(cnn, 256, 160, 320, 32, 128, 128)
56 |     inception_v1(cnn, 384, 192, 384, 48, 128, 128)
57 |     cnn.apool(7, 7, 1, 1, mode='VALID')
58 |     cnn.reshape([-1, 1024])
59 | 


--------------------------------------------------------------------------------
/TensorFlow/models/inception_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Inception model configuration.
 17 | 
 18 | Includes multiple models: inception3, inception4, inception-resnet2.
 19 | 
 20 | References:
 21 |   Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
 22 |   Inception-v4, Inception-ResNet and the Impact of Residual Connections on
 23 |   Learning
 24 | 
 25 |   Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
 26 |   Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
 27 |   Going Deeper with Convolutions
 28 |   http://arxiv.org/pdf/1409.4842v1.pdf
 29 | 
 30 |   Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
 31 |   Zbigniew Wojna
 32 |   Rethinking the Inception Architecture for Computer Vision
 33 |   arXiv preprint arXiv:1512.00567 (2015)
 34 | 
 35 |   Inception v3 model: http://arxiv.org/abs/1512.00567
 36 | 
 37 |   Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
 38 | """
 39 | 
 40 | from six.moves import xrange  # pylint: disable=redefined-builtin
 41 | from models import model
 42 | import tensorflow as tf
 43 | import datasets
 44 | 
 45 | 
 46 | class Inceptionv3Model(model.CNNModel):
 47 |   """InceptionV3."""
 48 | 
 49 |   def __init__(self, auxiliary=False):
 50 |     self._auxiliary = auxiliary
 51 |     super(Inceptionv3Model, self).__init__('inception3', 224, 256, 0.1)
 52 | 
 53 |   def add_inference(self, cnn):
 54 | 
 55 |     def inception_v3_a(cnn, n):
 56 |       cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)],
 57 |               [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)],
 58 |               [('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]]
 59 |       cnn.inception_module('incept_v3_a', cols)
 60 | 
 61 | 
 62 |     def inception_v3_b(cnn):
 63 |       cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')],
 64 |               [('conv', 64, 1, 1),
 65 |                ('conv', 96, 3, 3),
 66 |                ('conv', 96, 3, 3, 2, 2, 'VALID')],
 67 |               [('mpool', 3, 3, 2, 2, 'VALID')]]
 68 |       cnn.inception_module('incept_v3_b', cols)
 69 | 
 70 | 
 71 |     def inception_v3_c(cnn, n):
 72 |       cols = [[('conv', 192, 1, 1)],
 73 |               [('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)],
 74 |               [('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7),
 75 |                ('conv', n, 7, 1), ('conv', 192, 1, 7)],
 76 |               [('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]]
 77 |       cnn.inception_module('incept_v3_c', cols)
 78 | 
 79 | 
 80 |     def inception_v3_d(cnn):
 81 |       cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')],
 82 |               [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1),
 83 |                ('conv', 192, 3, 3, 2, 2, 'VALID')],
 84 |               [('mpool', 3, 3, 2, 2, 'VALID')]]
 85 |       cnn.inception_module('incept_v3_d', cols)
 86 | 
 87 | 
 88 |     def inception_v3_e(cnn, pooltype):
 89 |       cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)],
 90 |               [('share',), ('conv', 384, 3, 1)],
 91 |               [('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)],
 92 |               [('share',), ('share',), ('conv', 384, 3, 1)],
 93 |               [('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'),
 94 |                ('conv', 192, 1, 1)]]
 95 |       cnn.inception_module('incept_v3_e', cols)
 96 | 
 97 | 
 98 |     def incept_v3_aux(cnn):
 99 |       assert cnn.aux_top_layer is None
100 |       cnn.aux_top_layer = cnn.top_layer
101 |       cnn.aux_top_size = cnn.top_size
102 |       with cnn.switch_to_aux_top_layer():
103 |         cnn.apool(5, 5, 3, 3, mode='VALID')
104 |         cnn.conv(128, 1, 1, mode='SAME')
105 |         cnn.conv(768, 5, 5, mode='VALID', stddev=0.01)
106 |         cnn.reshape([-1, 768])
107 | 
108 | 
109 |     cnn.use_batch_norm = True
110 |     cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
111 |     cnn.conv(32, 3, 3, 2, 2, mode='VALID')   # 299 x 299 x 3
112 |     cnn.conv(32, 3, 3, 1, 1, mode='VALID')   # 149 x 149 x 32
113 |     cnn.conv(64, 3, 3, 1, 1, mode='SAME')    # 147 x 147 x 64
114 |     cnn.mpool(3, 3, 2, 2, mode='VALID')      # 147 x 147 x 64
115 |     cnn.conv(80, 1, 1, 1, 1, mode='VALID')   # 73 x 73 x 80
116 |     cnn.conv(192, 3, 3, 1, 1, mode='VALID')  # 71 x 71 x 192
117 |     cnn.mpool(3, 3, 2, 2, 'VALID')           # 35 x 35 x 192
118 |     inception_v3_a(cnn, 32)                  # 35 x 35 x 256 mixed.
119 |     inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_1.
120 |     inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_2
121 |     inception_v3_b(cnn)                      # 17 x 17 x 768 mixed_3
122 |     inception_v3_c(cnn, 128)                 # 17 x 17 x 768 mixed_4
123 |     inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_5
124 |     inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_6
125 |     inception_v3_c(cnn, 192)                 # 17 x 17 x 768 mixed_7
126 |     if self._auxiliary:
127 |       incept_v3_aux(cnn)                     # Auxillary Head logits
128 |     inception_v3_d(cnn)                      # 17 x 17 x 1280 mixed_8
129 |     inception_v3_e(cnn, 'avg')               # 8 x 8 x 2048 mixed_9
130 |     inception_v3_e(cnn, 'max')               # 8 x 8 x 2048 mixed_10
131 |     cnn.spatial_mean()                       # 8 x 8 x 2048
132 |     cnn.reshape([-1, 2048])                  # 1 x 1 x 2048
133 | 
134 |   def get_learning_rate(self, global_step, batch_size):
135 |     num_batches_per_epoch = (
136 |         float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
137 |     boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 90, 100]]
138 | 
139 |     rescaled_lr = self.learning_rate / self.default_batch_size * batch_size
140 |     print('Init LR: ', rescaled_lr)
141 |     rescaled_lr = rescaled_lr / (batch_size / self.batch_size)
142 |     print('GPU Num: ', batch_size / self.batch_size)
143 |     print('Batch size: ', batch_size)
144 |     values = [1, 0.1, 0.01, 0.001, 0.0001]
145 |     values = [rescaled_lr * v for v in values]
146 |     lr = tf.train.piecewise_constant(global_step, boundaries, values)
147 | 
148 |     warmup_steps = int(num_batches_per_epoch)
149 | 
150 |     warmup_lr = lr * 0.1
151 | 
152 |     return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
153 | 
154 | 
155 | # Stem functions
156 | def inception_v4_sa(cnn):
157 |   cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [
158 |       ('conv', 96, 3, 3, 2, 2, 'VALID')]]
159 |   cnn.inception_module('incept_v4_sa', cols)
160 | 
161 | 
162 | def inception_v4_sb(cnn):
163 |   cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')],
164 |           [('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7),
165 |            ('conv', 96, 3, 3, 1, 1, 'VALID')]]
166 |   cnn.inception_module('incept_v4_sb', cols)
167 | 
168 | 
169 | def inception_v4_sc(cnn):
170 |   cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')],
171 |           [('mpool', 3, 3, 2, 2, 'VALID')]]
172 |   cnn.inception_module('incept_v4_sc', cols)
173 | 
174 | 
175 | # Reduction functions
176 | def inception_v4_ra(cnn, k, l, m, n):
177 |   cols = [
178 |       [('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')],
179 |       [('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')]
180 |   ]
181 |   cnn.inception_module('incept_v4_ra', cols)
182 | 
183 | 
184 | def inception_v4_rb(cnn):
185 |   cols = [[('mpool', 3, 3, 2, 2, 'VALID')],
186 |           [('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')],
187 |           [('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1),
188 |            ('conv', 320, 3, 3, 2, 2, 'VALID')]]
189 |   cnn.inception_module('incept_v4_rb', cols)
190 | 
191 | 
192 | class Inceptionv4Model(model.CNNModel):
193 |   """Inceptionv4."""
194 | 
195 |   def __init__(self):
196 |     super(Inceptionv4Model, self).__init__('inception4', 224, 64, 0.004)
197 | 
198 |   def add_inference(self, cnn):
199 |     cnn.use_batch_norm = True
200 |     cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
201 | 
202 |     def inception_v4_a(cnn):
203 |       cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)],
204 |               [('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)],
205 |               [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]]
206 |       cnn.inception_module('incept_v4_a', cols)
207 | 
208 |     def inception_v4_b(cnn):
209 |       cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)],
210 |               [('conv', 384, 1, 1)],
211 |               [('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)],
212 |               [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1),
213 |                ('conv', 224, 1, 7), ('conv', 256, 7, 1)]]
214 |       cnn.inception_module('incept_v4_b', cols)
215 | 
216 |     def inception_v4_c(cnn):
217 |       cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)],
218 |               [('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)],
219 |               [('share',), ('conv', 256, 3, 1)],
220 |               [('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1),
221 |                ('conv', 256, 3, 1)], [('share',), ('share',), ('share',),
222 |                                       ('conv', 256, 1, 3)]]
223 |       cnn.inception_module('incept_v4_c', cols)
224 | 
225 |     cnn.use_batch_norm = True
226 |     cnn.conv(32, 3, 3, 2, 2, mode='VALID')
227 |     cnn.conv(32, 3, 3, 1, 1, mode='VALID')
228 |     cnn.conv(64, 3, 3)
229 |     inception_v4_sa(cnn)
230 |     inception_v4_sb(cnn)
231 |     inception_v4_sc(cnn)
232 |     for _ in xrange(4):
233 |       inception_v4_a(cnn)
234 |     inception_v4_ra(cnn, 192, 224, 256, 384)
235 |     for _ in xrange(7):
236 |       inception_v4_b(cnn)
237 |     inception_v4_rb(cnn)
238 |     for _ in xrange(3):
239 |       inception_v4_c(cnn)
240 |     cnn.spatial_mean()
241 |     cnn.dropout(0.8)
242 | 
243 |   def get_learning_rate(self, global_step, batch_size):
244 |     num_batches_per_epoch = (
245 |         float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
246 |     boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
247 |     rescaled_lr = self.learning_rate / self.default_batch_size * batch_size
248 |     values = [1, 0.1, 0.01, 0.001, 0.0001]
249 |     values = [rescaled_lr * v for v in values]
250 |     lr = tf.train.piecewise_constant(global_step, boundaries, values)
251 |     warmup_steps = int(num_batches_per_epoch * 5)
252 |     warmup_lr = (
253 |         rescaled_lr * tf.cast(global_step, tf.float32) / tf.cast(
254 |             warmup_steps, tf.float32))
255 |     return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
256 | 


--------------------------------------------------------------------------------
/TensorFlow/models/lenet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Lenet model configuration.
17 | 
18 | References:
19 |   LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
20 |   Gradient-based learning applied to document recognition
21 |   Proceedings of the IEEE (1998)
22 | """
23 | 
24 | from models import model
25 | 
26 | 
27 | class Lenet5Model(model.CNNModel):
28 |   """Lenet5."""
29 | 
30 |   def __init__(self):
31 |     super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005)
32 | 
33 |   def add_inference(self, cnn):
34 |     # Note: This matches TF's MNIST tutorial model
35 |     cnn.conv(32, 5, 5)
36 |     cnn.mpool(2, 2)
37 |     cnn.conv(64, 5, 5)
38 |     cnn.mpool(2, 2)
39 |     cnn.reshape([-1, 64 * 7 * 7])
40 |     cnn.affine(512)
41 | 


--------------------------------------------------------------------------------
/TensorFlow/models/mobilenet_conv_blocks.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Convolution blocks for mobilenet."""
 16 | import contextlib
 17 | import functools
 18 | 
 19 | import tensorflow as tf
 20 | 
 21 | slim = tf.contrib.slim
 22 | 
 23 | 
 24 | def get_variable(name, shape, dtype, cast_dtype, *args, **kwargs):
 25 |   # TODO(reedwm): Currently variables and gradients are transferred to other
 26 |   # devices and machines as type `dtype`, not `cast_dtype`. In particular,
 27 |   # this means in fp16 mode, variables are transferred as fp32 values, not
 28 |   # fp16 values, which uses extra bandwidth.
 29 |   var = tf.get_variable(name, shape, dtype, *args, **kwargs)
 30 |   return tf.cast(var, cast_dtype)
 31 | 
 32 | 
 33 | def adaption(input_layer):
 34 |   epsilon = 1e-5
 35 |   variable_dtype = tf.float32
 36 |   dtype = tf.float16
 37 | 
 38 |   num_channels = input_layer.get_shape().as_list()[3]
 39 |   squeeze = [1, 2]
 40 |   with tf.variable_scope(default_name='adaption'):
 41 |     beta = get_variable('beta', [1, 1, 1, num_channels],
 42 |                         variable_dtype, dtype,
 43 |                         initializer=tf.constant_initializer(0.))
 44 |     alpha = get_variable('alpha', [1, 1, 1, num_channels],
 45 |                          variable_dtype, dtype,
 46 |                          initializer=tf.constant_initializer(1.))
 47 |     gamma = get_variable('gamma', [1, 1, 1, num_channels],
 48 |                          variable_dtype, dtype,
 49 |                          initializer=tf.constant_initializer(1.))
 50 |     theta = get_variable('theta', [1, 1, 1, num_channels],
 51 |                          variable_dtype, dtype,
 52 |                          initializer=tf.constant_initializer(0.))
 53 | 
 54 |     X = input_layer
 55 |     alpha_2 = tf.square(alpha)
 56 |     alpha_2 = alpha_2 / tf.reduce_mean(alpha_2) + epsilon
 57 |     alpha = tf.sqrt(alpha_2)
 58 |     A = alpha_2 * tf.reduce_mean(tf.square(X), squeeze, keepdims=True) - (
 59 |         2. * alpha * beta) * tf.reduce_mean(X, squeeze, keepdims=True)
 60 |     A = tf.reduce_mean(
 61 |         A, [1, 2, 3], keepdims=True) + (tf.reduce_mean(tf.square(beta)) + epsilon)
 62 |     # B = tf.reduce_sum(alpha_2)
 63 |     B = 1.
 64 |     l2 = tf.sqrt(B / A)
 65 |     adaptor = tf.pow(l2, gamma + theta * l2)
 66 |     trans_back = X * adaptor + (beta / alpha) * (1. - adaptor)
 67 | 
 68 |   return trans_back
 69 | 
 70 | def adaption_conv2d(inputs, *args, **kwargs):
 71 |   name = 'adaption'
 72 |   inputs = adaption(inputs)
 73 |   return slim.conv2d(inputs, *args, **kwargs)
 74 | 
 75 | 
 76 | def _fixed_padding(inputs, kernel_size, rate=1):
 77 |   """Pads the input along the spatial dimensions independently of input size.
 78 | 
 79 |   Pads the input such that if it was used in a convolution with 'VALID' padding,
 80 |   the output would have the same dimensions as if the unpadded input was used
 81 |   in a convolution with 'SAME' padding.
 82 | 
 83 |   Args:
 84 |     inputs: A tensor of size [batch, height_in, width_in, channels].
 85 |     kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
 86 |     rate: An integer, rate for atrous convolution.
 87 | 
 88 |   Returns:
 89 |     output: A tensor of size [batch, height_out, width_out, channels] with the
 90 |       input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
 91 |   """
 92 |   kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
 93 |                            kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
 94 |   pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
 95 |   pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
 96 |   pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
 97 |   padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
 98 |                                   [pad_beg[1], pad_end[1]], [0, 0]])
 99 |   return padded_inputs
100 | 
101 | 
102 | def _make_divisible(v, divisor, min_value=None):
103 |   if min_value is None:
104 |     min_value = divisor
105 |   new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
106 |   # Make sure that round down does not go down by more than 10%.
107 |   if new_v < 0.9 * v:
108 |     new_v += divisor
109 |   return new_v
110 | 
111 | 
112 | def _split_divisible(num, num_ways, divisible_by=8):
113 |   """Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
114 |   assert num % divisible_by == 0
115 |   assert num / num_ways >= divisible_by
116 |   # Note: want to round down, we adjust each split to match the total.
117 |   base = num // num_ways // divisible_by * divisible_by
118 |   result = []
119 |   accumulated = 0
120 |   for i in range(num_ways):
121 |     r = base
122 |     while accumulated + r < num * (i + 1) / num_ways:
123 |       r += divisible_by
124 |     result.append(r)
125 |     accumulated += r
126 |   assert accumulated == num
127 |   return result
128 | 
129 | 
130 | @contextlib.contextmanager
131 | def _v1_compatible_scope_naming(scope):
132 |   if scope is None:  # Create uniqified separable blocks.
133 |     with tf.variable_scope(None, default_name='separable') as s, \
134 |          tf.name_scope(s.original_name_scope):
135 |       yield ''
136 |   else:
137 |     # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
138 |     # which provide numbered scopes.
139 |     scope += '_'
140 |     yield scope
141 | 
142 | 
143 | @slim.add_arg_scope
144 | def split_separable_conv2d(input_tensor,
145 |                            num_outputs,
146 |                            scope=None,
147 |                            normalizer_fn=None,
148 |                            stride=1,
149 |                            rate=1,
150 |                            endpoints=None,
151 |                            use_explicit_padding=False):
152 |   """Separable mobilenet V1 style convolution.
153 | 
154 |   Depthwise convolution, with default non-linearity,
155 |   followed by 1x1 depthwise convolution.  This is similar to
156 |   slim.separable_conv2d, but differs in tha it applies batch
157 |   normalization and non-linearity to depthwise. This  matches
158 |   the basic building of Mobilenet Paper
159 |   (https://arxiv.org/abs/1704.04861)
160 | 
161 |   Args:
162 |     input_tensor: input
163 |     num_outputs: number of outputs
164 |     scope: optional name of the scope. Note if provided it will use
165 |     scope_depthwise for deptwhise, and scope_pointwise for pointwise.
166 |     normalizer_fn: which normalizer function to use for depthwise/pointwise
167 |     stride: stride
168 |     rate: output rate (also known as dilation rate)
169 |     endpoints: optional, if provided, will export additional tensors to it.
170 |     use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
171 |       inputs so that the output dimensions are the same as if 'SAME' padding
172 |       were used.
173 | 
174 |   Returns:
175 |     output tesnor
176 |   """
177 | 
178 |   with _v1_compatible_scope_naming(scope) as scope:
179 |     dw_scope = scope + 'depthwise'
180 |     endpoints = endpoints if endpoints is not None else {}
181 |     kernel_size = [3, 3]
182 |     padding = 'SAME'
183 |     if use_explicit_padding:
184 |       padding = 'VALID'
185 |       input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
186 |     net = slim.separable_conv2d(
187 |         input_tensor,
188 |         None,
189 |         kernel_size,
190 |         depth_multiplier=1,
191 |         stride=stride,
192 |         rate=rate,
193 |         normalizer_fn=normalizer_fn,
194 |         padding=padding,
195 |         scope=dw_scope)
196 | 
197 |     endpoints[dw_scope] = net
198 | 
199 |     pw_scope = scope + 'pointwise'
200 |     net = adaption_conv2d(
201 |         net,
202 |         num_outputs, [1, 1],
203 |         stride=1,
204 |         normalizer_fn=normalizer_fn,
205 |         scope=pw_scope)
206 |     endpoints[pw_scope] = net
207 |   return net
208 | 
209 | 
210 | def expand_input_by_factor(n, divisible_by=8):
211 |   return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
212 | 
213 | 
214 | @slim.add_arg_scope
215 | def expanded_conv(input_tensor,
216 |                   num_outputs,
217 |                   expansion_size=expand_input_by_factor(6),
218 |                   stride=1,
219 |                   rate=1,
220 |                   kernel_size=(3, 3),
221 |                   residual=True,
222 |                   normalizer_fn=None,
223 |                   split_projection=1,
224 |                   split_expansion=1,
225 |                   expansion_transform=None,
226 |                   depthwise_location='expansion',
227 |                   depthwise_channel_multiplier=1,
228 |                   endpoints=None,
229 |                   use_explicit_padding=False,
230 |                   padding='SAME',
231 |                   scope=None):
232 |   """Depthwise Convolution Block with expansion.
233 | 
234 |   Builds a composite convolution that has the following structure
235 |   expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
236 | 
237 |   Args:
238 |     input_tensor: input
239 |     num_outputs: number of outputs in the final layer.
240 |     expansion_size: the size of expansion, could be a constant or a callable.
241 |       If latter it will be provided 'num_inputs' as an input. For forward
242 |       compatibility it should accept arbitrary keyword arguments.
243 |       Default will expand the input by factor of 6.
244 |     stride: depthwise stride
245 |     rate: depthwise rate
246 |     kernel_size: depthwise kernel
247 |     residual: whether to include residual connection between input
248 |       and output.
249 |     normalizer_fn: batchnorm or otherwise
250 |     split_projection: how many ways to split projection operator
251 |       (that is conv expansion->bottleneck)
252 |     split_expansion: how many ways to split expansion op
253 |       (that is conv bottleneck->expansion) ops will keep depth divisible
254 |       by this value.
255 |     expansion_transform: Optional function that takes expansion
256 |       as a single input and returns output.
257 |     depthwise_location: where to put depthwise covnvolutions supported
258 |       values None, 'input', 'output', 'expansion'
259 |     depthwise_channel_multiplier: depthwise channel multiplier:
260 |     each input will replicated (with different filters)
261 |     that many times. So if input had c channels,
262 |     output will have c x depthwise_channel_multpilier.
263 |     endpoints: An optional dictionary into which intermediate endpoints are
264 |       placed. The keys "expansion_output", "depthwise_output",
265 |       "projection_output" and "expansion_transform" are always populated, even
266 |       if the corresponding functions are not invoked.
267 |     use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
268 |       inputs so that the output dimensions are the same as if 'SAME' padding
269 |       were used.
270 |     padding: Padding type to use if `use_explicit_padding` is not set.
271 |     scope: optional scope.
272 | 
273 |   Returns:
274 |     Tensor of depth num_outputs
275 | 
276 |   Raises:
277 |     TypeError: on inval
278 |   """
279 |   with tf.variable_scope(scope, default_name='expanded_conv') as s, \
280 |        tf.name_scope(s.original_name_scope):
281 |     prev_depth = input_tensor.get_shape().as_list()[3]
282 |     if  depthwise_location not in [None, 'input', 'output', 'expansion']:
283 |       raise TypeError('%r is unknown value for depthwise_location' %
284 |                       depthwise_location)
285 |     if use_explicit_padding:
286 |       if padding != 'SAME':
287 |         raise TypeError('`use_explicit_padding` should only be used with '
288 |                         '"SAME" padding.')
289 |       padding = 'VALID'
290 |     depthwise_func = functools.partial(
291 |         slim.separable_conv2d,
292 |         num_outputs=None,
293 |         kernel_size=kernel_size,
294 |         depth_multiplier=depthwise_channel_multiplier,
295 |         stride=stride,
296 |         rate=rate,
297 |         normalizer_fn=normalizer_fn,
298 |         padding=padding,
299 |         scope='depthwise')
300 |     # b1 -> b2 * r -> b2
301 |     #   i -> (o * r) (bottleneck) -> o
302 |     input_tensor = tf.identity(input_tensor, 'input')
303 |     net = input_tensor
304 | 
305 |     if depthwise_location == 'input':
306 |       if use_explicit_padding:
307 |         net = _fixed_padding(net, kernel_size, rate)
308 |       net = depthwise_func(net, activation_fn=None)
309 | 
310 |     if callable(expansion_size):
311 |       inner_size = expansion_size(num_inputs=prev_depth)
312 |     else:
313 |       inner_size = expansion_size
314 | 
315 |     if inner_size > net.shape[3]:
316 |       net = split_conv(
317 |           net,
318 |           inner_size,
319 |           num_ways=split_expansion,
320 |           scope='expand',
321 |           stride=1,
322 |           normalizer_fn=normalizer_fn)
323 |       net = tf.identity(net, 'expansion_output')
324 |     if endpoints is not None:
325 |       endpoints['expansion_output'] = net
326 | 
327 |     if depthwise_location == 'expansion':
328 |       if use_explicit_padding:
329 |         net = _fixed_padding(net, kernel_size, rate)
330 |       net = depthwise_func(net)
331 | 
332 |     net = tf.identity(net, name='depthwise_output')
333 |     if endpoints is not None:
334 |       endpoints['depthwise_output'] = net
335 |     if expansion_transform:
336 |       net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
337 |     # Note in contrast with expansion, we always have
338 |     # projection to produce the desired output size.
339 |     net = split_conv(
340 |         net,
341 |         num_outputs,
342 |         num_ways=split_projection,
343 |         stride=1,
344 |         scope='project',
345 |         normalizer_fn=normalizer_fn,
346 |         activation_fn=tf.identity)
347 |     if endpoints is not None:
348 |       endpoints['projection_output'] = net
349 |     if depthwise_location == 'output':
350 |       if use_explicit_padding:
351 |         net = _fixed_padding(net, kernel_size, rate)
352 |       net = depthwise_func(net, activation_fn=None)
353 | 
354 |     if callable(residual):  # custom residual
355 |       net = residual(input_tensor=input_tensor, output_tensor=net)
356 |     elif (residual and
357 |           # stride check enforces that we don't add residuals when spatial
358 |           # dimensions are None
359 |           stride == 1 and
360 |           # Depth matches
361 |           net.get_shape().as_list()[3] ==
362 |           input_tensor.get_shape().as_list()[3]):
363 |       net += input_tensor
364 |     return tf.identity(net, name='output')
365 | 
366 | 
367 | def split_conv(input_tensor,
368 |                num_outputs,
369 |                num_ways,
370 |                scope,
371 |                divisible_by=8,
372 |                **kwargs):
373 |   """Creates a split convolution.
374 | 
375 |   Split convolution splits the input and output into
376 |   'num_blocks' blocks of approximately the same size each,
377 |   and only connects $i$-th input to $i$ output.
378 | 
379 |   Args:
380 |     input_tensor: input tensor
381 |     num_outputs: number of output filters
382 |     num_ways: num blocks to split by.
383 |     scope: scope for all the operators.
384 |     divisible_by: make sure that every part is divisiable by this.
385 |     **kwargs: will be passed directly into conv2d operator
386 |   Returns:
387 |     tensor
388 |   """
389 |   b = input_tensor.get_shape().as_list()[3]
390 | 
391 |   if num_ways == 1 or min(b // num_ways,
392 |                           num_outputs // num_ways) < divisible_by:
393 |     # Don't do any splitting if we end up with less than 8 filters
394 |     # on either side.
395 |     return adaption_conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
396 | 
397 |   outs = []
398 |   input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
399 |   output_splits = _split_divisible(
400 |       num_outputs, num_ways, divisible_by=divisible_by)
401 |   inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
402 |   base = scope
403 |   for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
404 |     scope = base + '_part_%d' % (i,)
405 |     n = adaption_conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
406 |     n = tf.identity(n, scope + '_output')
407 |     outs.append(n)
408 |   return tf.concat(outs, 3, name=scope + '_concat')
409 | 


--------------------------------------------------------------------------------
/TensorFlow/models/mobilenet_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for mobilenet_v2, branched from slim for fp16 performance study."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import copy
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | from models import mobilenet
 26 | from models import mobilenet_conv_blocks as ops
 27 | from models import mobilenet_v2
 28 | 
 29 | 
 30 | slim = tf.contrib.slim
 31 | 
 32 | 
 33 | def find_ops(optype):
 34 |   """Find ops of a given type in graphdef or a graph.
 35 | 
 36 |   Args:
 37 |     optype: operation type (e.g. Conv2D)
 38 |   Returns:
 39 |      List of operations.
 40 |   """
 41 |   gd = tf.get_default_graph()
 42 |   return [var for var in gd.get_operations() if var.type == optype]
 43 | 
 44 | 
 45 | class MobilenetV2Test(tf.test.TestCase):
 46 | 
 47 |   def setUp(self):
 48 |     tf.reset_default_graph()
 49 | 
 50 |   def testCreation(self):
 51 |     spec = dict(mobilenet_v2.V2_DEF)
 52 |     _, ep = mobilenet.mobilenet(
 53 |         tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
 54 |     num_convs = len(find_ops('Conv2D'))
 55 | 
 56 |     # This is mostly a sanity test. No deep reason for these particular
 57 |     # constants.
 58 |     #
 59 |     # All but first 2 and last one have  two convolutions, and there is one
 60 |     # extra conv that is not in the spec. (logits)
 61 |     self.assertEqual(num_convs, len(spec['spec']) * 2 - 2)
 62 |     # Check that depthwise are exposed.
 63 |     for i in range(2, 17):
 64 |       self.assertIn('layer_%d/depthwise_output' % i, ep)
 65 | 
 66 |   def testCreationNoClasses(self):
 67 |     spec = copy.deepcopy(mobilenet_v2.V2_DEF)
 68 |     net, ep = mobilenet.mobilenet(
 69 |         tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec,
 70 |         num_classes=None)
 71 |     self.assertIs(net, ep['global_pool'])
 72 | 
 73 |   def testImageSizes(self):
 74 |     for input_size, output_size in [(224, 7), (192, 6), (160, 5),
 75 |                                     (128, 4), (96, 3)]:
 76 |       tf.reset_default_graph()
 77 |       _, ep = mobilenet_v2.mobilenet(
 78 |           tf.placeholder(tf.float32, (10, input_size, input_size, 3)))
 79 | 
 80 |       self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3],
 81 |                        [output_size] * 2)
 82 | 
 83 |   def testWithSplits(self):
 84 |     spec = copy.deepcopy(mobilenet_v2.V2_DEF)
 85 |     spec['overrides'] = {
 86 |         (ops.expanded_conv,): dict(split_expansion=2),
 87 |     }
 88 |     _, _ = mobilenet.mobilenet(
 89 |         tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
 90 |     num_convs = len(find_ops('Conv2D'))
 91 |     # All but 3 op has 3 conv operatore, the remainign 3 have one
 92 |     # and there is one unaccounted.
 93 |     self.assertEqual(num_convs, len(spec['spec']) * 3 - 5)
 94 | 
 95 |   def testWithOutputStride8(self):
 96 |     out, _ = mobilenet.mobilenet_base(
 97 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
 98 |         conv_defs=mobilenet_v2.V2_DEF,
 99 |         output_stride=8,
100 |         scope='MobilenetV2')
101 |     self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
102 | 
103 |   def testDivisibleBy(self):
104 |     tf.reset_default_graph()
105 |     mobilenet_v2.mobilenet(
106 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
107 |         conv_defs=mobilenet_v2.V2_DEF,
108 |         divisible_by=16,
109 |         min_depth=32)
110 |     s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
111 |     s = set(s)
112 |     self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280,
113 |                              1001], s)
114 | 
115 |   def testDivisibleByWithArgScope(self):
116 |     tf.reset_default_graph()
117 |     # Verifies that depth_multiplier arg scope actually works
118 |     # if no default min_depth is provided.
119 |     with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
120 |       mobilenet_v2.mobilenet(
121 |           tf.placeholder(tf.float32, (10, 224, 224, 2)),
122 |           conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
123 |       s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
124 |       s = set(s)
125 |       self.assertSameElements(s, [32, 192, 128, 1001])
126 | 
127 |   def testFineGrained(self):
128 |     tf.reset_default_graph()
129 |     # Verifies that depth_multiplier arg scope actually works
130 |     # if no default min_depth is provided.
131 | 
132 |     mobilenet_v2.mobilenet(
133 |         tf.placeholder(tf.float32, (10, 224, 224, 2)),
134 |         conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01,
135 |         finegrain_classification_mode=True)
136 |     s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
137 |     s = set(s)
138 |     # All convolutions will be 8->48, except for the last one.
139 |     self.assertSameElements(s, [8, 48, 1001, 1280])
140 | 
141 |   def testMobilenetBase(self):
142 |     tf.reset_default_graph()
143 |     # Verifies that mobilenet_base returns pre-pooling layer.
144 |     with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
145 |       net, _ = mobilenet_v2.mobilenet_base(
146 |           tf.placeholder(tf.float32, (10, 224, 224, 16)),
147 |           conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
148 |       self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
149 | 
150 |   def testWithOutputStride16(self):
151 |     tf.reset_default_graph()
152 |     out, _ = mobilenet.mobilenet_base(
153 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
154 |         conv_defs=mobilenet_v2.V2_DEF,
155 |         output_stride=16)
156 |     self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
157 | 
158 |   def testWithOutputStride8AndExplicitPadding(self):
159 |     tf.reset_default_graph()
160 |     out, _ = mobilenet.mobilenet_base(
161 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
162 |         conv_defs=mobilenet_v2.V2_DEF,
163 |         output_stride=8,
164 |         use_explicit_padding=True,
165 |         scope='MobilenetV2')
166 |     self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
167 | 
168 |   def testWithOutputStride16AndExplicitPadding(self):
169 |     tf.reset_default_graph()
170 |     out, _ = mobilenet.mobilenet_base(
171 |         tf.placeholder(tf.float32, (10, 224, 224, 16)),
172 |         conv_defs=mobilenet_v2.V2_DEF,
173 |         output_stride=16,
174 |         use_explicit_padding=True)
175 |     self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
176 | 
177 |   def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self):
178 |     sc = mobilenet.training_scope(is_training=None)
179 |     self.assertNotIn('is_training', sc[slim.arg_scope_func_key(
180 |         slim.batch_norm)])
181 | 
182 |   def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self):
183 |     sc = mobilenet.training_scope(is_training=False)
184 |     self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
185 |     sc = mobilenet.training_scope(is_training=True)
186 |     self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
187 |     sc = mobilenet.training_scope()
188 |     self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
189 | 
190 | 
191 | if __name__ == '__main__':
192 |   tf.test.main()
193 | 


--------------------------------------------------------------------------------
/TensorFlow/models/mobilenet_v2.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Mobilenet V2 model, branched from slim models for fp16 performance study.
 16 | 
 17 | Architecture: https://arxiv.org/abs/1801.04381
 18 | 
 19 | The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
 20 | 3.4 M parameters.
 21 | """
 22 | 
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | import copy
 28 | 
 29 | import tensorflow as tf
 30 | 
 31 | from models import mobilenet as lib
 32 | from models import mobilenet_conv_blocks as ops
 33 | from models import model
 34 | 
 35 | slim = tf.contrib.slim
 36 | op = lib.op
 37 | 
 38 | expand_input = ops.expand_input_by_factor
 39 | 
 40 | 
 41 | def get_variable(name, shape, dtype, cast_dtype, *args, **kwargs):
 42 |   # TODO(reedwm): Currently variables and gradients are transferred to other
 43 |   # devices and machines as type `dtype`, not `cast_dtype`. In particular,
 44 |   # this means in fp16 mode, variables are transferred as fp32 values, not
 45 |   # fp16 values, which uses extra bandwidth.
 46 |   var = tf.get_variable(name, shape, dtype, *args, **kwargs)
 47 |   return tf.cast(var, cast_dtype)
 48 | 
 49 | 
 50 | def adaption(input_layer):
 51 |   epsilon = 1e-5
 52 |   variable_dtype = tf.float32
 53 |   dtype = tf.float16
 54 | 
 55 |   num_channels = input_layer.get_shape().as_list()[3]
 56 |   squeeze = [1, 2]
 57 |   with tf.variable_scope(default_name='adaption'):
 58 |     beta = get_variable('beta', [1, 1, 1, num_channels],
 59 |                         variable_dtype, dtype,
 60 |                         initializer=tf.constant_initializer(0.))
 61 |     alpha = get_variable('alpha', [1, 1, 1, num_channels],
 62 |                          variable_dtype, dtype,
 63 |                          initializer=tf.constant_initializer(1.))
 64 |     gamma = get_variable('gamma', [1, 1, 1, num_channels],
 65 |                          variable_dtype, dtype,
 66 |                          initializer=tf.constant_initializer(1.))
 67 |     theta = get_variable('theta', [1, 1, 1, num_channels],
 68 |                          variable_dtype, dtype,
 69 |                          initializer=tf.constant_initializer(0.))
 70 | 
 71 |     X = input_layer
 72 |     alpha_2 = tf.square(alpha)
 73 |     alpha_2 = alpha_2 / tf.reduce_mean(alpha_2) + epsilon
 74 |     alpha = tf.sqrt(alpha_2)
 75 |     A = alpha_2 * tf.reduce_mean(tf.square(X), squeeze, keepdims=True) - (
 76 |         2. * alpha * beta) * tf.reduce_mean(X, squeeze, keepdims=True)
 77 |     A = tf.reduce_mean(
 78 |         A, [1, 2, 3], keepdims=True) + (tf.reduce_mean(tf.square(beta)) + epsilon)
 79 |     # B = tf.reduce_sum(alpha_2)
 80 |     B = 1.
 81 |     l2 = tf.sqrt(B / A)
 82 |     adaptor = tf.pow(l2, gamma + theta * l2)
 83 |     trans_back = X * adaptor + (beta / alpha) * (1. - adaptor)
 84 | 
 85 |   return trans_back
 86 | 
 87 | 
 88 | def adaption_conv2d(inputs, *args, **kwargs):
 89 |   name = 'adaption'
 90 |   inputs = adaption(inputs)
 91 |   return slim.conv2d(inputs, *args, **kwargs)
 92 | 
 93 | 
 94 | # pyformat: disable
 95 | # Architecture: https://arxiv.org/abs/1801.04381
 96 | V2_DEF = dict(
 97 |     defaults={
 98 |         # Note: these parameters of batch norm affect the architecture
 99 |         # that's why they are here and not in training_scope.
100 |         (slim.batch_norm,): {'center': True, 'scale': True},
101 |         (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
102 |             'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
103 |         },
104 |         (ops.expanded_conv,): {
105 |             'expansion_size': expand_input(6),
106 |             'split_expansion': 1,
107 |             'normalizer_fn': slim.batch_norm,
108 |             'residual': True
109 |         },
110 |         (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
111 |     },
112 |     spec=[
113 |         op(adaption_conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
114 |         op(ops.expanded_conv,
115 |            expansion_size=expand_input(1, divisible_by=1),
116 |            num_outputs=16),
117 |         op(ops.expanded_conv, stride=2, num_outputs=24),
118 |         op(ops.expanded_conv, stride=1, num_outputs=24),
119 |         op(ops.expanded_conv, stride=2, num_outputs=32),
120 |         op(ops.expanded_conv, stride=1, num_outputs=32),
121 |         op(ops.expanded_conv, stride=1, num_outputs=32),
122 |         op(ops.expanded_conv, stride=2, num_outputs=64),
123 |         op(ops.expanded_conv, stride=1, num_outputs=64),
124 |         op(ops.expanded_conv, stride=1, num_outputs=64),
125 |         op(ops.expanded_conv, stride=1, num_outputs=64),
126 |         op(ops.expanded_conv, stride=1, num_outputs=96),
127 |         op(ops.expanded_conv, stride=1, num_outputs=96),
128 |         op(ops.expanded_conv, stride=1, num_outputs=96),
129 |         op(ops.expanded_conv, stride=2, num_outputs=160),
130 |         op(ops.expanded_conv, stride=1, num_outputs=160),
131 |         op(ops.expanded_conv, stride=1, num_outputs=160),
132 |         op(ops.expanded_conv, stride=1, num_outputs=320),
133 |         op(adaption_conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
134 |     ],
135 | )
136 | # pyformat: enable
137 | 
138 | 
139 | @slim.add_arg_scope
140 | def mobilenet(input_tensor,
141 |               num_classes=1001,
142 |               depth_multiplier=1.0,
143 |               scope='MobilenetV2',
144 |               conv_defs=None,
145 |               finegrain_classification_mode=False,
146 |               min_depth=None,
147 |               divisible_by=None,
148 |               **kwargs):
149 |   """Creates mobilenet V2 network.
150 | 
151 |   Inference mode is created by default. To create training use training_scope
152 |   below.
153 | 
154 |   with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
155 |      logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
156 | 
157 |   Args:
158 |     input_tensor: The input tensor
159 |     num_classes: number of classes
160 |     depth_multiplier: The multiplier applied to scale number of
161 |     channels in each layer. Note: this is called depth multiplier in the
162 |     paper but the name is kept for consistency with slim's model builder.
163 |     scope: Scope of the operator
164 |     conv_defs: Allows to override default conv def.
165 |     finegrain_classification_mode: When set to True, the model
166 |     will keep the last layer large even for small multipliers. Following
167 |     https://arxiv.org/abs/1801.04381
168 |     suggests that it improves performance for ImageNet-type of problems.
169 |       *Note* ignored if final_endpoint makes the builder exit earlier.
170 |     min_depth: If provided, will ensure that all layers will have that
171 |     many channels after application of depth multiplier.
172 |     divisible_by: If provided will ensure that all layers # channels
173 |     will be divisible by this number.
174 |     **kwargs: passed directly to mobilenet.mobilenet:
175 |       prediction_fn- what prediction function to use.
176 |       reuse-: whether to reuse variables (if reuse set to true, scope
177 |       must be given).
178 |   Returns:
179 |     logits/endpoints pair
180 | 
181 |   Raises:
182 |     ValueError: On invalid arguments
183 |   """
184 |   if conv_defs is None:
185 |     conv_defs = V2_DEF
186 |   if 'multiplier' in kwargs:
187 |     raise ValueError('mobilenetv2 doesn\'t support generic '
188 |                      'multiplier parameter use "depth_multiplier" instead.')
189 |   if finegrain_classification_mode:
190 |     conv_defs = copy.deepcopy(conv_defs)
191 |     if depth_multiplier < 1:
192 |       conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
193 | 
194 |   depth_args = {}
195 |   # NB: do not set depth_args unless they are provided to avoid overriding
196 |   # whatever default depth_multiplier might have thanks to arg_scope.
197 |   if min_depth is not None:
198 |     depth_args['min_depth'] = min_depth
199 |   if divisible_by is not None:
200 |     depth_args['divisible_by'] = divisible_by
201 | 
202 |   with slim.arg_scope((lib.depth_multiplier,), **depth_args):
203 |     return lib.mobilenet(
204 |         input_tensor,
205 |         num_classes=num_classes,
206 |         conv_defs=conv_defs,
207 |         scope=scope,
208 |         multiplier=depth_multiplier,
209 |         **kwargs)
210 | 
211 | 
212 | @slim.add_arg_scope
213 | def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
214 |   """Creates base of the mobilenet (no pooling and no logits) ."""
215 |   return mobilenet(input_tensor,
216 |                    depth_multiplier=depth_multiplier,
217 |                    base_only=True, **kwargs)
218 | 
219 | 
220 | def training_scope(**kwargs):
221 |   """Defines MobilenetV2 training scope.
222 | 
223 |   Usage:
224 |      with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
225 |        logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
226 | 
227 |   with slim.
228 | 
229 |   Args:
230 |     **kwargs: Passed to mobilenet.training_scope. The following parameters
231 |     are supported:
232 |       weight_decay- The weight decay to use for regularizing the model.
233 |       stddev-  Standard deviation for initialization, if negative uses xavier.
234 |       dropout_keep_prob- dropout keep probability
235 |       bn_decay- decay for the batch norm moving averages.
236 | 
237 |   Returns:
238 |     An `arg_scope` to use for the mobilenet v2 model.
239 |   """
240 |   return lib.training_scope(**kwargs)
241 | 
242 | 
243 | class MobilenetModel(model.CNNModel):
244 |   """Mobilenet model configuration."""
245 | 
246 |   def __init__(self):
247 |     super(MobilenetModel, self).__init__('mobilenet', 224, 32, 0.005)
248 | 
249 |   def add_inference(self, cnn):
250 |     with tf.contrib.slim.arg_scope(training_scope(is_training=cnn.phase_train)):
251 |       cnn.top_layer, _ = mobilenet(cnn.top_layer, is_training=cnn.phase_train)
252 |       cnn.top_size = cnn.top_layer.shape[-1].value
253 | 


--------------------------------------------------------------------------------
/TensorFlow/models/model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Base model configuration for CNN benchmarks."""
 16 | import tensorflow as tf
 17 | 
 18 | import convnet_builder
 19 | 
 20 | 
 21 | class Model(object):
 22 |   """Base model config for DNN benchmarks."""
 23 | 
 24 |   def __init__(self, model_name, batch_size, learning_rate, fp16_loss_scale):
 25 |     self.model = model_name
 26 |     self.batch_size = batch_size
 27 |     self.default_batch_size = batch_size
 28 |     self.learning_rate = learning_rate
 29 |     # TODO(reedwm) Set custom loss scales for each model instead of using the
 30 |     # default of 128.
 31 |     self.fp16_loss_scale = fp16_loss_scale
 32 | 
 33 |   def get_model(self):
 34 |     return self.model
 35 | 
 36 |   def get_batch_size(self):
 37 |     return self.batch_size
 38 | 
 39 |   def set_batch_size(self, batch_size):
 40 |     self.batch_size = batch_size
 41 | 
 42 |   def get_default_batch_size(self):
 43 |     return self.default_batch_size
 44 | 
 45 |   def get_fp16_loss_scale(self):
 46 |     return self.fp16_loss_scale
 47 | 
 48 |   def get_learning_rate(self, global_step, batch_size):
 49 |     del global_step
 50 |     del batch_size
 51 |     return self.learning_rate
 52 | 
 53 |   def add_inference(self, unused_cnn):
 54 |     raise ValueError('Must be implemented in derived classes')
 55 | 
 56 |   def build_network(self, inputs, **kwargs):
 57 |     del inputs
 58 |     del kwargs
 59 |     raise ValueError('Must be implemented in derived classes')
 60 | 
 61 | 
 62 | class CNNModel(Model):
 63 |   """Base model configuration for CNN benchmarks."""
 64 | 
 65 |   def __init__(self,
 66 |                model,
 67 |                image_size,
 68 |                batch_size,
 69 |                learning_rate,
 70 |                layer_counts=None,
 71 |                fp16_loss_scale=128):
 72 |     super(CNNModel, self).__init__(model, batch_size, learning_rate,
 73 |                                    fp16_loss_scale)
 74 |     self.image_size = image_size
 75 |     self.layer_counts = layer_counts
 76 | 
 77 |   def get_image_size(self):
 78 |     return self.image_size
 79 | 
 80 |   def get_layer_counts(self):
 81 |     return self.layer_counts
 82 | 
 83 |   def skip_final_affine_layer(self):
 84 |     """Returns if the caller of this class should skip the final affine layer.
 85 | 
 86 |     Normally, this class adds a final affine layer to the model after calling
 87 |     self.add_inference(), to generate the logits. If a subclass override this
 88 |     method to return True, the caller should not add the final affine layer.
 89 | 
 90 |     This is useful for tests.
 91 |     """
 92 |     return False
 93 | 
 94 |   def build_network(self, images, phase_train=True, nclass=1001, image_depth=3,
 95 |                     data_type=tf.float32, data_format='NCHW',
 96 |                     use_tf_layers=True, fp16_vars=False):
 97 |     """Returns logits and aux_logits from images."""
 98 |     if data_format == 'NCHW':
 99 |       images = tf.transpose(images, [0, 3, 1, 2])
100 |     var_type = tf.float32
101 |     if data_type == tf.float16 and fp16_vars:
102 |       var_type = tf.float16
103 |     network = convnet_builder.ConvNetBuilder(
104 |         images, image_depth, phase_train, use_tf_layers,
105 |         data_format, data_type, var_type)
106 |     with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
107 |       self.add_inference(network)
108 |       # Add the final fully-connected class layer
109 |       logits = (network.affine(nclass, activation='linear')
110 |                 if not self.skip_final_affine_layer()
111 |                 else network.top_layer)
112 |       aux_logits = None
113 |       if network.aux_top_layer is not None:
114 |         with network.switch_to_aux_top_layer():
115 |           aux_logits = network.affine(
116 |               nclass, activation='linear', stddev=0.001)
117 |     if data_type == tf.float16:
118 |       # TODO(reedwm): Determine if we should do this cast here.
119 |       logits = tf.cast(logits, tf.float32)
120 |       if aux_logits is not None:
121 |         aux_logits = tf.cast(aux_logits, tf.float32)
122 |     return logits, aux_logits
123 | 
124 |   # Subclasses can override this to define their own loss function. By default,
125 |   # benchmark_cnn.py defines its own loss function. If overridden, it must have
126 |   # the same signature as benchmark_cnn.loss_function.
127 |   loss_function = None
128 | 


--------------------------------------------------------------------------------
/TensorFlow/models/model_config.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Model configurations for CNN benchmarks.
 17 | """
 18 | 
 19 | from models import alexnet_model
 20 | from models import densenet_model
 21 | from models import googlenet_model
 22 | from models import inception_model
 23 | from models import lenet_model
 24 | from models import mobilenet_v2
 25 | from models import nasnet_model
 26 | from models import official_resnet_model
 27 | from models import overfeat_model
 28 | from models import resnet_model
 29 | from models import trivial_model
 30 | from models import vgg_model
 31 | 
 32 | 
 33 | _model_name_to_imagenet_model = {
 34 |     'vgg11': vgg_model.Vgg11Model,
 35 |     'vgg16': vgg_model.Vgg16Model,
 36 |     'vgg19': vgg_model.Vgg19Model,
 37 |     'lenet': lenet_model.Lenet5Model,
 38 |     'googlenet': googlenet_model.GooglenetModel,
 39 |     'overfeat': overfeat_model.OverfeatModel,
 40 |     'alexnet': alexnet_model.AlexnetModel,
 41 |     'trivial': trivial_model.TrivialModel,
 42 |     'inception3': inception_model.Inceptionv3Model,
 43 |     'inception4': inception_model.Inceptionv4Model,
 44 |     'official_resnet18_v2':
 45 |     lambda: official_resnet_model.ImagenetResnetModel(18),
 46 |     'official_resnet34_v2':
 47 |     lambda: official_resnet_model.ImagenetResnetModel(34),
 48 |     'official_resnet50_v2':
 49 |     lambda: official_resnet_model.ImagenetResnetModel(50),
 50 |     'official_resnet101_v2':
 51 |     lambda: official_resnet_model.ImagenetResnetModel(101),
 52 |     'official_resnet152_v2':
 53 |     lambda: official_resnet_model.ImagenetResnetModel(152),
 54 |     'official_resnet200_v2':
 55 |     lambda: official_resnet_model.ImagenetResnetModel(200),
 56 |     'official_resnet18':
 57 |     lambda: official_resnet_model.ImagenetResnetModel(18, version=1),
 58 |     'official_resnet34':
 59 |     lambda: official_resnet_model.ImagenetResnetModel(34, version=1),
 60 |     'official_resnet50':
 61 |     lambda: official_resnet_model.ImagenetResnetModel(50, version=1),
 62 |     'official_resnet101':
 63 |     lambda: official_resnet_model.ImagenetResnetModel(101, version=1),
 64 |     'official_resnet152':
 65 |     lambda: official_resnet_model.ImagenetResnetModel(152, version=1),
 66 |     'official_resnet200':
 67 |     lambda: official_resnet_model.ImagenetResnetModel(200, version=1),
 68 |     'resnet50': resnet_model.create_resnet50_model,
 69 |     'resnet50_v1.5': resnet_model.create_resnet50_v1_5_model,
 70 |     'resnet50_v2': resnet_model.create_resnet50_v2_model,
 71 |     'resnet101': resnet_model.create_resnet101_model,
 72 |     'resnet101_v2': resnet_model.create_resnet101_v2_model,
 73 |     'resnet152': resnet_model.create_resnet152_model,
 74 |     'resnet152_v2': resnet_model.create_resnet152_v2_model,
 75 |     'nasnet': nasnet_model.NasnetModel,
 76 |     'nasnetlarge': nasnet_model.NasnetLargeModel,
 77 |     'mobilenet': mobilenet_v2.MobilenetModel,
 78 | 
 79 | }
 80 | 
 81 | 
 82 | _model_name_to_cifar_model = {
 83 |     'alexnet': alexnet_model.AlexnetCifar10Model,
 84 |     'resnet20': resnet_model.create_resnet20_cifar_model,
 85 |     'resnet20_v2': resnet_model.create_resnet20_v2_cifar_model,
 86 |     'resnet32': resnet_model.create_resnet32_cifar_model,
 87 |     'resnet32_v2': resnet_model.create_resnet32_v2_cifar_model,
 88 |     'resnet44': resnet_model.create_resnet44_cifar_model,
 89 |     'resnet44_v2': resnet_model.create_resnet44_v2_cifar_model,
 90 |     'resnet56': resnet_model.create_resnet56_cifar_model,
 91 |     'resnet56_v2': resnet_model.create_resnet56_v2_cifar_model,
 92 |     'resnet110': resnet_model.create_resnet110_cifar_model,
 93 |     'resnet110_v2': resnet_model.create_resnet110_v2_cifar_model,
 94 |     'trivial': trivial_model.TrivialCifar10Model,
 95 |     'densenet40_k12': densenet_model.create_densenet40_k12_model,
 96 |     'densenet100_k12': densenet_model.create_densenet100_k12_model,
 97 |     'densenet100_k24': densenet_model.create_densenet100_k24_model,
 98 |     'nasnet': nasnet_model.NasnetCifarModel,
 99 | }
100 | 
101 | 
102 | def _get_model_map(dataset_name):
103 |   if 'cifar10' == dataset_name:
104 |     return _model_name_to_cifar_model
105 |   elif dataset_name in ('imagenet', 'synthetic'):
106 |     return _model_name_to_imagenet_model
107 |   else:
108 |     raise ValueError('Invalid dataset name: %s' % dataset_name)
109 | 
110 | 
111 | def get_model_config(model_name, dataset):
112 |   """Map model name to model network configuration."""
113 |   model_map = _get_model_map(dataset.name)
114 |   if model_name not in model_map:
115 |     raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' %
116 |                      (model_name, dataset.name))
117 |   else:
118 |     return model_map[model_name]()
119 | 
120 | 
121 | def register_model(model_name, dataset_name, model_func):
122 |   """Register a new model that can be obtained with `get_model_config`."""
123 |   model_map = _get_model_map(dataset_name)
124 |   if model_name in model_map:
125 |     raise ValueError('Model "%s" is already registered for dataset "%s"' %
126 |                      (model_name, dataset_name))
127 |   model_map[model_name] = model_func
128 | 


--------------------------------------------------------------------------------
/TensorFlow/models/nasnet_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for nasnet."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from models import nasnet_model as nasnet
 23 | 
 24 | slim = tf.contrib.slim
 25 | 
 26 | 
 27 | class NASNetTest(tf.test.TestCase):
 28 | 
 29 |   def testBuildLogitsCifarModel(self):
 30 |     batch_size = 5
 31 |     height, width = 32, 32
 32 |     num_classes = 10
 33 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 34 |     tf.train.create_global_step()
 35 |     with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
 36 |       logits, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
 37 |     auxlogits = end_points['AuxLogits']
 38 |     predictions = end_points['Predictions']
 39 |     self.assertListEqual(auxlogits.get_shape().as_list(),
 40 |                          [batch_size, num_classes])
 41 |     self.assertListEqual(logits.get_shape().as_list(),
 42 |                          [batch_size, num_classes])
 43 |     self.assertListEqual(predictions.get_shape().as_list(),
 44 |                          [batch_size, num_classes])
 45 | 
 46 |   def testBuildLogitsMobileModel(self):
 47 |     batch_size = 5
 48 |     height, width = 224, 224
 49 |     num_classes = 1000
 50 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 51 |     tf.train.create_global_step()
 52 |     with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
 53 |       logits, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
 54 |     auxlogits = end_points['AuxLogits']
 55 |     predictions = end_points['Predictions']
 56 |     self.assertListEqual(auxlogits.get_shape().as_list(),
 57 |                          [batch_size, num_classes])
 58 |     self.assertListEqual(logits.get_shape().as_list(),
 59 |                          [batch_size, num_classes])
 60 |     self.assertListEqual(predictions.get_shape().as_list(),
 61 |                          [batch_size, num_classes])
 62 | 
 63 |   def testBuildLogitsLargeModel(self):
 64 |     batch_size = 5
 65 |     height, width = 331, 331
 66 |     num_classes = 1000
 67 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 68 |     tf.train.create_global_step()
 69 |     with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
 70 |       logits, end_points = nasnet.build_nasnet_large(inputs, num_classes)
 71 |     auxlogits = end_points['AuxLogits']
 72 |     predictions = end_points['Predictions']
 73 |     self.assertListEqual(auxlogits.get_shape().as_list(),
 74 |                          [batch_size, num_classes])
 75 |     self.assertListEqual(logits.get_shape().as_list(),
 76 |                          [batch_size, num_classes])
 77 |     self.assertListEqual(predictions.get_shape().as_list(),
 78 |                          [batch_size, num_classes])
 79 | 
 80 |   def testBuildPreLogitsCifarModel(self):
 81 |     batch_size = 5
 82 |     height, width = 32, 32
 83 |     num_classes = None
 84 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 85 |     tf.train.create_global_step()
 86 |     with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
 87 |       net, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
 88 |     self.assertFalse('AuxLogits' in end_points)
 89 |     self.assertFalse('Predictions' in end_points)
 90 |     self.assertTrue(net.op.name.startswith('final_layer/Mean'))
 91 |     self.assertListEqual(net.get_shape().as_list(), [batch_size, 768])
 92 | 
 93 |   def testBuildPreLogitsMobileModel(self):
 94 |     batch_size = 5
 95 |     height, width = 224, 224
 96 |     num_classes = None
 97 |     inputs = tf.random_uniform((batch_size, height, width, 3))
 98 |     tf.train.create_global_step()
 99 |     with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
100 |       net, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
101 |     self.assertFalse('AuxLogits' in end_points)
102 |     self.assertFalse('Predictions' in end_points)
103 |     self.assertTrue(net.op.name.startswith('final_layer/Mean'))
104 |     self.assertListEqual(net.get_shape().as_list(), [batch_size, 1056])
105 | 
106 |   def testBuildPreLogitsLargeModel(self):
107 |     batch_size = 5
108 |     height, width = 331, 331
109 |     num_classes = None
110 |     inputs = tf.random_uniform((batch_size, height, width, 3))
111 |     tf.train.create_global_step()
112 |     with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
113 |       net, end_points = nasnet.build_nasnet_large(inputs, num_classes)
114 |     self.assertFalse('AuxLogits' in end_points)
115 |     self.assertFalse('Predictions' in end_points)
116 |     self.assertTrue(net.op.name.startswith('final_layer/Mean'))
117 |     self.assertListEqual(net.get_shape().as_list(), [batch_size, 4032])
118 | 
119 |   def testAllEndPointsShapesCifarModel(self):
120 |     batch_size = 5
121 |     height, width = 32, 32
122 |     num_classes = 10
123 |     inputs = tf.random_uniform((batch_size, height, width, 3))
124 |     tf.train.create_global_step()
125 |     with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
126 |       _, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
127 |     endpoints_shapes = {'Stem': [batch_size, 32, 32, 96],
128 |                         'Cell_0': [batch_size, 32, 32, 192],
129 |                         'Cell_1': [batch_size, 32, 32, 192],
130 |                         'Cell_2': [batch_size, 32, 32, 192],
131 |                         'Cell_3': [batch_size, 32, 32, 192],
132 |                         'Cell_4': [batch_size, 32, 32, 192],
133 |                         'Cell_5': [batch_size, 32, 32, 192],
134 |                         'Cell_6': [batch_size, 16, 16, 384],
135 |                         'Cell_7': [batch_size, 16, 16, 384],
136 |                         'Cell_8': [batch_size, 16, 16, 384],
137 |                         'Cell_9': [batch_size, 16, 16, 384],
138 |                         'Cell_10': [batch_size, 16, 16, 384],
139 |                         'Cell_11': [batch_size, 16, 16, 384],
140 |                         'Cell_12': [batch_size, 8, 8, 768],
141 |                         'Cell_13': [batch_size, 8, 8, 768],
142 |                         'Cell_14': [batch_size, 8, 8, 768],
143 |                         'Cell_15': [batch_size, 8, 8, 768],
144 |                         'Cell_16': [batch_size, 8, 8, 768],
145 |                         'Cell_17': [batch_size, 8, 8, 768],
146 |                         'Reduction_Cell_0': [batch_size, 16, 16, 256],
147 |                         'Reduction_Cell_1': [batch_size, 8, 8, 512],
148 |                         'global_pool': [batch_size, 768],
149 |                         # Logits and predictions
150 |                         'AuxLogits': [batch_size, num_classes],
151 |                         'Logits': [batch_size, num_classes],
152 |                         'Predictions': [batch_size, num_classes]}
153 |     self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
154 |     for endpoint_name in endpoints_shapes:
155 |       tf.logging.info('Endpoint name: {}'.format(endpoint_name))
156 |       expected_shape = endpoints_shapes[endpoint_name]
157 |       self.assertTrue(endpoint_name in end_points)
158 |       self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
159 |                            expected_shape)
160 | 
161 |   def testAllEndPointsShapesMobileModel(self):
162 |     batch_size = 5
163 |     height, width = 224, 224
164 |     num_classes = 1000
165 |     inputs = tf.random_uniform((batch_size, height, width, 3))
166 |     tf.train.create_global_step()
167 |     with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
168 |       _, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
169 |     endpoints_shapes = {'Stem': [batch_size, 28, 28, 88],
170 |                         'Cell_0': [batch_size, 28, 28, 264],
171 |                         'Cell_1': [batch_size, 28, 28, 264],
172 |                         'Cell_2': [batch_size, 28, 28, 264],
173 |                         'Cell_3': [batch_size, 28, 28, 264],
174 |                         'Cell_4': [batch_size, 14, 14, 528],
175 |                         'Cell_5': [batch_size, 14, 14, 528],
176 |                         'Cell_6': [batch_size, 14, 14, 528],
177 |                         'Cell_7': [batch_size, 14, 14, 528],
178 |                         'Cell_8': [batch_size, 7, 7, 1056],
179 |                         'Cell_9': [batch_size, 7, 7, 1056],
180 |                         'Cell_10': [batch_size, 7, 7, 1056],
181 |                         'Cell_11': [batch_size, 7, 7, 1056],
182 |                         'Reduction_Cell_0': [batch_size, 14, 14, 352],
183 |                         'Reduction_Cell_1': [batch_size, 7, 7, 704],
184 |                         'global_pool': [batch_size, 1056],
185 |                         # Logits and predictions
186 |                         'AuxLogits': [batch_size, num_classes],
187 |                         'Logits': [batch_size, num_classes],
188 |                         'Predictions': [batch_size, num_classes]}
189 |     self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
190 |     for endpoint_name in endpoints_shapes:
191 |       tf.logging.info('Endpoint name: {}'.format(endpoint_name))
192 |       expected_shape = endpoints_shapes[endpoint_name]
193 |       self.assertTrue(endpoint_name in end_points)
194 |       self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
195 |                            expected_shape)
196 | 
197 |   def testAllEndPointsShapesLargeModel(self):
198 |     batch_size = 5
199 |     height, width = 331, 331
200 |     num_classes = 1000
201 |     inputs = tf.random_uniform((batch_size, height, width, 3))
202 |     tf.train.create_global_step()
203 |     with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
204 |       _, end_points = nasnet.build_nasnet_large(inputs, num_classes)
205 |     endpoints_shapes = {'Stem': [batch_size, 42, 42, 336],
206 |                         'Cell_0': [batch_size, 42, 42, 1008],
207 |                         'Cell_1': [batch_size, 42, 42, 1008],
208 |                         'Cell_2': [batch_size, 42, 42, 1008],
209 |                         'Cell_3': [batch_size, 42, 42, 1008],
210 |                         'Cell_4': [batch_size, 42, 42, 1008],
211 |                         'Cell_5': [batch_size, 42, 42, 1008],
212 |                         'Cell_6': [batch_size, 21, 21, 2016],
213 |                         'Cell_7': [batch_size, 21, 21, 2016],
214 |                         'Cell_8': [batch_size, 21, 21, 2016],
215 |                         'Cell_9': [batch_size, 21, 21, 2016],
216 |                         'Cell_10': [batch_size, 21, 21, 2016],
217 |                         'Cell_11': [batch_size, 21, 21, 2016],
218 |                         'Cell_12': [batch_size, 11, 11, 4032],
219 |                         'Cell_13': [batch_size, 11, 11, 4032],
220 |                         'Cell_14': [batch_size, 11, 11, 4032],
221 |                         'Cell_15': [batch_size, 11, 11, 4032],
222 |                         'Cell_16': [batch_size, 11, 11, 4032],
223 |                         'Cell_17': [batch_size, 11, 11, 4032],
224 |                         'Reduction_Cell_0': [batch_size, 21, 21, 1344],
225 |                         'Reduction_Cell_1': [batch_size, 11, 11, 2688],
226 |                         'global_pool': [batch_size, 4032],
227 |                         # Logits and predictions
228 |                         'AuxLogits': [batch_size, num_classes],
229 |                         'Logits': [batch_size, num_classes],
230 |                         'Predictions': [batch_size, num_classes]}
231 |     self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
232 |     for endpoint_name in endpoints_shapes:
233 |       tf.logging.info('Endpoint name: {}'.format(endpoint_name))
234 |       expected_shape = endpoints_shapes[endpoint_name]
235 |       self.assertTrue(endpoint_name in end_points)
236 |       self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
237 |                            expected_shape)
238 | 
239 |   def testVariablesSetDeviceMobileModel(self):
240 |     batch_size = 5
241 |     height, width = 224, 224
242 |     num_classes = 1000
243 |     inputs = tf.random_uniform((batch_size, height, width, 3))
244 |     tf.train.create_global_step()
245 |     # Force all Variables to reside on the device.
246 |     with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
247 |       with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
248 |         nasnet.build_nasnet_mobile(inputs, num_classes)
249 |     with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
250 |       with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
251 |         nasnet.build_nasnet_mobile(inputs, num_classes)
252 |     for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
253 |       self.assertDeviceEqual(v.device, '/cpu:0')
254 |     for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
255 |       self.assertDeviceEqual(v.device, '/gpu:0')
256 | 
257 |   def testUnknownBatchSizeMobileModel(self):
258 |     batch_size = 1
259 |     height, width = 224, 224
260 |     num_classes = 1000
261 |     with self.test_session() as sess:
262 |       inputs = tf.placeholder(tf.float32, (None, height, width, 3))
263 |       with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
264 |         logits, _ = nasnet.build_nasnet_mobile(inputs, num_classes)
265 |       self.assertListEqual(logits.get_shape().as_list(),
266 |                            [None, num_classes])
267 |       images = tf.random_uniform((batch_size, height, width, 3))
268 |       sess.run(tf.global_variables_initializer())
269 |       output = sess.run(logits, {inputs: images.eval()})
270 |       self.assertEquals(output.shape, (batch_size, num_classes))
271 | 
272 |   def testEvaluationMobileModel(self):
273 |     batch_size = 2
274 |     height, width = 224, 224
275 |     num_classes = 1000
276 |     with self.test_session() as sess:
277 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
278 |       with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
279 |         logits, _ = nasnet.build_nasnet_mobile(eval_inputs,
280 |                                                num_classes,
281 |                                                is_training=False)
282 |       predictions = tf.argmax(logits, 1)
283 |       sess.run(tf.global_variables_initializer())
284 |       output = sess.run(predictions)
285 |       self.assertEquals(output.shape, (batch_size,))
286 | 
287 | 
288 | if __name__ == '__main__':
289 |   tf.test.main()
290 | 


--------------------------------------------------------------------------------
/TensorFlow/models/official_resnet_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Import official resnet models."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | import datasets
23 | from models import model as model_lib
24 | 
25 | 
26 | class ImagenetResnetModel(model_lib.CNNModel):
27 |   """Official resnet models."""
28 | 
29 |   def __init__(self, resnet_size, version=2):
30 |     """These are the parameters that work for Imagenet data.
31 | 
32 |     Args:
33 |       resnet_size: The number of convolutional layers needed in the model.
34 |       version: 1 or 2 for v1 or v2, respectively.
35 |     """
36 |     default_batch_sizes = {
37 |         50: 128,
38 |         101: 32,
39 |         152: 32
40 |     }
41 |     batch_size = default_batch_sizes.get(resnet_size, 32)
42 |     default_learning_rate = 0.0125 * batch_size / 32
43 |     model_name = 'official_resnet_{}_v{}'.format(resnet_size, version)
44 |     super(ImagenetResnetModel, self).__init__(
45 |         model_name, 224, batch_size, default_learning_rate)
46 |     self.resnet_size = resnet_size
47 |     self.version = version
48 | 
49 |   def get_learning_rate(self, global_step, batch_size):
50 |     num_batches_per_epoch = (
51 |         float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
52 |     boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
53 |     values = [1, 0.1, 0.01, 0.001, 0.0001]
54 |     adjusted_learning_rate = (
55 |         self.learning_rate / self.default_batch_size * batch_size)
56 |     values = [v * adjusted_learning_rate for v in values]
57 |     return tf.train.piecewise_constant(global_step, boundaries, values)
58 | 
59 |   def build_network(self, images, phase_train=True, nclass=1001, image_depth=3,
60 |                     data_type=tf.float32, data_format='NCHW',
61 |                     use_tf_layers=True, fp16_vars=False):
62 |     del image_depth
63 |     del data_format
64 |     del use_tf_layers
65 |     # pylint: disable=g-import-not-at-top
66 |     try:
67 |       from official.resnet.imagenet_main import ImagenetModel
68 |     except ImportError:
69 |       tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
70 |       raise
71 |     images = tf.cast(images, data_type)
72 |     model_class = ImagenetModel(resnet_size=self.resnet_size,
73 |                                 resnet_version=self.version,
74 |                                 # The official model dtype seems to be ignored,
75 |                                 # as the dtype it uses is the dtype of the input
76 |                                 # images. Doesn't hurt to set it though.
77 |                                 dtype=data_type)
78 |     logits = model_class(images, phase_train)
79 |     logits = tf.cast(logits, tf.float32)
80 |     return logits, None
81 | 


--------------------------------------------------------------------------------
/TensorFlow/models/overfeat_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Overfeat model configuration.
17 | 
18 | References:
19 |   OverFeat: Integrated Recognition, Localization and Detection using
20 |   Convolutional Networks
21 |   Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus,
22 |   Yann LeCun, 2014
23 |   http://arxiv.org/abs/1312.6229
24 | """
25 | 
26 | from models import model
27 | 
28 | 
29 | class OverfeatModel(model.CNNModel):
30 |   """OverfeatModel."""
31 | 
32 |   def __init__(self):
33 |     super(OverfeatModel, self).__init__('overfeat', 231, 32, 0.005)
34 | 
35 |   def add_inference(self, cnn):
36 |     # Note: VALID requires padding the images by 3 in width and height
37 |     cnn.conv(96, 11, 11, 4, 4, mode='VALID')
38 |     cnn.mpool(2, 2)
39 |     cnn.conv(256, 5, 5, 1, 1, mode='VALID')
40 |     cnn.mpool(2, 2)
41 |     cnn.conv(512, 3, 3)
42 |     cnn.conv(1024, 3, 3)
43 |     cnn.conv(1024, 3, 3)
44 |     cnn.mpool(2, 2)
45 |     cnn.reshape([-1, 1024 * 6 * 6])
46 |     cnn.affine(3072)
47 |     cnn.dropout()
48 |     cnn.affine(4096)
49 |     cnn.dropout()
50 | 


--------------------------------------------------------------------------------
/TensorFlow/models/resnet_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Resnet model configuration.
 17 | 
 18 | References:
 19 |   Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 20 |   Deep Residual Learning for Image Recognition
 21 |   arXiv:1512.03385 (2015)
 22 | 
 23 |   Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 24 |   Identity Mappings in Deep Residual Networks
 25 |   arXiv:1603.05027 (2016)
 26 | 
 27 |   Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy,
 28 |   Alan L. Yuille
 29 |   DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
 30 |   Atrous Convolution, and Fully Connected CRFs
 31 |   arXiv:1606.00915 (2016)
 32 | """
 33 | 
 34 | import numpy as np
 35 | from six.moves import xrange  # pylint: disable=redefined-builtin
 36 | import tensorflow as tf
 37 | import datasets
 38 | from models import model as model_lib
 39 | 
 40 | 
 41 | def bottleneck_block_v1(cnn, depth, depth_bottleneck, stride):
 42 |   """Bottleneck block with identity short-cut for ResNet v1.
 43 | 
 44 |   Args:
 45 |     cnn: the network to append bottleneck blocks.
 46 |     depth: the number of output filters for this bottleneck block.
 47 |     depth_bottleneck: the number of bottleneck filters for this block.
 48 |     stride: Stride used in the first layer of the bottleneck block.
 49 |   """
 50 |   input_layer = cnn.top_layer
 51 |   in_size = cnn.top_size
 52 |   name_key = 'resnet_v1'
 53 |   name = name_key + str(cnn.counts[name_key])
 54 |   cnn.counts[name_key] += 1
 55 | 
 56 |   with tf.variable_scope(name):
 57 |     if depth == in_size:
 58 |       if stride == 1:
 59 |         shortcut = input_layer
 60 |       else:
 61 |         shortcut = cnn.apool(
 62 |             1, 1, stride, stride, input_layer=input_layer,
 63 |             num_channels_in=in_size)
 64 |     else:
 65 |       shortcut = cnn.conv(
 66 |           depth, 1, 1, stride, stride, activation=None,
 67 |           use_batch_norm=True, input_layer=input_layer,
 68 |           num_channels_in=in_size, bias=None)
 69 |     cnn.conv(depth_bottleneck, 1, 1, stride, stride,
 70 |              input_layer=input_layer, num_channels_in=in_size,
 71 |              use_batch_norm=True, bias=None)
 72 |     cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
 73 |              use_batch_norm=True, bias=None)
 74 |     res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
 75 |                    use_batch_norm=True, bias=None)
 76 |     '''
 77 |     # se
 78 |     cn = cnn.top_size
 79 |     se = cnn.spatial_mean()
 80 |     se = cnn.affine(cn / 16)
 81 |     se = cnn.affine(cn, activation=None)
 82 |     se = tf.sigmoid(se)
 83 |     if cnn.data_format == 'NCHW':
 84 |       se = tf.reshape(se, [-1, cn, 1, 1])
 85 |     else:
 86 |       se = tf.reshape(se, [-1, 1, 1, cn])
 87 |     res = res * se
 88 |     '''
 89 | 
 90 |     output = tf.nn.relu(shortcut + res)
 91 |     cnn.top_layer = output
 92 |     cnn.top_size = depth
 93 | 
 94 | 
 95 | def bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride):
 96 |   """Bottleneck block with identity short-cut for ResNet v1.5.
 97 | 
 98 |   ResNet v1.5 is the informal name for ResNet v1 where stride 2 is used in the
 99 |   first 3x3 convolution of each block instead of the first 1x1 convolution.
100 | 
101 |   First seen at https://github.com/facebook/fb.resnet.torch. Used in the paper
102 |   "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"
103 |   (arXiv:1706.02677v2) and by fast.ai to train to accuracy in 45 epochs using
104 |   multiple image sizes.
105 | 
106 |   Args:
107 |     cnn: the network to append bottleneck blocks.
108 |     depth: the number of output filters for this bottleneck block.
109 |     depth_bottleneck: the number of bottleneck filters for this block.
110 |     stride: Stride used in the first layer of the bottleneck block.
111 |   """
112 |   input_layer = cnn.top_layer
113 |   in_size = cnn.top_size
114 |   name_key = 'resnet_v1.5'
115 |   name = name_key + str(cnn.counts[name_key])
116 |   cnn.counts[name_key] += 1
117 | 
118 |   with tf.variable_scope(name):
119 |     if depth == in_size:
120 |       if stride == 1:
121 |         shortcut = input_layer
122 |       else:
123 |         shortcut = cnn.apool(
124 |             1, 1, stride, stride, input_layer=input_layer,
125 |             num_channels_in=in_size)
126 |     else:
127 |       shortcut = cnn.conv(
128 |           depth, 1, 1, stride, stride, activation=None,
129 |           use_batch_norm=True, input_layer=input_layer,
130 |           num_channels_in=in_size, bias=None)
131 |     cnn.conv(depth_bottleneck, 1, 1, 1, 1,
132 |              input_layer=input_layer, num_channels_in=in_size,
133 |              use_batch_norm=True, bias=None)
134 |     cnn.conv(depth_bottleneck, 3, 3, stride, stride, mode='SAME_RESNET',
135 |              use_batch_norm=True, bias=None)
136 |     res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
137 |                    use_batch_norm=True, bias=None)
138 |     output = tf.nn.relu(shortcut + res)
139 |     cnn.top_layer = output
140 |     cnn.top_size = depth
141 | 
142 | 
143 | def bottleneck_block_v2(cnn, depth, depth_bottleneck, stride):
144 |   """Bottleneck block with identity short-cut for ResNet v2.
145 | 
146 |   The main difference from v1 is that a batch norm and relu are done at the
147 |   start of the block, instead of the end. This initial batch norm and relu is
148 |   collectively called a pre-activation.
149 | 
150 |   Args:
151 |     cnn: the network to append bottleneck blocks.
152 |     depth: the number of output filters for this bottleneck block.
153 |     depth_bottleneck: the number of bottleneck filters for this block.
154 |     stride: Stride used in the first layer of the bottleneck block.
155 |   """
156 |   input_layer = cnn.top_layer
157 |   in_size = cnn.top_size
158 |   name_key = 'resnet_v2'
159 |   name = name_key + str(cnn.counts[name_key])
160 |   cnn.counts[name_key] += 1
161 | 
162 |   preact = cnn.batch_norm()
163 |   preact = tf.nn.relu(preact)
164 |   with tf.variable_scope(name):
165 |     if depth == in_size:
166 |       if stride == 1:
167 |         shortcut = input_layer
168 |       else:
169 |         shortcut = cnn.apool(
170 |             1, 1, stride, stride, input_layer=input_layer,
171 |             num_channels_in=in_size)
172 |     else:
173 |       shortcut = cnn.conv(
174 |           depth, 1, 1, stride, stride, activation=None, use_batch_norm=False,
175 |           input_layer=preact, num_channels_in=in_size, bias=None)
176 |     cnn.conv(depth_bottleneck, 1, 1, stride, stride,
177 |              input_layer=preact, num_channels_in=in_size,
178 |              use_batch_norm=True, bias=None)
179 |     cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
180 |              use_batch_norm=True, bias=None)
181 |     res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
182 |                    use_batch_norm=False, bias=None)
183 | 
184 |     output = shortcut + res
185 |     cnn.top_layer = output
186 |     cnn.top_size = depth
187 | 
188 | 
189 | def bottleneck_block(cnn, depth, depth_bottleneck, stride, version):
190 |   """Bottleneck block with identity short-cut.
191 | 
192 |   Args:
193 |     cnn: the network to append bottleneck blocks.
194 |     depth: the number of output filters for this bottleneck block.
195 |     depth_bottleneck: the number of bottleneck filters for this block.
196 |     stride: Stride used in the first layer of the bottleneck block.
197 |     version: version of ResNet to build.
198 |   """
199 |   if version == 'v2':
200 |     bottleneck_block_v2(cnn, depth, depth_bottleneck, stride)
201 |   elif version == 'v1.5':
202 |     bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride)
203 |   else:
204 |     bottleneck_block_v1(cnn, depth, depth_bottleneck, stride)
205 | 
206 | 
207 | def residual_block(cnn, depth, stride, version):
208 |   """Residual block with identity short-cut.
209 | 
210 |   Args:
211 |     cnn: the network to append residual blocks.
212 |     depth: the number of output filters for this residual block.
213 |     stride: Stride used in the first layer of the residual block.
214 |     version: version of ResNet to build.
215 |   """
216 |   pre_activation = True if version == 'v2' else False
217 |   input_layer = cnn.top_layer
218 |   in_size = cnn.top_size
219 |   if in_size != depth:
220 |     # Plan A of shortcut.
221 |     shortcut = cnn.apool(1, 1, stride, stride,
222 |                          input_layer=input_layer,
223 |                          num_channels_in=in_size)
224 |     padding = (depth - in_size) // 2
225 |     if cnn.channel_pos == 'channels_last':
226 |       shortcut = tf.pad(
227 |           shortcut, [[0, 0], [0, 0], [0, 0], [padding, padding]])
228 |     else:
229 |       shortcut = tf.pad(
230 |           shortcut, [[0, 0], [padding, padding], [0, 0], [0, 0]])
231 |   else:
232 |     shortcut = input_layer
233 |   if pre_activation:
234 |     res = cnn.batch_norm(input_layer)
235 |     res = tf.nn.relu(res)
236 |   else:
237 |     res = input_layer
238 |   cnn.conv(depth, 3, 3, stride, stride,
239 |            input_layer=res, num_channels_in=in_size,
240 |            use_batch_norm=True, bias=None)
241 |   if pre_activation:
242 |     res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
243 |                    use_batch_norm=False, bias=None)
244 |     output = shortcut + res
245 |   else:
246 |     res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
247 |                    use_batch_norm=True, bias=None)
248 |     output = tf.nn.relu(shortcut + res)
249 |   cnn.top_layer = output
250 |   cnn.top_size = depth
251 | 
252 | 
253 | class ResnetModel(model_lib.CNNModel):
254 |   """Resnet cnn network configuration."""
255 | 
256 |   def __init__(self, model, layer_counts):
257 |     default_batch_sizes = {
258 |         'resnet50': 64,
259 |         'resnet101': 32,
260 |         'resnet152': 32,
261 |         'resnet50_v2': 64,
262 |         'resnet101_v2': 32,
263 |         'resnet152_v2': 32,
264 |     }
265 |     batch_size = 256
266 |     super(ResnetModel, self).__init__(model, 224, batch_size, 0.1,
267 |                                       layer_counts)
268 |     if 'v2' in model:
269 |       self.version = 'v2'
270 |     elif 'v1.5' in model:
271 |       self.version = 'v1.5'
272 |     else:
273 |       self.version = 'v1'
274 | 
275 |   def add_inference(self, cnn):
276 |     if self.layer_counts is None:
277 |       raise ValueError('Layer counts not specified for %s' % self.get_model())
278 |     cnn.use_batch_norm = True
279 |     cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
280 |     cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
281 |     cnn.mpool(3, 3, 2, 2, mode='SAME')
282 |     for _ in xrange(self.layer_counts[0]):
283 |       bottleneck_block(cnn, 256, 64, 1, self.version)
284 |     for i in xrange(self.layer_counts[1]):
285 |       stride = 2 if i == 0 else 1
286 |       bottleneck_block(cnn, 512, 128, stride, self.version)
287 |     for i in xrange(self.layer_counts[2]):
288 |       stride = 2 if i == 0 else 1
289 |       bottleneck_block(cnn, 1024, 256, stride, self.version)
290 |     for i in xrange(self.layer_counts[3]):
291 |       stride = 2 if i == 0 else 1
292 |       bottleneck_block(cnn, 2048, 512, stride, self.version)
293 |     if self.version:
294 |       cnn.batch_norm()
295 |       cnn.top_layer = tf.nn.relu(cnn.top_layer)
296 |     cnn.spatial_mean()
297 | 
298 |   def get_learning_rate(self, global_step, batch_size):
299 |     num_batches_per_epoch = (
300 |         float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
301 |     boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 90, 100]]
302 | 
303 |     rescaled_lr = self.learning_rate / self.default_batch_size * batch_size
304 |     print('Init LR: ', rescaled_lr)
305 |     rescaled_lr = rescaled_lr / (batch_size / self.batch_size)
306 |     print('GPU Num: ', batch_size / self.batch_size)
307 |     print('Batch size: ', batch_size)
308 |     values = [1, 0.1, 0.01, 0.001, 0.0001]
309 |     values = [rescaled_lr * v for v in values]
310 |     lr = tf.train.piecewise_constant(global_step, boundaries, values)
311 | 
312 |     warmup_steps = int(num_batches_per_epoch)
313 | 
314 |     warmup_lr = lr * 0.1
315 | 
316 |     return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
317 | 
318 | 
319 | def create_resnet50_model():
320 |   return ResnetModel('resnet50', (3, 4, 6, 3))
321 | 
322 | 
323 | def create_resnet50_v1_5_model():
324 |   return ResnetModel('resnet50_v1.5', (3, 4, 6, 3))
325 | 
326 | 
327 | def create_resnet50_v2_model():
328 |   return ResnetModel('resnet50_v2', (3, 4, 6, 3))
329 | 
330 | 
331 | def create_resnet101_model():
332 |   return ResnetModel('resnet101', (3, 4, 23, 3))
333 | 
334 | 
335 | def create_resnet101_v2_model():
336 |   return ResnetModel('resnet101_v2', (3, 4, 23, 3))
337 | 
338 | 
339 | def create_resnet152_model():
340 |   return ResnetModel('resnet152', (3, 8, 36, 3))
341 | 
342 | 
343 | def create_resnet152_v2_model():
344 |   return ResnetModel('resnet152_v2', (3, 8, 36, 3))
345 | 
346 | 
347 | class ResnetCifar10Model(model_lib.CNNModel):
348 |   """Resnet cnn network configuration for Cifar 10 dataset.
349 | 
350 |   V1 model architecture follows the one defined in the paper:
351 |   https://arxiv.org/pdf/1512.03385.pdf.
352 | 
353 |   V2 model architecture follows the one defined in the paper:
354 |   https://arxiv.org/pdf/1603.05027.pdf.
355 |   """
356 | 
357 |   def __init__(self, model, layer_counts):
358 |     if 'v2' in model:
359 |       self.version = 'v2'
360 |     else:
361 |       self.version = 'v1'
362 |     super(ResnetCifar10Model, self).__init__(
363 |         model, 32, 128, 0.1, layer_counts)
364 | 
365 |   def add_inference(self, cnn):
366 |     if self.layer_counts is None:
367 |       raise ValueError('Layer counts not specified for %s' % self.get_model())
368 | 
369 |     cnn.use_batch_norm = True
370 |     cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
371 |     if self.version == 'v2':
372 |       cnn.conv(16, 3, 3, 1, 1, use_batch_norm=True)
373 |     else:
374 |       cnn.conv(16, 3, 3, 1, 1, activation=None, use_batch_norm=True)
375 |     for i in xrange(self.layer_counts[0]):
376 |       # reshape to batch_size x 16 x 32 x 32
377 |       residual_block(cnn, 16, 1, self.version)
378 |     for i in xrange(self.layer_counts[1]):
379 |       # Subsampling is performed at the first convolution with a stride of 2
380 |       stride = 2 if i == 0 else 1
381 |       # reshape to batch_size x 32 x 16 x 16
382 |       residual_block(cnn, 32, stride, self.version)
383 |     for i in xrange(self.layer_counts[2]):
384 |       stride = 2 if i == 0 else 1
385 |       # reshape to batch_size x 64 x 8 x 8
386 |       residual_block(cnn, 64, stride, self.version)
387 |     if self.version == 'v2':
388 |       cnn.batch_norm()
389 |       cnn.top_layer = tf.nn.relu(cnn.top_layer)
390 |     cnn.spatial_mean()
391 | 
392 |   def get_learning_rate(self, global_step, batch_size):
393 |     num_batches_per_epoch = int(50000 / batch_size)
394 |     boundaries = num_batches_per_epoch * np.array([82, 123, 300],
395 |                                                   dtype=np.int64)
396 |     boundaries = [x for x in boundaries]
397 |     values = [0.1, 0.01, 0.001, 0.0002]
398 |     return tf.train.piecewise_constant(global_step, boundaries, values)
399 | 
400 | 
401 | def create_resnet20_cifar_model():
402 |   return ResnetCifar10Model('resnet20', (3, 3, 3))
403 | 
404 | 
405 | def create_resnet20_v2_cifar_model():
406 |   return ResnetCifar10Model('resnet20_v2', (3, 3, 3))
407 | 
408 | 
409 | def create_resnet32_cifar_model():
410 |   return ResnetCifar10Model('resnet32_v2', (5, 5, 5))
411 | 
412 | 
413 | def create_resnet32_v2_cifar_model():
414 |   return ResnetCifar10Model('resnet32_v2', (5, 5, 5))
415 | 
416 | 
417 | def create_resnet44_cifar_model():
418 |   return ResnetCifar10Model('resnet44', (7, 7, 7))
419 | 
420 | 
421 | def create_resnet44_v2_cifar_model():
422 |   return ResnetCifar10Model('resnet44_v2', (7, 7, 7))
423 | 
424 | 
425 | def create_resnet56_cifar_model():
426 |   return ResnetCifar10Model('resnet56', (9, 9, 9))
427 | 
428 | 
429 | def create_resnet56_v2_cifar_model():
430 |   return ResnetCifar10Model('resnet56_v2', (9, 9, 9))
431 | 
432 | 
433 | def create_resnet110_cifar_model():
434 |   return ResnetCifar10Model('resnet110', (18, 18, 18))
435 | 
436 | 
437 | def create_resnet110_v2_cifar_model():
438 |   return ResnetCifar10Model('resnet110_v2', (18, 18, 18))
439 | 


--------------------------------------------------------------------------------
/TensorFlow/models/trivial_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Trivial model configuration."""
16 | 
17 | from models import model
18 | 
19 | 
20 | class TrivialModel(model.CNNModel):
21 |   """Trivial model configuration."""
22 | 
23 |   def __init__(self):
24 |     super(TrivialModel, self).__init__('trivial', 224 + 3, 32, 0.005)
25 | 
26 |   def add_inference(self, cnn):
27 |     cnn.reshape([-1, 227 * 227 * 3])
28 |     cnn.affine(1)
29 |     cnn.affine(4096)
30 | 
31 | 
32 | class TrivialCifar10Model(model.CNNModel):
33 |   """Trivial cifar10 model configuration."""
34 | 
35 |   def __init__(self):
36 |     super(TrivialCifar10Model, self).__init__('trivial', 32, 32, 0.005)
37 | 
38 |   def add_inference(self, cnn):
39 |     cnn.reshape([-1, 32 * 32 * 3])
40 |     cnn.affine(1)
41 |     cnn.affine(4096)
42 | 


--------------------------------------------------------------------------------
/TensorFlow/models/vgg_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Vgg model configuration.
 17 | 
 18 | Includes multiple models: vgg11, vgg16, vgg19, corresponding to
 19 |   model A, D, and E in Table 1 of [1].
 20 | 
 21 | References:
 22 | [1]  Simonyan, Karen, Andrew Zisserman
 23 |      Very Deep Convolutional Networks for Large-Scale Image Recognition
 24 |      arXiv:1409.1556 (2014)
 25 | """
 26 | 
 27 | from six.moves import xrange  # pylint: disable=redefined-builtin
 28 | import tensorflow as tf
 29 | from models import model
 30 | import datasets
 31 | 
 32 | 
 33 | 
 34 | 
 35 | def _construct_vgg(cnn, num_conv_layers):
 36 |   """Build vgg architecture from blocks."""
 37 |   assert len(num_conv_layers) == 5
 38 |   for _ in xrange(num_conv_layers[0]):
 39 |     cnn.conv(64, 3, 3, use_batch_norm=True, bias=None)
 40 | 
 41 |   cnn.mpool(2, 2)
 42 |   for _ in xrange(num_conv_layers[1]):
 43 |     cnn.conv(128, 3, 3, use_batch_norm=True, bias=None)
 44 | 
 45 |   cnn.mpool(2, 2)
 46 |   for _ in xrange(num_conv_layers[2]):
 47 |     cnn.conv(256, 3, 3, use_batch_norm=True, bias=None)
 48 | 
 49 |   cnn.mpool(2, 2)
 50 |   for _ in xrange(num_conv_layers[3]):
 51 |     cnn.conv(512, 3, 3, use_batch_norm=True, bias=None)
 52 | 
 53 |   cnn.mpool(2, 2)
 54 |   for _ in xrange(num_conv_layers[4]):
 55 |     cnn.conv(512, 3, 3, use_batch_norm=True, bias=None)
 56 | 
 57 |   cnn.mpool(2, 2)
 58 |   cnn.reshape([-1, 512 * 7 * 7])
 59 |   cnn.affine(4096)
 60 |   cnn.dropout()
 61 |   cnn.affine(4096)
 62 |   cnn.dropout()
 63 | 
 64 | 
 65 | class Vgg11Model(model.CNNModel):
 66 | 
 67 |   def __init__(self):
 68 |     super(Vgg11Model, self).__init__('vgg11', 224, 64, 0.004)
 69 | 
 70 |   def add_inference(self, cnn):
 71 |     _construct_vgg(cnn, [1, 1, 2, 2, 2])
 72 | 
 73 | 
 74 | class Vgg16Model(model.CNNModel):
 75 | 
 76 |   def __init__(self):
 77 |     super(Vgg16Model, self).__init__('vgg16', 224, 256, 0.1)
 78 | 
 79 |   def add_inference(self, cnn):
 80 |     cnn.use_batch_norm = True
 81 |     cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
 82 |     _construct_vgg(cnn, [2, 2, 3, 3, 3])
 83 | 
 84 |   def get_learning_rate(self, global_step, batch_size):
 85 |     num_batches_per_epoch = (
 86 |         float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
 87 |     boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 90, 100]]
 88 | 
 89 |     rescaled_lr = self.learning_rate / self.default_batch_size * batch_size
 90 |     print('Init LR: ', rescaled_lr)
 91 |     rescaled_lr = rescaled_lr / (batch_size / self.batch_size)
 92 |     print('GPU Num: ', batch_size / self.batch_size)
 93 |     print('Batch size: ', batch_size)
 94 |     values = [1, 0.1, 0.01, 0.001, 0.0001]
 95 |     values = [rescaled_lr * v for v in values]
 96 |     lr = tf.train.piecewise_constant(global_step, boundaries, values)
 97 | 
 98 |     warmup_steps = int(num_batches_per_epoch)
 99 | 
100 |     warmup_lr = lr * 0.1
101 | 
102 |     return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
103 | 
104 | 
105 | class Vgg19Model(model.CNNModel):
106 | 
107 |   def __init__(self):
108 |     super(Vgg19Model, self).__init__('vgg19', 224, 64, 0.004)
109 | 
110 |   def add_inference(self, cnn):
111 |     _construct_vgg(cnn, [2, 2, 4, 4, 4])
112 | 


--------------------------------------------------------------------------------
/TensorFlow/platforms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/platforms/__init__.py


--------------------------------------------------------------------------------
/TensorFlow/platforms/default/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/platforms/default/__init__.py


--------------------------------------------------------------------------------
/TensorFlow/platforms/default/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utility code for the default platform."""
17 | 
18 | import os
19 | import sys
20 | import tempfile
21 | 
22 | import cnn_util
23 | 
24 | 
25 | _ROOT_PROJECT_DIR = os.path.dirname(cnn_util.__file__)
26 | 
27 | 
28 | def define_platform_params():
29 |   """Defines platform-specific parameters.
30 | 
31 |   Currently there are no platform-specific parameters to be defined.
32 |   """
33 |   pass
34 | 
35 | 
36 | def get_cluster_manager(params, config_proto):
37 |   """Returns the cluster manager to be used."""
38 |   return cnn_util.GrpcClusterManager(params, config_proto)
39 | 
40 | 
41 | def get_command_to_run_python_module(module):
42 |   """Returns a command to run a Python module."""
43 |   python_interpretter = sys.executable
44 |   if not python_interpretter:
45 |     raise ValueError('Could not find Python interpreter')
46 |   return [python_interpretter,
47 |           os.path.join(_ROOT_PROJECT_DIR, module + '.py')]
48 | 
49 | 
50 | def get_test_output_dir():
51 |   """Returns a directory where test outputs should be placed."""
52 |   base_dir = os.environ.get('TEST_OUTPUTS_DIR',
53 |                             '/tmp/tf_cnn_benchmarks_test_outputs')
54 |   if not os.path.exists(base_dir):
55 |     os.mkdir(base_dir)
56 |   return tempfile.mkdtemp(dir=base_dir)
57 | 
58 | 
59 | def get_test_data_dir():
60 |   """Returns the path to the test_data directory."""
61 |   return os.path.join(_ROOT_PROJECT_DIR, 'test_data')
62 | 
63 | 
64 | def _initialize(params, config_proto):
65 |   # Currently, no platform initialization needs to be done.
66 |   del params, config_proto
67 | 
68 | 
69 | _is_initalized = False
70 | 
71 | 
72 | def initialize(params, config_proto):
73 |   global _is_initalized
74 |   if _is_initalized:
75 |     return
76 |   _is_initalized = True
77 |   _initialize(params, config_proto)
78 | 


--------------------------------------------------------------------------------
/TensorFlow/platforms/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utility code for a certain platform.
17 | 
18 | This file simply imports everything from the default platform. To switch to a
19 | different platform, the import statement can be changed to point to a new
20 | platform.
21 | 
22 | Creating a custom platform can be useful to, e.g., run some initialization code
23 | required by the platform or register a platform-specific model.
24 | """
25 | 
26 | from platforms.default.util import *  # pylint: disable=unused-import,wildcard-import
27 | 


--------------------------------------------------------------------------------
/TensorFlow/run_tests.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Runs the tf_cnn_benchmarks tests."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import sys
 22 | import unittest
 23 | 
 24 | from absl import app
 25 | from absl import flags as absl_flags
 26 | 
 27 | import all_reduce_benchmark_test
 28 | import allreduce_test
 29 | import benchmark_cnn_distributed_test
 30 | import benchmark_cnn_test
 31 | import cnn_util_test
 32 | import variable_mgr_util_test
 33 | from models import nasnet_test
 34 | 
 35 | 
 36 | # Ideally, we wouldn't need this option, and run both distributed tests and non-
 37 | # distributed tests. But, TensorFlow allocates all the GPU memory by default, so
 38 | # the non-distributed tests allocate all the GPU memory. The distributed tests
 39 | # spawn processes that run TensorFlow, and cannot run if all the GPU memory is
 40 | # already allocated. If a non-distributed test is run, then a distributed test
 41 | # is run in the same process, the distributed test will fail because there is no
 42 | # more GPU memory for the spawned processes to allocate.
 43 | absl_flags.DEFINE_boolean('run_distributed_tests', False,
 44 |                           'If True, run the distributed tests. If False, the'
 45 |                           'non-distributed tests.')
 46 | 
 47 | absl_flags.DEFINE_boolean('full_tests', False,
 48 |                           'If True, all distributed or non-distributed tests '
 49 |                           'are run, which can take hours. If False, only a '
 50 |                           'subset of tests will be run. This subset runs much '
 51 |                           'faster and tests almost all the functionality as '
 52 |                           'the full set of tests, so it is recommended to keep '
 53 |                           'this option set to False.')
 54 | 
 55 | FLAGS = absl_flags.FLAGS
 56 | 
 57 | 
 58 | def main(_):
 59 |   loader = unittest.defaultTestLoader
 60 |   if FLAGS.full_tests:
 61 |     suite = unittest.TestSuite([
 62 |         loader.loadTestsFromModule(allreduce_test),
 63 |         loader.loadTestsFromModule(cnn_util_test),
 64 |         loader.loadTestsFromModule(variable_mgr_util_test),
 65 |         loader.loadTestsFromModule(benchmark_cnn_test),
 66 |         loader.loadTestsFromModule(all_reduce_benchmark_test),
 67 |         loader.loadTestsFromModule(nasnet_test),
 68 |     ])
 69 |     dist_suite = unittest.TestSuite([
 70 |         loader.loadTestsFromModule(benchmark_cnn_distributed_test),
 71 |     ])
 72 |   else:
 73 |     suite = unittest.TestSuite([
 74 |         loader.loadTestsFromModule(allreduce_test),
 75 |         loader.loadTestsFromModule(cnn_util_test),
 76 |         loader.loadTestsFromModule(all_reduce_benchmark_test),
 77 |         loader.loadTestsFromModule(variable_mgr_util_test),
 78 |         loader.loadTestsFromTestCase(benchmark_cnn_test.TestAlexnetModel),
 79 |         loader.loadTestsFromTestCase(benchmark_cnn_test.TfCnnBenchmarksTest),
 80 |         loader.loadTestsFromTestCase(benchmark_cnn_test.VariableUpdateTest),
 81 |         loader.loadTestsFromTestCase(
 82 |             benchmark_cnn_test.VariableMgrLocalReplicatedTest),
 83 |     ])
 84 |     dist_suite = unittest.TestSuite([
 85 |         loader.loadTestsFromNames([
 86 |             'benchmark_cnn_distributed_test.DistributedVariableUpdateTest'
 87 |             '.testVarUpdateDefault',
 88 | 
 89 |             'benchmark_cnn_distributed_test.TfCnnBenchmarksDistributedTest'
 90 |             '.testParameterServer',
 91 |         ]),
 92 |     ])
 93 | 
 94 |   if FLAGS.run_distributed_tests:
 95 |     print('Running distributed tests')
 96 |     result = unittest.TextTestRunner(verbosity=2).run(dist_suite)
 97 |   else:
 98 |     print('Running non-distributed tests')
 99 |     result = unittest.TextTestRunner(verbosity=2).run(suite)
100 |   sys.exit(not result.wasSuccessful())
101 | 
102 | 
103 | if __name__ == '__main__':
104 |   app.run(main)
105 | 


--------------------------------------------------------------------------------
/TensorFlow/test_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/__init__.py


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00000-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00000-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00001-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00001-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00002-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00002-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00003-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00003-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00004-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00004-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00005-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00005-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00006-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00006-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/train-00007-of-00008:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/train-00007-of-00008


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/validation-00000-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/validation-00000-of-00002


--------------------------------------------------------------------------------
/TensorFlow/test_data/fake_tf_record_data/validation-00001-of-00002:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/fake_tf_record_data/validation-00001-of-00002


--------------------------------------------------------------------------------
/TensorFlow/test_data/images/black_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/images/black_image.jpg


--------------------------------------------------------------------------------
/TensorFlow/test_data/images/white_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/TensorFlow/test_data/images/white_image.jpg


--------------------------------------------------------------------------------
/TensorFlow/tf_cnn_benchmarks.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Benchmark script for TensorFlow.
17 | 
18 | See the README for more information.
19 | """
20 | 
21 | from __future__ import print_function
22 | 
23 | from absl import app
24 | from absl import flags as absl_flags
25 | import tensorflow as tf
26 | 
27 | import benchmark_cnn
28 | import cnn_util
29 | import flags
30 | from cnn_util import log_fn
31 | 
32 | 
33 | flags.define_flags()
34 | for name in flags.param_specs.keys():
35 |   absl_flags.declare_key_flag(name)
36 | 
37 | 
38 | def main(positional_arguments):
39 |   # Command-line arguments like '--distortions False' are equivalent to
40 |   # '--distortions=True False', where False is a positional argument. To prevent
41 |   # this from silently running with distortions, we do not allow positional
42 |   # arguments.
43 |   assert len(positional_arguments) >= 1
44 |   if len(positional_arguments) > 1:
45 |     raise ValueError('Received unknown positional arguments: %s'
46 |                      % positional_arguments[1:])
47 | 
48 |   params = benchmark_cnn.make_params_from_flags()
49 |   params = benchmark_cnn.setup(params)
50 |   bench = benchmark_cnn.BenchmarkCNN(params)
51 | 
52 |   tfversion = cnn_util.tensorflow_version_tuple()
53 |   log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
54 | 
55 |   bench.print_info()
56 |   bench.run()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |   app.run(main)  # Raises error on invalid flags, unlike tf.app.run()
61 | 


--------------------------------------------------------------------------------
/TensorFlow/train_gct_resnet50.sh:
--------------------------------------------------------------------------------
1 | DATA_DIR="/path/to/imagenet"
2 | CKPT_DIR="results/gct_resnet50"
3 | echo ${CKPT_DIR}
4 | python tf_cnn_benchmarks.py --data_format=NCHW --batch_size=64 \
5 | --model=resnet50 --optimizer=momentum --variable_update=replicated \
6 | --nodistortions --gradient_repacking=8 --num_gpus=4 \
7 | --num_epochs=100 --weight_decay=1e-4 --data_dir=${DATA_DIR} \
8 | --train_dir=${CKPT_DIR} --print_training_accuracy --xla --save_model_secs=3600 \
9 | --summary_verbosity=1 --save_summaries_steps=200 --eval_dir=${CKPT_DIR}/eval


--------------------------------------------------------------------------------
/TensorFlow/variable_mgr_util_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for variable_mgr_util."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | import variable_mgr_util
 23 | 
 24 | 
 25 | class VariableMgrUtilTest(tf.test.TestCase):
 26 | 
 27 |   def testGetLossScaleUpdateOpTruePath(self):
 28 |     loss_scale = tf.Variable(4)
 29 |     # loss_scale_normal_steps >= inc_loss_scale_every_n
 30 |     loss_scale_normal_steps = tf.Variable(10)
 31 |     inc_loss_scale_every_n = 10
 32 |     update_op = variable_mgr_util.get_loss_scale_update_op(
 33 |         loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n)
 34 | 
 35 |     with self.test_session() as sess:
 36 |       sess.run(tf.global_variables_initializer())
 37 |       sess.run(update_op)
 38 | 
 39 |       self.assertEqual(sess.run(loss_scale), 8)
 40 |       self.assertEqual(sess.run(loss_scale_normal_steps), 0)
 41 | 
 42 |   def testGetLossScaleUpdateOpFalsePath(self):
 43 |     loss_scale = tf.Variable(4)
 44 |     # loss_scale_normal_steps < inc_loss_scale_every_n
 45 |     loss_scale_normal_steps = tf.Variable(9)
 46 |     inc_loss_scale_every_n = 10
 47 |     update_op = variable_mgr_util.get_loss_scale_update_op(
 48 |         loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n)
 49 | 
 50 |     with self.test_session() as sess:
 51 |       sess.run(tf.global_variables_initializer())
 52 |       sess.run(update_op)
 53 | 
 54 |       self.assertEqual(sess.run(loss_scale), 4)
 55 |       self.assertEqual(sess.run(loss_scale_normal_steps), 10)
 56 | 
 57 |   def testAppendGradientsWithLossScaleWithAutoScaleDisabled(self):
 58 |     v = tf.Variable(0)
 59 |     training_ops = []
 60 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
 61 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
 62 |         enable_auto_loss_scale=False,  # no auto loss scale.
 63 |         loss_scale=tf.Variable(4),
 64 |         loss_scale_normal_steps=tf.Variable(10),
 65 |         inc_loss_scale_every_n=10,
 66 |         is_chief=True)
 67 |     variable_mgr_util.append_gradients_with_loss_scale(
 68 |         training_ops,
 69 |         get_apply_gradients_ops_func,
 70 |         loss_scale_params,
 71 |         grad_has_inf_nan=True)
 72 | 
 73 |     with self.test_session() as sess:
 74 |       sess.run(tf.global_variables_initializer())
 75 |       sess.run(training_ops)
 76 |       self.assertEqual(sess.run(v), 1)
 77 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
 78 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10)
 79 | 
 80 |   def testAppendGradientsWithLossScaleForNonChiefWorker(self):
 81 |     v = tf.Variable(0)
 82 |     training_ops = []
 83 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
 84 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
 85 |         enable_auto_loss_scale=True,
 86 |         loss_scale=tf.Variable(4),
 87 |         loss_scale_normal_steps=tf.Variable(10),
 88 |         inc_loss_scale_every_n=10,
 89 |         is_chief=False)  # Non-chief
 90 |     variable_mgr_util.append_gradients_with_loss_scale(
 91 |         training_ops,
 92 |         get_apply_gradients_ops_func,
 93 |         loss_scale_params,
 94 |         grad_has_inf_nan=False)
 95 | 
 96 |     with self.test_session() as sess:
 97 |       sess.run(tf.global_variables_initializer())
 98 |       sess.run(training_ops)
 99 |       self.assertEqual(sess.run(v), 1)
100 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
101 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10)
102 | 
103 |   def testAppendGradientsWithLossScaleWithoutNan(self):
104 |     v = tf.Variable(0)
105 |     training_ops = []
106 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
107 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
108 |         enable_auto_loss_scale=True,
109 |         loss_scale=tf.Variable(4, dtype=tf.float32),
110 |         loss_scale_normal_steps=tf.Variable(10),
111 |         inc_loss_scale_every_n=10,
112 |         is_chief=True)
113 |     variable_mgr_util.append_gradients_with_loss_scale(
114 |         training_ops,
115 |         get_apply_gradients_ops_func,
116 |         loss_scale_params,
117 |         grad_has_inf_nan=tf.constant(False))
118 | 
119 |     with self.test_session() as sess:
120 |       sess.run(tf.global_variables_initializer())
121 |       sess.run(training_ops)
122 |       self.assertEqual(sess.run(v), 1)
123 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 8)
124 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
125 | 
126 |   def testAppendGradientsWithLossScaleWithtNan(self):
127 |     v = tf.Variable(0)
128 |     training_ops = []
129 |     get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
130 |     loss_scale_params = variable_mgr_util.AutoLossScaleParams(
131 |         enable_auto_loss_scale=True,
132 |         loss_scale=tf.Variable(4, dtype=tf.float32),
133 |         loss_scale_normal_steps=tf.Variable(10),
134 |         inc_loss_scale_every_n=10,
135 |         is_chief=True)
136 |     variable_mgr_util.append_gradients_with_loss_scale(
137 |         training_ops,
138 |         get_apply_gradients_ops_func,
139 |         loss_scale_params,
140 |         grad_has_inf_nan=tf.constant(True))
141 | 
142 |     with self.test_session() as sess:
143 |       sess.run(tf.global_variables_initializer())
144 |       sess.run(training_ops)
145 |       self.assertEqual(sess.run(v), 0)  # Skip updating for v.
146 |       # halve loss_scale and reset local_scale_normal_steps.
147 |       self.assertEqual(sess.run(loss_scale_params.loss_scale), 2)
148 |       self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
149 | 
150 | 
151 | if __name__ == '__main__':
152 |   tf.test.main()
153 | 


--------------------------------------------------------------------------------
/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z-x-yang/GCT/68983edd87f8cfbe709b1b51214c69eb9c81abd7/overview.png


--------------------------------------------------------------------------------