├── README.md
├── affine_transforms.py
├── cifar10
    └── resnext
    │   ├── LICENSE
    │   ├── logger.py
    │   ├── main.py
    │   ├── models
    │       ├── __init__.py
    │       ├── caffe_cifar.py
    │       ├── densenet.py
    │       ├── imagenet_resnet.py
    │       ├── preresnet.py
    │       ├── res_utils.py
    │       ├── resnet.py
    │       ├── resnext.py
    │       └── wide_resnet.py
    │   ├── test.py
    │   └── utils.py
├── helpers.py
├── load_data.py
├── lr_scheduler.py
└── plots.py


/README.md:
--------------------------------------------------------------------------------
 1 | This repository contains a PyTorch implementation code for reproducing the results in our paper:
 2 | 
 3 | **[Generalization in Machine Learning via Analytical Learning Theory](https://arxiv.org/pdf/1802.07426.pdf)** \
 4 | *Kenji Kawaguchi, Yoshua Bengio, Vikas Verma, and Leslie Pack Kaelbling*
 5 | 
 6 | 
 7 | #### Test error (\%) with WideResNet28_10 and different regularization methods
 8 | |    Regularization Method    | CIFAR-10 |  CIFAR-100 |  SVHN  |
 9 | |:----------:|:--------------:|:--------------:|:------:|
10 | | Standard   | 3.79  ±  0.07  |  19.85  ±  0.14   |  2.47 ± 0.04|
11 | | Single-cutout  | 3.19 ± 0.09 | 18.13 ± 0.28  | 2.23  ± 0.03 |
12 | | Dual-cutout  |  2.61 ± 0.04 |  17.54    ±  0.09    | 2.06  ± 0.06|
13 | 
14 | * Dual-cutout is proposed in our paper based on a new learning theory.
15 | 
16 | 
17 | 
18 | 
19 | ### How to run DualCutout
20 | ```
21 | python cifar10/resnext/main.py --dualcutout --dataset cifar10 --arch wrn28_10 \
22 | --epochs 300 --batch_size 64 --learning_rate 0.1 --data_aug 1 --decay 0.0005 --schedule 150 225 \
23 | --gamma 0.1 0.1 --alpha 0.1 --cutsize 16
24 | ```
25 | Add the --temp_dir and --home_dir as appropriate in the above commands. For Cifar10 and Cifar100, we used --cutsize 16, and for SVHN, we used --cutsize 20.
26 | 
27 | ### How to run Single Cutout
28 | ```
29 | python cifar10/resnext/main.py --singlecutout --dataset cifar10 --arch wrn28_10 \
30 | --epochs 300 --batch_size 64 --learning_rate 0.1 --data_aug 1 --decay 0.0005 --schedule 150 225 \
31 | --gamma 0.1 0.1 --alpha 0.1 --cutsize 16
32 | ```
33 | ### How to run baseline
34 | ```
35 | python cifar10/resnext/main.py --dataset cifar10 --arch wrn28_10 \
36 | --epochs 300 --batch_size 64 --learning_rate 0.1 --data_aug 1 --decay 0.0005 --schedule 150 225 \
37 | --gamma 0.1 0.1
38 | ```
39 | 
40 | This code has been tested with  
41 | python 2.7.9  
42 | torch 0.3.1  
43 | torchvision 0.2.0
44 | 


--------------------------------------------------------------------------------
/affine_transforms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Affine transforms implemented on torch tensors, and
  3 | only requiring one interpolation
  4 | 
  5 | Included:
  6 | - Affine()
  7 | - AffineCompose()
  8 | - Rotation()
  9 | - Translation()
 10 | - Shear()
 11 | - Zoom()
 12 | - Flip()
 13 | 
 14 | """
 15 | 
 16 | import math
 17 | import random
 18 | import torch
 19 | 
 20 | # necessary now, but should eventually not be
 21 | import scipy.ndimage as ndi
 22 | import numpy as np
 23 | 
 24 | 
 25 | def transform_matrix_offset_center(matrix, x, y):
 26 |     """Apply offset to a transform matrix so that the image is
 27 |     transformed about the center of the image. 
 28 | 
 29 |     NOTE: This is a fairly simple operaion, so can easily be
 30 |     moved to full torch.
 31 | 
 32 |     Arguments
 33 |     ---------
 34 |     matrix : 3x3 matrix/array
 35 | 
 36 |     x : integer
 37 |         height dimension of image to be transformed
 38 | 
 39 |     y : integer
 40 |         width dimension of image to be transformed
 41 |     """
 42 |     o_x = float(x) / 2 + 0.5
 43 |     o_y = float(y) / 2 + 0.5
 44 |     offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
 45 |     reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
 46 |     transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
 47 |     return transform_matrix
 48 | 
 49 | def apply_transform(x, transform, fill_mode='nearest', fill_value=0.):
 50 |     """Applies an affine transform to a 2D array, or to each channel of a 3D array.
 51 | 
 52 |     NOTE: this can and certainly should be moved to full torch operations.
 53 | 
 54 |     Arguments
 55 |     ---------
 56 |     x : np.ndarray
 57 |         array to transform. NOTE: array should be ordered CHW
 58 |     
 59 |     transform : 3x3 affine transform matrix
 60 |         matrix to apply
 61 |     """
 62 |     x = x.astype('float32')
 63 |     transform = transform_matrix_offset_center(transform, x.shape[1], x.shape[2])
 64 |     final_affine_matrix = transform[:2, :2]
 65 |     final_offset = transform[:2, 2]
 66 |     channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
 67 |             final_offset, order=0, mode=fill_mode, cval=fill_value) for x_channel in x]
 68 |     x = np.stack(channel_images, axis=0)
 69 |     return x
 70 | 
 71 | class Affine(object):
 72 | 
 73 |     def __init__(self, 
 74 |                  rotation_range=None, 
 75 |                  translation_range=None,
 76 |                  shear_range=None, 
 77 |                  zoom_range=None, 
 78 |                  fill_mode='constant',
 79 |                  fill_value=0., 
 80 |                  target_fill_mode='nearest', 
 81 |                  target_fill_value=0.):
 82 |         """Perform an affine transforms with various sub-transforms, using
 83 |         only one interpolation and without having to instantiate each
 84 |         sub-transform individually.
 85 | 
 86 |         Arguments
 87 |         ---------
 88 |         rotation_range : one integer or float
 89 |             image will be rotated between (-degrees, degrees) degrees
 90 | 
 91 |         translation_range : a float or a tuple/list w/ 2 floats between [0, 1)
 92 |             first value:
 93 |                 image will be horizontally shifted between 
 94 |                 (-height_range * height_dimension, height_range * height_dimension)
 95 |             second value:
 96 |                 Image will be vertically shifted between 
 97 |                 (-width_range * width_dimension, width_range * width_dimension)
 98 | 
 99 |         shear_range : float
100 |             radian bounds on the shear transform
101 | 
102 |         zoom_range : list/tuple with two floats between [0, infinity).
103 |             first float should be less than the second
104 |             lower and upper bounds on percent zoom. 
105 |             Anything less than 1.0 will zoom in on the image, 
106 |             anything greater than 1.0 will zoom out on the image.
107 |             e.g. (0.7, 1.0) will only zoom in, 
108 |                  (1.0, 1.4) will only zoom out,
109 |                  (0.7, 1.4) will randomly zoom in or out
110 | 
111 |         fill_mode : string in {'constant', 'nearest'}
112 |             how to fill the empty space caused by the transform
113 |             ProTip : use 'nearest' for discrete images (e.g. segmentations)
114 |                     and use 'constant' for continuous images
115 | 
116 |         fill_value : float
117 |             the value to fill the empty space with if fill_mode='constant'
118 | 
119 |         target_fill_mode : same as fill_mode, but for target image
120 | 
121 |         target_fill_value : same as fill_value, but for target image
122 | 
123 |         """
124 |         self.transforms = []
125 |         if rotation_range:
126 |             rotation_tform = Rotation(rotation_range, lazy=True)
127 |             self.transforms.append(rotation_tform)
128 | 
129 |         if translation_range:
130 |             translation_tform = Translation(translation_range, lazy=True)
131 |             self.transforms.append(translation_tform)
132 | 
133 |         if shear_range:
134 |             shear_tform = Shear(shear_range, lazy=True)
135 |             self.transforms.append(shear_tform) 
136 | 
137 |         if zoom_range:
138 |             zoom_tform = Translation(zoom_range, lazy=True)
139 |             self.transforms.append(zoom_tform)
140 | 
141 |         self.fill_mode = fill_mode
142 |         self.fill_value = fill_value
143 |         self.target_fill_mode = target_fill_mode
144 |         self.target_fill_value = target_fill_value
145 | 
146 |     def __call__(self, x, y=None):
147 |         # collect all of the lazily returned tform matrices
148 |         tform_matrix = self.transforms[0](x)
149 |         for tform in self.transforms[1:]:
150 |             tform_matrix = np.dot(tform_matrix, tform(x)) 
151 | 
152 |         x = torch.from_numpy(apply_transform(x.numpy(), tform_matrix,
153 |             fill_mode=self.fill_mode, fill_value=self.fill_value))
154 | 
155 |         if y:
156 |             y = torch.from_numpy(apply_transform(y.numpy(), tform_matrix,
157 |                 fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
158 |             return x, y
159 |         else:
160 |             return x
161 | 
162 | class AffineCompose(object):
163 | 
164 |     def __init__(self, 
165 |                  transforms, 
166 |                  fill_mode='constant', 
167 |                  fill_value=0., 
168 |                  target_fill_mode='nearest', 
169 |                  target_fill_value=0.):
170 |         """Apply a collection of explicit affine transforms to an input image,
171 |         and to a target image if necessary
172 | 
173 |         Arguments
174 |         ---------
175 |         transforms : list or tuple
176 |             each element in the list/tuple should be an affine transform.
177 |             currently supported transforms:
178 |                 - Rotation()
179 |                 - Translation()
180 |                 - Shear()
181 |                 - Zoom()
182 | 
183 |         fill_mode : string in {'constant', 'nearest'}
184 |             how to fill the empty space caused by the transform
185 | 
186 |         fill_value : float
187 |             the value to fill the empty space with if fill_mode='constant'
188 | 
189 |         """
190 |         self.transforms = transforms
191 |         # set transforms to lazy so they only return the tform matrix
192 |         for t in self.transforms:
193 |             t.lazy = True
194 |         self.fill_mode = fill_mode
195 |         self.fill_value = fill_value
196 |         self.target_fill_mode = target_fill_mode
197 |         self.target_fill_value = target_fill_value
198 | 
199 |     def __call__(self, x, y=None):
200 |         # collect all of the lazily returned tform matrices
201 |         tform_matrix = self.transforms[0](x)
202 |         for tform in self.transforms[1:]:
203 |             tform_matrix = np.dot(tform_matrix, tform(x)) 
204 | 
205 |         x = torch.from_numpy(apply_transform(x.numpy(), tform_matrix,
206 |             fill_mode=self.fill_mode, fill_value=self.fill_value))
207 | 
208 |         if y:
209 |             y = torch.from_numpy(apply_transform(y.numpy(), tform_matrix,
210 |                 fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
211 |             return x, y
212 |         else:
213 |             return x
214 | 
215 | 
216 | class Rotation(object):
217 | 
218 |     def __init__(self, 
219 |                  rotation_range, 
220 |                  fill_mode='constant', 
221 |                  fill_value=0., 
222 |                  target_fill_mode='nearest', 
223 |                  target_fill_value=0., 
224 |                  lazy=False):
225 |         """Randomly rotate an image between (-degrees, degrees). If the image
226 |         has multiple channels, the same rotation will be applied to each channel.
227 | 
228 |         Arguments
229 |         ---------
230 |         rotation_range : integer or float
231 |             image will be rotated between (-degrees, degrees) degrees
232 | 
233 |         fill_mode : string in {'constant', 'nearest'}
234 |             how to fill the empty space caused by the transform
235 | 
236 |         fill_value : float
237 |             the value to fill the empty space with if fill_mode='constant'
238 | 
239 |         lazy    : boolean
240 |             if true, perform the transform on the tensor and return the tensor
241 |             if false, only create the affine transform matrix and return that
242 |         """
243 |         self.rotation_range = rotation_range
244 |         self.fill_mode = fill_mode
245 |         self.fill_value = fill_value
246 |         self.target_fill_mode = target_fill_mode
247 |         self.target_fill_value = target_fill_value
248 |         self.lazy = lazy
249 | 
250 |     def __call__(self, x, y=None):
251 |         degree = random.uniform(-self.rotation_range, self.rotation_range)
252 |         theta = math.pi / 180 * degree
253 |         rotation_matrix = np.array([[math.cos(theta), -math.sin(theta), 0],
254 |                                     [math.sin(theta), math.cos(theta), 0],
255 |                                     [0, 0, 1]])
256 |         if self.lazy:
257 |             return rotation_matrix
258 |         else:
259 |             x_transformed = torch.from_numpy(apply_transform(x.numpy(), rotation_matrix,
260 |                 fill_mode=self.fill_mode, fill_value=self.fill_value))
261 |             if y:
262 |                 y_transformed = torch.from_numpy(apply_transform(y.numpy(), rotation_matrix,
263 |                 fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
264 |                 return x_transformed, y_transformed
265 |             else:
266 |                 return x_transformed
267 | 
268 | 
269 | class Translation(object):
270 | 
271 |     def __init__(self, 
272 |                  translation_range, 
273 |                  fill_mode='constant',
274 |                  fill_value=0., 
275 |                  target_fill_mode='nearest', 
276 |                  target_fill_value=0., 
277 |                  lazy=False):
278 |         """Randomly translate an image some fraction of total height and/or
279 |         some fraction of total width. If the image has multiple channels,
280 |         the same translation will be applied to each channel.
281 | 
282 |         Arguments
283 |         ---------
284 |         translation_range : two floats between [0, 1) 
285 |             first value:
286 |                 fractional bounds of total height to shift image
287 |                 image will be horizontally shifted between 
288 |                 (-height_range * height_dimension, height_range * height_dimension)
289 |             second value:
290 |                 fractional bounds of total width to shift image 
291 |                 Image will be vertically shifted between 
292 |                 (-width_range * width_dimension, width_range * width_dimension)
293 | 
294 |         fill_mode : string in {'constant', 'nearest'}
295 |             how to fill the empty space caused by the transform
296 | 
297 |         fill_value : float
298 |             the value to fill the empty space with if fill_mode='constant'
299 | 
300 |         lazy    : boolean
301 |             if true, perform the transform on the tensor and return the tensor
302 |             if false, only create the affine transform matrix and return that
303 |         """
304 |         if isinstance(translation_range, float):
305 |             translation_range = (translation_range, translation_range)
306 |         self.height_range = translation_range[0]
307 |         self.width_range = translation_range[1]
308 |         self.fill_mode = fill_mode
309 |         self.fill_value = fill_value
310 |         self.target_fill_mode = target_fill_mode
311 |         self.target_fill_value = target_fill_value
312 |         self.lazy = lazy
313 | 
314 |     def __call__(self, x, y=None):
315 |         # height shift
316 |         if self.height_range > 0:
317 |             tx = random.uniform(-self.height_range, self.height_range) * x.size(1)
318 |         else:
319 |             tx = 0
320 |         # width shift
321 |         if self.width_range > 0:
322 |             ty = random.uniform(-self.width_range, self.width_range) * x.size(2)
323 |         else:
324 |             ty = 0
325 | 
326 |         translation_matrix = np.array([[1, 0, tx],
327 |                                        [0, 1, ty],
328 |                                        [0, 0, 1]])
329 |         if self.lazy:
330 |             return translation_matrix
331 |         else:
332 |             x_transformed = torch.from_numpy(apply_transform(x.numpy(), 
333 |                 translation_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value))
334 |             if y:
335 |                 y_transformed = torch.from_numpy(apply_transform(y.numpy(), translation_matrix,
336 |                 fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
337 |                 return x_transformed, y_transformed
338 |             else:
339 |                 return x_transformed
340 | 
341 | 
342 | class Shear(object):
343 | 
344 |     def __init__(self, 
345 |                  shear_range, 
346 |                  fill_mode='constant', 
347 |                  fill_value=0., 
348 |                  target_fill_mode='nearest', 
349 |                  target_fill_value=0., 
350 |                  lazy=False):
351 |         """Randomly shear an image with radians (-shear_range, shear_range)
352 | 
353 |         Arguments
354 |         ---------
355 |         shear_range : float
356 |             radian bounds on the shear transform
357 |         
358 |         fill_mode : string in {'constant', 'nearest'}
359 |             how to fill the empty space caused by the transform
360 | 
361 |         fill_value : float
362 |             the value to fill the empty space with if fill_mode='constant'
363 | 
364 |         lazy    : boolean
365 |             if true, perform the transform on the tensor and return the tensor
366 |             if false, only create the affine transform matrix and return that
367 |         """
368 |         self.shear_range = shear_range
369 |         self.fill_mode = fill_mode
370 |         self.fill_value = fill_value
371 |         self.target_fill_mode = target_fill_mode
372 |         self.target_fill_value = target_fill_value
373 |         self.lazy = lazy
374 | 
375 |     def __call__(self, x, y=None):
376 |         shear = random.uniform(-self.shear_range, self.shear_range)
377 |         shear_matrix = np.array([[1, -math.sin(shear), 0],
378 |                                  [0, math.cos(shear), 0],
379 |                                  [0, 0, 1]])
380 |         if self.lazy:
381 |             return shear_matrix
382 |         else:
383 |             x_transformed = torch.from_numpy(apply_transform(x.numpy(), 
384 |                 shear_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value))
385 |             if y:
386 |                 y_transformed = torch.from_numpy(apply_transform(y.numpy(), shear_matrix,
387 |                 fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
388 |                 return x_transformed, y_transformed
389 |             else:
390 |                 return x_transformed
391 |       
392 | 
393 | class Zoom(object):
394 | 
395 |     def __init__(self, 
396 |                  zoom_range, 
397 |                  fill_mode='constant', 
398 |                  fill_value=0, 
399 |                  target_fill_mode='nearest', 
400 |                  target_fill_value=0., 
401 |                  lazy=False):
402 |         """Randomly zoom in and/or out on an image 
403 | 
404 |         Arguments
405 |         ---------
406 |         zoom_range : tuple or list with 2 values, both between (0, infinity)
407 |             lower and upper bounds on percent zoom. 
408 |             Anything less than 1.0 will zoom in on the image, 
409 |             anything greater than 1.0 will zoom out on the image.
410 |             e.g. (0.7, 1.0) will only zoom in, 
411 |                  (1.0, 1.4) will only zoom out,
412 |                  (0.7, 1.4) will randomly zoom in or out
413 | 
414 |         fill_mode : string in {'constant', 'nearest'}
415 |             how to fill the empty space caused by the transform
416 | 
417 |         fill_value : float
418 |             the value to fill the empty space with if fill_mode='constant'
419 | 
420 |         lazy    : boolean
421 |             if true, perform the transform on the tensor and return the tensor
422 |             if false, only create the affine transform matrix and return that
423 |         """
424 |         if not isinstance(zoom_range, list) and not isinstance(zoom_range, tuple):
425 |             raise ValueError('zoom_range must be tuple or list with 2 values')
426 |         self.zoom_range = zoom_range
427 |         self.fill_mode = fill_mode
428 |         self.fill_value = fill_value
429 |         self.target_fill_mode = target_fill_mode
430 |         self.target_fill_value = target_fill_value
431 |         self.lazy = lazy
432 | 
433 |     def __call__(self, x, y=None):
434 |         zx = random.uniform(self.zoom_range[0], self.zoom_range[1])
435 |         zy = random.uniform(self.zoom_range[0], self.zoom_range[1])
436 |         zoom_matrix = np.array([[zx, 0, 0],
437 |                                 [0, zy, 0],
438 |                                 [0, 0, 1]])
439 |         if self.lazy:
440 |             return zoom_matrix
441 |         else:
442 |             x_transformed = torch.from_numpy(apply_transform(x.numpy(), 
443 |                 zoom_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value))
444 |             if y:
445 |                 y_transformed = torch.from_numpy(apply_transform(y.numpy(), zoom_matrix,
446 |                 fill_mode=self.target_fill_mode, fill_value=self.target_fill_value))
447 |                 return x_transformed, y_transformed
448 |             else:
449 |                 return x_transformed
450 | 
451 | 
452 | 


--------------------------------------------------------------------------------
/cifar10/resnext/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Xuanyi Dong
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/cifar10/resnext/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import scipy.misc 
 5 | import sys
 6 | if sys.version[0] == '2':
 7 |   from StringIO import StringIO  # Python 2.x
 8 | elif sys.version[0] == '3':
 9 |   from io import BytesIO       # Python 3.x
10 | 
11 | 
12 | class Logger(object):
13 |   
14 |   def __init__(self, log_dir):
15 |     """Create a summary writer logging to log_dir."""
16 |     self.writer = tf.summary.FileWriter(log_dir)
17 | 
18 |   def scalar_summary(self, tag, value, step):
19 |     """Log a scalar variable."""
20 |     summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
21 |     self.writer.add_summary(summary, step)
22 | 
23 |   def image_summary(self, tag, images, step):
24 |     """Log a list of images."""
25 | 
26 |     img_summaries = []
27 |     for i, img in enumerate(images):
28 |       # Write the image to a string
29 |       try:
30 |         s = StringIO()
31 |       except:
32 |         s = BytesIO()
33 |       scipy.misc.toimage(img).save(s, format="png")
34 | 
35 |       # Create an Image object
36 |       img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
37 |                                  height=img.shape[0],
38 |                                  width=img.shape[1])
39 |       # Create a Summary value
40 |       img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
41 | 
42 |     # Create and write Summary
43 |     summary = tf.Summary(value=img_summaries)
44 |     self.writer.add_summary(summary, step)
45 |     
46 |   def histo_summary(self, tag, values, step, bins=1000):
47 |     """Log a histogram of the tensor of values."""
48 | 
49 |     # Create a histogram using numpy
50 |     counts, bin_edges = np.histogram(values, bins=bins)
51 | 
52 |     # Fill the fields of the histogram proto
53 |     hist = tf.HistogramProto()
54 |     hist.min = float(np.min(values))
55 |     hist.max = float(np.max(values))
56 |     hist.num = int(np.prod(values.shape))
57 |     hist.sum = float(np.sum(values))
58 |     hist.sum_squares = float(np.sum(values**2))
59 | 
60 |     # Drop the start of the first bin
61 |     bin_edges = bin_edges[1:]
62 | 
63 |     # Add bin edges and counts
64 |     for edge in bin_edges:
65 |       hist.bucket_limit.append(edge)
66 |     for c in counts:
67 |       hist.bucket.append(c)
68 | 
69 |     # Create and write Summary
70 |     summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
71 |     self.writer.add_summary(summary, step)
72 |     self.writer.flush()
73 | 


--------------------------------------------------------------------------------
/cifar10/resnext/main.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on Aug 15, 2018
  3 | 
  4 | @author: vermavik
  5 | '''
  6 | from __future__ import division
  7 | 
  8 | import os, sys, shutil, time, random
  9 | import argparse
 10 | from distutils.dir_util import copy_tree
 11 | from shutil import rmtree
 12 | import torch
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | from torch.autograd import Variable
 17 | import torchvision.datasets as dset
 18 | import torchvision.transforms as transforms
 19 | from utils import AverageMeter, RecorderMeter, time_string, convert_secs2time
 20 | import models
 21 | 
 22 | import sys
 23 | if sys.version_info[0] < 3:
 24 |     import cPickle as pickle
 25 | else:
 26 |  import _pickle as pickle
 27 | from collections import OrderedDict
 28 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 29 | from load_data  import *
 30 | from helpers import *
 31 | from plots import *
 32 | 
 33 | model_names = sorted(name for name in models.__dict__
 34 |   if name.islower() and not name.startswith("__")
 35 |   and callable(models.__dict__[name]))
 36 | print (model_names)
 37 | 
 38 | parser = argparse.ArgumentParser(description='Trains ResNeXt on CIFAR or ImageNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 39 | parser.add_argument('--dataset', type=str, default='cifar10', choices=['cifar10', 'cifar100', 'imagenet', 'svhn', 'stl10'], help='Choose between Cifar10/100 and ImageNet.')
 40 | parser.add_argument('--arch', metavar='ARCH', default='resnext29_8_64', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnext29_8_64)')
 41 | # Optimization options
 42 | parser.add_argument('--epochs', type=int, default=300, help='Number of epochs to train.')
 43 | parser.add_argument('--singlecutout', action='store_true', default=False,
 44 |                     help='whether to use singlecutout')
 45 | parser.add_argument('--dualcutout', action='store_true', default=False,
 46 |                     help='whether to use dualcutout')
 47 | parser.add_argument('--cutsize', type=int, default=16, help='cutout size.')
 48 | parser.add_argument('--dropout', action='store_true', default=False,
 49 |                     help='whether to use dropout or not in final layer')
 50 | #parser.add_argument('--batch_size', type=int, default=128, help='Batch size.')
 51 | parser.add_argument('--batch_size', type=int, default=64, help='Batch size.')
 52 | parser.add_argument('--learning_rate', type=float, default=0.05, help='The Learning Rate.')
 53 | parser.add_argument('--momentum', type=float, default=0.9, help='Momentum.')
 54 | parser.add_argument('--alpha', type=float, default=0.01, help='the coefficient that controls the difference between the outputs from two cutouts')
 55 | parser.add_argument('--data_aug', type=int, default=0)
 56 | parser.add_argument('--add_name', type=str, default='')
 57 | #parser.add_argument('--decay', type=float, default=0.0005, help='Weight decay (L2 penalty).')
 58 | parser.add_argument('--decay', type=float, default=0.0000, help='Weight decay (L2 penalty).')
 59 | parser.add_argument('--schedule', type=int, nargs='+', default=[150, 225], help='Decrease learning rate at these epochs.')
 60 | parser.add_argument('--gammas', type=float, nargs='+', default=[0.1, 0.1], help='LR is multiplied by gamma on schedule, number of gammas should be equal to schedule')
 61 | # Checkpoints
 62 | parser.add_argument('--print_freq', default=1000, type=int, metavar='N', help='print frequency (default: 200)')
 63 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
 64 | parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)')
 65 | parser.add_argument('--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set')
 66 | # Acceleration
 67 | parser.add_argument('--ngpu', type=int, default=1, help='0 = CPU.')
 68 | parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)')
 69 | # random seed
 70 | parser.add_argument('--manualSeed', type=int, help='manual seed')
 71 | parser.add_argument('--job_id', type=str, default='')
 72 | parser.add_argument('--temp_dir', type = str, default = '/Tmp/vermavik/',
 73 |                         help='folder on local node where data is stored temporarily')
 74 | parser.add_argument('--home_dir', type = str, default = '/data/milatmp1/vermavik/',
 75 |                         help='file where results are to be written')
 76 | 
 77 | 
 78 | 
 79 | args = parser.parse_args()
 80 | args.use_cuda = args.ngpu>0 and torch.cuda.is_available()
 81 | 
 82 | out_str = str(args)
 83 | print(out_str)
 84 | 
 85 | if args.manualSeed is None:
 86 |     args.manualSeed = random.randint(1, 10000)
 87 | random.seed(args.manualSeed)
 88 | torch.manual_seed(args.manualSeed)
 89 | 
 90 | if args.use_cuda:
 91 |     torch.cuda.manual_seed_all(args.manualSeed)
 92 | cudnn.benchmark = True
 93 | 
 94 | 
 95 | def experiment_name(arch='',
 96 |                     epochs=400,
 97 |                     dropout=True,
 98 |                     batch_size=64,
 99 |                     lr=0.01,
100 |                     momentum=0.5,
101 |                     alpha= 0.01,
102 |                     decay=0.0005,
103 |                     data_aug=1,
104 |                     dualcutout= False,
105 |                     singlecutout= False,
106 |                     cutsize = 16,
107 |                     manualSeed=None,
108 |                     job_id=None,
109 |                     add_name=''):
110 | 
111 |     exp_name= str(arch)
112 |     exp_name += '_epochs_'+str(epochs)
113 |     if dropout:
114 |         exp_name+='_dropout_'+'true'
115 |     else:
116 |         exp_name+='_dropout_'+'False'
117 |     if dualcutout:
118 |         exp_name+='_dualcutout_'+'true'
119 |         exp_name +='_cut_size_'+str(cutsize)
120 |     elif singlecutout:
121 |         exp_name+='_singlecutout_'+'true'
122 |         exp_name +='_cut_size_'+str(cutsize)
123 |     else:
124 |         exp_name+='_nocutout_'+'true'
125 | 
126 |     exp_name +='_batch_size_'+str(batch_size)
127 |     exp_name += '_lr_'+str(lr)
128 |     exp_name += '_momentum_'+str(momentum)
129 |     exp_name += '_alpha_'+str(alpha)
130 |     exp_name +='_decay_'+str(decay)
131 |     exp_name += '_data_aug_'+str(data_aug)
132 |     if job_id!=None:
133 |         exp_name += '_job_id_'+str(job_id)
134 |     if manualSeed!=None:
135 |         exp_name += '_manuael_seed_'+str(manualSeed)
136 |     if add_name!='':
137 |         exp_name += '_add_name_'+str(add_name)
138 | 
139 |     # exp_name += strftime("_%Y-%m-%d_%H:%M:%S", gmtime())
140 |     print('experiement name: ' + exp_name)
141 |     return exp_name
142 | 
143 | 
144 | def print_log(print_string, log):
145 |     print("{}".format(print_string))
146 |     log.write('{}\n'.format(print_string))
147 |     log.flush()
148 | 
149 | def save_checkpoint(state, is_best, save_path, filename):
150 |     filename = os.path.join(save_path, filename)
151 |     torch.save(state, filename)
152 |     if is_best:
153 |         bestname = os.path.join(save_path, 'model_best.pth.tar')
154 |         shutil.copyfile(filename, bestname)
155 | 
156 | def adjust_learning_rate(optimizer, epoch, gammas, schedule):
157 |     """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
158 |     lr = args.learning_rate
159 |     assert len(gammas) == len(schedule), "length of gammas and schedule should be equal"
160 |     for (gamma, step) in zip(gammas, schedule):
161 |         if (epoch >= step):
162 |             lr = lr * gamma
163 |         else:
164 |             break
165 |     for param_group in optimizer.param_groups:
166 |         param_group['lr'] = lr
167 |     return lr
168 | 
169 | def accuracy(output, target, topk=(1,)):
170 |     """Computes the precision@k for the specified values of k"""
171 |     maxk = max(topk)
172 |     batch_size = target.size(0)
173 | 
174 |     _, pred = output.topk(maxk, 1, True, True)
175 |     pred = pred.t()
176 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
177 | 
178 |     res = []
179 |     for k in topk:
180 |         correct_k = correct[:k].view(-1).float().sum(0)
181 |         res.append(correct_k.mul_(100.0 / batch_size))
182 |     return res
183 | 
184 | 
185 | # train function (forward, backward, update)
186 | def train(train_loader, model, criterion, cutout,  optimizer, epoch, log):
187 |     batch_time = AverageMeter()
188 |     data_time = AverageMeter()
189 |     losses = AverageMeter()
190 |     top1 = AverageMeter()
191 |     top5 = AverageMeter()
192 |     # switch to train mode
193 |     model.train()
194 | 
195 |     end = time.time()
196 |     for i, (input, target) in enumerate(train_loader):
197 |         # measure data loading time
198 |         data_time.update(time.time() - end)
199 |         if  args.dualcutout == True or args.singlecutout == True :
200 |             cutout1 = cutout.apply(input)
201 |             cutout2 = cutout.apply(input)
202 |             if args.use_cuda:
203 |                 target = target.cuda(async=True)
204 |                 input = input.cuda()
205 |                 cutout1 = cutout1.cuda()
206 |                 cutout2 = cutout2.cuda()
207 | 
208 |             input_var = Variable(input)
209 |             cutout1_var = Variable(cutout1)
210 |             cutout2_var = Variable(cutout2)
211 |             target_var = Variable(target)
212 | 
213 |             # compute output
214 |             output1 = model(cutout1_var)
215 |             if args.dualcutout:
216 |                 output2 = model(cutout2_var)
217 |             if args.dualcutout:
218 |                 loss = (criterion(output1, target_var)+criterion(output2, target_var))*0.5 + args.alpha*F.mse_loss(output1, output2)
219 |             else:
220 |                 loss = criterion(output1, target_var)
221 | 
222 |             total_loss = loss
223 |         # measure accuracy and record loss
224 | 
225 |         else:
226 |             if args.use_cuda:
227 |                 target = target.cuda(async=True)
228 |                 input = input.cuda()
229 | 
230 |             input_var = Variable(input)
231 |             target_var = Variable(target)
232 | 
233 |             # compute output
234 |             output1 = model(input_var)
235 |             loss = criterion(output1, target_var)
236 | 
237 |             total_loss = loss
238 | 
239 | 
240 | 
241 |         # compute gradient and do SGD step
242 |         optimizer.zero_grad()
243 |         total_loss.backward()
244 |         optimizer.step()
245 | 
246 |         # measure elapsed time
247 |         batch_time.update(time.time() - end)
248 |         end = time.time()
249 | 
250 | 
251 |         if args.dualcutout:
252 |             prec1, prec5 = accuracy((output1.data+output2.data)*0.5, target, topk=(1, 5))
253 |         else:
254 |             prec1, prec5 = accuracy(output1.data, target, topk=(1, 5))
255 |         losses.update(loss.data[0], input.size(0))
256 |         top1.update(prec1[0], input.size(0))
257 |         top5.update(prec5[0], input.size(0))
258 |         # measure elapsed time
259 |         batch_time.update(time.time() - end)
260 |         end = time.time()
261 | 
262 |         if i % args.print_freq == 0:
263 |             print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
264 |                 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
265 |                 'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
266 |                 'Loss {loss.val:.4f} ({loss.avg:.4f})   '
267 |                 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
268 |                 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
269 |                 epoch, i, len(train_loader), batch_time=batch_time,
270 |                 data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log)
271 | 
272 | 
273 |     print_log('  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log)
274 |     return top1.avg, losses.avg
275 | 
276 | def validate(val_loader, model, criterion, log):
277 |     losses = AverageMeter()
278 |     top1 = AverageMeter()
279 |     top5 = AverageMeter()
280 | 
281 |     # switch to evaluate mode
282 |     model.eval()
283 | 
284 |     for i, (input, target) in enumerate(val_loader):
285 |         if args.use_cuda:
286 |             target = target.cuda(async=True)
287 |             input = input.cuda()
288 |         input_var = torch.autograd.Variable(input, volatile=True)
289 |         target_var = torch.autograd.Variable(target, volatile=True)
290 | 
291 |         # compute output
292 |         output = model(input_var)
293 |         loss = criterion(output, target_var)
294 | 
295 |         # measure accuracy and record loss
296 |         prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
297 |         losses.update(loss.data[0], input.size(0))
298 |         top1.update(prec1[0], input.size(0))
299 |         top5.update(prec5[0], input.size(0))
300 | 
301 |     print_log('  **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log)
302 | 
303 |     return top1.avg, losses.avg
304 | 
305 | best_acc = 0
306 | def main():
307 | 
308 |     ### transfer data from source to current node#####
309 |     print ("Copying the dataset to the current node's  dir...")
310 | 
311 |     tmp = args.temp_dir
312 |     home = args.home_dir
313 | 
314 | 
315 |     dataset=args.dataset
316 |     data_source_dir = os.path.join(home,'data',dataset)
317 |     if not os.path.exists(data_source_dir):
318 |         os.makedirs(data_source_dir)
319 |     data_target_dir = os.path.join(tmp,'data',dataset)
320 |     copy_tree(data_source_dir, data_target_dir)
321 | 
322 |     ### set up the experiment directories########
323 |     exp_name=experiment_name(arch=args.arch,
324 |                     epochs=args.epochs,
325 |                     dropout=args.dropout,
326 |                     batch_size=args.batch_size,
327 |                     lr=args.learning_rate,
328 |                     momentum=args.momentum,
329 |                     alpha = args.alpha,
330 |                     decay= args.decay,
331 |                     data_aug=args.data_aug,
332 |                     dualcutout=args.dualcutout,
333 |                     singlecutout = args.singlecutout,
334 |                     cutsize = args.cutsize,
335 |                     manualSeed=args.manualSeed,
336 |                     job_id=args.job_id,
337 |                     add_name=args.add_name)
338 |     temp_model_dir = os.path.join(tmp,'experiments/DualCutout/'+dataset+'/model/'+ exp_name)
339 |     temp_result_dir = os.path.join(tmp, 'experiments/DualCutout/'+dataset+'/results/'+ exp_name)
340 |     model_dir = os.path.join(home, 'experiments/DualCutout/'+dataset+'/model/'+ exp_name)
341 |     result_dir = os.path.join(home, 'experiments/DualCutout/'+dataset+'/results/'+ exp_name)
342 | 
343 | 
344 |     if not os.path.exists(temp_model_dir):
345 |         os.makedirs(temp_model_dir)
346 | 
347 |     if not os.path.exists(temp_result_dir):
348 |         os.makedirs(temp_result_dir)
349 | 
350 |     copy_script_to_folder(os.path.abspath(__file__), temp_result_dir)
351 | 
352 |     result_png_path = os.path.join(temp_result_dir, 'results.png')
353 | 
354 | 
355 |     global best_acc
356 | 
357 |     log = open(os.path.join(temp_result_dir, 'log.txt'.format(args.manualSeed)), 'w')
358 |     print_log('save path : {}'.format(temp_result_dir), log)
359 |     state = {k: v for k, v in args._get_kwargs()}
360 |     print_log(state, log)
361 |     print_log("Random Seed: {}".format(args.manualSeed), log)
362 |     print_log("python version : {}".format(sys.version.replace('\n', ' ')), log)
363 |     print_log("torch  version : {}".format(torch.__version__), log)
364 |     print_log("cudnn  version : {}".format(torch.backends.cudnn.version()), log)
365 | 
366 | 
367 |     train_loader, test_loader,num_classes=load_data(args.data_aug, args.batch_size,args.workers,args.dataset, data_target_dir)
368 | 
369 |     print_log("=> creating model '{}'".format(args.arch), log)
370 |     # Init model, criterion, and optimizer
371 | 
372 |     net = models.__dict__[args.arch](num_classes,args.dropout)
373 |     print_log("=> network :\n {}".format(net), log)
374 | 
375 |     #net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
376 | 
377 |     # define loss function (criterion) and optimizer
378 |     criterion = torch.nn.CrossEntropyLoss()
379 | 
380 |     optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
381 |                 weight_decay=state['decay'], nesterov=True)
382 | 
383 | 
384 |     cutout = Cutout(1, args.cutsize)
385 |     if args.use_cuda:
386 |         net.cuda()
387 |     criterion.cuda()
388 | 
389 |     recorder = RecorderMeter(args.epochs)
390 |     # optionally resume from a checkpoint
391 |     if args.resume:
392 |         if os.path.isfile(args.resume):
393 |             print_log("=> loading checkpoint '{}'".format(args.resume), log)
394 |             checkpoint = torch.load(args.resume)
395 |             recorder = checkpoint['recorder']
396 |             args.start_epoch = checkpoint['epoch']
397 |             net.load_state_dict(checkpoint['state_dict'])
398 |             optimizer.load_state_dict(checkpoint['optimizer'])
399 |             best_acc = recorder.max_accuracy(False)
400 |             print_log("=> loaded checkpoint '{}' accuracy={} (epoch {})" .format(args.resume, best_acc, checkpoint['epoch']), log)
401 |         else:
402 |             print_log("=> no checkpoint found at '{}'".format(args.resume), log)
403 |     else:
404 |         print_log("=> do not use any checkpoint for {} model".format(args.arch), log)
405 | 
406 |     if args.evaluate:
407 |         validate(test_loader, net, criterion, log)
408 |         return
409 | 
410 |     # Main loop
411 |     start_time = time.time()
412 |     epoch_time = AverageMeter()
413 |     # Main loop
414 |     train_loss = []
415 |     train_acc=[]
416 |     test_loss=[]
417 |     test_acc=[]
418 |     for epoch in range(args.start_epoch, args.epochs):
419 |         current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)
420 | 
421 |         need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch))
422 |         need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)
423 | 
424 |         print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
425 |                 + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)
426 | 
427 |         # train for one epoch
428 |         tr_acc, tr_los = train(train_loader, net, criterion, cutout, optimizer, epoch, log)
429 | 
430 |         # evaluate on validation set
431 |         val_acc,   val_los   = validate(test_loader, net, criterion, log)
432 |         train_loss.append(tr_los)
433 |         train_acc.append(tr_acc)
434 |         test_loss.append(val_los)
435 |         test_acc.append(val_acc)
436 |         dummy = recorder.update(epoch, tr_los, tr_acc, val_los, val_acc)
437 | 
438 |         is_best = False
439 |         if val_acc > best_acc:
440 |             is_best = True
441 |             best_acc = val_acc
442 | 
443 |         save_checkpoint({
444 |           'epoch': epoch + 1,
445 |           'arch': args.arch,
446 |           'state_dict': net.state_dict(),
447 |           'recorder': recorder,
448 |           'optimizer' : optimizer.state_dict(),
449 |         }, is_best, temp_model_dir, 'checkpoint.pth.tar')
450 | 
451 |         # measure elapsed time
452 |         epoch_time.update(time.time() - start_time)
453 |         start_time = time.time()
454 |         recorder.plot_curve(result_png_path)
455 | 
456 |     train_log = OrderedDict()
457 |     train_log['train_loss'] = train_loss
458 |     train_log['train_acc']=train_acc
459 |     train_log['test_loss']=test_loss
460 |     train_log['test_acc']=test_acc
461 | 
462 |     pickle.dump(train_log, open( os.path.join(temp_result_dir,'log.pkl'), 'wb'))
463 |     plotting(temp_result_dir)
464 | 
465 |     copy_tree(temp_model_dir, model_dir)
466 |     copy_tree(temp_result_dir, result_dir)
467 | 
468 |     rmtree(temp_model_dir)
469 |     rmtree(temp_result_dir)
470 | 
471 |     log.close()
472 | 
473 | 
474 | if __name__ == '__main__':
475 |     main()
476 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """The models subpackage contains definitions for the following model
 2 | architectures:
 3 | -  `ResNeXt` for CIFAR10 CIFAR100
 4 | You can construct a model with random weights by calling its constructor:
 5 | .. code:: python
 6 |     import models
 7 |     resnext29_16_64 = models.ResNeXt29_16_64(num_classes)
 8 |     resnext29_8_64 = models.ResNeXt29_8_64(num_classes)
 9 |     resnet20 = models.ResNet20(num_classes)
10 |     resnet32 = models.ResNet32(num_classes)
11 | 
12 | 
13 | .. ResNext: https://arxiv.org/abs/1611.05431
14 | """
15 | 
16 | from .resnext import resnext29_8_64, resnext29_16_64
17 | #from .resnet import resnet20, resnet32, resnet44, resnet56, resnet110
18 | from .resnet import resnet18, resnet34, resnet50, resnet101, resnet152
19 | from .preresnet import preactresnet18, preactresnet34, preactresnet50, preactresnet101, preactresnet152
20 | from .caffe_cifar import caffe_cifar
21 | from .densenet import densenet100_12,densenet100_24
22 | from .wide_resnet import wrn28_10, wrn28_2
23 | 
24 | #from .imagenet_resnet import resnet18, resnet34, resnet50, resnet101, resnet152
25 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/caffe_cifar.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.nn import init
 7 | import math
 8 | 
 9 | ## http://torch.ch/blog/2015/07/30/cifar.html
10 | class CifarCaffeNet(nn.Module):
11 |   def __init__(self, num_classes):
12 |     super(CifarCaffeNet, self).__init__()
13 | 
14 |     self.num_classes = num_classes
15 | 
16 |     self.block_1 = nn.Sequential(
17 |       nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
18 |       nn.MaxPool2d(kernel_size=3, stride=2),
19 |       nn.ReLU(),
20 |       nn.BatchNorm2d(32))
21 | 
22 |     self.block_2 = nn.Sequential(
23 |       nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
24 |       nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
25 |       nn.ReLU(),
26 |       nn.AvgPool2d(kernel_size=3, stride=2),
27 |       nn.BatchNorm2d(64))
28 | 
29 |     self.block_3 = nn.Sequential(
30 |       nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
31 |       nn.Conv2d(64,128, kernel_size=3, stride=1, padding=1),
32 |       nn.ReLU(),
33 |       nn.AvgPool2d(kernel_size=3, stride=2),
34 |       nn.BatchNorm2d(128))
35 | 
36 |     self.classifier = nn.Linear(128*9, self.num_classes)
37 | 
38 |     for m in self.modules():
39 |       if isinstance(m, nn.Conv2d):
40 |         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
41 |         m.weight.data.normal_(0, math.sqrt(2. / n))
42 |       elif isinstance(m, nn.BatchNorm2d):
43 |         m.weight.data.fill_(1)
44 |         m.bias.data.zero_()
45 |       elif isinstance(m, nn.Linear):
46 |         init.kaiming_normal(m.weight)
47 |         m.bias.data.zero_()
48 | 
49 |   def forward(self, x):
50 |     x = self.block_1.forward(x)
51 |     x = self.block_2.forward(x)
52 |     x = self.block_3.forward(x)
53 |     x = x.view(x.size(0), -1)
54 |     #print ('{}'.format(x.size()))
55 |     return self.classifier(x)
56 | 
57 | def caffe_cifar(num_classes=10):
58 |   model = CifarCaffeNet(num_classes)
59 |   return model
60 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/densenet.py:
--------------------------------------------------------------------------------
  1 | import math, torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class Bottleneck(nn.Module):
  6 |   def __init__(self, nChannels, growthRate):
  7 |     super(Bottleneck, self).__init__()
  8 |     interChannels = 4*growthRate
  9 |     self.bn1 = nn.BatchNorm2d(nChannels)
 10 |     self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False)
 11 |     self.bn2 = nn.BatchNorm2d(interChannels)
 12 |     self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False)
 13 | 
 14 |   def forward(self, x):
 15 |     out = self.conv1(F.relu(self.bn1(x)))
 16 |     out = self.conv2(F.relu(self.bn2(out)))
 17 |     out = torch.cat((x, out), 1)
 18 |     return out
 19 | 
 20 | class SingleLayer(nn.Module):
 21 |   def __init__(self, nChannels, growthRate):
 22 |     super(SingleLayer, self).__init__()
 23 |     self.bn1 = nn.BatchNorm2d(nChannels)
 24 |     self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False)
 25 | 
 26 |   def forward(self, x):
 27 |     out = self.conv1(F.relu(self.bn1(x)))
 28 |     out = torch.cat((x, out), 1)
 29 |     return out
 30 | 
 31 | class Transition(nn.Module):
 32 |   def __init__(self, nChannels, nOutChannels):
 33 |     super(Transition, self).__init__()
 34 |     self.bn1 = nn.BatchNorm2d(nChannels)
 35 |     self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False)
 36 | 
 37 |   def forward(self, x):
 38 |     out = self.conv1(F.relu(self.bn1(x)))
 39 |     out = F.avg_pool2d(out, 2)
 40 |     return out
 41 | 
 42 | class DenseNet(nn.Module):
 43 |   def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
 44 |     super(DenseNet, self).__init__()
 45 | 
 46 |     if bottleneck:  nDenseBlocks = int( (depth-4) / 6 )
 47 |     else         :  nDenseBlocks = int( (depth-4) / 3 )
 48 | 
 49 |     nChannels = 2*growthRate
 50 |     self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False)
 51 | 
 52 |     self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
 53 |     nChannels += nDenseBlocks*growthRate
 54 |     nOutChannels = int(math.floor(nChannels*reduction))
 55 |     self.trans1 = Transition(nChannels, nOutChannels)
 56 | 
 57 |     nChannels = nOutChannels
 58 |     self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
 59 |     nChannels += nDenseBlocks*growthRate
 60 |     nOutChannels = int(math.floor(nChannels*reduction))
 61 |     self.trans2 = Transition(nChannels, nOutChannels)
 62 | 
 63 |     nChannels = nOutChannels
 64 |     self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
 65 |     nChannels += nDenseBlocks*growthRate
 66 | 
 67 |     self.bn1 = nn.BatchNorm2d(nChannels)
 68 |     self.fc = nn.Linear(nChannels, nClasses)
 69 | 
 70 |     for m in self.modules():
 71 |       if isinstance(m, nn.Conv2d):
 72 |         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 73 |         m.weight.data.normal_(0, math.sqrt(2. / n))
 74 |       elif isinstance(m, nn.BatchNorm2d):
 75 |         m.weight.data.fill_(1)
 76 |         m.bias.data.zero_()
 77 |       elif isinstance(m, nn.Linear):
 78 |         m.bias.data.zero_()
 79 | 
 80 |   def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
 81 |     layers = []
 82 |     for i in range(int(nDenseBlocks)):
 83 |       if bottleneck:
 84 |         layers.append(Bottleneck(nChannels, growthRate))
 85 |       else:
 86 |         layers.append(SingleLayer(nChannels, growthRate))
 87 |       nChannels += growthRate
 88 |     return nn.Sequential(*layers)
 89 | 
 90 |   def forward(self, x):
 91 |     out = self.conv1(x)
 92 |     out = self.trans1(self.dense1(out))
 93 |     out = self.trans2(self.dense2(out))
 94 |     out = self.dense3(out)
 95 |     out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
 96 |     out = F.log_softmax(self.fc(out))
 97 |     return out
 98 | 
 99 | def densenet100_12(num_classes=10):
100 |   model = DenseNet(12, 100, 0.5, num_classes, False)
101 |   return model
102 | 
103 | 
104 | def densenet100_24(num_classes=10):
105 |   model = DenseNet(24, 100, 0.5, num_classes, False)
106 |   return model
107 |   
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/imagenet_resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | 
  5 | def conv3x3(in_planes, out_planes, stride=1):
  6 |     "3x3 convolution with padding"
  7 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
  8 |                      padding=1, bias=False)
  9 | 
 10 | 
 11 | class BasicBlock(nn.Module):
 12 |     expansion = 1
 13 | 
 14 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 15 |         super(BasicBlock, self).__init__()
 16 |         self.conv1 = conv3x3(inplanes, planes, stride)
 17 |         self.bn1 = nn.BatchNorm2d(planes)
 18 |         self.relu = nn.ReLU(inplace=True)
 19 |         self.conv2 = conv3x3(planes, planes)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.downsample = downsample
 22 |         self.stride = stride
 23 | 
 24 |     def forward(self, x):
 25 |         residual = x
 26 | 
 27 |         out = self.conv1(x)
 28 |         out = self.bn1(out)
 29 |         out = self.relu(out)
 30 | 
 31 |         out = self.conv2(out)
 32 |         out = self.bn2(out)
 33 | 
 34 |         if self.downsample is not None:
 35 |             residual = self.downsample(x)
 36 | 
 37 |         out += residual
 38 |         out = self.relu(out)
 39 | 
 40 |         return out
 41 | 
 42 | 
 43 | class Bottleneck(nn.Module):
 44 |     expansion = 4
 45 | 
 46 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 47 |         super(Bottleneck, self).__init__()
 48 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 49 |         self.bn1 = nn.BatchNorm2d(planes)
 50 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 51 |                                padding=1, bias=False)
 52 |         self.bn2 = nn.BatchNorm2d(planes)
 53 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 54 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 55 |         self.relu = nn.ReLU(inplace=True)
 56 |         self.downsample = downsample
 57 |         self.stride = stride
 58 | 
 59 |     def forward(self, x):
 60 |         residual = x
 61 | 
 62 |         out = self.conv1(x)
 63 |         out = self.bn1(out)
 64 |         out = self.relu(out)
 65 | 
 66 |         out = self.conv2(out)
 67 |         out = self.bn2(out)
 68 |         out = self.relu(out)
 69 | 
 70 |         out = self.conv3(out)
 71 |         out = self.bn3(out)
 72 | 
 73 |         if self.downsample is not None:
 74 |             residual = self.downsample(x)
 75 | 
 76 |         out += residual
 77 |         out = self.relu(out)
 78 | 
 79 |         return out
 80 | 
 81 | 
 82 | class ResNet(nn.Module):
 83 | 
 84 |     def __init__(self, block, layers, num_classes=1000):
 85 |         self.inplanes = 64
 86 |         super(ResNet, self).__init__()
 87 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 88 |                                bias=False)
 89 |         self.bn1 = nn.BatchNorm2d(64)
 90 |         self.relu = nn.ReLU(inplace=True)
 91 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 92 |         self.layer1 = self._make_layer(block, 64, layers[0])
 93 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 94 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 95 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 96 |         self.avgpool = nn.AvgPool2d(7)
 97 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 98 | 
 99 |         for m in self.modules():
100 |             if isinstance(m, nn.Conv2d):
101 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
102 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
103 |             elif isinstance(m, nn.BatchNorm2d):
104 |                 m.weight.data.fill_(1)
105 |                 m.bias.data.zero_()
106 | 
107 |     def _make_layer(self, block, planes, blocks, stride=1):
108 |         downsample = None
109 |         if stride != 1 or self.inplanes != planes * block.expansion:
110 |             downsample = nn.Sequential(
111 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
112 |                           kernel_size=1, stride=stride, bias=False),
113 |                 nn.BatchNorm2d(planes * block.expansion),
114 |             )
115 | 
116 |         layers = []
117 |         layers.append(block(self.inplanes, planes, stride, downsample))
118 |         self.inplanes = planes * block.expansion
119 |         for i in range(1, blocks):
120 |             layers.append(block(self.inplanes, planes))
121 | 
122 |         return nn.Sequential(*layers)
123 | 
124 |     def forward(self, x):
125 |         x = self.conv1(x)
126 |         x = self.bn1(x)
127 |         x = self.relu(x)
128 |         x = self.maxpool(x)
129 | 
130 |         x = self.layer1(x)
131 |         x = self.layer2(x)
132 |         x = self.layer3(x)
133 |         x = self.layer4(x)
134 | 
135 |         x = self.avgpool(x)
136 |         x = x.view(x.size(0), -1)
137 |         x = self.fc(x)
138 | 
139 |         return x
140 | 
141 | 
142 | def resnet18(num_classes=1000):
143 |     """Constructs a ResNet-18 model.
144 | 
145 |     Args:
146 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
147 |     """
148 |     model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes)
149 |     return model
150 | 
151 | 
152 | def resnet34(num_classes=1000):
153 |     """Constructs a ResNet-34 model.
154 | 
155 |     Args:
156 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
157 |     """
158 |     model = ResNet(BasicBlock, [3, 4, 6, 3], num_classes)
159 |     return model
160 | 
161 | 
162 | def resnet50(num_classes=1000):
163 |     """Constructs a ResNet-50 model.
164 | 
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes)
169 |     return model
170 | 
171 | 
172 | def resnet101(num_classes=1000):
173 |     """Constructs a ResNet-101 model.
174 | 
175 |     Args:
176 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
177 |     """
178 |     model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
179 |     return model
180 | 
181 | 
182 | def resnet152(num_classes=1000):
183 |     """Constructs a ResNet-152 model.
184 | 
185 |     Args:
186 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
187 |     """
188 |     model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes)
189 |     return model
190 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/preresnet.py:
--------------------------------------------------------------------------------
  1 | '''Pre-activation ResNet in PyTorch.
  2 | Reference:
  3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  4 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
  5 | '''
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | class PreActBlock(nn.Module):
 12 |     '''Pre-activation version of the BasicBlock.'''
 13 |     expansion = 1
 14 | 
 15 |     def __init__(self, in_planes, planes, stride=1):
 16 |         super(PreActBlock, self).__init__()
 17 |         self.bn1 = nn.BatchNorm2d(in_planes)
 18 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 19 |         self.bn2 = nn.BatchNorm2d(planes)
 20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 21 | 
 22 |         if stride != 1 or in_planes != self.expansion*planes:
 23 |             self.shortcut = nn.Sequential(
 24 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 25 |             )
 26 | 
 27 |     def forward(self, x):
 28 |         out = F.relu(self.bn1(x))
 29 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 30 |         out = self.conv1(out)
 31 |         out = self.conv2(F.relu(self.bn2(out)))
 32 |         out += shortcut
 33 |         return out
 34 | 
 35 | 
 36 | class PreActBottleneck(nn.Module):
 37 |     '''Pre-activation version of the original Bottleneck module.'''
 38 |     expansion = 4
 39 | 
 40 |     def __init__(self, in_planes, planes, stride=1):
 41 |         super(PreActBottleneck, self).__init__()
 42 |         self.bn1 = nn.BatchNorm2d(in_planes)
 43 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 44 |         self.bn2 = nn.BatchNorm2d(planes)
 45 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 46 |         self.bn3 = nn.BatchNorm2d(planes)
 47 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 48 | 
 49 |         if stride != 1 or in_planes != self.expansion*planes:
 50 |             self.shortcut = nn.Sequential(
 51 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 52 |             )
 53 | 
 54 |     def forward(self, x):
 55 |         out = F.relu(self.bn1(x))
 56 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 57 |         out = self.conv1(out)
 58 |         out = self.conv2(F.relu(self.bn2(out)))
 59 |         out = self.conv3(F.relu(self.bn3(out)))
 60 |         out += shortcut
 61 |         return out
 62 | 
 63 | 
 64 | class PreActResNet(nn.Module):
 65 |     def __init__(self, block, num_blocks, num_classes=10, dropout= False):
 66 |         super(PreActResNet, self).__init__()
 67 |         self.in_planes = 64
 68 |         self.dropout = dropout
 69 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 70 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 71 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 72 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 73 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 74 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 75 | 
 76 |     def _make_layer(self, block, planes, num_blocks, stride):
 77 |         strides = [stride] + [1]*(num_blocks-1)
 78 |         layers = []
 79 |         for stride in strides:
 80 |             layers.append(block(self.in_planes, planes, stride))
 81 |             self.in_planes = planes * block.expansion
 82 |         return nn.Sequential(*layers)
 83 | 
 84 |     def forward(self, x):
 85 |         out = self.conv1(x)
 86 |         out = self.layer1(out)
 87 |         if self.dropout:
 88 |             x = F.dropout(x, p=0.5, training=self.training)
 89 |         out = self.layer2(out)
 90 |         if self.dropout:
 91 |             x = F.dropout(x, p=0.5, training=self.training)
 92 |         out = self.layer3(out)
 93 |         if self.dropout:
 94 |             x = F.dropout(x, p=0.5, training=self.training)
 95 |         out = self.layer4(out)
 96 |         if self.dropout:
 97 |             x = F.dropout(x, p=0.5, training=self.training)
 98 |         out = F.avg_pool2d(out, 4)
 99 |         out = out.view(out.size(0), -1)
100 |         out = self.linear(out)
101 |         return out
102 | 
103 | 
104 | def preactresnet18(num_classes=10, dropout = False):
105 |     return PreActResNet(PreActBlock, [2,2,2,2], num_classes, dropout)
106 | 
107 | def preactresnet34(num_classes=10, dropout = False):
108 |     return PreActResNet(PreActBlock, [3,4,6,3], num_classes, dropout)
109 | 
110 | def preactresnet50(num_classes=10, dropout = False):
111 |     return PreActResNet(PreActBottleneck, [3,4,6,3], num_classes, dropout)
112 | 
113 | def preactresnet101(num_classes=10, dropout = False):
114 |     return PreActResNet(PreActBottleneck, [3,4,23,3], num_classes, dropout)
115 | 
116 | def preactresnet152(num_classes=10, dropout = False):
117 |     return PreActResNet(PreActBottleneck, [3,8,36,3], num_classes, dropout)
118 | 
119 | 
120 | def test():
121 |     net = PreActResNet18()
122 |     y = net((torch.randn(1,3,32,32)))
123 |     print(y.size())
124 | 
125 | # test()


--------------------------------------------------------------------------------
/cifar10/resnext/models/res_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class DownsampleA(nn.Module):  
 5 | 
 6 |   def __init__(self, nIn, nOut, stride):
 7 |     super(DownsampleA, self).__init__() 
 8 |     assert stride == 2    
 9 |     self.avg = nn.AvgPool2d(kernel_size=1, stride=stride)   
10 | 
11 |   def forward(self, x):   
12 |     x = self.avg(x)  
13 |     return torch.cat((x, x.mul(0)), 1)  
14 | 
15 | class DownsampleC(nn.Module):     
16 | 
17 |   def __init__(self, nIn, nOut, stride):
18 |     super(DownsampleC, self).__init__()
19 |     assert stride != 1 or nIn != nOut
20 |     self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=stride, padding=0, bias=False)
21 | 
22 |   def forward(self, x):
23 |     x = self.conv(x)
24 |     return x
25 | 
26 | class DownsampleD(nn.Module):
27 | 
28 |   def __init__(self, nIn, nOut, stride):
29 |     super(DownsampleD, self).__init__()
30 |     assert stride == 2
31 |     self.conv = nn.Conv2d(nIn, nOut, kernel_size=2, stride=stride, padding=0, bias=False)
32 |     self.bn   = nn.BatchNorm2d(nOut)
33 | 
34 |   def forward(self, x):
35 |     x = self.conv(x)
36 |     x = self.bn(x)
37 |     return x
38 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/resnet.py:
--------------------------------------------------------------------------------
  1 | ## https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
  2 | '''ResNet in PyTorch.
  3 | For Pre-activation ResNet, see 'preact_resnet.py'.
  4 | Reference:
  5 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  6 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  7 | '''
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | class BasicBlock(nn.Module):
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(BasicBlock, self).__init__()
 18 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 19 |         self.bn1 = nn.BatchNorm2d(planes)
 20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 | 
 23 |         self.shortcut = nn.Sequential()
 24 |         if stride != 1 or in_planes != self.expansion*planes:
 25 |             self.shortcut = nn.Sequential(
 26 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 27 |                 nn.BatchNorm2d(self.expansion*planes)
 28 |             )
 29 | 
 30 |     def forward(self, x):
 31 |         out = F.relu(self.bn1(self.conv1(x)))
 32 |         out = self.bn2(self.conv2(out))
 33 |         out += self.shortcut(x)
 34 |         out = F.relu(out)
 35 |         return out
 36 | 
 37 | 
 38 | class Bottleneck(nn.Module):
 39 |     expansion = 4
 40 | 
 41 |     def __init__(self, in_planes, planes, stride=1):
 42 |         super(Bottleneck, self).__init__()
 43 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 44 |         self.bn1 = nn.BatchNorm2d(planes)
 45 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 46 |         self.bn2 = nn.BatchNorm2d(planes)
 47 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 48 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 49 | 
 50 |         self.shortcut = nn.Sequential()
 51 |         if stride != 1 or in_planes != self.expansion*planes:
 52 |             self.shortcut = nn.Sequential(
 53 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 54 |                 nn.BatchNorm2d(self.expansion*planes)
 55 |             )
 56 | 
 57 |     def forward(self, x):
 58 |         out = F.relu(self.bn1(self.conv1(x)))
 59 |         out = F.relu(self.bn2(self.conv2(out)))
 60 |         out = self.bn3(self.conv3(out))
 61 |         out += self.shortcut(x)
 62 |         out = F.relu(out)
 63 |         return out
 64 | 
 65 | 
 66 | class ResNet(nn.Module):
 67 |     def __init__(self, block, num_blocks, num_classes=10):
 68 |         super(ResNet, self).__init__()
 69 |         self.in_planes = 64
 70 | 
 71 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 72 |         self.bn1 = nn.BatchNorm2d(64)
 73 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 74 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 75 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 76 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 77 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 78 | 
 79 |     def _make_layer(self, block, planes, num_blocks, stride):
 80 |         strides = [stride] + [1]*(num_blocks-1)
 81 |         layers = []
 82 |         for stride in strides:
 83 |             layers.append(block(self.in_planes, planes, stride))
 84 |             self.in_planes = planes * block.expansion
 85 |         return nn.Sequential(*layers)
 86 | 
 87 |     def forward(self, x):
 88 |         out = F.relu(self.bn1(self.conv1(x)))
 89 |         out = self.layer1(out)
 90 |         out = self.layer2(out)
 91 |         out = self.layer3(out)
 92 |         out = self.layer4(out)
 93 |         out = F.avg_pool2d(out, 4)
 94 |         out = out.view(out.size(0), -1)
 95 |         out = self.linear(out)
 96 |         return out
 97 | 
 98 | 
 99 | 
100 | def resnet18(num_classes=10, dropout = False):
101 |     """Constructs a ResNet-18 model.
102 |     Args:
103 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
104 |     """
105 |     model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes)
106 |     return model
107 | 
108 | 
109 | def resnet34(num_classes=10, dropout = False):
110 |     """Constructs a ResNet-34 model.
111 |     Args:
112 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
113 |     """
114 |     model = ResNet(BasicBlock, [3, 4, 6, 3], num_classes)
115 |     return model
116 | 
117 | 
118 | def resnet50(num_classes=10, dropout = False):
119 |     """Constructs a ResNet-50 model.
120 |     Args:
121 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
122 |     """
123 |     model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes)
124 |     return model
125 | 
126 | 
127 | def resnet101(num_classes=10, dropout = False):
128 |     """Constructs a ResNet-101 model.
129 |     Args:
130 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
131 |     """
132 |     model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
133 |     return model
134 | 
135 | 
136 | def resnet152(num_classes=10, dropout = False):
137 |     """Constructs a ResNet-152 model.
138 |     Args:
139 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
140 |     """
141 |     model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes)
142 |     return model


--------------------------------------------------------------------------------
/cifar10/resnext/models/resnext.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from torch.nn import init
  4 | import math
  5 | 
  6 | class ResNeXtBottleneck(nn.Module):
  7 |   expansion = 4
  8 |   """
  9 |   RexNeXt bottleneck type C (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua)
 10 |   """
 11 |   def __init__(self, inplanes, planes, cardinality, base_width, stride=1, downsample=None):
 12 |     super(ResNeXtBottleneck, self).__init__()
 13 | 
 14 |     D = int(math.floor(planes * (base_width/64.0)))
 15 |     C = cardinality
 16 | 
 17 |     self.conv_reduce = nn.Conv2d(inplanes, D*C, kernel_size=1, stride=1, padding=0, bias=False)
 18 |     self.bn_reduce = nn.BatchNorm2d(D*C)
 19 | 
 20 |     self.conv_conv = nn.Conv2d(D*C, D*C, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
 21 |     self.bn = nn.BatchNorm2d(D*C)
 22 | 
 23 |     self.conv_expand = nn.Conv2d(D*C, planes*4, kernel_size=1, stride=1, padding=0, bias=False)
 24 |     self.bn_expand = nn.BatchNorm2d(planes*4)
 25 | 
 26 |     self.downsample = downsample
 27 | 
 28 |   def forward(self, x):
 29 |     residual = x
 30 | 
 31 |     bottleneck = self.conv_reduce(x)
 32 |     bottleneck = F.relu(self.bn_reduce(bottleneck), inplace=True)
 33 | 
 34 |     bottleneck = self.conv_conv(bottleneck)
 35 |     bottleneck = F.relu(self.bn(bottleneck), inplace=True)
 36 | 
 37 |     bottleneck = self.conv_expand(bottleneck)
 38 |     bottleneck = self.bn_expand(bottleneck)
 39 | 
 40 |     if self.downsample is not None:
 41 |       residual = self.downsample(x)
 42 |     
 43 |     return F.relu(residual + bottleneck, inplace=True)
 44 | 
 45 | 
 46 | class CifarResNeXt(nn.Module):
 47 |   """
 48 |   ResNext optimized for the Cifar dataset, as specified in
 49 |   https://arxiv.org/pdf/1611.05431.pdf
 50 |   """
 51 |   def __init__(self, block, depth, cardinality, base_width, num_classes, dropout):
 52 |     super(CifarResNeXt, self).__init__()
 53 | 
 54 |     #Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
 55 |     assert (depth - 2) % 9 == 0, 'depth should be one of 29, 38, 47, 56, 101'
 56 |     layer_blocks = (depth - 2) // 9
 57 | 
 58 |     self.cardinality = cardinality
 59 |     self.base_width = base_width
 60 |     self.num_classes = num_classes
 61 |     self.dropout=dropout
 62 |     self.conv_1_3x3 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
 63 |     self.bn_1 = nn.BatchNorm2d(64)
 64 | 
 65 |     self.inplanes = 64
 66 |     self.stage_1 = self._make_layer(block, 64 , layer_blocks, 1)
 67 |     self.stage_2 = self._make_layer(block, 128, layer_blocks, 2)
 68 |     self.stage_3 = self._make_layer(block, 256, layer_blocks, 2)
 69 |     self.avgpool = nn.AvgPool2d(8)
 70 |     self.classifier = nn.Linear(256*block.expansion, num_classes)
 71 | 
 72 |     for m in self.modules():
 73 |       if isinstance(m, nn.Conv2d):
 74 |         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 75 |         m.weight.data.normal_(0, math.sqrt(2. / n))
 76 |       elif isinstance(m, nn.BatchNorm2d):
 77 |         m.weight.data.fill_(1)
 78 |         m.bias.data.zero_()
 79 |       elif isinstance(m, nn.Linear):
 80 |         init.kaiming_normal(m.weight)
 81 |         m.bias.data.zero_()
 82 | 
 83 |   def _make_layer(self, block, planes, blocks, stride=1):
 84 |     downsample = None
 85 |     if stride != 1 or self.inplanes != planes * block.expansion:
 86 |       downsample = nn.Sequential(
 87 |         nn.Conv2d(self.inplanes, planes * block.expansion,
 88 |               kernel_size=1, stride=stride, bias=False),
 89 |         nn.BatchNorm2d(planes * block.expansion),
 90 |       )
 91 | 
 92 |     layers = []
 93 |     layers.append(block(self.inplanes, planes, self.cardinality, self.base_width, stride, downsample))
 94 |     self.inplanes = planes * block.expansion
 95 |     for i in range(1, blocks):
 96 |       layers.append(block(self.inplanes, planes, self.cardinality, self.base_width))
 97 | 
 98 |     return nn.Sequential(*layers)
 99 | 
100 |   def forward(self, x):
101 |     x = self.conv_1_3x3(x)
102 |     x = F.relu(self.bn_1(x), inplace=True)
103 |     x = self.stage_1(x)
104 |     x = self.stage_2(x)
105 |     x = self.stage_3(x)
106 |     x = self.avgpool(x)
107 |     x = x.view(x.size(0), -1)
108 |     if self.dropout:
109 |             x = F.dropout(x, p=0.5, training=self.training)
110 |     return self.classifier(x)
111 | 
112 | def resnext29_16_64(num_classes=10,dropout=True):
113 |   """Constructs a ResNeXt-29, 16*64d model for CIFAR-10 (by default)
114 |   
115 |   Args:
116 |     num_classes (uint): number of classes
117 |   """
118 |   model = CifarResNeXt(ResNeXtBottleneck, 29, 16, 64, num_classes, dropout)
119 |   return model
120 | 
121 | def resnext29_8_64(num_classes=10, dropout=True):
122 |   """Constructs a ResNeXt-29, 8*64d model for CIFAR-10 (by default)
123 |   
124 |   Args:
125 |     num_classes (uint): number of classes
126 |   """
127 |   model = CifarResNeXt(ResNeXtBottleneck, 29, 8, 64, num_classes, dropout)
128 |   return model
129 | 


--------------------------------------------------------------------------------
/cifar10/resnext/models/wide_resnet.py:
--------------------------------------------------------------------------------
  1 | ### dropout has been removed in this code. original code had dropout#####
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.init as init
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | 
  8 | import sys
  9 | import numpy as np
 10 | 
 11 | act = torch.nn.LeakyReLU()
 12 | 
 13 | 
 14 | def conv3x3(in_planes, out_planes, stride=1):
 15 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True)
 16 | 
 17 | def conv_init(m):
 18 |     classname = m.__class__.__name__
 19 |     if classname.find('Conv') != -1:
 20 |         init.xavier_uniform(m.weight, gain=np.sqrt(2))
 21 |         init.constant(m.bias, 0)
 22 |     elif classname.find('BatchNorm') != -1:
 23 |         init.constant(m.weight, 1)
 24 |         init.constant(m.bias, 0)
 25 | 
 26 | class wide_basic(nn.Module):
 27 |     def __init__(self, in_planes, planes, stride=1):
 28 |         super(wide_basic, self).__init__()
 29 |         self.bn1 = nn.BatchNorm2d(in_planes)
 30 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True)
 31 |         self.bn2 = nn.BatchNorm2d(planes)
 32 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True)
 33 | 
 34 |         self.shortcut = nn.Sequential()
 35 |         if stride != 1 or in_planes != planes:
 36 |             self.shortcut = nn.Sequential(
 37 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True),
 38 |             )
 39 | 
 40 |     def forward(self, x):
 41 |         out = self.conv1(act(self.bn1(x)))
 42 |         out = self.conv2(act(self.bn2(out)))
 43 |         out += self.shortcut(x)
 44 | 
 45 |         return out
 46 | 
 47 | class Wide_ResNet(nn.Module):
 48 |     
 49 |     def __init__(self, depth, widen_factor, num_classes):
 50 |         super(Wide_ResNet, self).__init__()
 51 |         self.in_planes = 16
 52 | 
 53 |         assert ((depth-4)%6 ==0), 'Wide-resnet_v2 depth should be 6n+4'
 54 |         n = int((depth-4)/6)
 55 |         k = widen_factor
 56 | 
 57 |         print('| Wide-Resnet %dx%d' %(depth, k))
 58 |         nStages = [16, 16*k, 32*k, 64*k]
 59 | 
 60 |         self.conv1 = conv3x3(3,nStages[0])
 61 |         self.layer1 = self._wide_layer(wide_basic, nStages[1], n, stride=1)
 62 |         self.layer2 = self._wide_layer(wide_basic, nStages[2], n, stride=2)
 63 |         self.layer3 = self._wide_layer(wide_basic, nStages[3], n, stride=2)
 64 |         self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)
 65 |         self.linear = nn.Linear(nStages[3], num_classes)
 66 | 
 67 |     def _wide_layer(self, block, planes, num_blocks, stride):
 68 |         strides = [stride] + [1]*(num_blocks-1)
 69 |         layers = []
 70 | 
 71 |         for stride in strides:
 72 |             layers.append(block(self.in_planes, planes, stride))
 73 |             self.in_planes = planes
 74 | 
 75 |         return nn.Sequential(*layers)
 76 |     
 77 |     """
 78 |     ## Modified WRN architecture###
 79 |     def __init__(self, depth, widen_factor, dropout_rate, num_classes):
 80 |         super(Wide_ResNet, self).__init__()
 81 |         self.in_planes = 16
 82 | 
 83 |         assert ((depth-4)%6 ==0), 'Wide-resnet_v2 depth should be 6n+4'
 84 |         n = (depth-4)/6
 85 |         k = widen_factor
 86 |         #self.mixup_hidden = mixup_hidden
 87 | 
 88 |         print('| Wide-Resnet %dx%d' %(depth, k))
 89 |         nStages = [16, 16*k, 32*k, 64*k]
 90 | 
 91 |         self.conv1 = conv3x3(3,nStages[0])
 92 |         self.bn1 = nn.BatchNorm2d(nStages[0])
 93 |         self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1)
 94 |         self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2)
 95 |         self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2)
 96 |         #self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)
 97 |         self.linear = nn.Linear(nStages[3], num_classes)
 98 | 
 99 |     def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride):
100 |         strides = [stride] + [1]*(num_blocks-1)
101 |         layers = []
102 | 
103 |         for stride in strides:
104 |             layers.append(block(self.in_planes, planes, dropout_rate, stride))
105 |             self.in_planes = planes
106 | 
107 |         return nn.Sequential(*layers)
108 |     """
109 |     def forward(self, x):
110 |         #print x.shape
111 |         
112 |         out = self.conv1(x)
113 |         out = self.layer1(out)
114 |         out = self.layer2(out)
115 |         out = self.layer3(out)
116 |         out = act(self.bn1(out))
117 |         out = F.avg_pool2d(out, 8)
118 |         out = out.view(out.size(0), -1)
119 |         out = self.linear(out)
120 |         """
121 |         ## modified WRN arch
122 |         out = x
123 |         out = F.leaky_relu(self.bn1(self.conv1(out)))
124 |         out = self.layer1(out)
125 |         out = self.layer2(out)
126 |         out = self.layer3(out)
127 |         #out = F.relu(self.bn1(out))
128 |         out = F.avg_pool2d(out, 8)
129 |         out = out.view(out.size(0), -1)
130 |         out = self.linear(out)
131 |         """
132 |         return out
133 |     
134 | def wrn28_10(num_classes=10, dropout = False):
135 |     #print ('this')
136 |     model = Wide_ResNet(depth=28, widen_factor=10, num_classes=num_classes)
137 |     return model
138 | 
139 | def wrn28_2(num_classes=10, dropout = False):
140 |     #print ('this')
141 |     model = Wide_ResNet(depth =28, widen_factor =2, num_classes = num_classes)
142 |     return model
143 | 
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     net=Wide_ResNet(28, 10, 0.3, 10)
148 |     y = net(Variable(torch.randn(1,3,32,32)))
149 | 
150 |     print(y.size())
151 | 


--------------------------------------------------------------------------------
/cifar10/resnext/test.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 31 Dec 2017
 3 | 
 4 | @author: vermav1
 5 | '''
 6 | import numpy as np
 7 | 
 8 | x=np.asarray((0.2,0.3,0.5))
 9 | y=np.asarray((0.9,0.01,0.1))
10 | 
11 | ce=0
12 | for i in xrange(x.shape[0]):
13 |     ce+=-(x[i]*np.log(y[i]))
14 | 
15 | print ce


--------------------------------------------------------------------------------
/cifar10/resnext/utils.py:
--------------------------------------------------------------------------------
  1 | import os, sys, time
  2 | import numpy as np
  3 | import matplotlib
  4 | matplotlib.use('agg')
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | class AverageMeter(object):
  8 |   """Computes and stores the average and current value"""
  9 |   def __init__(self):
 10 |     self.reset()
 11 | 
 12 |   def reset(self):
 13 |     self.val = 0
 14 |     self.avg = 0
 15 |     self.sum = 0
 16 |     self.count = 0
 17 | 
 18 |   def update(self, val, n=1):
 19 |     self.val = val
 20 |     self.sum += val * n
 21 |     self.count += n
 22 |     self.avg = self.sum / self.count
 23 | 
 24 | 
 25 | class RecorderMeter(object):
 26 |   """Computes and stores the minimum loss value and its epoch index"""
 27 |   def __init__(self, total_epoch):
 28 |     self.reset(total_epoch)
 29 | 
 30 |   def reset(self, total_epoch):
 31 |     assert total_epoch > 0
 32 |     self.total_epoch   = total_epoch
 33 |     self.current_epoch = 0
 34 |     self.epoch_losses  = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
 35 |     self.epoch_losses  = self.epoch_losses - 1
 36 | 
 37 |     self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val]
 38 |     self.epoch_accuracy= self.epoch_accuracy
 39 | 
 40 |   def update(self, idx, train_loss, train_acc, val_loss, val_acc):
 41 |     assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx)
 42 |     self.epoch_losses  [idx, 0] = train_loss
 43 |     self.epoch_losses  [idx, 1] = val_loss
 44 |     self.epoch_accuracy[idx, 0] = train_acc
 45 |     self.epoch_accuracy[idx, 1] = val_acc
 46 |     self.current_epoch = idx + 1
 47 |     return self.max_accuracy(False) == val_acc
 48 | 
 49 |   def max_accuracy(self, istrain):
 50 |     if self.current_epoch <= 0: return 0
 51 |     if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max()
 52 |     else:       return self.epoch_accuracy[:self.current_epoch, 1].max()
 53 |   
 54 |   def plot_curve(self, save_path):
 55 |     title = 'the accuracy/loss curve of train/val'
 56 |     dpi = 80  
 57 |     width, height = 1200, 800
 58 |     legend_fontsize = 10
 59 |     scale_distance = 48.8
 60 |     figsize = width / float(dpi), height / float(dpi)
 61 | 
 62 |     fig = plt.figure(figsize=figsize)
 63 |     x_axis = np.array([i for i in range(self.total_epoch)]) # epochs
 64 |     y_axis = np.zeros(self.total_epoch)
 65 | 
 66 |     plt.xlim(0, self.total_epoch)
 67 |     plt.ylim(0, 100)
 68 |     interval_y = 5
 69 |     interval_x = 5
 70 |     plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x))
 71 |     plt.yticks(np.arange(0, 100 + interval_y, interval_y))
 72 |     plt.grid()
 73 |     plt.title(title, fontsize=20)
 74 |     plt.xlabel('the training epoch', fontsize=16)
 75 |     plt.ylabel('accuracy', fontsize=16)
 76 |   
 77 |     y_axis[:] = self.epoch_accuracy[:, 0]
 78 |     plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2)
 79 |     plt.legend(loc=4, fontsize=legend_fontsize)
 80 | 
 81 |     y_axis[:] = self.epoch_accuracy[:, 1]
 82 |     plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2)
 83 |     plt.legend(loc=4, fontsize=legend_fontsize)
 84 | 
 85 |     
 86 |     y_axis[:] = self.epoch_losses[:, 0]
 87 |     plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2)
 88 |     plt.legend(loc=4, fontsize=legend_fontsize)
 89 | 
 90 |     y_axis[:] = self.epoch_losses[:, 1]
 91 |     plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2)
 92 |     plt.legend(loc=4, fontsize=legend_fontsize)
 93 | 
 94 |     if save_path is not None:
 95 |       fig.savefig(save_path, dpi=dpi, bbox_inches='tight')
 96 |       print ('---- save figure {} into {}'.format(title, save_path))
 97 |     plt.close(fig)
 98 |     
 99 | 
100 | def time_string():
101 |   ISOTIMEFORMAT='%Y-%m-%d %X'
102 |   string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
103 |   return string
104 | 
105 | def convert_secs2time(epoch_time):
106 |   need_hour = int(epoch_time / 3600)
107 |   need_mins = int((epoch_time - 3600*need_hour) / 60)
108 |   need_secs = int(epoch_time - 3600*need_hour - 60*need_mins)
109 |   return need_hour, need_mins, need_secs
110 | 
111 | def time_file_str():
112 |   ISOTIMEFORMAT='%Y-%m-%d'
113 |   string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) ))
114 |   return string + '-{}'.format(random.randint(1, 10000))
115 | 


--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 16 Nov 2017
  3 | 
  4 | @author: vermav1
  5 | '''
  6 | from time import gmtime, strftime
  7 | import torch
  8 | import numpy as np
  9 | import pandas as pd
 10 | import os
 11 | import shutil
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | 
 15 | class Cutout(object):
 16 |     """Randomly mask out one or more patches from an image.
 17 |     Args:
 18 |         n_holes (int): Number of patches to cut out of each image.
 19 |         length (int): The length (in pixels) of each square patch.
 20 |     """
 21 |     def __init__(self, n_holes, length):
 22 |         self.n_holes = n_holes
 23 |         self.length = length
 24 | 
 25 |     def apply(self, img):
 26 |         """
 27 |         Args:
 28 |             img (Tensor): Tensor image of size (C, H, W).
 29 |         Returns:
 30 |             Tensor: Image with n_holes of dimension length x length cut out of it.
 31 |         """
 32 |         h = img.size(2)
 33 |         w = img.size(3)
 34 | 
 35 |         mask = np.ones((h, w), np.float32)
 36 | 
 37 |         for n in range(self.n_holes):
 38 |             y = np.random.randint(h)
 39 |             x = np.random.randint(w)
 40 | 
 41 |             y1 = int(np.clip(y - self.length / 2, 0, h))
 42 |             y2 = int(np.clip(y + self.length / 2, 0, h))
 43 |             x1 = int(np.clip(x - self.length / 2, 0, w))
 44 |             x2 = int(np.clip(x + self.length / 2, 0, w))
 45 | 
 46 |             mask[y1: y2, x1: x2] = 0.
 47 | 
 48 |         mask = torch.from_numpy(mask)
 49 |         mask = mask.expand_as(img)
 50 |         img = img * mask
 51 | 
 52 |         return img
 53 | 
 54 | 
 55 | def experiment_name(cod=True,
 56 |                     cod_trainable=False,
 57 |                     aux_nets=2,
 58 |                     opt='sgd',
 59 |                     epochs=400,
 60 |                     batch_size=64,
 61 |                     test_batch_size=1000,
 62 |                     lr=0.01,
 63 |                     momentum=0.5, 
 64 |                     data_aug=1,
 65 |                     manualSeed=None,
 66 |                     job_id=None,
 67 |                     add_name=''):
 68 |     if cod:
 69 |         exp_name = 'cod_true'
 70 |         if cod_trainable:
 71 |             exp_name+='_trainable_true'
 72 |         else:
 73 |             exp_name+='_trainable_false'
 74 |     else:
 75 |         exp_name = 'cod_false'
 76 |     exp_name+='_auxnets_'+str(aux_nets)
 77 |     exp_name+='_opt_'+str(opt)
 78 |     exp_name+='_epochs_'+str(epochs)
 79 |     exp_name +='_batch_size_'+str(batch_size)
 80 |     exp_name+='_test_batch_size_'+str(test_batch_size)
 81 |     exp_name += '_lr_'+str(lr)
 82 |     exp_name += '_momentum_'+str(momentum)
 83 |     exp_name += '_data_aug_'+str(data_aug)
 84 |     if manualSeed!=None:
 85 |         exp_name += '_manuael_seed_'+str(manualSeed)
 86 |     if job_id!=None:
 87 |         exp_name += '_job_id_'+str(job_id)
 88 |     if add_name!='':
 89 |         exp_name += '_add_name_'+str(add_name)
 90 |     
 91 |     # exp_name += strftime("_%Y-%m-%d_%H:%M:%S", gmtime())
 92 |     print('experiement name: ' + exp_name)
 93 |     return exp_name
 94 | 
 95 | 
 96 | def experiment_name_non_mnist(arch='',
 97 |                     aux_nets=2,
 98 |                     epochs=400,
 99 |                     dropout=True,
100 |                     batch_size=64,
101 |                     lr=0.01,
102 |                     momentum=0.5, 
103 |                     data_aug=1,
104 |                     manualSeed=None,
105 |                     job_id=None,
106 |                     add_name=''):
107 |     
108 |     exp_name= str(arch)
109 |     exp_name+='_auxnets_'+str(aux_nets)
110 |     exp_name += '_epochs_'+str(epochs)
111 |     if dropout:
112 |         exp_name+='_dropout_'+'true'
113 |     else:
114 |         exp_name+='_dropout_'+'False'
115 |     exp_name +='_batch_size_'+str(batch_size)
116 |     exp_name += '_lr_'+str(lr)
117 |     exp_name += '_momentum_'+str(momentum)
118 |     exp_name += '_data_aug_'+str(data_aug)
119 |     if manualSeed!=None:
120 |         exp_name += '_manuael_seed_'+str(manualSeed)
121 |     if job_id!=None:
122 |         exp_name += '_job_id_'+str(job_id)
123 |     if add_name!='':
124 |         exp_name += '_add_name_'+str(add_name)
125 |     
126 |     # exp_name += strftime("_%Y-%m-%d_%H:%M:%S", gmtime())
127 |     print('experiement name: ' + exp_name)
128 |     return exp_name
129 | 
130 | def copy_script_to_folder(caller_path, folder):
131 |     script_filename = caller_path.split('/')[-1]
132 |     script_relative_path = os.path.join(folder, script_filename)
133 |     # Copying script
134 |     shutil.copy(caller_path, script_relative_path)
135 | 
136 | def cyclic_lr(initial_lr,step,total_steps,num_cycles):
137 |     factor=np.ceil(float(total_steps)/num_cycles)
138 |     theta=np.pi*np.mod(step-1,factor)/factor
139 |     return (initial_lr/2)*(np.cos(theta)+1)
140 | 
141 | if __name__ == '__main__':
142 |     lr_list=[]
143 |     for i in xrange(1000):
144 |         lr=cyclic_lr(0.1,i+1,1100,3)
145 |         lr_list.append(lr)
146 |     plt.plot(np.asarray(lr_list))
147 |     plt.show()
148 |         


--------------------------------------------------------------------------------
/load_data.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 21 Nov 2017
  3 | 
  4 | @author: vermav1
  5 | '''
  6 | import torch
  7 | from torchvision import datasets, transforms
  8 | from affine_transforms import Rotation, Zoom
  9 | 
 10 | 
 11 | 
 12 | def load_mnist(data_aug, batch_size, test_batch_size,cuda, data_target_dir):
 13 | 
 14 |     if data_aug == 1:
 15 |         hw_size = 24
 16 |         transform_train = transforms.Compose([
 17 |                             transforms.RandomCrop(hw_size),                
 18 |                             transforms.ToTensor(),
 19 |                             Rotation(15),                                            
 20 |                             Zoom((0.85, 1.15)),       
 21 |                             transforms.Normalize((0.1307,), (0.3081,))
 22 |                        ])
 23 |         transform_test = transforms.Compose([
 24 |                             transforms.CenterCrop(hw_size),                       
 25 |                             transforms.ToTensor(),
 26 |                             transforms.Normalize((0.1307,), (0.3081,))
 27 |                        ])
 28 |     else:
 29 |         hw_size = 28
 30 |         transform_train = transforms.Compose([
 31 |                             transforms.ToTensor(),       
 32 |                             transforms.Normalize((0.1307,), (0.3081,))
 33 |                        ])
 34 |         transform_test = transforms.Compose([
 35 |                             transforms.ToTensor(),
 36 |                             transforms.Normalize((0.1307,), (0.3081,))
 37 |                        ])
 38 |     
 39 |     
 40 |     kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {}       
 41 |     
 42 |     
 43 |                 
 44 |     train_loader = torch.utils.data.DataLoader(
 45 |         datasets.MNIST(data_target_dir, train=True, download=True, transform=transform_train),
 46 |         batch_size=batch_size, shuffle=True, **kwargs)
 47 |     test_loader = torch.utils.data.DataLoader(
 48 |         datasets.MNIST(data_target_dir, train=False, transform=transform_test),
 49 |         batch_size=test_batch_size, shuffle=True, **kwargs)
 50 |     
 51 |     return train_loader, test_loader    
 52 | 
 53 | 
 54 | def load_data(data_aug, batch_size,workers,dataset, data_target_dir):
 55 |     
 56 |     if dataset == 'cifar10':
 57 |         mean = [x / 255 for x in [125.3, 123.0, 113.9]]
 58 |         std = [x / 255 for x in [63.0, 62.1, 66.7]]
 59 |     elif dataset == 'cifar100':
 60 |         mean = [x / 255 for x in [129.3, 124.1, 112.4]]
 61 |         std = [x / 255 for x in [68.2, 65.4, 70.4]]
 62 |         
 63 |     elif dataset == 'svhn':
 64 |         mean = [x / 255 for x in [127.5, 127.5, 127.5]]
 65 |         std = [x / 255 for x in [127.5, 127.5, 127.5]]
 66 |     else:
 67 |         assert False, "Unknow dataset : {}".format(dataset)
 68 |     
 69 |     if data_aug==1:
 70 |         if dataset == 'svhn':
 71 |             train_transform = transforms.Compose(
 72 |                                              [ transforms.RandomCrop(32, padding=2), transforms.ToTensor(),
 73 |                                               transforms.Normalize(mean, std)])
 74 |             test_transform = transforms.Compose(
 75 |                                             [transforms.ToTensor(), transforms.Normalize(mean, std)])
 76 |         else:
 77 |             train_transform = transforms.Compose(
 78 |                                                  [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
 79 |                                                   transforms.Normalize(mean, std)])
 80 |             test_transform = transforms.Compose(
 81 |                                                 [transforms.ToTensor(), transforms.Normalize(mean, std)])
 82 |     else:
 83 |         train_transform = transforms.Compose(
 84 |                                              [ transforms.ToTensor(),
 85 |                                               transforms.Normalize(mean, std)])
 86 |         test_transform = transforms.Compose(
 87 |                                             [transforms.ToTensor(), transforms.Normalize(mean, std)])
 88 |     if dataset == 'cifar10':
 89 |         train_data = datasets.CIFAR10(data_target_dir, train=True, transform=train_transform, download=True)
 90 |         test_data = datasets.CIFAR10(data_target_dir, train=False, transform=test_transform, download=True)
 91 |         num_classes = 10
 92 |     elif dataset == 'cifar100':
 93 |         train_data = datasets.CIFAR100(data_target_dir, train=True, transform=train_transform, download=True)
 94 |         test_data = datasets.CIFAR100(data_target_dir, train=False, transform=test_transform, download=True)
 95 |         num_classes = 100
 96 |     elif dataset == 'svhn':
 97 |         train_data = datasets.SVHN(data_target_dir, split='train', transform=train_transform, download=True)
 98 |         test_data = datasets.SVHN(data_target_dir, split='test', transform=test_transform, download=True)
 99 |         num_classes = 10
100 |     elif dataset == 'stl10':
101 |         train_data = datasets.STL10(data_target_dir, split='train', transform=train_transform, download=True)
102 |         test_data = datasets.STL10(data_target_dir, split='test', transform=test_transform, download=True)
103 |         num_classes = 10
104 |     elif dataset == 'imagenet':
105 |         assert False, 'Do not finish imagenet code'
106 |     else:
107 |         assert False, 'Do not support dataset : {}'.format(dataset)
108 | 
109 |     train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True,
110 |                          num_workers=workers, pin_memory=True)
111 |     test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False,
112 |                         num_workers=workers, pin_memory=True)
113 |     
114 |     return train_loader, test_loader, num_classes


--------------------------------------------------------------------------------
/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | #copied from https://github.com/Jiaming-Liu/pytorch-lr-scheduler/blob/master/lr_scheduler.py
  2 | 
  3 | import numpy as np
  4 | import warnings
  5 | from torch.optim.optimizer import Optimizer
  6 | 
  7 | 
  8 | class ReduceLROnPlateau(object):
  9 |     """Reduce learning rate when a metric has stopped improving.
 10 |     Models often benefit from reducing the learning rate by a factor
 11 |     of 2-10 once learning stagnates. This scheduler reads a metrics
 12 |     quantity and if no improvement is seen for a 'patience' number
 13 |     of epochs, the learning rate is reduced.
 14 |     
 15 |     Args:
 16 |         factor: factor by which the learning rate will
 17 |             be reduced. new_lr = lr * factor
 18 |         patience: number of epochs with no improvement
 19 |             after which learning rate will be reduced.
 20 |         verbose: int. 0: quiet, 1: update messages.
 21 |         mode: one of {min, max}. In `min` mode,
 22 |             lr will be reduced when the quantity
 23 |             monitored has stopped decreasing; in `max`
 24 |             mode it will be reduced when the quantity
 25 |             monitored has stopped increasing.
 26 |         epsilon: threshold for measuring the new optimum,
 27 |             to only focus on significant changes.
 28 |         cooldown: number of epochs to wait before resuming
 29 |             normal operation after lr has been reduced.
 30 |         min_lr: lower bound on the learning rate.
 31 |         
 32 |         
 33 |     Example:
 34 |         >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
 35 |         >>> scheduler = ReduceLROnPlateau(optimizer, 'min')
 36 |         >>> for epoch in range(10):
 37 |         >>>     train(...)
 38 |         >>>     val_acc, val_loss = validate(...)
 39 |         >>>     scheduler.step(val_loss, epoch)
 40 |     """
 41 | 
 42 |     def __init__(self, optimizer, mode='min', factor=0.1, patience=10,
 43 |                  verbose=0, epsilon=1e-4, cooldown=0, min_lr=0):
 44 |         super(ReduceLROnPlateau, self).__init__()
 45 | 
 46 |         if factor >= 1.0:
 47 |             raise ValueError('ReduceLROnPlateau '
 48 |                              'does not support a factor >= 1.0.')
 49 |         self.factor = factor
 50 |         self.min_lr = min_lr
 51 |         self.epsilon = epsilon
 52 |         self.patience = patience
 53 |         self.verbose = verbose
 54 |         self.cooldown = cooldown
 55 |         self.cooldown_counter = 0  # Cooldown counter.
 56 |         self.monitor_op = None
 57 |         self.wait = 0
 58 |         self.best = 0
 59 |         self.mode = mode
 60 |         assert isinstance(optimizer, Optimizer)
 61 |         self.optimizer = optimizer
 62 |         self._reset()
 63 | 
 64 |     def _reset(self):
 65 |         """Resets wait counter and cooldown counter.
 66 |         """
 67 |         if self.mode not in ['min', 'max']:
 68 |             raise RuntimeError('Learning Rate Plateau Reducing mode %s is unknown!')
 69 |         if self.mode == 'min' :
 70 |             self.monitor_op = lambda a, b: np.less(a, b - self.epsilon)
 71 |             self.best = np.Inf
 72 |         else:
 73 |             self.monitor_op = lambda a, b: np.greater(a, b + self.epsilon)
 74 |             self.best = -np.Inf
 75 |         self.cooldown_counter = 0
 76 |         self.wait = 0
 77 |         self.lr_epsilon = self.min_lr * 1e-4
 78 | 
 79 |     def reset(self):
 80 |         self._reset()
 81 | 
 82 |     def step(self, metrics, epoch):
 83 |         current = metrics
 84 |         if current is None:
 85 |             warnings.warn('Learning Rate Plateau Reducing requires metrics available!', RuntimeWarning)
 86 |         else:
 87 |             if self.in_cooldown():
 88 |                 self.cooldown_counter -= 1
 89 |                 self.wait = 0
 90 | 
 91 |             if self.monitor_op(current, self.best):
 92 |                 self.best = current
 93 |                 self.wait = 0
 94 |             elif not self.in_cooldown():
 95 |                 if self.wait >= self.patience:
 96 |                     for param_group in self.optimizer.param_groups:
 97 |                         old_lr = float(param_group['lr'])
 98 |                         if old_lr > self.min_lr + self.lr_epsilon:
 99 |                             new_lr = old_lr * self.factor
100 |                             new_lr = max(new_lr, self.min_lr)
101 |                             param_group['lr'] = new_lr
102 |                             if self.verbose > 0:
103 |                                 print('\nEpoch %05d: reducing learning rate to %s.' % (epoch, new_lr))
104 |                             self.cooldown_counter = self.cooldown
105 |                             self.wait = 0
106 |                 self.wait += 1
107 | 
108 |     def in_cooldown(self):
109 |         return self.cooldown_counter > 0
110 | 


--------------------------------------------------------------------------------
/plots.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 19 Oct 2017
 3 | 
 4 | @author: vermav1
 5 | '''
 6 | import argparse
 7 | import sys
 8 | if sys.version_info[0] < 3:
 9 |     import cPickle as pickle
10 | else:
11 |  import _pickle as pickle
12 | import os
13 | import matplotlib.pyplot as plt
14 | import numpy as np
15 | from scipy.stats import norm
16 | import seaborn as sns
17 | sns.set(color_codes=True)
18 | 
19 | plot_from_index=-10000
20 | 
21 | 
22 | def plotting(exp_dir):
23 |     # Load the training log dictionary:
24 |     train_dict = pickle.load(open(os.path.join(exp_dir, 'log.pkl'), 'rb'))
25 | 
26 |     ###########################################################
27 |     ### Make the vanilla train and test loss per epoch plot ###
28 |     ###########################################################
29 |    
30 |     plt.plot(np.asarray(train_dict['train_loss']), label='train_loss')
31 |         
32 |     #plt.ylim(0,2000)
33 |     plt.xlabel('evaluation step')
34 |     plt.ylabel('metrics')
35 |     plt.tight_layout()
36 |     plt.legend(loc='upper right')
37 |     plt.savefig(os.path.join(exp_dir, 'train_loss.png' ))
38 |     plt.clf()
39 |     
40 |     
41 |     
42 |     plt.plot(np.asarray(train_dict['test_loss']), label='test_loss')
43 |        
44 |     #plt.ylim(0,100)
45 |     plt.xlabel('evaluation step')
46 |     plt.ylabel('metrics')
47 |     plt.tight_layout()
48 |     plt.legend(loc='upper right')
49 |     plt.savefig(os.path.join(exp_dir, 'test_loss.png' ))
50 |     plt.clf()
51 |     
52 |     plt.plot(np.asarray(train_dict['train_acc']), label='train_acc')
53 |        
54 |     #plt.ylim(0,100)
55 |     plt.xlabel('evaluation step')
56 |     plt.ylabel('metrics')
57 |     plt.tight_layout()
58 |     plt.legend(loc='upper right')
59 |     plt.savefig(os.path.join(exp_dir, 'train_acc.png' ))
60 |     plt.clf()
61 |     
62 |     
63 |     plt.plot(np.asarray(train_dict['test_acc']), label='test_acc')
64 |        
65 |     #plt.ylim(0,100)
66 |     plt.xlabel('evaluation step')
67 |     plt.ylabel('metrics')
68 |     plt.tight_layout()
69 |     plt.legend(loc='upper right')
70 |     plt.savefig(os.path.join(exp_dir, 'test_acc.png' ))
71 |     plt.clf()
72 |    
73 |     
74 |         
75 | 
76 |         
77 |     
78 | 
79 |    
80 | if __name__ == '__main__':
81 |     plotting('experiments/PB_cnn_mse_pretrained_ne_pretrain100000_ne_posttrain100000_real_data_size1_N10_P2000_')
82 |     #plotting_separate_theta('model', 'temp.pkl',3)


--------------------------------------------------------------------------------