├── README.md ├── affine_transforms.py ├── cifar10 └── resnext │ ├── LICENSE │ ├── logger.py │ ├── main.py │ ├── models │ ├── __init__.py │ ├── caffe_cifar.py │ ├── densenet.py │ ├── imagenet_resnet.py │ ├── preresnet.py │ ├── res_utils.py │ ├── resnet.py │ ├── resnext.py │ └── wide_resnet.py │ ├── test.py │ └── utils.py ├── helpers.py ├── load_data.py ├── lr_scheduler.py └── plots.py /README.md: -------------------------------------------------------------------------------- 1 | This repository contains a PyTorch implementation code for reproducing the results in our paper: 2 | 3 | **[Generalization in Machine Learning via Analytical Learning Theory](https://arxiv.org/pdf/1802.07426.pdf)** \ 4 | *Kenji Kawaguchi, Yoshua Bengio, Vikas Verma, and Leslie Pack Kaelbling* 5 | 6 | 7 | #### Test error (\%) with WideResNet28_10 and different regularization methods 8 | | Regularization Method | CIFAR-10 | CIFAR-100 | SVHN | 9 | |:----------:|:--------------:|:--------------:|:------:| 10 | | Standard | 3.79 ± 0.07 | 19.85 ± 0.14 | 2.47 ± 0.04| 11 | | Single-cutout | 3.19 ± 0.09 | 18.13 ± 0.28 | 2.23 ± 0.03 | 12 | | Dual-cutout | 2.61 ± 0.04 | 17.54 ± 0.09 | 2.06 ± 0.06| 13 | 14 | * Dual-cutout is proposed in our paper based on a new learning theory. 15 | 16 | 17 | 18 | 19 | ### How to run DualCutout 20 | ``` 21 | python cifar10/resnext/main.py --dualcutout --dataset cifar10 --arch wrn28_10 \ 22 | --epochs 300 --batch_size 64 --learning_rate 0.1 --data_aug 1 --decay 0.0005 --schedule 150 225 \ 23 | --gamma 0.1 0.1 --alpha 0.1 --cutsize 16 24 | ``` 25 | Add the --temp_dir and --home_dir as appropriate in the above commands. For Cifar10 and Cifar100, we used --cutsize 16, and for SVHN, we used --cutsize 20. 26 | 27 | ### How to run Single Cutout 28 | ``` 29 | python cifar10/resnext/main.py --singlecutout --dataset cifar10 --arch wrn28_10 \ 30 | --epochs 300 --batch_size 64 --learning_rate 0.1 --data_aug 1 --decay 0.0005 --schedule 150 225 \ 31 | --gamma 0.1 0.1 --alpha 0.1 --cutsize 16 32 | ``` 33 | ### How to run baseline 34 | ``` 35 | python cifar10/resnext/main.py --dataset cifar10 --arch wrn28_10 \ 36 | --epochs 300 --batch_size 64 --learning_rate 0.1 --data_aug 1 --decay 0.0005 --schedule 150 225 \ 37 | --gamma 0.1 0.1 38 | ``` 39 | 40 | This code has been tested with 41 | python 2.7.9 42 | torch 0.3.1 43 | torchvision 0.2.0 44 | -------------------------------------------------------------------------------- /affine_transforms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Affine transforms implemented on torch tensors, and 3 | only requiring one interpolation 4 | 5 | Included: 6 | - Affine() 7 | - AffineCompose() 8 | - Rotation() 9 | - Translation() 10 | - Shear() 11 | - Zoom() 12 | - Flip() 13 | 14 | """ 15 | 16 | import math 17 | import random 18 | import torch 19 | 20 | # necessary now, but should eventually not be 21 | import scipy.ndimage as ndi 22 | import numpy as np 23 | 24 | 25 | def transform_matrix_offset_center(matrix, x, y): 26 | """Apply offset to a transform matrix so that the image is 27 | transformed about the center of the image. 28 | 29 | NOTE: This is a fairly simple operaion, so can easily be 30 | moved to full torch. 31 | 32 | Arguments 33 | --------- 34 | matrix : 3x3 matrix/array 35 | 36 | x : integer 37 | height dimension of image to be transformed 38 | 39 | y : integer 40 | width dimension of image to be transformed 41 | """ 42 | o_x = float(x) / 2 + 0.5 43 | o_y = float(y) / 2 + 0.5 44 | offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) 45 | reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) 46 | transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) 47 | return transform_matrix 48 | 49 | def apply_transform(x, transform, fill_mode='nearest', fill_value=0.): 50 | """Applies an affine transform to a 2D array, or to each channel of a 3D array. 51 | 52 | NOTE: this can and certainly should be moved to full torch operations. 53 | 54 | Arguments 55 | --------- 56 | x : np.ndarray 57 | array to transform. NOTE: array should be ordered CHW 58 | 59 | transform : 3x3 affine transform matrix 60 | matrix to apply 61 | """ 62 | x = x.astype('float32') 63 | transform = transform_matrix_offset_center(transform, x.shape[1], x.shape[2]) 64 | final_affine_matrix = transform[:2, :2] 65 | final_offset = transform[:2, 2] 66 | channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix, 67 | final_offset, order=0, mode=fill_mode, cval=fill_value) for x_channel in x] 68 | x = np.stack(channel_images, axis=0) 69 | return x 70 | 71 | class Affine(object): 72 | 73 | def __init__(self, 74 | rotation_range=None, 75 | translation_range=None, 76 | shear_range=None, 77 | zoom_range=None, 78 | fill_mode='constant', 79 | fill_value=0., 80 | target_fill_mode='nearest', 81 | target_fill_value=0.): 82 | """Perform an affine transforms with various sub-transforms, using 83 | only one interpolation and without having to instantiate each 84 | sub-transform individually. 85 | 86 | Arguments 87 | --------- 88 | rotation_range : one integer or float 89 | image will be rotated between (-degrees, degrees) degrees 90 | 91 | translation_range : a float or a tuple/list w/ 2 floats between [0, 1) 92 | first value: 93 | image will be horizontally shifted between 94 | (-height_range * height_dimension, height_range * height_dimension) 95 | second value: 96 | Image will be vertically shifted between 97 | (-width_range * width_dimension, width_range * width_dimension) 98 | 99 | shear_range : float 100 | radian bounds on the shear transform 101 | 102 | zoom_range : list/tuple with two floats between [0, infinity). 103 | first float should be less than the second 104 | lower and upper bounds on percent zoom. 105 | Anything less than 1.0 will zoom in on the image, 106 | anything greater than 1.0 will zoom out on the image. 107 | e.g. (0.7, 1.0) will only zoom in, 108 | (1.0, 1.4) will only zoom out, 109 | (0.7, 1.4) will randomly zoom in or out 110 | 111 | fill_mode : string in {'constant', 'nearest'} 112 | how to fill the empty space caused by the transform 113 | ProTip : use 'nearest' for discrete images (e.g. segmentations) 114 | and use 'constant' for continuous images 115 | 116 | fill_value : float 117 | the value to fill the empty space with if fill_mode='constant' 118 | 119 | target_fill_mode : same as fill_mode, but for target image 120 | 121 | target_fill_value : same as fill_value, but for target image 122 | 123 | """ 124 | self.transforms = [] 125 | if rotation_range: 126 | rotation_tform = Rotation(rotation_range, lazy=True) 127 | self.transforms.append(rotation_tform) 128 | 129 | if translation_range: 130 | translation_tform = Translation(translation_range, lazy=True) 131 | self.transforms.append(translation_tform) 132 | 133 | if shear_range: 134 | shear_tform = Shear(shear_range, lazy=True) 135 | self.transforms.append(shear_tform) 136 | 137 | if zoom_range: 138 | zoom_tform = Translation(zoom_range, lazy=True) 139 | self.transforms.append(zoom_tform) 140 | 141 | self.fill_mode = fill_mode 142 | self.fill_value = fill_value 143 | self.target_fill_mode = target_fill_mode 144 | self.target_fill_value = target_fill_value 145 | 146 | def __call__(self, x, y=None): 147 | # collect all of the lazily returned tform matrices 148 | tform_matrix = self.transforms[0](x) 149 | for tform in self.transforms[1:]: 150 | tform_matrix = np.dot(tform_matrix, tform(x)) 151 | 152 | x = torch.from_numpy(apply_transform(x.numpy(), tform_matrix, 153 | fill_mode=self.fill_mode, fill_value=self.fill_value)) 154 | 155 | if y: 156 | y = torch.from_numpy(apply_transform(y.numpy(), tform_matrix, 157 | fill_mode=self.target_fill_mode, fill_value=self.target_fill_value)) 158 | return x, y 159 | else: 160 | return x 161 | 162 | class AffineCompose(object): 163 | 164 | def __init__(self, 165 | transforms, 166 | fill_mode='constant', 167 | fill_value=0., 168 | target_fill_mode='nearest', 169 | target_fill_value=0.): 170 | """Apply a collection of explicit affine transforms to an input image, 171 | and to a target image if necessary 172 | 173 | Arguments 174 | --------- 175 | transforms : list or tuple 176 | each element in the list/tuple should be an affine transform. 177 | currently supported transforms: 178 | - Rotation() 179 | - Translation() 180 | - Shear() 181 | - Zoom() 182 | 183 | fill_mode : string in {'constant', 'nearest'} 184 | how to fill the empty space caused by the transform 185 | 186 | fill_value : float 187 | the value to fill the empty space with if fill_mode='constant' 188 | 189 | """ 190 | self.transforms = transforms 191 | # set transforms to lazy so they only return the tform matrix 192 | for t in self.transforms: 193 | t.lazy = True 194 | self.fill_mode = fill_mode 195 | self.fill_value = fill_value 196 | self.target_fill_mode = target_fill_mode 197 | self.target_fill_value = target_fill_value 198 | 199 | def __call__(self, x, y=None): 200 | # collect all of the lazily returned tform matrices 201 | tform_matrix = self.transforms[0](x) 202 | for tform in self.transforms[1:]: 203 | tform_matrix = np.dot(tform_matrix, tform(x)) 204 | 205 | x = torch.from_numpy(apply_transform(x.numpy(), tform_matrix, 206 | fill_mode=self.fill_mode, fill_value=self.fill_value)) 207 | 208 | if y: 209 | y = torch.from_numpy(apply_transform(y.numpy(), tform_matrix, 210 | fill_mode=self.target_fill_mode, fill_value=self.target_fill_value)) 211 | return x, y 212 | else: 213 | return x 214 | 215 | 216 | class Rotation(object): 217 | 218 | def __init__(self, 219 | rotation_range, 220 | fill_mode='constant', 221 | fill_value=0., 222 | target_fill_mode='nearest', 223 | target_fill_value=0., 224 | lazy=False): 225 | """Randomly rotate an image between (-degrees, degrees). If the image 226 | has multiple channels, the same rotation will be applied to each channel. 227 | 228 | Arguments 229 | --------- 230 | rotation_range : integer or float 231 | image will be rotated between (-degrees, degrees) degrees 232 | 233 | fill_mode : string in {'constant', 'nearest'} 234 | how to fill the empty space caused by the transform 235 | 236 | fill_value : float 237 | the value to fill the empty space with if fill_mode='constant' 238 | 239 | lazy : boolean 240 | if true, perform the transform on the tensor and return the tensor 241 | if false, only create the affine transform matrix and return that 242 | """ 243 | self.rotation_range = rotation_range 244 | self.fill_mode = fill_mode 245 | self.fill_value = fill_value 246 | self.target_fill_mode = target_fill_mode 247 | self.target_fill_value = target_fill_value 248 | self.lazy = lazy 249 | 250 | def __call__(self, x, y=None): 251 | degree = random.uniform(-self.rotation_range, self.rotation_range) 252 | theta = math.pi / 180 * degree 253 | rotation_matrix = np.array([[math.cos(theta), -math.sin(theta), 0], 254 | [math.sin(theta), math.cos(theta), 0], 255 | [0, 0, 1]]) 256 | if self.lazy: 257 | return rotation_matrix 258 | else: 259 | x_transformed = torch.from_numpy(apply_transform(x.numpy(), rotation_matrix, 260 | fill_mode=self.fill_mode, fill_value=self.fill_value)) 261 | if y: 262 | y_transformed = torch.from_numpy(apply_transform(y.numpy(), rotation_matrix, 263 | fill_mode=self.target_fill_mode, fill_value=self.target_fill_value)) 264 | return x_transformed, y_transformed 265 | else: 266 | return x_transformed 267 | 268 | 269 | class Translation(object): 270 | 271 | def __init__(self, 272 | translation_range, 273 | fill_mode='constant', 274 | fill_value=0., 275 | target_fill_mode='nearest', 276 | target_fill_value=0., 277 | lazy=False): 278 | """Randomly translate an image some fraction of total height and/or 279 | some fraction of total width. If the image has multiple channels, 280 | the same translation will be applied to each channel. 281 | 282 | Arguments 283 | --------- 284 | translation_range : two floats between [0, 1) 285 | first value: 286 | fractional bounds of total height to shift image 287 | image will be horizontally shifted between 288 | (-height_range * height_dimension, height_range * height_dimension) 289 | second value: 290 | fractional bounds of total width to shift image 291 | Image will be vertically shifted between 292 | (-width_range * width_dimension, width_range * width_dimension) 293 | 294 | fill_mode : string in {'constant', 'nearest'} 295 | how to fill the empty space caused by the transform 296 | 297 | fill_value : float 298 | the value to fill the empty space with if fill_mode='constant' 299 | 300 | lazy : boolean 301 | if true, perform the transform on the tensor and return the tensor 302 | if false, only create the affine transform matrix and return that 303 | """ 304 | if isinstance(translation_range, float): 305 | translation_range = (translation_range, translation_range) 306 | self.height_range = translation_range[0] 307 | self.width_range = translation_range[1] 308 | self.fill_mode = fill_mode 309 | self.fill_value = fill_value 310 | self.target_fill_mode = target_fill_mode 311 | self.target_fill_value = target_fill_value 312 | self.lazy = lazy 313 | 314 | def __call__(self, x, y=None): 315 | # height shift 316 | if self.height_range > 0: 317 | tx = random.uniform(-self.height_range, self.height_range) * x.size(1) 318 | else: 319 | tx = 0 320 | # width shift 321 | if self.width_range > 0: 322 | ty = random.uniform(-self.width_range, self.width_range) * x.size(2) 323 | else: 324 | ty = 0 325 | 326 | translation_matrix = np.array([[1, 0, tx], 327 | [0, 1, ty], 328 | [0, 0, 1]]) 329 | if self.lazy: 330 | return translation_matrix 331 | else: 332 | x_transformed = torch.from_numpy(apply_transform(x.numpy(), 333 | translation_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value)) 334 | if y: 335 | y_transformed = torch.from_numpy(apply_transform(y.numpy(), translation_matrix, 336 | fill_mode=self.target_fill_mode, fill_value=self.target_fill_value)) 337 | return x_transformed, y_transformed 338 | else: 339 | return x_transformed 340 | 341 | 342 | class Shear(object): 343 | 344 | def __init__(self, 345 | shear_range, 346 | fill_mode='constant', 347 | fill_value=0., 348 | target_fill_mode='nearest', 349 | target_fill_value=0., 350 | lazy=False): 351 | """Randomly shear an image with radians (-shear_range, shear_range) 352 | 353 | Arguments 354 | --------- 355 | shear_range : float 356 | radian bounds on the shear transform 357 | 358 | fill_mode : string in {'constant', 'nearest'} 359 | how to fill the empty space caused by the transform 360 | 361 | fill_value : float 362 | the value to fill the empty space with if fill_mode='constant' 363 | 364 | lazy : boolean 365 | if true, perform the transform on the tensor and return the tensor 366 | if false, only create the affine transform matrix and return that 367 | """ 368 | self.shear_range = shear_range 369 | self.fill_mode = fill_mode 370 | self.fill_value = fill_value 371 | self.target_fill_mode = target_fill_mode 372 | self.target_fill_value = target_fill_value 373 | self.lazy = lazy 374 | 375 | def __call__(self, x, y=None): 376 | shear = random.uniform(-self.shear_range, self.shear_range) 377 | shear_matrix = np.array([[1, -math.sin(shear), 0], 378 | [0, math.cos(shear), 0], 379 | [0, 0, 1]]) 380 | if self.lazy: 381 | return shear_matrix 382 | else: 383 | x_transformed = torch.from_numpy(apply_transform(x.numpy(), 384 | shear_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value)) 385 | if y: 386 | y_transformed = torch.from_numpy(apply_transform(y.numpy(), shear_matrix, 387 | fill_mode=self.target_fill_mode, fill_value=self.target_fill_value)) 388 | return x_transformed, y_transformed 389 | else: 390 | return x_transformed 391 | 392 | 393 | class Zoom(object): 394 | 395 | def __init__(self, 396 | zoom_range, 397 | fill_mode='constant', 398 | fill_value=0, 399 | target_fill_mode='nearest', 400 | target_fill_value=0., 401 | lazy=False): 402 | """Randomly zoom in and/or out on an image 403 | 404 | Arguments 405 | --------- 406 | zoom_range : tuple or list with 2 values, both between (0, infinity) 407 | lower and upper bounds on percent zoom. 408 | Anything less than 1.0 will zoom in on the image, 409 | anything greater than 1.0 will zoom out on the image. 410 | e.g. (0.7, 1.0) will only zoom in, 411 | (1.0, 1.4) will only zoom out, 412 | (0.7, 1.4) will randomly zoom in or out 413 | 414 | fill_mode : string in {'constant', 'nearest'} 415 | how to fill the empty space caused by the transform 416 | 417 | fill_value : float 418 | the value to fill the empty space with if fill_mode='constant' 419 | 420 | lazy : boolean 421 | if true, perform the transform on the tensor and return the tensor 422 | if false, only create the affine transform matrix and return that 423 | """ 424 | if not isinstance(zoom_range, list) and not isinstance(zoom_range, tuple): 425 | raise ValueError('zoom_range must be tuple or list with 2 values') 426 | self.zoom_range = zoom_range 427 | self.fill_mode = fill_mode 428 | self.fill_value = fill_value 429 | self.target_fill_mode = target_fill_mode 430 | self.target_fill_value = target_fill_value 431 | self.lazy = lazy 432 | 433 | def __call__(self, x, y=None): 434 | zx = random.uniform(self.zoom_range[0], self.zoom_range[1]) 435 | zy = random.uniform(self.zoom_range[0], self.zoom_range[1]) 436 | zoom_matrix = np.array([[zx, 0, 0], 437 | [0, zy, 0], 438 | [0, 0, 1]]) 439 | if self.lazy: 440 | return zoom_matrix 441 | else: 442 | x_transformed = torch.from_numpy(apply_transform(x.numpy(), 443 | zoom_matrix, fill_mode=self.fill_mode, fill_value=self.fill_value)) 444 | if y: 445 | y_transformed = torch.from_numpy(apply_transform(y.numpy(), zoom_matrix, 446 | fill_mode=self.target_fill_mode, fill_value=self.target_fill_value)) 447 | return x_transformed, y_transformed 448 | else: 449 | return x_transformed 450 | 451 | 452 | -------------------------------------------------------------------------------- /cifar10/resnext/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xuanyi Dong 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cifar10/resnext/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | import sys 6 | if sys.version[0] == '2': 7 | from StringIO import StringIO # Python 2.x 8 | elif sys.version[0] == '3': 9 | from io import BytesIO # Python 3.x 10 | 11 | 12 | class Logger(object): 13 | 14 | def __init__(self, log_dir): 15 | """Create a summary writer logging to log_dir.""" 16 | self.writer = tf.summary.FileWriter(log_dir) 17 | 18 | def scalar_summary(self, tag, value, step): 19 | """Log a scalar variable.""" 20 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 21 | self.writer.add_summary(summary, step) 22 | 23 | def image_summary(self, tag, images, step): 24 | """Log a list of images.""" 25 | 26 | img_summaries = [] 27 | for i, img in enumerate(images): 28 | # Write the image to a string 29 | try: 30 | s = StringIO() 31 | except: 32 | s = BytesIO() 33 | scipy.misc.toimage(img).save(s, format="png") 34 | 35 | # Create an Image object 36 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 37 | height=img.shape[0], 38 | width=img.shape[1]) 39 | # Create a Summary value 40 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 41 | 42 | # Create and write Summary 43 | summary = tf.Summary(value=img_summaries) 44 | self.writer.add_summary(summary, step) 45 | 46 | def histo_summary(self, tag, values, step, bins=1000): 47 | """Log a histogram of the tensor of values.""" 48 | 49 | # Create a histogram using numpy 50 | counts, bin_edges = np.histogram(values, bins=bins) 51 | 52 | # Fill the fields of the histogram proto 53 | hist = tf.HistogramProto() 54 | hist.min = float(np.min(values)) 55 | hist.max = float(np.max(values)) 56 | hist.num = int(np.prod(values.shape)) 57 | hist.sum = float(np.sum(values)) 58 | hist.sum_squares = float(np.sum(values**2)) 59 | 60 | # Drop the start of the first bin 61 | bin_edges = bin_edges[1:] 62 | 63 | # Add bin edges and counts 64 | for edge in bin_edges: 65 | hist.bucket_limit.append(edge) 66 | for c in counts: 67 | hist.bucket.append(c) 68 | 69 | # Create and write Summary 70 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 71 | self.writer.add_summary(summary, step) 72 | self.writer.flush() 73 | -------------------------------------------------------------------------------- /cifar10/resnext/main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 15, 2018 3 | 4 | @author: vermavik 5 | ''' 6 | from __future__ import division 7 | 8 | import os, sys, shutil, time, random 9 | import argparse 10 | from distutils.dir_util import copy_tree 11 | from shutil import rmtree 12 | import torch 13 | import torch.backends.cudnn as cudnn 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | from torch.autograd import Variable 17 | import torchvision.datasets as dset 18 | import torchvision.transforms as transforms 19 | from utils import AverageMeter, RecorderMeter, time_string, convert_secs2time 20 | import models 21 | 22 | import sys 23 | if sys.version_info[0] < 3: 24 | import cPickle as pickle 25 | else: 26 | import _pickle as pickle 27 | from collections import OrderedDict 28 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 29 | from load_data import * 30 | from helpers import * 31 | from plots import * 32 | 33 | model_names = sorted(name for name in models.__dict__ 34 | if name.islower() and not name.startswith("__") 35 | and callable(models.__dict__[name])) 36 | print (model_names) 37 | 38 | parser = argparse.ArgumentParser(description='Trains ResNeXt on CIFAR or ImageNet', formatter_class=argparse.ArgumentDefaultsHelpFormatter) 39 | parser.add_argument('--dataset', type=str, default='cifar10', choices=['cifar10', 'cifar100', 'imagenet', 'svhn', 'stl10'], help='Choose between Cifar10/100 and ImageNet.') 40 | parser.add_argument('--arch', metavar='ARCH', default='resnext29_8_64', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnext29_8_64)') 41 | # Optimization options 42 | parser.add_argument('--epochs', type=int, default=300, help='Number of epochs to train.') 43 | parser.add_argument('--singlecutout', action='store_true', default=False, 44 | help='whether to use singlecutout') 45 | parser.add_argument('--dualcutout', action='store_true', default=False, 46 | help='whether to use dualcutout') 47 | parser.add_argument('--cutsize', type=int, default=16, help='cutout size.') 48 | parser.add_argument('--dropout', action='store_true', default=False, 49 | help='whether to use dropout or not in final layer') 50 | #parser.add_argument('--batch_size', type=int, default=128, help='Batch size.') 51 | parser.add_argument('--batch_size', type=int, default=64, help='Batch size.') 52 | parser.add_argument('--learning_rate', type=float, default=0.05, help='The Learning Rate.') 53 | parser.add_argument('--momentum', type=float, default=0.9, help='Momentum.') 54 | parser.add_argument('--alpha', type=float, default=0.01, help='the coefficient that controls the difference between the outputs from two cutouts') 55 | parser.add_argument('--data_aug', type=int, default=0) 56 | parser.add_argument('--add_name', type=str, default='') 57 | #parser.add_argument('--decay', type=float, default=0.0005, help='Weight decay (L2 penalty).') 58 | parser.add_argument('--decay', type=float, default=0.0000, help='Weight decay (L2 penalty).') 59 | parser.add_argument('--schedule', type=int, nargs='+', default=[150, 225], help='Decrease learning rate at these epochs.') 60 | parser.add_argument('--gammas', type=float, nargs='+', default=[0.1, 0.1], help='LR is multiplied by gamma on schedule, number of gammas should be equal to schedule') 61 | # Checkpoints 62 | parser.add_argument('--print_freq', default=1000, type=int, metavar='N', help='print frequency (default: 200)') 63 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') 64 | parser.add_argument('--start_epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') 65 | parser.add_argument('--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') 66 | # Acceleration 67 | parser.add_argument('--ngpu', type=int, default=1, help='0 = CPU.') 68 | parser.add_argument('--workers', type=int, default=2, help='number of data loading workers (default: 2)') 69 | # random seed 70 | parser.add_argument('--manualSeed', type=int, help='manual seed') 71 | parser.add_argument('--job_id', type=str, default='') 72 | parser.add_argument('--temp_dir', type = str, default = '/Tmp/vermavik/', 73 | help='folder on local node where data is stored temporarily') 74 | parser.add_argument('--home_dir', type = str, default = '/data/milatmp1/vermavik/', 75 | help='file where results are to be written') 76 | 77 | 78 | 79 | args = parser.parse_args() 80 | args.use_cuda = args.ngpu>0 and torch.cuda.is_available() 81 | 82 | out_str = str(args) 83 | print(out_str) 84 | 85 | if args.manualSeed is None: 86 | args.manualSeed = random.randint(1, 10000) 87 | random.seed(args.manualSeed) 88 | torch.manual_seed(args.manualSeed) 89 | 90 | if args.use_cuda: 91 | torch.cuda.manual_seed_all(args.manualSeed) 92 | cudnn.benchmark = True 93 | 94 | 95 | def experiment_name(arch='', 96 | epochs=400, 97 | dropout=True, 98 | batch_size=64, 99 | lr=0.01, 100 | momentum=0.5, 101 | alpha= 0.01, 102 | decay=0.0005, 103 | data_aug=1, 104 | dualcutout= False, 105 | singlecutout= False, 106 | cutsize = 16, 107 | manualSeed=None, 108 | job_id=None, 109 | add_name=''): 110 | 111 | exp_name= str(arch) 112 | exp_name += '_epochs_'+str(epochs) 113 | if dropout: 114 | exp_name+='_dropout_'+'true' 115 | else: 116 | exp_name+='_dropout_'+'False' 117 | if dualcutout: 118 | exp_name+='_dualcutout_'+'true' 119 | exp_name +='_cut_size_'+str(cutsize) 120 | elif singlecutout: 121 | exp_name+='_singlecutout_'+'true' 122 | exp_name +='_cut_size_'+str(cutsize) 123 | else: 124 | exp_name+='_nocutout_'+'true' 125 | 126 | exp_name +='_batch_size_'+str(batch_size) 127 | exp_name += '_lr_'+str(lr) 128 | exp_name += '_momentum_'+str(momentum) 129 | exp_name += '_alpha_'+str(alpha) 130 | exp_name +='_decay_'+str(decay) 131 | exp_name += '_data_aug_'+str(data_aug) 132 | if job_id!=None: 133 | exp_name += '_job_id_'+str(job_id) 134 | if manualSeed!=None: 135 | exp_name += '_manuael_seed_'+str(manualSeed) 136 | if add_name!='': 137 | exp_name += '_add_name_'+str(add_name) 138 | 139 | # exp_name += strftime("_%Y-%m-%d_%H:%M:%S", gmtime()) 140 | print('experiement name: ' + exp_name) 141 | return exp_name 142 | 143 | 144 | def print_log(print_string, log): 145 | print("{}".format(print_string)) 146 | log.write('{}\n'.format(print_string)) 147 | log.flush() 148 | 149 | def save_checkpoint(state, is_best, save_path, filename): 150 | filename = os.path.join(save_path, filename) 151 | torch.save(state, filename) 152 | if is_best: 153 | bestname = os.path.join(save_path, 'model_best.pth.tar') 154 | shutil.copyfile(filename, bestname) 155 | 156 | def adjust_learning_rate(optimizer, epoch, gammas, schedule): 157 | """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" 158 | lr = args.learning_rate 159 | assert len(gammas) == len(schedule), "length of gammas and schedule should be equal" 160 | for (gamma, step) in zip(gammas, schedule): 161 | if (epoch >= step): 162 | lr = lr * gamma 163 | else: 164 | break 165 | for param_group in optimizer.param_groups: 166 | param_group['lr'] = lr 167 | return lr 168 | 169 | def accuracy(output, target, topk=(1,)): 170 | """Computes the precision@k for the specified values of k""" 171 | maxk = max(topk) 172 | batch_size = target.size(0) 173 | 174 | _, pred = output.topk(maxk, 1, True, True) 175 | pred = pred.t() 176 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 177 | 178 | res = [] 179 | for k in topk: 180 | correct_k = correct[:k].view(-1).float().sum(0) 181 | res.append(correct_k.mul_(100.0 / batch_size)) 182 | return res 183 | 184 | 185 | # train function (forward, backward, update) 186 | def train(train_loader, model, criterion, cutout, optimizer, epoch, log): 187 | batch_time = AverageMeter() 188 | data_time = AverageMeter() 189 | losses = AverageMeter() 190 | top1 = AverageMeter() 191 | top5 = AverageMeter() 192 | # switch to train mode 193 | model.train() 194 | 195 | end = time.time() 196 | for i, (input, target) in enumerate(train_loader): 197 | # measure data loading time 198 | data_time.update(time.time() - end) 199 | if args.dualcutout == True or args.singlecutout == True : 200 | cutout1 = cutout.apply(input) 201 | cutout2 = cutout.apply(input) 202 | if args.use_cuda: 203 | target = target.cuda(async=True) 204 | input = input.cuda() 205 | cutout1 = cutout1.cuda() 206 | cutout2 = cutout2.cuda() 207 | 208 | input_var = Variable(input) 209 | cutout1_var = Variable(cutout1) 210 | cutout2_var = Variable(cutout2) 211 | target_var = Variable(target) 212 | 213 | # compute output 214 | output1 = model(cutout1_var) 215 | if args.dualcutout: 216 | output2 = model(cutout2_var) 217 | if args.dualcutout: 218 | loss = (criterion(output1, target_var)+criterion(output2, target_var))*0.5 + args.alpha*F.mse_loss(output1, output2) 219 | else: 220 | loss = criterion(output1, target_var) 221 | 222 | total_loss = loss 223 | # measure accuracy and record loss 224 | 225 | else: 226 | if args.use_cuda: 227 | target = target.cuda(async=True) 228 | input = input.cuda() 229 | 230 | input_var = Variable(input) 231 | target_var = Variable(target) 232 | 233 | # compute output 234 | output1 = model(input_var) 235 | loss = criterion(output1, target_var) 236 | 237 | total_loss = loss 238 | 239 | 240 | 241 | # compute gradient and do SGD step 242 | optimizer.zero_grad() 243 | total_loss.backward() 244 | optimizer.step() 245 | 246 | # measure elapsed time 247 | batch_time.update(time.time() - end) 248 | end = time.time() 249 | 250 | 251 | if args.dualcutout: 252 | prec1, prec5 = accuracy((output1.data+output2.data)*0.5, target, topk=(1, 5)) 253 | else: 254 | prec1, prec5 = accuracy(output1.data, target, topk=(1, 5)) 255 | losses.update(loss.data[0], input.size(0)) 256 | top1.update(prec1[0], input.size(0)) 257 | top5.update(prec5[0], input.size(0)) 258 | # measure elapsed time 259 | batch_time.update(time.time() - end) 260 | end = time.time() 261 | 262 | if i % args.print_freq == 0: 263 | print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 264 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 265 | 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 266 | 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 267 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 268 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( 269 | epoch, i, len(train_loader), batch_time=batch_time, 270 | data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) 271 | 272 | 273 | print_log(' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log) 274 | return top1.avg, losses.avg 275 | 276 | def validate(val_loader, model, criterion, log): 277 | losses = AverageMeter() 278 | top1 = AverageMeter() 279 | top5 = AverageMeter() 280 | 281 | # switch to evaluate mode 282 | model.eval() 283 | 284 | for i, (input, target) in enumerate(val_loader): 285 | if args.use_cuda: 286 | target = target.cuda(async=True) 287 | input = input.cuda() 288 | input_var = torch.autograd.Variable(input, volatile=True) 289 | target_var = torch.autograd.Variable(target, volatile=True) 290 | 291 | # compute output 292 | output = model(input_var) 293 | loss = criterion(output, target_var) 294 | 295 | # measure accuracy and record loss 296 | prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) 297 | losses.update(loss.data[0], input.size(0)) 298 | top1.update(prec1[0], input.size(0)) 299 | top5.update(prec5[0], input.size(0)) 300 | 301 | print_log(' **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log) 302 | 303 | return top1.avg, losses.avg 304 | 305 | best_acc = 0 306 | def main(): 307 | 308 | ### transfer data from source to current node##### 309 | print ("Copying the dataset to the current node's dir...") 310 | 311 | tmp = args.temp_dir 312 | home = args.home_dir 313 | 314 | 315 | dataset=args.dataset 316 | data_source_dir = os.path.join(home,'data',dataset) 317 | if not os.path.exists(data_source_dir): 318 | os.makedirs(data_source_dir) 319 | data_target_dir = os.path.join(tmp,'data',dataset) 320 | copy_tree(data_source_dir, data_target_dir) 321 | 322 | ### set up the experiment directories######## 323 | exp_name=experiment_name(arch=args.arch, 324 | epochs=args.epochs, 325 | dropout=args.dropout, 326 | batch_size=args.batch_size, 327 | lr=args.learning_rate, 328 | momentum=args.momentum, 329 | alpha = args.alpha, 330 | decay= args.decay, 331 | data_aug=args.data_aug, 332 | dualcutout=args.dualcutout, 333 | singlecutout = args.singlecutout, 334 | cutsize = args.cutsize, 335 | manualSeed=args.manualSeed, 336 | job_id=args.job_id, 337 | add_name=args.add_name) 338 | temp_model_dir = os.path.join(tmp,'experiments/DualCutout/'+dataset+'/model/'+ exp_name) 339 | temp_result_dir = os.path.join(tmp, 'experiments/DualCutout/'+dataset+'/results/'+ exp_name) 340 | model_dir = os.path.join(home, 'experiments/DualCutout/'+dataset+'/model/'+ exp_name) 341 | result_dir = os.path.join(home, 'experiments/DualCutout/'+dataset+'/results/'+ exp_name) 342 | 343 | 344 | if not os.path.exists(temp_model_dir): 345 | os.makedirs(temp_model_dir) 346 | 347 | if not os.path.exists(temp_result_dir): 348 | os.makedirs(temp_result_dir) 349 | 350 | copy_script_to_folder(os.path.abspath(__file__), temp_result_dir) 351 | 352 | result_png_path = os.path.join(temp_result_dir, 'results.png') 353 | 354 | 355 | global best_acc 356 | 357 | log = open(os.path.join(temp_result_dir, 'log.txt'.format(args.manualSeed)), 'w') 358 | print_log('save path : {}'.format(temp_result_dir), log) 359 | state = {k: v for k, v in args._get_kwargs()} 360 | print_log(state, log) 361 | print_log("Random Seed: {}".format(args.manualSeed), log) 362 | print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) 363 | print_log("torch version : {}".format(torch.__version__), log) 364 | print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) 365 | 366 | 367 | train_loader, test_loader,num_classes=load_data(args.data_aug, args.batch_size,args.workers,args.dataset, data_target_dir) 368 | 369 | print_log("=> creating model '{}'".format(args.arch), log) 370 | # Init model, criterion, and optimizer 371 | 372 | net = models.__dict__[args.arch](num_classes,args.dropout) 373 | print_log("=> network :\n {}".format(net), log) 374 | 375 | #net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) 376 | 377 | # define loss function (criterion) and optimizer 378 | criterion = torch.nn.CrossEntropyLoss() 379 | 380 | optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], 381 | weight_decay=state['decay'], nesterov=True) 382 | 383 | 384 | cutout = Cutout(1, args.cutsize) 385 | if args.use_cuda: 386 | net.cuda() 387 | criterion.cuda() 388 | 389 | recorder = RecorderMeter(args.epochs) 390 | # optionally resume from a checkpoint 391 | if args.resume: 392 | if os.path.isfile(args.resume): 393 | print_log("=> loading checkpoint '{}'".format(args.resume), log) 394 | checkpoint = torch.load(args.resume) 395 | recorder = checkpoint['recorder'] 396 | args.start_epoch = checkpoint['epoch'] 397 | net.load_state_dict(checkpoint['state_dict']) 398 | optimizer.load_state_dict(checkpoint['optimizer']) 399 | best_acc = recorder.max_accuracy(False) 400 | print_log("=> loaded checkpoint '{}' accuracy={} (epoch {})" .format(args.resume, best_acc, checkpoint['epoch']), log) 401 | else: 402 | print_log("=> no checkpoint found at '{}'".format(args.resume), log) 403 | else: 404 | print_log("=> do not use any checkpoint for {} model".format(args.arch), log) 405 | 406 | if args.evaluate: 407 | validate(test_loader, net, criterion, log) 408 | return 409 | 410 | # Main loop 411 | start_time = time.time() 412 | epoch_time = AverageMeter() 413 | # Main loop 414 | train_loss = [] 415 | train_acc=[] 416 | test_loss=[] 417 | test_acc=[] 418 | for epoch in range(args.start_epoch, args.epochs): 419 | current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) 420 | 421 | need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch)) 422 | need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs) 423 | 424 | print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ 425 | + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) 426 | 427 | # train for one epoch 428 | tr_acc, tr_los = train(train_loader, net, criterion, cutout, optimizer, epoch, log) 429 | 430 | # evaluate on validation set 431 | val_acc, val_los = validate(test_loader, net, criterion, log) 432 | train_loss.append(tr_los) 433 | train_acc.append(tr_acc) 434 | test_loss.append(val_los) 435 | test_acc.append(val_acc) 436 | dummy = recorder.update(epoch, tr_los, tr_acc, val_los, val_acc) 437 | 438 | is_best = False 439 | if val_acc > best_acc: 440 | is_best = True 441 | best_acc = val_acc 442 | 443 | save_checkpoint({ 444 | 'epoch': epoch + 1, 445 | 'arch': args.arch, 446 | 'state_dict': net.state_dict(), 447 | 'recorder': recorder, 448 | 'optimizer' : optimizer.state_dict(), 449 | }, is_best, temp_model_dir, 'checkpoint.pth.tar') 450 | 451 | # measure elapsed time 452 | epoch_time.update(time.time() - start_time) 453 | start_time = time.time() 454 | recorder.plot_curve(result_png_path) 455 | 456 | train_log = OrderedDict() 457 | train_log['train_loss'] = train_loss 458 | train_log['train_acc']=train_acc 459 | train_log['test_loss']=test_loss 460 | train_log['test_acc']=test_acc 461 | 462 | pickle.dump(train_log, open( os.path.join(temp_result_dir,'log.pkl'), 'wb')) 463 | plotting(temp_result_dir) 464 | 465 | copy_tree(temp_model_dir, model_dir) 466 | copy_tree(temp_result_dir, result_dir) 467 | 468 | rmtree(temp_model_dir) 469 | rmtree(temp_result_dir) 470 | 471 | log.close() 472 | 473 | 474 | if __name__ == '__main__': 475 | main() 476 | -------------------------------------------------------------------------------- /cifar10/resnext/models/__init__.py: -------------------------------------------------------------------------------- 1 | """The models subpackage contains definitions for the following model 2 | architectures: 3 | - `ResNeXt` for CIFAR10 CIFAR100 4 | You can construct a model with random weights by calling its constructor: 5 | .. code:: python 6 | import models 7 | resnext29_16_64 = models.ResNeXt29_16_64(num_classes) 8 | resnext29_8_64 = models.ResNeXt29_8_64(num_classes) 9 | resnet20 = models.ResNet20(num_classes) 10 | resnet32 = models.ResNet32(num_classes) 11 | 12 | 13 | .. ResNext: https://arxiv.org/abs/1611.05431 14 | """ 15 | 16 | from .resnext import resnext29_8_64, resnext29_16_64 17 | #from .resnet import resnet20, resnet32, resnet44, resnet56, resnet110 18 | from .resnet import resnet18, resnet34, resnet50, resnet101, resnet152 19 | from .preresnet import preactresnet18, preactresnet34, preactresnet50, preactresnet101, preactresnet152 20 | from .caffe_cifar import caffe_cifar 21 | from .densenet import densenet100_12,densenet100_24 22 | from .wide_resnet import wrn28_10, wrn28_2 23 | 24 | #from .imagenet_resnet import resnet18, resnet34, resnet50, resnet101, resnet152 25 | -------------------------------------------------------------------------------- /cifar10/resnext/models/caffe_cifar.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn import init 7 | import math 8 | 9 | ## http://torch.ch/blog/2015/07/30/cifar.html 10 | class CifarCaffeNet(nn.Module): 11 | def __init__(self, num_classes): 12 | super(CifarCaffeNet, self).__init__() 13 | 14 | self.num_classes = num_classes 15 | 16 | self.block_1 = nn.Sequential( 17 | nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), 18 | nn.MaxPool2d(kernel_size=3, stride=2), 19 | nn.ReLU(), 20 | nn.BatchNorm2d(32)) 21 | 22 | self.block_2 = nn.Sequential( 23 | nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), 24 | nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), 25 | nn.ReLU(), 26 | nn.AvgPool2d(kernel_size=3, stride=2), 27 | nn.BatchNorm2d(64)) 28 | 29 | self.block_3 = nn.Sequential( 30 | nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), 31 | nn.Conv2d(64,128, kernel_size=3, stride=1, padding=1), 32 | nn.ReLU(), 33 | nn.AvgPool2d(kernel_size=3, stride=2), 34 | nn.BatchNorm2d(128)) 35 | 36 | self.classifier = nn.Linear(128*9, self.num_classes) 37 | 38 | for m in self.modules(): 39 | if isinstance(m, nn.Conv2d): 40 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 41 | m.weight.data.normal_(0, math.sqrt(2. / n)) 42 | elif isinstance(m, nn.BatchNorm2d): 43 | m.weight.data.fill_(1) 44 | m.bias.data.zero_() 45 | elif isinstance(m, nn.Linear): 46 | init.kaiming_normal(m.weight) 47 | m.bias.data.zero_() 48 | 49 | def forward(self, x): 50 | x = self.block_1.forward(x) 51 | x = self.block_2.forward(x) 52 | x = self.block_3.forward(x) 53 | x = x.view(x.size(0), -1) 54 | #print ('{}'.format(x.size())) 55 | return self.classifier(x) 56 | 57 | def caffe_cifar(num_classes=10): 58 | model = CifarCaffeNet(num_classes) 59 | return model 60 | -------------------------------------------------------------------------------- /cifar10/resnext/models/densenet.py: -------------------------------------------------------------------------------- 1 | import math, torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Bottleneck(nn.Module): 6 | def __init__(self, nChannels, growthRate): 7 | super(Bottleneck, self).__init__() 8 | interChannels = 4*growthRate 9 | self.bn1 = nn.BatchNorm2d(nChannels) 10 | self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, bias=False) 11 | self.bn2 = nn.BatchNorm2d(interChannels) 12 | self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, padding=1, bias=False) 13 | 14 | def forward(self, x): 15 | out = self.conv1(F.relu(self.bn1(x))) 16 | out = self.conv2(F.relu(self.bn2(out))) 17 | out = torch.cat((x, out), 1) 18 | return out 19 | 20 | class SingleLayer(nn.Module): 21 | def __init__(self, nChannels, growthRate): 22 | super(SingleLayer, self).__init__() 23 | self.bn1 = nn.BatchNorm2d(nChannels) 24 | self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1, bias=False) 25 | 26 | def forward(self, x): 27 | out = self.conv1(F.relu(self.bn1(x))) 28 | out = torch.cat((x, out), 1) 29 | return out 30 | 31 | class Transition(nn.Module): 32 | def __init__(self, nChannels, nOutChannels): 33 | super(Transition, self).__init__() 34 | self.bn1 = nn.BatchNorm2d(nChannels) 35 | self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, bias=False) 36 | 37 | def forward(self, x): 38 | out = self.conv1(F.relu(self.bn1(x))) 39 | out = F.avg_pool2d(out, 2) 40 | return out 41 | 42 | class DenseNet(nn.Module): 43 | def __init__(self, growthRate, depth, reduction, nClasses, bottleneck): 44 | super(DenseNet, self).__init__() 45 | 46 | if bottleneck: nDenseBlocks = int( (depth-4) / 6 ) 47 | else : nDenseBlocks = int( (depth-4) / 3 ) 48 | 49 | nChannels = 2*growthRate 50 | self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1, bias=False) 51 | 52 | self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck) 53 | nChannels += nDenseBlocks*growthRate 54 | nOutChannels = int(math.floor(nChannels*reduction)) 55 | self.trans1 = Transition(nChannels, nOutChannels) 56 | 57 | nChannels = nOutChannels 58 | self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck) 59 | nChannels += nDenseBlocks*growthRate 60 | nOutChannels = int(math.floor(nChannels*reduction)) 61 | self.trans2 = Transition(nChannels, nOutChannels) 62 | 63 | nChannels = nOutChannels 64 | self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck) 65 | nChannels += nDenseBlocks*growthRate 66 | 67 | self.bn1 = nn.BatchNorm2d(nChannels) 68 | self.fc = nn.Linear(nChannels, nClasses) 69 | 70 | for m in self.modules(): 71 | if isinstance(m, nn.Conv2d): 72 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 73 | m.weight.data.normal_(0, math.sqrt(2. / n)) 74 | elif isinstance(m, nn.BatchNorm2d): 75 | m.weight.data.fill_(1) 76 | m.bias.data.zero_() 77 | elif isinstance(m, nn.Linear): 78 | m.bias.data.zero_() 79 | 80 | def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck): 81 | layers = [] 82 | for i in range(int(nDenseBlocks)): 83 | if bottleneck: 84 | layers.append(Bottleneck(nChannels, growthRate)) 85 | else: 86 | layers.append(SingleLayer(nChannels, growthRate)) 87 | nChannels += growthRate 88 | return nn.Sequential(*layers) 89 | 90 | def forward(self, x): 91 | out = self.conv1(x) 92 | out = self.trans1(self.dense1(out)) 93 | out = self.trans2(self.dense2(out)) 94 | out = self.dense3(out) 95 | out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8)) 96 | out = F.log_softmax(self.fc(out)) 97 | return out 98 | 99 | def densenet100_12(num_classes=10): 100 | model = DenseNet(12, 100, 0.5, num_classes, False) 101 | return model 102 | 103 | 104 | def densenet100_24(num_classes=10): 105 | model = DenseNet(24, 100, 0.5, num_classes, False) 106 | return model 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /cifar10/resnext/models/imagenet_resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | def conv3x3(in_planes, out_planes, stride=1): 6 | "3x3 convolution with padding" 7 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 8 | padding=1, bias=False) 9 | 10 | 11 | class BasicBlock(nn.Module): 12 | expansion = 1 13 | 14 | def __init__(self, inplanes, planes, stride=1, downsample=None): 15 | super(BasicBlock, self).__init__() 16 | self.conv1 = conv3x3(inplanes, planes, stride) 17 | self.bn1 = nn.BatchNorm2d(planes) 18 | self.relu = nn.ReLU(inplace=True) 19 | self.conv2 = conv3x3(planes, planes) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.downsample = downsample 22 | self.stride = stride 23 | 24 | def forward(self, x): 25 | residual = x 26 | 27 | out = self.conv1(x) 28 | out = self.bn1(out) 29 | out = self.relu(out) 30 | 31 | out = self.conv2(out) 32 | out = self.bn2(out) 33 | 34 | if self.downsample is not None: 35 | residual = self.downsample(x) 36 | 37 | out += residual 38 | out = self.relu(out) 39 | 40 | return out 41 | 42 | 43 | class Bottleneck(nn.Module): 44 | expansion = 4 45 | 46 | def __init__(self, inplanes, planes, stride=1, downsample=None): 47 | super(Bottleneck, self).__init__() 48 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 49 | self.bn1 = nn.BatchNorm2d(planes) 50 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 51 | padding=1, bias=False) 52 | self.bn2 = nn.BatchNorm2d(planes) 53 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 54 | self.bn3 = nn.BatchNorm2d(planes * 4) 55 | self.relu = nn.ReLU(inplace=True) 56 | self.downsample = downsample 57 | self.stride = stride 58 | 59 | def forward(self, x): 60 | residual = x 61 | 62 | out = self.conv1(x) 63 | out = self.bn1(out) 64 | out = self.relu(out) 65 | 66 | out = self.conv2(out) 67 | out = self.bn2(out) 68 | out = self.relu(out) 69 | 70 | out = self.conv3(out) 71 | out = self.bn3(out) 72 | 73 | if self.downsample is not None: 74 | residual = self.downsample(x) 75 | 76 | out += residual 77 | out = self.relu(out) 78 | 79 | return out 80 | 81 | 82 | class ResNet(nn.Module): 83 | 84 | def __init__(self, block, layers, num_classes=1000): 85 | self.inplanes = 64 86 | super(ResNet, self).__init__() 87 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 88 | bias=False) 89 | self.bn1 = nn.BatchNorm2d(64) 90 | self.relu = nn.ReLU(inplace=True) 91 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 92 | self.layer1 = self._make_layer(block, 64, layers[0]) 93 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 94 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 95 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 96 | self.avgpool = nn.AvgPool2d(7) 97 | self.fc = nn.Linear(512 * block.expansion, num_classes) 98 | 99 | for m in self.modules(): 100 | if isinstance(m, nn.Conv2d): 101 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 102 | m.weight.data.normal_(0, math.sqrt(2. / n)) 103 | elif isinstance(m, nn.BatchNorm2d): 104 | m.weight.data.fill_(1) 105 | m.bias.data.zero_() 106 | 107 | def _make_layer(self, block, planes, blocks, stride=1): 108 | downsample = None 109 | if stride != 1 or self.inplanes != planes * block.expansion: 110 | downsample = nn.Sequential( 111 | nn.Conv2d(self.inplanes, planes * block.expansion, 112 | kernel_size=1, stride=stride, bias=False), 113 | nn.BatchNorm2d(planes * block.expansion), 114 | ) 115 | 116 | layers = [] 117 | layers.append(block(self.inplanes, planes, stride, downsample)) 118 | self.inplanes = planes * block.expansion 119 | for i in range(1, blocks): 120 | layers.append(block(self.inplanes, planes)) 121 | 122 | return nn.Sequential(*layers) 123 | 124 | def forward(self, x): 125 | x = self.conv1(x) 126 | x = self.bn1(x) 127 | x = self.relu(x) 128 | x = self.maxpool(x) 129 | 130 | x = self.layer1(x) 131 | x = self.layer2(x) 132 | x = self.layer3(x) 133 | x = self.layer4(x) 134 | 135 | x = self.avgpool(x) 136 | x = x.view(x.size(0), -1) 137 | x = self.fc(x) 138 | 139 | return x 140 | 141 | 142 | def resnet18(num_classes=1000): 143 | """Constructs a ResNet-18 model. 144 | 145 | Args: 146 | pretrained (bool): If True, returns a model pre-trained on ImageNet 147 | """ 148 | model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes) 149 | return model 150 | 151 | 152 | def resnet34(num_classes=1000): 153 | """Constructs a ResNet-34 model. 154 | 155 | Args: 156 | pretrained (bool): If True, returns a model pre-trained on ImageNet 157 | """ 158 | model = ResNet(BasicBlock, [3, 4, 6, 3], num_classes) 159 | return model 160 | 161 | 162 | def resnet50(num_classes=1000): 163 | """Constructs a ResNet-50 model. 164 | 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes) 169 | return model 170 | 171 | 172 | def resnet101(num_classes=1000): 173 | """Constructs a ResNet-101 model. 174 | 175 | Args: 176 | pretrained (bool): If True, returns a model pre-trained on ImageNet 177 | """ 178 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes) 179 | return model 180 | 181 | 182 | def resnet152(num_classes=1000): 183 | """Constructs a ResNet-152 model. 184 | 185 | Args: 186 | pretrained (bool): If True, returns a model pre-trained on ImageNet 187 | """ 188 | model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes) 189 | return model 190 | -------------------------------------------------------------------------------- /cifar10/resnext/models/preresnet.py: -------------------------------------------------------------------------------- 1 | '''Pre-activation ResNet in PyTorch. 2 | Reference: 3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 4 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class PreActBlock(nn.Module): 12 | '''Pre-activation version of the BasicBlock.''' 13 | expansion = 1 14 | 15 | def __init__(self, in_planes, planes, stride=1): 16 | super(PreActBlock, self).__init__() 17 | self.bn1 = nn.BatchNorm2d(in_planes) 18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 19 | self.bn2 = nn.BatchNorm2d(planes) 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 21 | 22 | if stride != 1 or in_planes != self.expansion*planes: 23 | self.shortcut = nn.Sequential( 24 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 25 | ) 26 | 27 | def forward(self, x): 28 | out = F.relu(self.bn1(x)) 29 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 30 | out = self.conv1(out) 31 | out = self.conv2(F.relu(self.bn2(out))) 32 | out += shortcut 33 | return out 34 | 35 | 36 | class PreActBottleneck(nn.Module): 37 | '''Pre-activation version of the original Bottleneck module.''' 38 | expansion = 4 39 | 40 | def __init__(self, in_planes, planes, stride=1): 41 | super(PreActBottleneck, self).__init__() 42 | self.bn1 = nn.BatchNorm2d(in_planes) 43 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 44 | self.bn2 = nn.BatchNorm2d(planes) 45 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 46 | self.bn3 = nn.BatchNorm2d(planes) 47 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 48 | 49 | if stride != 1 or in_planes != self.expansion*planes: 50 | self.shortcut = nn.Sequential( 51 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 52 | ) 53 | 54 | def forward(self, x): 55 | out = F.relu(self.bn1(x)) 56 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 57 | out = self.conv1(out) 58 | out = self.conv2(F.relu(self.bn2(out))) 59 | out = self.conv3(F.relu(self.bn3(out))) 60 | out += shortcut 61 | return out 62 | 63 | 64 | class PreActResNet(nn.Module): 65 | def __init__(self, block, num_blocks, num_classes=10, dropout= False): 66 | super(PreActResNet, self).__init__() 67 | self.in_planes = 64 68 | self.dropout = dropout 69 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 70 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 71 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 72 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 73 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 74 | self.linear = nn.Linear(512*block.expansion, num_classes) 75 | 76 | def _make_layer(self, block, planes, num_blocks, stride): 77 | strides = [stride] + [1]*(num_blocks-1) 78 | layers = [] 79 | for stride in strides: 80 | layers.append(block(self.in_planes, planes, stride)) 81 | self.in_planes = planes * block.expansion 82 | return nn.Sequential(*layers) 83 | 84 | def forward(self, x): 85 | out = self.conv1(x) 86 | out = self.layer1(out) 87 | if self.dropout: 88 | x = F.dropout(x, p=0.5, training=self.training) 89 | out = self.layer2(out) 90 | if self.dropout: 91 | x = F.dropout(x, p=0.5, training=self.training) 92 | out = self.layer3(out) 93 | if self.dropout: 94 | x = F.dropout(x, p=0.5, training=self.training) 95 | out = self.layer4(out) 96 | if self.dropout: 97 | x = F.dropout(x, p=0.5, training=self.training) 98 | out = F.avg_pool2d(out, 4) 99 | out = out.view(out.size(0), -1) 100 | out = self.linear(out) 101 | return out 102 | 103 | 104 | def preactresnet18(num_classes=10, dropout = False): 105 | return PreActResNet(PreActBlock, [2,2,2,2], num_classes, dropout) 106 | 107 | def preactresnet34(num_classes=10, dropout = False): 108 | return PreActResNet(PreActBlock, [3,4,6,3], num_classes, dropout) 109 | 110 | def preactresnet50(num_classes=10, dropout = False): 111 | return PreActResNet(PreActBottleneck, [3,4,6,3], num_classes, dropout) 112 | 113 | def preactresnet101(num_classes=10, dropout = False): 114 | return PreActResNet(PreActBottleneck, [3,4,23,3], num_classes, dropout) 115 | 116 | def preactresnet152(num_classes=10, dropout = False): 117 | return PreActResNet(PreActBottleneck, [3,8,36,3], num_classes, dropout) 118 | 119 | 120 | def test(): 121 | net = PreActResNet18() 122 | y = net((torch.randn(1,3,32,32))) 123 | print(y.size()) 124 | 125 | # test() -------------------------------------------------------------------------------- /cifar10/resnext/models/res_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class DownsampleA(nn.Module): 5 | 6 | def __init__(self, nIn, nOut, stride): 7 | super(DownsampleA, self).__init__() 8 | assert stride == 2 9 | self.avg = nn.AvgPool2d(kernel_size=1, stride=stride) 10 | 11 | def forward(self, x): 12 | x = self.avg(x) 13 | return torch.cat((x, x.mul(0)), 1) 14 | 15 | class DownsampleC(nn.Module): 16 | 17 | def __init__(self, nIn, nOut, stride): 18 | super(DownsampleC, self).__init__() 19 | assert stride != 1 or nIn != nOut 20 | self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=stride, padding=0, bias=False) 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | return x 25 | 26 | class DownsampleD(nn.Module): 27 | 28 | def __init__(self, nIn, nOut, stride): 29 | super(DownsampleD, self).__init__() 30 | assert stride == 2 31 | self.conv = nn.Conv2d(nIn, nOut, kernel_size=2, stride=stride, padding=0, bias=False) 32 | self.bn = nn.BatchNorm2d(nOut) 33 | 34 | def forward(self, x): 35 | x = self.conv(x) 36 | x = self.bn(x) 37 | return x 38 | -------------------------------------------------------------------------------- /cifar10/resnext/models/resnet.py: -------------------------------------------------------------------------------- 1 | ## https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 2 | '''ResNet in PyTorch. 3 | For Pre-activation ResNet, see 'preact_resnet.py'. 4 | Reference: 5 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 6 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 7 | ''' 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class BasicBlock(nn.Module): 14 | expansion = 1 15 | 16 | def __init__(self, in_planes, planes, stride=1): 17 | super(BasicBlock, self).__init__() 18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 19 | self.bn1 = nn.BatchNorm2d(planes) 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | 23 | self.shortcut = nn.Sequential() 24 | if stride != 1 or in_planes != self.expansion*planes: 25 | self.shortcut = nn.Sequential( 26 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 27 | nn.BatchNorm2d(self.expansion*planes) 28 | ) 29 | 30 | def forward(self, x): 31 | out = F.relu(self.bn1(self.conv1(x))) 32 | out = self.bn2(self.conv2(out)) 33 | out += self.shortcut(x) 34 | out = F.relu(out) 35 | return out 36 | 37 | 38 | class Bottleneck(nn.Module): 39 | expansion = 4 40 | 41 | def __init__(self, in_planes, planes, stride=1): 42 | super(Bottleneck, self).__init__() 43 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 44 | self.bn1 = nn.BatchNorm2d(planes) 45 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 46 | self.bn2 = nn.BatchNorm2d(planes) 47 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 48 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 49 | 50 | self.shortcut = nn.Sequential() 51 | if stride != 1 or in_planes != self.expansion*planes: 52 | self.shortcut = nn.Sequential( 53 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 54 | nn.BatchNorm2d(self.expansion*planes) 55 | ) 56 | 57 | def forward(self, x): 58 | out = F.relu(self.bn1(self.conv1(x))) 59 | out = F.relu(self.bn2(self.conv2(out))) 60 | out = self.bn3(self.conv3(out)) 61 | out += self.shortcut(x) 62 | out = F.relu(out) 63 | return out 64 | 65 | 66 | class ResNet(nn.Module): 67 | def __init__(self, block, num_blocks, num_classes=10): 68 | super(ResNet, self).__init__() 69 | self.in_planes = 64 70 | 71 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 72 | self.bn1 = nn.BatchNorm2d(64) 73 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 74 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 75 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 76 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 77 | self.linear = nn.Linear(512*block.expansion, num_classes) 78 | 79 | def _make_layer(self, block, planes, num_blocks, stride): 80 | strides = [stride] + [1]*(num_blocks-1) 81 | layers = [] 82 | for stride in strides: 83 | layers.append(block(self.in_planes, planes, stride)) 84 | self.in_planes = planes * block.expansion 85 | return nn.Sequential(*layers) 86 | 87 | def forward(self, x): 88 | out = F.relu(self.bn1(self.conv1(x))) 89 | out = self.layer1(out) 90 | out = self.layer2(out) 91 | out = self.layer3(out) 92 | out = self.layer4(out) 93 | out = F.avg_pool2d(out, 4) 94 | out = out.view(out.size(0), -1) 95 | out = self.linear(out) 96 | return out 97 | 98 | 99 | 100 | def resnet18(num_classes=10, dropout = False): 101 | """Constructs a ResNet-18 model. 102 | Args: 103 | pretrained (bool): If True, returns a model pre-trained on ImageNet 104 | """ 105 | model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes) 106 | return model 107 | 108 | 109 | def resnet34(num_classes=10, dropout = False): 110 | """Constructs a ResNet-34 model. 111 | Args: 112 | pretrained (bool): If True, returns a model pre-trained on ImageNet 113 | """ 114 | model = ResNet(BasicBlock, [3, 4, 6, 3], num_classes) 115 | return model 116 | 117 | 118 | def resnet50(num_classes=10, dropout = False): 119 | """Constructs a ResNet-50 model. 120 | Args: 121 | pretrained (bool): If True, returns a model pre-trained on ImageNet 122 | """ 123 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes) 124 | return model 125 | 126 | 127 | def resnet101(num_classes=10, dropout = False): 128 | """Constructs a ResNet-101 model. 129 | Args: 130 | pretrained (bool): If True, returns a model pre-trained on ImageNet 131 | """ 132 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes) 133 | return model 134 | 135 | 136 | def resnet152(num_classes=10, dropout = False): 137 | """Constructs a ResNet-152 model. 138 | Args: 139 | pretrained (bool): If True, returns a model pre-trained on ImageNet 140 | """ 141 | model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes) 142 | return model -------------------------------------------------------------------------------- /cifar10/resnext/models/resnext.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch.nn import init 4 | import math 5 | 6 | class ResNeXtBottleneck(nn.Module): 7 | expansion = 4 8 | """ 9 | RexNeXt bottleneck type C (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua) 10 | """ 11 | def __init__(self, inplanes, planes, cardinality, base_width, stride=1, downsample=None): 12 | super(ResNeXtBottleneck, self).__init__() 13 | 14 | D = int(math.floor(planes * (base_width/64.0))) 15 | C = cardinality 16 | 17 | self.conv_reduce = nn.Conv2d(inplanes, D*C, kernel_size=1, stride=1, padding=0, bias=False) 18 | self.bn_reduce = nn.BatchNorm2d(D*C) 19 | 20 | self.conv_conv = nn.Conv2d(D*C, D*C, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 21 | self.bn = nn.BatchNorm2d(D*C) 22 | 23 | self.conv_expand = nn.Conv2d(D*C, planes*4, kernel_size=1, stride=1, padding=0, bias=False) 24 | self.bn_expand = nn.BatchNorm2d(planes*4) 25 | 26 | self.downsample = downsample 27 | 28 | def forward(self, x): 29 | residual = x 30 | 31 | bottleneck = self.conv_reduce(x) 32 | bottleneck = F.relu(self.bn_reduce(bottleneck), inplace=True) 33 | 34 | bottleneck = self.conv_conv(bottleneck) 35 | bottleneck = F.relu(self.bn(bottleneck), inplace=True) 36 | 37 | bottleneck = self.conv_expand(bottleneck) 38 | bottleneck = self.bn_expand(bottleneck) 39 | 40 | if self.downsample is not None: 41 | residual = self.downsample(x) 42 | 43 | return F.relu(residual + bottleneck, inplace=True) 44 | 45 | 46 | class CifarResNeXt(nn.Module): 47 | """ 48 | ResNext optimized for the Cifar dataset, as specified in 49 | https://arxiv.org/pdf/1611.05431.pdf 50 | """ 51 | def __init__(self, block, depth, cardinality, base_width, num_classes, dropout): 52 | super(CifarResNeXt, self).__init__() 53 | 54 | #Model type specifies number of layers for CIFAR-10 and CIFAR-100 model 55 | assert (depth - 2) % 9 == 0, 'depth should be one of 29, 38, 47, 56, 101' 56 | layer_blocks = (depth - 2) // 9 57 | 58 | self.cardinality = cardinality 59 | self.base_width = base_width 60 | self.num_classes = num_classes 61 | self.dropout=dropout 62 | self.conv_1_3x3 = nn.Conv2d(3, 64, 3, 1, 1, bias=False) 63 | self.bn_1 = nn.BatchNorm2d(64) 64 | 65 | self.inplanes = 64 66 | self.stage_1 = self._make_layer(block, 64 , layer_blocks, 1) 67 | self.stage_2 = self._make_layer(block, 128, layer_blocks, 2) 68 | self.stage_3 = self._make_layer(block, 256, layer_blocks, 2) 69 | self.avgpool = nn.AvgPool2d(8) 70 | self.classifier = nn.Linear(256*block.expansion, num_classes) 71 | 72 | for m in self.modules(): 73 | if isinstance(m, nn.Conv2d): 74 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 75 | m.weight.data.normal_(0, math.sqrt(2. / n)) 76 | elif isinstance(m, nn.BatchNorm2d): 77 | m.weight.data.fill_(1) 78 | m.bias.data.zero_() 79 | elif isinstance(m, nn.Linear): 80 | init.kaiming_normal(m.weight) 81 | m.bias.data.zero_() 82 | 83 | def _make_layer(self, block, planes, blocks, stride=1): 84 | downsample = None 85 | if stride != 1 or self.inplanes != planes * block.expansion: 86 | downsample = nn.Sequential( 87 | nn.Conv2d(self.inplanes, planes * block.expansion, 88 | kernel_size=1, stride=stride, bias=False), 89 | nn.BatchNorm2d(planes * block.expansion), 90 | ) 91 | 92 | layers = [] 93 | layers.append(block(self.inplanes, planes, self.cardinality, self.base_width, stride, downsample)) 94 | self.inplanes = planes * block.expansion 95 | for i in range(1, blocks): 96 | layers.append(block(self.inplanes, planes, self.cardinality, self.base_width)) 97 | 98 | return nn.Sequential(*layers) 99 | 100 | def forward(self, x): 101 | x = self.conv_1_3x3(x) 102 | x = F.relu(self.bn_1(x), inplace=True) 103 | x = self.stage_1(x) 104 | x = self.stage_2(x) 105 | x = self.stage_3(x) 106 | x = self.avgpool(x) 107 | x = x.view(x.size(0), -1) 108 | if self.dropout: 109 | x = F.dropout(x, p=0.5, training=self.training) 110 | return self.classifier(x) 111 | 112 | def resnext29_16_64(num_classes=10,dropout=True): 113 | """Constructs a ResNeXt-29, 16*64d model for CIFAR-10 (by default) 114 | 115 | Args: 116 | num_classes (uint): number of classes 117 | """ 118 | model = CifarResNeXt(ResNeXtBottleneck, 29, 16, 64, num_classes, dropout) 119 | return model 120 | 121 | def resnext29_8_64(num_classes=10, dropout=True): 122 | """Constructs a ResNeXt-29, 8*64d model for CIFAR-10 (by default) 123 | 124 | Args: 125 | num_classes (uint): number of classes 126 | """ 127 | model = CifarResNeXt(ResNeXtBottleneck, 29, 8, 64, num_classes, dropout) 128 | return model 129 | -------------------------------------------------------------------------------- /cifar10/resnext/models/wide_resnet.py: -------------------------------------------------------------------------------- 1 | ### dropout has been removed in this code. original code had dropout##### 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.init as init 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | 8 | import sys 9 | import numpy as np 10 | 11 | act = torch.nn.LeakyReLU() 12 | 13 | 14 | def conv3x3(in_planes, out_planes, stride=1): 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) 16 | 17 | def conv_init(m): 18 | classname = m.__class__.__name__ 19 | if classname.find('Conv') != -1: 20 | init.xavier_uniform(m.weight, gain=np.sqrt(2)) 21 | init.constant(m.bias, 0) 22 | elif classname.find('BatchNorm') != -1: 23 | init.constant(m.weight, 1) 24 | init.constant(m.bias, 0) 25 | 26 | class wide_basic(nn.Module): 27 | def __init__(self, in_planes, planes, stride=1): 28 | super(wide_basic, self).__init__() 29 | self.bn1 = nn.BatchNorm2d(in_planes) 30 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True) 31 | self.bn2 = nn.BatchNorm2d(planes) 32 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True) 33 | 34 | self.shortcut = nn.Sequential() 35 | if stride != 1 or in_planes != planes: 36 | self.shortcut = nn.Sequential( 37 | nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True), 38 | ) 39 | 40 | def forward(self, x): 41 | out = self.conv1(act(self.bn1(x))) 42 | out = self.conv2(act(self.bn2(out))) 43 | out += self.shortcut(x) 44 | 45 | return out 46 | 47 | class Wide_ResNet(nn.Module): 48 | 49 | def __init__(self, depth, widen_factor, num_classes): 50 | super(Wide_ResNet, self).__init__() 51 | self.in_planes = 16 52 | 53 | assert ((depth-4)%6 ==0), 'Wide-resnet_v2 depth should be 6n+4' 54 | n = int((depth-4)/6) 55 | k = widen_factor 56 | 57 | print('| Wide-Resnet %dx%d' %(depth, k)) 58 | nStages = [16, 16*k, 32*k, 64*k] 59 | 60 | self.conv1 = conv3x3(3,nStages[0]) 61 | self.layer1 = self._wide_layer(wide_basic, nStages[1], n, stride=1) 62 | self.layer2 = self._wide_layer(wide_basic, nStages[2], n, stride=2) 63 | self.layer3 = self._wide_layer(wide_basic, nStages[3], n, stride=2) 64 | self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9) 65 | self.linear = nn.Linear(nStages[3], num_classes) 66 | 67 | def _wide_layer(self, block, planes, num_blocks, stride): 68 | strides = [stride] + [1]*(num_blocks-1) 69 | layers = [] 70 | 71 | for stride in strides: 72 | layers.append(block(self.in_planes, planes, stride)) 73 | self.in_planes = planes 74 | 75 | return nn.Sequential(*layers) 76 | 77 | """ 78 | ## Modified WRN architecture### 79 | def __init__(self, depth, widen_factor, dropout_rate, num_classes): 80 | super(Wide_ResNet, self).__init__() 81 | self.in_planes = 16 82 | 83 | assert ((depth-4)%6 ==0), 'Wide-resnet_v2 depth should be 6n+4' 84 | n = (depth-4)/6 85 | k = widen_factor 86 | #self.mixup_hidden = mixup_hidden 87 | 88 | print('| Wide-Resnet %dx%d' %(depth, k)) 89 | nStages = [16, 16*k, 32*k, 64*k] 90 | 91 | self.conv1 = conv3x3(3,nStages[0]) 92 | self.bn1 = nn.BatchNorm2d(nStages[0]) 93 | self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1) 94 | self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2) 95 | self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2) 96 | #self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9) 97 | self.linear = nn.Linear(nStages[3], num_classes) 98 | 99 | def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride): 100 | strides = [stride] + [1]*(num_blocks-1) 101 | layers = [] 102 | 103 | for stride in strides: 104 | layers.append(block(self.in_planes, planes, dropout_rate, stride)) 105 | self.in_planes = planes 106 | 107 | return nn.Sequential(*layers) 108 | """ 109 | def forward(self, x): 110 | #print x.shape 111 | 112 | out = self.conv1(x) 113 | out = self.layer1(out) 114 | out = self.layer2(out) 115 | out = self.layer3(out) 116 | out = act(self.bn1(out)) 117 | out = F.avg_pool2d(out, 8) 118 | out = out.view(out.size(0), -1) 119 | out = self.linear(out) 120 | """ 121 | ## modified WRN arch 122 | out = x 123 | out = F.leaky_relu(self.bn1(self.conv1(out))) 124 | out = self.layer1(out) 125 | out = self.layer2(out) 126 | out = self.layer3(out) 127 | #out = F.relu(self.bn1(out)) 128 | out = F.avg_pool2d(out, 8) 129 | out = out.view(out.size(0), -1) 130 | out = self.linear(out) 131 | """ 132 | return out 133 | 134 | def wrn28_10(num_classes=10, dropout = False): 135 | #print ('this') 136 | model = Wide_ResNet(depth=28, widen_factor=10, num_classes=num_classes) 137 | return model 138 | 139 | def wrn28_2(num_classes=10, dropout = False): 140 | #print ('this') 141 | model = Wide_ResNet(depth =28, widen_factor =2, num_classes = num_classes) 142 | return model 143 | 144 | 145 | 146 | if __name__ == '__main__': 147 | net=Wide_ResNet(28, 10, 0.3, 10) 148 | y = net(Variable(torch.randn(1,3,32,32))) 149 | 150 | print(y.size()) 151 | -------------------------------------------------------------------------------- /cifar10/resnext/test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 31 Dec 2017 3 | 4 | @author: vermav1 5 | ''' 6 | import numpy as np 7 | 8 | x=np.asarray((0.2,0.3,0.5)) 9 | y=np.asarray((0.9,0.01,0.1)) 10 | 11 | ce=0 12 | for i in xrange(x.shape[0]): 13 | ce+=-(x[i]*np.log(y[i])) 14 | 15 | print ce -------------------------------------------------------------------------------- /cifar10/resnext/utils.py: -------------------------------------------------------------------------------- 1 | import os, sys, time 2 | import numpy as np 3 | import matplotlib 4 | matplotlib.use('agg') 5 | import matplotlib.pyplot as plt 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | self.avg = self.sum / self.count 23 | 24 | 25 | class RecorderMeter(object): 26 | """Computes and stores the minimum loss value and its epoch index""" 27 | def __init__(self, total_epoch): 28 | self.reset(total_epoch) 29 | 30 | def reset(self, total_epoch): 31 | assert total_epoch > 0 32 | self.total_epoch = total_epoch 33 | self.current_epoch = 0 34 | self.epoch_losses = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val] 35 | self.epoch_losses = self.epoch_losses - 1 36 | 37 | self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val] 38 | self.epoch_accuracy= self.epoch_accuracy 39 | 40 | def update(self, idx, train_loss, train_acc, val_loss, val_acc): 41 | assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx) 42 | self.epoch_losses [idx, 0] = train_loss 43 | self.epoch_losses [idx, 1] = val_loss 44 | self.epoch_accuracy[idx, 0] = train_acc 45 | self.epoch_accuracy[idx, 1] = val_acc 46 | self.current_epoch = idx + 1 47 | return self.max_accuracy(False) == val_acc 48 | 49 | def max_accuracy(self, istrain): 50 | if self.current_epoch <= 0: return 0 51 | if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max() 52 | else: return self.epoch_accuracy[:self.current_epoch, 1].max() 53 | 54 | def plot_curve(self, save_path): 55 | title = 'the accuracy/loss curve of train/val' 56 | dpi = 80 57 | width, height = 1200, 800 58 | legend_fontsize = 10 59 | scale_distance = 48.8 60 | figsize = width / float(dpi), height / float(dpi) 61 | 62 | fig = plt.figure(figsize=figsize) 63 | x_axis = np.array([i for i in range(self.total_epoch)]) # epochs 64 | y_axis = np.zeros(self.total_epoch) 65 | 66 | plt.xlim(0, self.total_epoch) 67 | plt.ylim(0, 100) 68 | interval_y = 5 69 | interval_x = 5 70 | plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x)) 71 | plt.yticks(np.arange(0, 100 + interval_y, interval_y)) 72 | plt.grid() 73 | plt.title(title, fontsize=20) 74 | plt.xlabel('the training epoch', fontsize=16) 75 | plt.ylabel('accuracy', fontsize=16) 76 | 77 | y_axis[:] = self.epoch_accuracy[:, 0] 78 | plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2) 79 | plt.legend(loc=4, fontsize=legend_fontsize) 80 | 81 | y_axis[:] = self.epoch_accuracy[:, 1] 82 | plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2) 83 | plt.legend(loc=4, fontsize=legend_fontsize) 84 | 85 | 86 | y_axis[:] = self.epoch_losses[:, 0] 87 | plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2) 88 | plt.legend(loc=4, fontsize=legend_fontsize) 89 | 90 | y_axis[:] = self.epoch_losses[:, 1] 91 | plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2) 92 | plt.legend(loc=4, fontsize=legend_fontsize) 93 | 94 | if save_path is not None: 95 | fig.savefig(save_path, dpi=dpi, bbox_inches='tight') 96 | print ('---- save figure {} into {}'.format(title, save_path)) 97 | plt.close(fig) 98 | 99 | 100 | def time_string(): 101 | ISOTIMEFORMAT='%Y-%m-%d %X' 102 | string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) 103 | return string 104 | 105 | def convert_secs2time(epoch_time): 106 | need_hour = int(epoch_time / 3600) 107 | need_mins = int((epoch_time - 3600*need_hour) / 60) 108 | need_secs = int(epoch_time - 3600*need_hour - 60*need_mins) 109 | return need_hour, need_mins, need_secs 110 | 111 | def time_file_str(): 112 | ISOTIMEFORMAT='%Y-%m-%d' 113 | string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) 114 | return string + '-{}'.format(random.randint(1, 10000)) 115 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 16 Nov 2017 3 | 4 | @author: vermav1 5 | ''' 6 | from time import gmtime, strftime 7 | import torch 8 | import numpy as np 9 | import pandas as pd 10 | import os 11 | import shutil 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class Cutout(object): 16 | """Randomly mask out one or more patches from an image. 17 | Args: 18 | n_holes (int): Number of patches to cut out of each image. 19 | length (int): The length (in pixels) of each square patch. 20 | """ 21 | def __init__(self, n_holes, length): 22 | self.n_holes = n_holes 23 | self.length = length 24 | 25 | def apply(self, img): 26 | """ 27 | Args: 28 | img (Tensor): Tensor image of size (C, H, W). 29 | Returns: 30 | Tensor: Image with n_holes of dimension length x length cut out of it. 31 | """ 32 | h = img.size(2) 33 | w = img.size(3) 34 | 35 | mask = np.ones((h, w), np.float32) 36 | 37 | for n in range(self.n_holes): 38 | y = np.random.randint(h) 39 | x = np.random.randint(w) 40 | 41 | y1 = int(np.clip(y - self.length / 2, 0, h)) 42 | y2 = int(np.clip(y + self.length / 2, 0, h)) 43 | x1 = int(np.clip(x - self.length / 2, 0, w)) 44 | x2 = int(np.clip(x + self.length / 2, 0, w)) 45 | 46 | mask[y1: y2, x1: x2] = 0. 47 | 48 | mask = torch.from_numpy(mask) 49 | mask = mask.expand_as(img) 50 | img = img * mask 51 | 52 | return img 53 | 54 | 55 | def experiment_name(cod=True, 56 | cod_trainable=False, 57 | aux_nets=2, 58 | opt='sgd', 59 | epochs=400, 60 | batch_size=64, 61 | test_batch_size=1000, 62 | lr=0.01, 63 | momentum=0.5, 64 | data_aug=1, 65 | manualSeed=None, 66 | job_id=None, 67 | add_name=''): 68 | if cod: 69 | exp_name = 'cod_true' 70 | if cod_trainable: 71 | exp_name+='_trainable_true' 72 | else: 73 | exp_name+='_trainable_false' 74 | else: 75 | exp_name = 'cod_false' 76 | exp_name+='_auxnets_'+str(aux_nets) 77 | exp_name+='_opt_'+str(opt) 78 | exp_name+='_epochs_'+str(epochs) 79 | exp_name +='_batch_size_'+str(batch_size) 80 | exp_name+='_test_batch_size_'+str(test_batch_size) 81 | exp_name += '_lr_'+str(lr) 82 | exp_name += '_momentum_'+str(momentum) 83 | exp_name += '_data_aug_'+str(data_aug) 84 | if manualSeed!=None: 85 | exp_name += '_manuael_seed_'+str(manualSeed) 86 | if job_id!=None: 87 | exp_name += '_job_id_'+str(job_id) 88 | if add_name!='': 89 | exp_name += '_add_name_'+str(add_name) 90 | 91 | # exp_name += strftime("_%Y-%m-%d_%H:%M:%S", gmtime()) 92 | print('experiement name: ' + exp_name) 93 | return exp_name 94 | 95 | 96 | def experiment_name_non_mnist(arch='', 97 | aux_nets=2, 98 | epochs=400, 99 | dropout=True, 100 | batch_size=64, 101 | lr=0.01, 102 | momentum=0.5, 103 | data_aug=1, 104 | manualSeed=None, 105 | job_id=None, 106 | add_name=''): 107 | 108 | exp_name= str(arch) 109 | exp_name+='_auxnets_'+str(aux_nets) 110 | exp_name += '_epochs_'+str(epochs) 111 | if dropout: 112 | exp_name+='_dropout_'+'true' 113 | else: 114 | exp_name+='_dropout_'+'False' 115 | exp_name +='_batch_size_'+str(batch_size) 116 | exp_name += '_lr_'+str(lr) 117 | exp_name += '_momentum_'+str(momentum) 118 | exp_name += '_data_aug_'+str(data_aug) 119 | if manualSeed!=None: 120 | exp_name += '_manuael_seed_'+str(manualSeed) 121 | if job_id!=None: 122 | exp_name += '_job_id_'+str(job_id) 123 | if add_name!='': 124 | exp_name += '_add_name_'+str(add_name) 125 | 126 | # exp_name += strftime("_%Y-%m-%d_%H:%M:%S", gmtime()) 127 | print('experiement name: ' + exp_name) 128 | return exp_name 129 | 130 | def copy_script_to_folder(caller_path, folder): 131 | script_filename = caller_path.split('/')[-1] 132 | script_relative_path = os.path.join(folder, script_filename) 133 | # Copying script 134 | shutil.copy(caller_path, script_relative_path) 135 | 136 | def cyclic_lr(initial_lr,step,total_steps,num_cycles): 137 | factor=np.ceil(float(total_steps)/num_cycles) 138 | theta=np.pi*np.mod(step-1,factor)/factor 139 | return (initial_lr/2)*(np.cos(theta)+1) 140 | 141 | if __name__ == '__main__': 142 | lr_list=[] 143 | for i in xrange(1000): 144 | lr=cyclic_lr(0.1,i+1,1100,3) 145 | lr_list.append(lr) 146 | plt.plot(np.asarray(lr_list)) 147 | plt.show() 148 | -------------------------------------------------------------------------------- /load_data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 21 Nov 2017 3 | 4 | @author: vermav1 5 | ''' 6 | import torch 7 | from torchvision import datasets, transforms 8 | from affine_transforms import Rotation, Zoom 9 | 10 | 11 | 12 | def load_mnist(data_aug, batch_size, test_batch_size,cuda, data_target_dir): 13 | 14 | if data_aug == 1: 15 | hw_size = 24 16 | transform_train = transforms.Compose([ 17 | transforms.RandomCrop(hw_size), 18 | transforms.ToTensor(), 19 | Rotation(15), 20 | Zoom((0.85, 1.15)), 21 | transforms.Normalize((0.1307,), (0.3081,)) 22 | ]) 23 | transform_test = transforms.Compose([ 24 | transforms.CenterCrop(hw_size), 25 | transforms.ToTensor(), 26 | transforms.Normalize((0.1307,), (0.3081,)) 27 | ]) 28 | else: 29 | hw_size = 28 30 | transform_train = transforms.Compose([ 31 | transforms.ToTensor(), 32 | transforms.Normalize((0.1307,), (0.3081,)) 33 | ]) 34 | transform_test = transforms.Compose([ 35 | transforms.ToTensor(), 36 | transforms.Normalize((0.1307,), (0.3081,)) 37 | ]) 38 | 39 | 40 | kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} 41 | 42 | 43 | 44 | train_loader = torch.utils.data.DataLoader( 45 | datasets.MNIST(data_target_dir, train=True, download=True, transform=transform_train), 46 | batch_size=batch_size, shuffle=True, **kwargs) 47 | test_loader = torch.utils.data.DataLoader( 48 | datasets.MNIST(data_target_dir, train=False, transform=transform_test), 49 | batch_size=test_batch_size, shuffle=True, **kwargs) 50 | 51 | return train_loader, test_loader 52 | 53 | 54 | def load_data(data_aug, batch_size,workers,dataset, data_target_dir): 55 | 56 | if dataset == 'cifar10': 57 | mean = [x / 255 for x in [125.3, 123.0, 113.9]] 58 | std = [x / 255 for x in [63.0, 62.1, 66.7]] 59 | elif dataset == 'cifar100': 60 | mean = [x / 255 for x in [129.3, 124.1, 112.4]] 61 | std = [x / 255 for x in [68.2, 65.4, 70.4]] 62 | 63 | elif dataset == 'svhn': 64 | mean = [x / 255 for x in [127.5, 127.5, 127.5]] 65 | std = [x / 255 for x in [127.5, 127.5, 127.5]] 66 | else: 67 | assert False, "Unknow dataset : {}".format(dataset) 68 | 69 | if data_aug==1: 70 | if dataset == 'svhn': 71 | train_transform = transforms.Compose( 72 | [ transforms.RandomCrop(32, padding=2), transforms.ToTensor(), 73 | transforms.Normalize(mean, std)]) 74 | test_transform = transforms.Compose( 75 | [transforms.ToTensor(), transforms.Normalize(mean, std)]) 76 | else: 77 | train_transform = transforms.Compose( 78 | [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), 79 | transforms.Normalize(mean, std)]) 80 | test_transform = transforms.Compose( 81 | [transforms.ToTensor(), transforms.Normalize(mean, std)]) 82 | else: 83 | train_transform = transforms.Compose( 84 | [ transforms.ToTensor(), 85 | transforms.Normalize(mean, std)]) 86 | test_transform = transforms.Compose( 87 | [transforms.ToTensor(), transforms.Normalize(mean, std)]) 88 | if dataset == 'cifar10': 89 | train_data = datasets.CIFAR10(data_target_dir, train=True, transform=train_transform, download=True) 90 | test_data = datasets.CIFAR10(data_target_dir, train=False, transform=test_transform, download=True) 91 | num_classes = 10 92 | elif dataset == 'cifar100': 93 | train_data = datasets.CIFAR100(data_target_dir, train=True, transform=train_transform, download=True) 94 | test_data = datasets.CIFAR100(data_target_dir, train=False, transform=test_transform, download=True) 95 | num_classes = 100 96 | elif dataset == 'svhn': 97 | train_data = datasets.SVHN(data_target_dir, split='train', transform=train_transform, download=True) 98 | test_data = datasets.SVHN(data_target_dir, split='test', transform=test_transform, download=True) 99 | num_classes = 10 100 | elif dataset == 'stl10': 101 | train_data = datasets.STL10(data_target_dir, split='train', transform=train_transform, download=True) 102 | test_data = datasets.STL10(data_target_dir, split='test', transform=test_transform, download=True) 103 | num_classes = 10 104 | elif dataset == 'imagenet': 105 | assert False, 'Do not finish imagenet code' 106 | else: 107 | assert False, 'Do not support dataset : {}'.format(dataset) 108 | 109 | train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, 110 | num_workers=workers, pin_memory=True) 111 | test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, 112 | num_workers=workers, pin_memory=True) 113 | 114 | return train_loader, test_loader, num_classes -------------------------------------------------------------------------------- /lr_scheduler.py: -------------------------------------------------------------------------------- 1 | #copied from https://github.com/Jiaming-Liu/pytorch-lr-scheduler/blob/master/lr_scheduler.py 2 | 3 | import numpy as np 4 | import warnings 5 | from torch.optim.optimizer import Optimizer 6 | 7 | 8 | class ReduceLROnPlateau(object): 9 | """Reduce learning rate when a metric has stopped improving. 10 | Models often benefit from reducing the learning rate by a factor 11 | of 2-10 once learning stagnates. This scheduler reads a metrics 12 | quantity and if no improvement is seen for a 'patience' number 13 | of epochs, the learning rate is reduced. 14 | 15 | Args: 16 | factor: factor by which the learning rate will 17 | be reduced. new_lr = lr * factor 18 | patience: number of epochs with no improvement 19 | after which learning rate will be reduced. 20 | verbose: int. 0: quiet, 1: update messages. 21 | mode: one of {min, max}. In `min` mode, 22 | lr will be reduced when the quantity 23 | monitored has stopped decreasing; in `max` 24 | mode it will be reduced when the quantity 25 | monitored has stopped increasing. 26 | epsilon: threshold for measuring the new optimum, 27 | to only focus on significant changes. 28 | cooldown: number of epochs to wait before resuming 29 | normal operation after lr has been reduced. 30 | min_lr: lower bound on the learning rate. 31 | 32 | 33 | Example: 34 | >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) 35 | >>> scheduler = ReduceLROnPlateau(optimizer, 'min') 36 | >>> for epoch in range(10): 37 | >>> train(...) 38 | >>> val_acc, val_loss = validate(...) 39 | >>> scheduler.step(val_loss, epoch) 40 | """ 41 | 42 | def __init__(self, optimizer, mode='min', factor=0.1, patience=10, 43 | verbose=0, epsilon=1e-4, cooldown=0, min_lr=0): 44 | super(ReduceLROnPlateau, self).__init__() 45 | 46 | if factor >= 1.0: 47 | raise ValueError('ReduceLROnPlateau ' 48 | 'does not support a factor >= 1.0.') 49 | self.factor = factor 50 | self.min_lr = min_lr 51 | self.epsilon = epsilon 52 | self.patience = patience 53 | self.verbose = verbose 54 | self.cooldown = cooldown 55 | self.cooldown_counter = 0 # Cooldown counter. 56 | self.monitor_op = None 57 | self.wait = 0 58 | self.best = 0 59 | self.mode = mode 60 | assert isinstance(optimizer, Optimizer) 61 | self.optimizer = optimizer 62 | self._reset() 63 | 64 | def _reset(self): 65 | """Resets wait counter and cooldown counter. 66 | """ 67 | if self.mode not in ['min', 'max']: 68 | raise RuntimeError('Learning Rate Plateau Reducing mode %s is unknown!') 69 | if self.mode == 'min' : 70 | self.monitor_op = lambda a, b: np.less(a, b - self.epsilon) 71 | self.best = np.Inf 72 | else: 73 | self.monitor_op = lambda a, b: np.greater(a, b + self.epsilon) 74 | self.best = -np.Inf 75 | self.cooldown_counter = 0 76 | self.wait = 0 77 | self.lr_epsilon = self.min_lr * 1e-4 78 | 79 | def reset(self): 80 | self._reset() 81 | 82 | def step(self, metrics, epoch): 83 | current = metrics 84 | if current is None: 85 | warnings.warn('Learning Rate Plateau Reducing requires metrics available!', RuntimeWarning) 86 | else: 87 | if self.in_cooldown(): 88 | self.cooldown_counter -= 1 89 | self.wait = 0 90 | 91 | if self.monitor_op(current, self.best): 92 | self.best = current 93 | self.wait = 0 94 | elif not self.in_cooldown(): 95 | if self.wait >= self.patience: 96 | for param_group in self.optimizer.param_groups: 97 | old_lr = float(param_group['lr']) 98 | if old_lr > self.min_lr + self.lr_epsilon: 99 | new_lr = old_lr * self.factor 100 | new_lr = max(new_lr, self.min_lr) 101 | param_group['lr'] = new_lr 102 | if self.verbose > 0: 103 | print('\nEpoch %05d: reducing learning rate to %s.' % (epoch, new_lr)) 104 | self.cooldown_counter = self.cooldown 105 | self.wait = 0 106 | self.wait += 1 107 | 108 | def in_cooldown(self): 109 | return self.cooldown_counter > 0 110 | -------------------------------------------------------------------------------- /plots.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 19 Oct 2017 3 | 4 | @author: vermav1 5 | ''' 6 | import argparse 7 | import sys 8 | if sys.version_info[0] < 3: 9 | import cPickle as pickle 10 | else: 11 | import _pickle as pickle 12 | import os 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | from scipy.stats import norm 16 | import seaborn as sns 17 | sns.set(color_codes=True) 18 | 19 | plot_from_index=-10000 20 | 21 | 22 | def plotting(exp_dir): 23 | # Load the training log dictionary: 24 | train_dict = pickle.load(open(os.path.join(exp_dir, 'log.pkl'), 'rb')) 25 | 26 | ########################################################### 27 | ### Make the vanilla train and test loss per epoch plot ### 28 | ########################################################### 29 | 30 | plt.plot(np.asarray(train_dict['train_loss']), label='train_loss') 31 | 32 | #plt.ylim(0,2000) 33 | plt.xlabel('evaluation step') 34 | plt.ylabel('metrics') 35 | plt.tight_layout() 36 | plt.legend(loc='upper right') 37 | plt.savefig(os.path.join(exp_dir, 'train_loss.png' )) 38 | plt.clf() 39 | 40 | 41 | 42 | plt.plot(np.asarray(train_dict['test_loss']), label='test_loss') 43 | 44 | #plt.ylim(0,100) 45 | plt.xlabel('evaluation step') 46 | plt.ylabel('metrics') 47 | plt.tight_layout() 48 | plt.legend(loc='upper right') 49 | plt.savefig(os.path.join(exp_dir, 'test_loss.png' )) 50 | plt.clf() 51 | 52 | plt.plot(np.asarray(train_dict['train_acc']), label='train_acc') 53 | 54 | #plt.ylim(0,100) 55 | plt.xlabel('evaluation step') 56 | plt.ylabel('metrics') 57 | plt.tight_layout() 58 | plt.legend(loc='upper right') 59 | plt.savefig(os.path.join(exp_dir, 'train_acc.png' )) 60 | plt.clf() 61 | 62 | 63 | plt.plot(np.asarray(train_dict['test_acc']), label='test_acc') 64 | 65 | #plt.ylim(0,100) 66 | plt.xlabel('evaluation step') 67 | plt.ylabel('metrics') 68 | plt.tight_layout() 69 | plt.legend(loc='upper right') 70 | plt.savefig(os.path.join(exp_dir, 'test_acc.png' )) 71 | plt.clf() 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | if __name__ == '__main__': 81 | plotting('experiments/PB_cnn_mse_pretrained_ne_pretrain100000_ne_posttrain100000_real_data_size1_N10_P2000_') 82 | #plotting_separate_theta('model', 'temp.pkl',3) --------------------------------------------------------------------------------