├── README.md ├── prequential ├── cifar10.py ├── mnist.py └── switch.py └── variational └── var_cifar10.py /README.md: -------------------------------------------------------------------------------- 1 | # The Description Length of Deep Learning Models 2 | 3 | This repo contains the code for the experiments in "The Description Length of Deep Learning Models": https://arxiv.org/pdf/1802.07044.pdf 4 | 5 | For the prequential experiments, we uses Keras. 6 | 7 | For the variational experiments, we uses pytorch and the library PyVarInf: https://github.com/ctallec/pyvarinf 8 | 9 | 10 | -------------------------------------------------------------------------------- /prequential/cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | 4 | import keras 5 | from keras.datasets import cifar10 6 | from keras.models import Sequential, Model 7 | from keras.layers import ( 8 | Dense, Dropout, Flatten, SpatialDropout2D, BatchNormalization, Input, 9 | Conv2D, MaxPooling2D, ZeroPadding2D, Activation) 10 | from keras import backend as K 11 | from keras.optimizers import RMSprop, Adam 12 | from keras.callbacks import EarlyStopping 13 | from keras.preprocessing.image import ImageDataGenerator 14 | 15 | import matplotlib 16 | matplotlib.use('Agg') 17 | import matplotlib.pyplot as plt 18 | import matplotlib.gridspec as gridspec 19 | from matplotlib.patches import Ellipse 20 | 21 | import pickle as pkl 22 | import pdb 23 | 24 | import os 25 | 26 | batch_size = 32 27 | num_classes = 10 28 | epochs = 10000000 29 | 30 | # input image dimensions 31 | img_rows, img_cols = 32, 32 32 | 33 | # the data, shuffled and split between train and test sets 34 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 35 | 36 | if K.image_data_format() == 'channels_first': 37 | x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols) 38 | x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols) 39 | input_shape = (1, img_rows, img_cols) 40 | else: 41 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3) 42 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3) 43 | input_shape = (img_rows, img_cols, 3) 44 | 45 | class MyNormalisation(): 46 | 47 | def __init__(self): 48 | pass 49 | 50 | def tooltransform(self, imagedata): 51 | imagedatanormed = imagedata / 255 52 | yuv_from_rgb = np.array([[ 0.299 , 0.587 , 0.114 ], 53 | [-0.14714119, -0.28886916, 0.43601035 ], 54 | [ 0.61497538, -0.51496512, -0.10001026 ]]) 55 | 56 | imagedatanormed = np.moveaxis(imagedatanormed, 3, 2) 57 | imagedatanormed = np.dot(yuv_from_rgb, imagedatanormed) 58 | imagedatanormed = np.moveaxis(imagedatanormed, 0, 3) 59 | 60 | return imagedatanormed 61 | 62 | 63 | def fit_transform(self, imagedata): 64 | imagedatanormed = self.tooltransform(imagedata) 65 | 66 | self.mean = imagedatanormed.mean() 67 | imagedatanormed -= self.mean 68 | 69 | self.std = imagedatanormed.std() 70 | imagedatanormed /= self.std 71 | 72 | return imagedatanormed 73 | 74 | def transform(self, imagedata): 75 | imagedatanormed = self.tooltransform(imagedata) 76 | imagedatanormed -= self.mean 77 | imagedatanormed /= self.std 78 | return imagedatanormed 79 | 80 | 81 | x_train = x_train.astype('float32') 82 | x_test = x_test.astype('float32') 83 | mynormalisation = MyNormalisation() 84 | x_train = mynormalisation.fit_transform(x_train) 85 | x_test = mynormalisation.transform(x_test) 86 | 87 | print('x_train shape:', x_train.shape) 88 | print(x_train.shape[0], 'train samples') 89 | print(x_test.shape[0], 'test samples') 90 | 91 | # convert class vectors to binary class matrices 92 | y_train = keras.utils.to_categorical(y_train, num_classes) 93 | y_test = keras.utils.to_categorical(y_test, num_classes) 94 | 95 | def make_model_vgg(): 96 | input_ = Input(shape=(32,32,3)) 97 | x = input_ 98 | 99 | def convbnrelu(nfilters): 100 | def fun(input_): 101 | x = ZeroPadding2D((1, 1))(input_) 102 | x = Conv2D(nfilters, kernel_size=(3,3))(x) 103 | x = BatchNormalization()(x) 104 | x = Activation("relu")(x) 105 | return x 106 | return fun 107 | 108 | def vgglayer(nlayers, nfilters, dropout=0.4): 109 | def fun(input_): 110 | x = input_ 111 | for _ in range(nlayers - 1): 112 | x = convbnrelu(nfilters)(x) 113 | x = SpatialDropout2D(dropout)(x) 114 | x = convbnrelu(nfilters)(x) 115 | x = MaxPooling2D(pool_size=(2,2))(x) 116 | return x 117 | return fun 118 | 119 | x = vgglayer(2, 64, dropout=0.3)(x) 120 | x = vgglayer(2, 128)(x) 121 | x = vgglayer(3, 256)(x) 122 | x = vgglayer(3, 512)(x) 123 | x = vgglayer(3, 512)(x) 124 | 125 | x = Flatten()(x) 126 | x = Dropout(0.5)(x) 127 | x = Dense(512)(x) 128 | x = BatchNormalization()(x) 129 | x = Dropout(0.5)(x) 130 | output = Dense(num_classes, activation='softmax')(x) 131 | optim = Adam(lr=0.0001) 132 | 133 | model = Model(inputs=[input_], output=[output]) 134 | 135 | model.compile(loss=keras.losses.categorical_crossentropy, 136 | optimizer=optim, 137 | metrics=['accuracy']) 138 | return model 139 | 140 | 141 | def make_model_tinycnn(): 142 | model = Sequential() 143 | model.add(ZeroPadding2D((1, 1), input_shape=input_shape)) 144 | model.add(Conv2D(32, kernel_size=(3, 3), 145 | activation='relu')) 146 | model.add(SpatialDropout2D(0.3)) 147 | model.add(ZeroPadding2D((1, 1))) 148 | model.add(Conv2D(32, (3, 3), activation='relu')) 149 | model.add(MaxPooling2D(pool_size=(2, 2))) 150 | 151 | model.add(ZeroPadding2D((1, 1))) 152 | model.add(Conv2D(32, (3, 3), activation='relu')) 153 | model.add(SpatialDropout2D(0.4)) 154 | model.add(ZeroPadding2D((1, 1))) 155 | model.add(Conv2D(32, (3, 3), activation='relu')) 156 | model.add(MaxPooling2D(pool_size=(2, 2))) 157 | 158 | model.add(Flatten()) 159 | 160 | model.add(Dropout(0.5)) 161 | model.add(Dense(256, activation='relu')) 162 | 163 | 164 | model.add(Dense(256, activation='relu')) 165 | model.add(Dropout(0.5)) 166 | model.add(Dense(num_classes, activation='softmax')) 167 | optim = Adam(lr=0.0001) 168 | 169 | model.compile(loss=keras.losses.categorical_crossentropy, 170 | optimizer=optim, 171 | metrics=['accuracy']) 172 | return model 173 | 174 | def make_model_mlp(): 175 | model = Sequential() 176 | model.add(Flatten(input_shape=input_shape)) 177 | model.add(Dense(512, activation='relu')) 178 | model.add(Dropout(0.2)) 179 | model.add(Dense(512, activation='relu')) 180 | model.add(Dropout(0.2)) 181 | model.add(Dense(10, activation='softmax')) 182 | 183 | 184 | model.compile(loss='categorical_crossentropy', 185 | optimizer=optim, 186 | metrics=['accuracy']) 187 | return model 188 | 189 | 190 | def make_model_shallow(): 191 | model = Sequential() 192 | model.add(Flatten(input_shape=input_shape)) 193 | model.add(Dense(5000, activation='relu')) 194 | #model.add(Dropout(0.2)) 195 | model.add(Dense(10, activation='softmax')) 196 | 197 | optim = Adam(lr=0.00001) 198 | model.compile(loss='categorical_crossentropy', 199 | optimizer=optim, 200 | metrics=['accuracy']) 201 | return model 202 | 203 | 204 | 205 | 206 | loss_train = [] 207 | loss_test = [] 208 | acc_train = [] 209 | acc_test = [] 210 | histlist = [] 211 | 212 | modelsscoreslist = [] 213 | 214 | print("Already computed models : ", [m["shortdescription"] for m in modelsscoreslist]) 215 | v = 0 216 | 217 | 218 | cb1 = EarlyStopping(monitor='loss', min_delta=0.005, patience=500, verbose=1, mode='auto') 219 | cb2 = EarlyStopping(monitor='loss', min_delta=0.005, patience=50, verbose=1, mode='auto') 220 | 221 | datagen = ImageDataGenerator( 222 | width_shift_range=0.2, 223 | height_shift_range=0.2, 224 | horizontal_flip=True) 225 | 226 | class MyImageGenerator(): 227 | def __init__(self, datagen, imgs, labels, batch_size): 228 | self.datagen = datagen.flow(imgs, labels, batch_size=batch_size) 229 | self.labels = labels 230 | self.batch_size = batch_size 231 | 232 | def __iter__(self): 233 | return self 234 | def __next__(self): 235 | return self.next() 236 | 237 | def next(self): 238 | x, y = next(self.datagen) 239 | #x = x[2:-2,2:-2] 240 | return (x,y) 241 | 242 | datagen.fit(x_train) 243 | 244 | 245 | def computescores(modelgenerator, description, shortdescription, 246 | x_train, indexes, **kwargs): 247 | model = modelgenerator() 248 | model.summary() 249 | v = 0 250 | validation_data = None 251 | #loss_test = [] 252 | #acc_test = [] 253 | #ltrain = [] 254 | #atrain = [] 255 | modelsscoreslist.append(None) 256 | 257 | 258 | for k, idx in enumerate(indexes): 259 | print("===> Training with %d training samples."%(idx)) 260 | model = modelgenerator() 261 | x_reduced_train = x_train[:idx] 262 | y_reduced_train = y_train[:idx] 263 | 264 | mygen = MyImageGenerator(datagen, x_reduced_train, 265 | y_reduced_train, batch_size) 266 | 267 | 268 | if k == len(indexes) - 1: 269 | x_valid = x_train[idx:] 270 | y_valid = y_train[idx:] 271 | else: 272 | x_valid = x_train[idx:indexes[k+1]] 273 | y_valid = y_train[idx:indexes[k+1]] 274 | 275 | #v=1 276 | if idx > 10000: 277 | v = 1 278 | cb = cb2 279 | else: 280 | v=2 281 | cb = cb1 282 | 283 | validation_data = (x_valid, y_valid) 284 | 285 | 286 | 287 | steps_per_epoch = int(np.ceil(idx/batch_size )) 288 | hist = model.fit_generator(mygen, steps_per_epoch, 289 | validation_data=validation_data, 290 | verbose=1, callbacks=[cb], 291 | **kwargs) 292 | 293 | histlist.append(hist.history) 294 | 295 | score = model.evaluate(x_valid, y_valid, verbose=0) 296 | 297 | loss_test.append(score[0]) 298 | acc_test.append(score[1]) 299 | ltrain = hist.history["loss"][-1] 300 | atrain = hist.history["acc"][-1] 301 | 302 | acc_train.append(atrain) 303 | loss_train.append(ltrain) 304 | 305 | print("Loss : %.3f Accuracy : %.2f Loss train : %.3f Accuracy train %.2f" % (loss_test[-1], acc_test[-1], loss_train[-1], acc_train[-1])) 306 | 307 | rdict = {"description":description, "shortdescription":shortdescription, 308 | "indexes":indexes, "histories":histlist} 309 | 310 | modelsscoreslist[-1] = rdict 311 | with open("metrics.pkl", "wb") as f: 312 | pkl.dump(modelsscoreslist, f) 313 | 314 | return rdict 315 | 316 | 317 | 318 | minidx = num_classes 319 | geomparam = 2 320 | maxk = int(np.floor( (np.log(x_train.shape[0]) - np.log(num_classes)) / np.log(geomparam))) 321 | indexes = [int(np.floor(num_classes * geomparam ** k)) for k in range(maxk + 1 )] 322 | #indexes = indexes[-1:] 323 | 324 | modelscores = computescores(make_model_shallow, 325 | "Shallow1 : Shallow network with width 5000", 326 | "Shallow1", x_train, indexes, epochs=epochs) 327 | 328 | 329 | 330 | modelsscoreslist.append(modelscores) 331 | 332 | 333 | -------------------------------------------------------------------------------- /prequential/mnist.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple convnet on the MNIST dataset. 2 | 3 | Gets to 99.25% test accuracy after 12 epochs 4 | (there is still a lot of margin for parameter tuning). 5 | 16 seconds per epoch on a GRID K520 GPU. 6 | ''' 7 | from __future__ import print_function 8 | import numpy as np 9 | 10 | import keras 11 | from keras.datasets import mnist 12 | from keras.models import Sequential, Model 13 | from keras.layers import ( 14 | Dense, Dropout, Flatten, SpatialDropout2D, BatchNormalization, Input, 15 | Conv2D, MaxPooling2D, ZeroPadding2D, Activation) 16 | from keras import backend as K 17 | from keras.optimizers import RMSprop, Adam 18 | from keras.callbacks import EarlyStopping 19 | from keras.preprocessing.image import ImageDataGenerator 20 | 21 | import matplotlib 22 | matplotlib.use('Agg') 23 | import matplotlib.pyplot as plt 24 | import matplotlib.gridspec as gridspec 25 | from matplotlib.patches import Ellipse 26 | 27 | import pickle as pkl 28 | import pdb 29 | 30 | import os 31 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 32 | 33 | batch_size = 32 34 | num_classes = 10 35 | epochs = 10000000 36 | 37 | # input image dimensions 38 | img_rows, img_cols = 28, 28 39 | 40 | # the data, shuffled and split between train and test sets 41 | 42 | def customload_data(path): 43 | f = np.load(path) 44 | x_train, y_train = f['x_train'], f['y_train'] 45 | x_test, y_test = f['x_test'], f['y_test'] 46 | f.close() 47 | return (x_train, y_train), (x_test, y_test) 48 | (x_train, y_train), (x_test, y_test) = customload_data("mnist.npz") 49 | 50 | if K.image_data_format() == 'channels_first': 51 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 52 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 53 | input_shape = (1, img_rows, img_cols) 54 | else: 55 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 56 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 57 | input_shape = (img_rows, img_cols, 1) 58 | 59 | class MyNormalisation(): 60 | 61 | def __init__(self): 62 | pass 63 | 64 | def tooltransform(self, imagedata): 65 | imagedatanormed = imagedata / 255 66 | yuv_from_rgb = np.array([[ 0.299 , 0.587 , 0.114 ], 67 | [-0.14714119, -0.28886916, 0.43601035 ], 68 | [ 0.61497538, -0.51496512, -0.10001026 ]]) 69 | 70 | #imagedatanormed = np.moveaxis(imagedatanormed, 3, 2) 71 | #imagedatanormed = np.dot(yuv_from_rgb, imagedatanormed) 72 | #imagedatanormed = np.moveaxis(imagedatanormed, 0, 3) 73 | 74 | return imagedatanormed 75 | 76 | 77 | def fit_transform(self, imagedata): 78 | imagedatanormed = self.tooltransform(imagedata) 79 | 80 | self.mean = imagedatanormed.mean() 81 | imagedatanormed -= self.mean 82 | 83 | self.std = imagedatanormed.std() 84 | imagedatanormed /= self.std 85 | 86 | return imagedatanormed 87 | 88 | def transform(self, imagedata): 89 | imagedatanormed = self.tooltransform(imagedata) 90 | imagedatanormed -= self.mean 91 | imagedatanormed /= self.std 92 | return imagedatanormed 93 | 94 | 95 | x_train = x_train.astype('float32') 96 | x_test = x_test.astype('float32') 97 | mynormalisation = MyNormalisation() 98 | x_train = mynormalisation.fit_transform(x_train) 99 | x_test = mynormalisation.transform(x_test) 100 | 101 | print('x_train shape:', x_train.shape) 102 | print(x_train.shape[0], 'train samples') 103 | print(x_test.shape[0], 'test samples') 104 | 105 | # convert class vectors to binary class matrices 106 | y_train = keras.utils.to_categorical(y_train, num_classes) 107 | y_test = keras.utils.to_categorical(y_test, num_classes) 108 | 109 | def make_model_vgg(): 110 | input_ = Input(shape=(28,28,1)) 111 | x = input_ 112 | 113 | def convbnrelu(nfilters): 114 | def fun(input_): 115 | x = ZeroPadding2D((1, 1))(input_) 116 | x = Conv2D(nfilters, kernel_size=(3,3))(x) 117 | x = BatchNormalization()(x) 118 | x = Activation("relu")(x) 119 | return x 120 | return fun 121 | 122 | def vgglayer(nlayers, nfilters, dropout=0.4): 123 | def fun(input_): 124 | x = input_ 125 | for _ in range(nlayers - 1): 126 | x = convbnrelu(nfilters)(x) 127 | x = SpatialDropout2D(dropout)(x) 128 | x = convbnrelu(nfilters)(x) 129 | x = MaxPooling2D(pool_size=(2,2))(x) 130 | return x 131 | return fun 132 | 133 | x = vgglayer(2, 32, dropout=0.3)(x) 134 | x = vgglayer(2, 64)(x) 135 | x = vgglayer(2, 128)(x) 136 | x = vgglayer(2, 256)(x) 137 | #x = vgglayer(3, 256)(x) 138 | 139 | x = Flatten()(x) 140 | x = Dropout(0.5)(x) 141 | x = Dense(256)(x) 142 | x = Dropout(0.5)(x) 143 | x = Dense(256)(x) 144 | x = Dropout(0.5)(x) 145 | output = Dense(num_classes, activation='softmax')(x) 146 | optim = Adam(lr=0.001) 147 | 148 | model = Model(inputs=[input_], output=[output]) 149 | 150 | model.compile(loss=keras.losses.categorical_crossentropy, 151 | optimizer=optim, 152 | metrics=['accuracy']) 153 | return model 154 | 155 | 156 | 157 | def make_model_mlp(): 158 | model = Sequential() 159 | model.add(Flatten(input_shape=input_shape)) 160 | model.add(Dense(256, activation='relu')) 161 | model.add(Dropout(0.2)) 162 | model.add(Dense(256, activation='relu')) 163 | model.add(Dropout(0.2)) 164 | model.add(Dense(10, activation='softmax')) 165 | 166 | 167 | model.compile(loss='categorical_crossentropy', 168 | optimizer=Adam(), 169 | metrics=['accuracy']) 170 | return model 171 | 172 | 173 | 174 | loss_train = [] 175 | loss_test = [] 176 | acc_train = [] 177 | acc_test = [] 178 | histlist = [] 179 | 180 | 181 | modelsscoreslist = [] 182 | 183 | v = 0 184 | 185 | 186 | cb1 = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=500, verbose=1, mode='auto') 187 | cb2 = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=50, verbose=1, mode='auto') 188 | 189 | datagen = ImageDataGenerator( 190 | width_shift_range=0.2, 191 | height_shift_range=0.2, 192 | horizontal_flip=True) 193 | 194 | class MyImageGenerator(): 195 | def __init__(self, datagen, imgs, labels, batch_size): 196 | self.datagen = datagen.flow(imgs, labels, batch_size=batch_size) 197 | self.labels = labels 198 | self.batch_size = batch_size 199 | 200 | def __iter__(self): 201 | return self 202 | def __next__(self): 203 | return self.next() 204 | 205 | def next(self): 206 | x, y = next(self.datagen) 207 | #x = x[2:-2,2:-2] 208 | return (x,y) 209 | 210 | datagen.fit(x_train) 211 | 212 | 213 | def computescores(modelgenerator, description, shortdescription, 214 | x_train, indexes, **kwargs): 215 | model = modelgenerator() 216 | model.summary() 217 | v = 0 218 | validation_data = None 219 | #loss_test = [] 220 | #acc_test = [] 221 | #ltrain = [] 222 | #atrain = [] 223 | 224 | for k, idx in enumerate(indexes): 225 | print("===> Training with %d training samples."%(idx)) 226 | model = modelgenerator() 227 | x_reduced_train = x_train[:idx] 228 | y_reduced_train = y_train[:idx] 229 | 230 | mygen = MyImageGenerator(datagen, x_reduced_train, 231 | y_reduced_train, batch_size) 232 | 233 | 234 | if k == len(indexes) - 1: 235 | x_valid = x_train[idx:] 236 | y_valid = y_train[idx:] 237 | else: 238 | x_valid = x_train[idx:indexes[k+1]] 239 | y_valid = y_train[idx:indexes[k+1]] 240 | 241 | #v=1 242 | if idx > 10000: 243 | v = 1 244 | cb = cb2 245 | else: 246 | v=2 247 | cb = cb1 248 | 249 | validation_data = (x_valid, y_valid) 250 | 251 | 252 | 253 | steps_per_epoch = int(np.ceil(idx/batch_size )) 254 | hist = model.fit_generator(mygen, steps_per_epoch, 255 | validation_data=validation_data, 256 | verbose=v, callbacks=[cb], 257 | **kwargs) 258 | histlist.append(hist.history) 259 | 260 | score = model.evaluate(x_valid, y_valid, verbose=0) 261 | 262 | loss_test.append(score[0]) 263 | acc_test.append(score[1]) 264 | ltrain = hist.history["loss"][-1] 265 | atrain = hist.history["acc"][-1] 266 | 267 | acc_train.append(atrain) 268 | loss_train.append(ltrain) 269 | 270 | print("Loss : %.3f Accuracy : %.2f Loss train : %.3f Accuracy train %.2f" % (loss_test[-1], acc_test[-1], loss_train[-1], acc_train[-1])) 271 | 272 | rdict = {"description":description, "shortdescription":shortdescription, 273 | "indexes":indexes, "histories":histlist} 274 | return rdict 275 | 276 | 277 | 278 | minidx = num_classes 279 | geomparam = 2 280 | maxk = int(np.floor( (np.log(x_train.shape[0]) - np.log(num_classes)) / np.log(geomparam))) 281 | indexes = [int(np.floor(num_classes * geomparam ** k)) for k in range(maxk + 1 )] 282 | #indexes = indexes[-1:] 283 | print("Indexes : ", indexes) 284 | 285 | modelscores = computescores(make_model_vgg, 286 | "VGG : Same VGG than for CIFAR", 287 | "VGG", x_train, indexes, epochs=epochs) 288 | 289 | -------------------------------------------------------------------------------- /prequential/switch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle as pkl 3 | 4 | import matplotlib 5 | matplotlib.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.gridspec as gridspec 8 | from matplotlib.patches import Ellipse 9 | 10 | import pdb 11 | 12 | def plot_metrics_samples(modelsscoreslist, namefile, subplots=None, 13 | num_classes=10, datasetshape=50000): 14 | maxindexes = max(m["indexes"][-1] for m in modelsscoreslist) 15 | 16 | 17 | 18 | if subplots is None: 19 | subplots = ["val_loss", "loss", "val_acc", "acc", "cost", "comprate", "costlab"] 20 | n_subplots = len(subplots) 21 | #gs = gridspec.GridSpec(3, 1) 22 | #gs.update(left=0.05, right=0.48, wspace=0.05) 23 | #fig, axes = plt.subplots(n_subplots, 1, figsize=(8,3*n_subplots)) 24 | fig, axes = plt.subplots(n_subplots // 2, 2, figsize=(10, 5)) 25 | 26 | 27 | # Loss plot 28 | def loss_subplot(losskey, title, ax): 29 | #ax.set_title(title) 30 | for m in [m for m in modelsscoreslist if losskey in m]: 31 | mloss = m[losskey] 32 | ax.plot(m["indexes"], mloss, label=m["shortdescription"], 33 | linewidth=1., alpha=0.7, color=m.get("color"), 34 | linestyle=m.get("linestyle")) 35 | ax.set_yscale('log') 36 | ax.set_xlim([0., datasetshape]) 37 | #ax.set_xlabel('Mini-batch-number') 38 | ax.set_ylabel('Loss (log-scale)') 39 | ax.set_xlabel('Number of samples') 40 | ax.legend(loc="upper right",fontsize=8) 41 | #ax.set_xscale('log') 42 | ax.get_yaxis().set_label_coords(-0.1,0.5) 43 | 44 | 45 | 46 | def acc_subplot(acckey, title, ax): 47 | #ax.set_title(title) 48 | #ax_acc.grid(axis='y', color='k', linewidth=0.2) 49 | for m in [m for m in modelsscoreslist if acckey in m]: 50 | ax.plot(m["indexes"], m[acckey], 51 | label=m["shortdescription"], linewidth=1., alpha=0.7, 52 | color=m.get("color"), linestyle=m.get("linestyle")) 53 | ax.set_xlim([0., datasetshape]) 54 | ax.set_ylim([0., 1.]) 55 | #ax_acc.set_xlabel('Mini-batch-number') 56 | ax.set_ylabel('Accuracy on the next\ndata pack (%)') 57 | ax.set_xlabel('Number of samples') 58 | ax.legend(loc="lower right",fontsize=8., ncol=2) 59 | ax.get_yaxis().set_label_coords(-0.1,0.5) 60 | 61 | 62 | # Cost plot 63 | def cost_subplot(title, ax): 64 | #ax.set_title(title) 65 | 66 | ##### A SUPPRIMER 67 | _, costbase, _, _ = modelsscoreslist[0]["cost"] 68 | for m in [m for m in modelsscoreslist if "cost" in m]: 69 | indexes_cost, cost, _, _ = m["cost"] 70 | ax.plot(indexes_cost, (cost - costbase)/1000, 71 | label=m["shortdescription"], 72 | linewidth=1., alpha=0.7, 73 | color=m.get("color"), 74 | linestyle=m.get("linestyle")) 75 | print(m["shortdescription"], cost[-1]) 76 | #ax.text(indexes_cost[-1] + 100, cost[-1], 77 | # str(int(cost[-1])), fontsize=6., 78 | # #color=m.get("color"), 79 | # ) 80 | 81 | #ax.legend(loc="lower left",fontsize=8.) 82 | 83 | 84 | #ax_loss.set_xlabel('Mini-batch-number') 85 | ax.set_ylabel('Cumulative encoding cost\n(difference with uniform) (kbits)') 86 | #ax.set_xlabel('Number of samples') 87 | ax.set_xlim([0., datasetshape]) 88 | ax.get_yaxis().set_label_coords(-0.1,0.5) 89 | 90 | 91 | def costlab_subplot(title, ax): 92 | #ax.set_title(title) 93 | for m in [m for m in modelsscoreslist if "cost" in m]: 94 | indexes_cost, _, _, costlab = m["cost"] 95 | ax.plot(indexes_cost, costlab, 96 | label=m["shortdescription"], 97 | linewidth=1., alpha=0.7, 98 | color=m.get("color"), 99 | linestyle=m.get("linestyle")) 100 | #ax.legend(loc="upper right",fontsize=8.) 101 | 102 | #ax_loss.set_xlabel('Mini-batch-number') 103 | ax.set_ylabel('Encoding cost per \nsample (bits)') 104 | ax.set_ylim([0., 2*np.log2(10)]) 105 | #ax.set_xlabel('Number of samples') 106 | ax.set_xlim([0., datasetshape]) 107 | #ax.set_yscale('log') 108 | ax.get_yaxis().set_label_coords(-0.1,0.5) 109 | 110 | 111 | def compressionrate_subplot(title, ax): 112 | #ax.set_title(title) 113 | for m in [m for m in modelsscoreslist if "cost" in m]: 114 | indexes_cost, _, comprate, _ = m["cost"] 115 | ax.plot(indexes_cost, comprate, 116 | label=m["shortdescription"], 117 | linewidth=1., alpha=0.7, 118 | color=m.get("color"), 119 | linestyle=m.get("linestyle")) 120 | 121 | ax.set_xlabel('Number of samples') 122 | ax.set_ylabel('Compression ratio') 123 | ax.set_xlim([0., datasetshape]) 124 | ax.set_ylim([0., 2.]) 125 | #ax.set_yscale('log') 126 | #ax.legend(bbox_to_anchor=(0., -.6, 1., -1.6), ncol=2, mode="expand", loc=3, borderaxespad=0., fontsize=10.) 127 | ax.get_yaxis().set_label_coords(-0.1,0.5) 128 | 129 | 130 | 131 | 132 | for subp, ax in zip(subplots, axes.flat): 133 | if subp == "val_loss": 134 | loss_subplot("val_loss", "Loss (evaluated on the next pack of data)", ax) 135 | if subp == "loss": 136 | loss_subplot("loss", "Loss (train)", ax) 137 | if subp == "val_acc": 138 | acc_subplot("val_acc", "Accuracy (evaluated on the next pack of data)", ax) 139 | if subp == "acc": 140 | acc_subplot("acc", "Accuracy (train)", ax) 141 | if subp == "cost": 142 | cost_subplot("Cumulative encoding cost (difference with uniform encoding cost)", ax) 143 | if subp == "comprate": 144 | compressionrate_subplot("Compression rate", ax) 145 | if subp == "costlab": 146 | costlab_subplot("Encoding cost for each label", ax) 147 | 148 | 149 | 150 | 151 | fig.tight_layout() 152 | plt.savefig(namefile, format="eps") 153 | 154 | 155 | def costfun(indexes, loss, initial_cost, datasetshape, interpolation=False): 156 | 157 | indexes_cost = np.arange(datasetshape + 1) 158 | uniform_cost = initial_cost * indexes_cost 159 | loss_cost = np.zeros(datasetshape + 1) 160 | for k, idx in enumerate(indexes): 161 | if k == len(indexes) - 1: 162 | maxidx = datasetshape + 1 163 | loss_cost[idx:] = loss[-1] 164 | else: 165 | for t in range(idx, indexes[k+1]): 166 | if interpolation: 167 | loss_cost[t] = loss[k] + (t - idx) / (indexes[k+1] - idx) * \ 168 | (loss[k+1] - loss[k]) 169 | else: 170 | loss_cost[t] = loss[k] 171 | #loss_cost = np.zeros(len(loss) +1 ) 172 | loss_cost[:indexes[0]] = initial_cost 173 | #loss_cost[1:] = loss 174 | 175 | cost = loss_cost - initial_cost 176 | 177 | compressionbound = loss_cost.cumsum() / uniform_cost 178 | #cost[1:] = (loss_cost - initial_cost) * (indexes_cost[1:] - indexes_cost[:-1]) 179 | cost = cost.cumsum() 180 | 181 | #cost = cost - 182 | return indexes_cost, cost, compressionbound, loss_cost 183 | 184 | 185 | def switch_loss(modelsscoreslist, num_classes, datasetshape, interpolate=False): 186 | initial_cost = np.log(num_classes) / np.log(2) # HERE IN BITS 187 | maxindexes = datasetshape 188 | 189 | indexes = list(range(maxindexes)) 190 | switchloss = [initial_cost for _ in range(maxindexes)] 191 | for m in modelsscoreslist: 192 | mloss = m["val_loss"] 193 | 194 | for k, idx in enumerate(m["indexes"]): 195 | if k == len(m["indexes"]) - 1: 196 | maxrange = maxindexes 197 | else: 198 | maxrange = m["indexes"][k+1] 199 | for t in range(idx, maxrange): 200 | if k == len(m["indexes"]) - 1: 201 | switchloss[t] = min(mloss[k], switchloss[t]) 202 | else: 203 | if interpolate: 204 | interp = m["val_loss"][k] + \ 205 | (t - idx) / (m["indexes"][k+1] - idx) * \ 206 | (m["val_loss"][k+1] - m["val_loss"][k]) 207 | else: 208 | interp = mloss[k] 209 | switchloss[t] = min(interp, switchloss[t]) 210 | 211 | switchdict = {"description":"Switch", "shortdescription":"Switch", 212 | "indexes":indexes, 213 | "val_loss":switchloss, 214 | "color":"r", 215 | "linestyle":"--", 216 | #"acc_test":(1/num_classes)*np.ones(maxindexes), 217 | #"loss_train":np.log(num_classes)*np.ones(maxindexes), 218 | #"acc_train":(1/num_classes)*np.ones(maxindexes), 219 | } 220 | 221 | return switchdict 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | def makemodelscoreslist(modellist, num_classes=10, datasetshape=50000, autoswitch="none"): 231 | modelsscoreslist = [] 232 | maxindexes = max(m["indexes"][-1] for m in modellist) 233 | 234 | uniform = {"description":"Uniform random", "shortdescription":"uniform", 235 | "indexes":np.arange(maxindexes), 236 | "val_loss":np.log2(num_classes)*np.ones(maxindexes), 237 | "val_acc":(1/num_classes)*np.ones(maxindexes), 238 | "loss":np.log2(num_classes)*np.ones(maxindexes), 239 | "acc":(1/num_classes)*np.ones(maxindexes), 240 | "linestyle":":", 241 | "color":"k"} 242 | 243 | modelsscoreslist.append(uniform) 244 | 245 | if True: #if in bits 246 | for m in modellist: 247 | if "histories" in m: 248 | for key in ["loss", "val_loss"]: 249 | for h in m["histories"]: 250 | h[key] /= np.log(2) 251 | else: 252 | if "loss_train" in m: 253 | m["loss_train"] = m["loss_train"] / np.log(2) 254 | 255 | m["loss_test"] = m["loss_test"] / np.log(2) 256 | 257 | 258 | 259 | 260 | 261 | for m in modellist: 262 | newm = {} 263 | for key in ["indexes", "description", "shortdescription"]: 264 | newm[key] = m[key] 265 | 266 | 267 | if "histories" in m: 268 | for key in ["loss", "acc", "val_loss", "val_acc"]: 269 | newm[key] = [h[key][-1] for h in m["histories"]] 270 | if autoswitch == "none" or autoswitch == "both": 271 | modelsscoreslist.append(newm) 272 | else: 273 | if "loss_train" in m: 274 | newm["loss"] = m["loss_train"] 275 | if "acc_train" in m: 276 | newm["acc"] = m["acc_train"] 277 | newm["val_loss"] = m["loss_test"] 278 | newm["val_acc"] = m["acc_test"] 279 | modelsscoreslist.append(newm) 280 | 281 | 282 | countselfsw = 0 283 | for m in [m for m in modellist if "histories" in m]: 284 | autoswitchm = {} 285 | autoswitchm["indexes"] = m["indexes"] 286 | autoswitchm["description"] = m["description"] + " +autoswitch" 287 | autoswitchm["shortdescription"] = m["shortdescription"] + "+SelfSw" 288 | autoswitchm["linestyle"] = "--" 289 | 290 | autoswitchm['color'] = 'C'+str(countselfsw) 291 | 292 | for key in ["loss", "acc", "val_loss", "val_acc"]: 293 | autoswitchm[key] = [] 294 | 295 | tmp=0 296 | for h in m["histories"]: 297 | 298 | 299 | bestl = np.inf 300 | bestk = 0 301 | for (k, l) in enumerate(h["val_loss"]): 302 | if l < bestl: 303 | bestl = l 304 | bestk = k 305 | print(m["shortdescription"], m["indexes"][tmp], bestk) 306 | for key in ["loss", "acc", "val_loss", "val_acc"]: 307 | autoswitchm[key].append(h[key][bestk]) 308 | tmp += 1 309 | if autoswitch == "as" or autoswitch == "both": 310 | modelsscoreslist.append(autoswitchm) 311 | 312 | countselfsw += 1 313 | 314 | 315 | 316 | switchscores = switch_loss(modelsscoreslist, num_classes, datasetshape) 317 | 318 | if autoswitch == "both" or autoswitch == "sw": 319 | pass 320 | #modelsscoreslist.append(switchscores) 321 | 322 | for m in modelsscoreslist: 323 | m["cost"] = costfun(m["indexes"], m["val_loss"], 324 | np.log(num_classes)/np.log(2), datasetshape) 325 | 326 | return modelsscoreslist 327 | 328 | 329 | 330 | with open("metrics.pkl", "rb") as f: 331 | modelsscoreslist = pkl.load(f) 332 | 333 | 334 | for m in modelsscoreslist: 335 | print(m["description"]) 336 | 337 | 338 | 339 | newmodelsscoreslist = makemodelscoreslist(modelsscoreslist, autoswitch="both", datasetshape=50000) 340 | 341 | for m in newmodelsscoreslist: 342 | print(m["shortdescription"]) 343 | 344 | 345 | subplots = ["costlab", "cost", "val_acc", "comprate", ] 346 | plot_metrics_samples(newmodelsscoreslist, "cifarscores.eps", subplots, num_classes=10, datasetshape=50000) 347 | 348 | -------------------------------------------------------------------------------- /variational/var_cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import pyvarinf 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | 9 | from torchvision import datasets, transforms 10 | from torch.autograd import Variable 11 | 12 | import numpy as np 13 | 14 | # Training settings 15 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 16 | parser.add_argument('--batch-size', type=int, default=32, metavar='N', 17 | help='input batch size for training (default: 64)') 18 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 19 | help='input batch size for testing (default: 1000)') 20 | parser.add_argument('--epochs', type=int, default=1000, metavar='N', 21 | help='number of epochs to train (default: 10)') 22 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR', 23 | help='learning rate (default: 0.01)') 24 | parser.add_argument('--momentum', type=float, default=0.0, metavar='M', 25 | help='SGD momentum (default: 0.5)') 26 | parser.add_argument('--no-cuda', action='store_true', default=False, 27 | help='disables CUDA training') 28 | parser.add_argument('--seed', type=int, default=1, metavar='S', 29 | help='random seed (default: 1)') 30 | parser.add_argument('--log-interval', type=int, default=50, metavar='N', 31 | help='how many batches to wait before logging training status') 32 | parser.add_argument('--prior', type=str, default='gaussian', metavar='P', 33 | help='prior used (default: gaussian)', 34 | choices=['gaussian', 'mixtgauss', 'conjugate', 'conjugate_known_mean']) 35 | 36 | args = parser.parse_args() 37 | args.cuda = not args.no_cuda and torch.cuda.is_available() 38 | 39 | # setting up prior parameters 40 | prior_parameters = {} 41 | if args.prior != 'gaussian': 42 | prior_parameters['n_mc_samples'] = 1 43 | 44 | if args.prior == 'mixtgauss': 45 | prior_parameters['sigma_1'] = 0.02 46 | prior_parameters['sigma_2'] = 0.2 47 | prior_parameters['pi'] = 0.5 48 | if args.prior == 'conjugate': 49 | prior_parameters['mu_0'] = 0. 50 | prior_parameters['kappa_0'] = 3. 51 | prior_parameters['alpha_0'] = .5 52 | prior_parameters['beta_0'] = .5 53 | if args.prior == 'conjugate_known_mean': 54 | prior_parameters['alpha_0'] = .5 55 | prior_parameters['beta_0'] = .5 56 | prior_parameters['mean'] = 0. 57 | 58 | torch.manual_seed(args.seed) 59 | if args.cuda: 60 | torch.cuda.manual_seed(args.seed) 61 | 62 | 63 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 64 | # train_loader = torch.utils.data.DataLoader( 65 | # datasets.MNIST('~/datasets', train=True, download=True, 66 | # transform=transforms.Compose([ 67 | # transforms.ToTensor(), 68 | # transforms.Normalize((0.1307,), (0.3081,)) 69 | # ])), 70 | # batch_size=args.batch_size, shuffle=True, **kwargs) 71 | # test_loader = torch.utils.data.DataLoader( 72 | # datasets.MNIST('~/datasets', train=False, transform=transforms.Compose([ 73 | # transforms.ToTensor(), 74 | # transforms.Normalize((0.1307,), (0.3081,)) 75 | # ])), 76 | # batch_size=args.batch_size, shuffle=True, **kwargs) 77 | 78 | transform_train = transforms.Compose([ 79 | #transforms.RandomCrop(28), 80 | transforms.ToTensor(), 81 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 82 | ]) 83 | 84 | transform_test = transforms.Compose([ 85 | #transforms.RandomCrop(28), 86 | transforms.ToTensor(), 87 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 88 | ]) 89 | 90 | trainset = datasets.CIFAR10(root='~/datasets', train=True, download=True, transform=transform_train) 91 | train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) 92 | 93 | testset = datasets.CIFAR10(root='~/datasets', train=False, download=True, transform=transform_test) 94 | test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) 95 | 96 | 97 | 98 | 99 | class Net(nn.Module): 100 | def __init__(self): 101 | super(Net, self).__init__() 102 | self.conv11 = nn.Conv2d(3, 32, kernel_size=3)#, padding=2) 103 | self.conv12 = nn.Conv2d(32, 32, kernel_size=3)#, padding=2) 104 | 105 | self.conv21 = nn.Conv2d(32, 64, kernel_size=3)#, padding=2) 106 | self.conv22 = nn.Conv2d(64, 64, kernel_size=3)#, padding=2) 107 | #self.conv2 = nn.Conv2d(32, 64, kernel_size=3) 108 | 109 | self.fc1 = nn.Linear(5*5*64, 256) 110 | self.fc2 = nn.Linear(256, 256) 111 | self.fc3 = nn.Linear(256, 10) 112 | self.bn1 = nn.BatchNorm2d(32) 113 | self.bn2 = nn.BatchNorm2d(64) 114 | 115 | def forward(self, x): 116 | x = F.relu(self.conv11(x)) 117 | x = F.relu(self.conv12(x)) 118 | x = F.max_pool2d(x, 2) 119 | 120 | x = F.relu(self.conv21(x)) 121 | x = F.relu(self.conv22(x)) 122 | x = F.max_pool2d(x, 2) 123 | 124 | #x = F.max_pool2d(F.relu(self.conv1(x)), 2) 125 | #x = self.bn1(x) 126 | #x = F.max_pool2d(F.relu(self.conv2(x)), 2) 127 | #x = self.bn2(x) 128 | 129 | x = x.view(x.size(0),-1) 130 | x = F.relu(self.fc1(x)) 131 | x = F.relu(self.fc2(x)) 132 | return F.log_softmax(F.relu(self.fc3(x))) 133 | 134 | class MLPNet(nn.Module): 135 | def __init__(self): 136 | super(MLPNet, self).__init__() 137 | self.fc1 = nn.Linear(3*32*32, 256) 138 | #self.fc1 = nn.Linear(1*28*28, 256) 139 | self.bn1 = nn.BatchNorm1d(256) 140 | self.fc2 = nn.Linear(256, 256) 141 | self.bn2 = nn.BatchNorm1d(256) 142 | self.fc3 = nn.Linear(256, 10) 143 | 144 | def forward(self, x): 145 | x = x.view(x.size(0), -1) 146 | x = F.relu(self.fc1(x)) 147 | #x = self.bn1(x) 148 | x = F.relu(self.fc2(x)) 149 | #x = self.bn2(x) 150 | x = self.fc3(x) 151 | return F.log_softmax(x) 152 | 153 | class LeNet(nn.Module): 154 | def __init__(self): 155 | super(LeNet, self).__init__() 156 | self.conv1 = nn.Conv2d(3, 6, 5) 157 | self.conv2 = nn.Conv2d(6, 16, 5) 158 | self.fc1 = nn.Linear(16*5*5, 120) 159 | self.fc2 = nn.Linear(120, 84) 160 | self.fc3 = nn.Linear(84, 10) 161 | 162 | def forward(self, x): 163 | out = F.relu(self.conv1(x)) 164 | out = F.max_pool2d(out, 2) 165 | out = F.relu(self.conv2(out)) 166 | out = F.max_pool2d(out, 2) 167 | out = out.view(out.size(0), -1) 168 | out = F.relu(self.fc1(out)) 169 | out = F.relu(self.fc2(out)) 170 | out = self.fc3(out) 171 | return F.log_softmax(out) 172 | 173 | model = Net() 174 | var_model = pyvarinf.Variationalize(model) 175 | var_model.set_prior(args.prior, **prior_parameters) 176 | if args.cuda: 177 | var_model.cuda() 178 | 179 | optimizer = optim.Adam(var_model.parameters(), lr=args.lr) 180 | #optimizer = optim.SGD(var_model.parameters(), lr=args.lr) 181 | 182 | 183 | 184 | def train(epoch): 185 | var_model.train() 186 | for batch_idx, (data, target) in enumerate(train_loader): 187 | if args.cuda: 188 | data, target = data.cuda(), target.cuda() 189 | optimizer.zero_grad() 190 | output = var_model(data) 191 | loss_error = F.nll_loss(output, target) 192 | loss_prior = var_model.prior_loss() / len(train_loader.dataset) 193 | loss = loss_error + loss_prior 194 | loss.backward() 195 | optimizer.step() 196 | if batch_idx % args.log_interval == 0: 197 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLoss error: {:.6f}\tLoss weights: {:.6f}'.format( 198 | epoch, batch_idx * len(data), len(train_loader.dataset), 199 | 100. * batch_idx / len(train_loader), loss.item(), loss_error.item(), loss_prior.item())) 200 | 201 | 202 | def compressionscores(): 203 | var_model.train() 204 | loss_prior = var_model.prior_loss().item() 205 | loss_error = 0 206 | for batch_idx, (data, target) in enumerate(train_loader): 207 | if args.cuda: 208 | data, target = data.cuda(), target.cuda() 209 | output = var_model(data) 210 | loss_error += F.nll_loss(output, target, size_average=False).item() 211 | 212 | loss = loss_error + loss_prior 213 | print('Compression scores: DL: {:.0f}\tDL error: {:.0f}\tDL weights: {:.6f}\tCompRate: {:.4f}'.format( 214 | loss, loss_error, loss_prior, loss / (len(train_loader.dataset) * np.log(10)))) 215 | 216 | def test(epoch): 217 | var_model.eval() 218 | test_loss = 0 219 | correct = 0 220 | for data, target in test_loader: 221 | with torch.no_grad(): 222 | if args.cuda: 223 | data, target = data.cuda(), target.cuda() 224 | data, target = Variable(data), Variable(target) 225 | output = var_model(data) 226 | test_loss += F.nll_loss(output, target).item() 227 | pred = output.max(1)[1] # get the index of the max log-probability 228 | correct += pred.eq(target).cpu().sum().item() 229 | 230 | test_loss = test_loss 231 | test_loss /= len(test_loader) # loss function already averages over batch size 232 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( 233 | test_loss, correct, len(test_loader.dataset), 234 | 100. * correct / len(test_loader.dataset))) 235 | 236 | compressionscores() 237 | for epoch in range(1, args.epochs + 1): 238 | train(epoch) 239 | test(epoch) 240 | compressionscores() 241 | --------------------------------------------------------------------------------