├── README.md
├── prequential
    ├── cifar10.py
    ├── mnist.py
    └── switch.py
└── variational
    └── var_cifar10.py


/README.md:
--------------------------------------------------------------------------------
 1 | # The Description Length of Deep Learning Models
 2 | 
 3 | This repo contains the code for the experiments in "The Description Length of Deep Learning Models": https://arxiv.org/pdf/1802.07044.pdf
 4 | 
 5 | For the prequential experiments, we uses Keras.
 6 | 
 7 | For the variational experiments, we uses pytorch and the library PyVarInf: https://github.com/ctallec/pyvarinf
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/prequential/cifar10.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | 
  4 | import keras
  5 | from keras.datasets import cifar10
  6 | from keras.models import Sequential, Model
  7 | from keras.layers import (
  8 |     Dense, Dropout, Flatten, SpatialDropout2D, BatchNormalization, Input,
  9 |     Conv2D, MaxPooling2D, ZeroPadding2D, Activation)
 10 | from keras import backend as K
 11 | from keras.optimizers import RMSprop, Adam
 12 | from keras.callbacks import EarlyStopping
 13 | from keras.preprocessing.image import ImageDataGenerator
 14 | 
 15 | import matplotlib
 16 | matplotlib.use('Agg')
 17 | import matplotlib.pyplot as plt
 18 | import matplotlib.gridspec as gridspec
 19 | from matplotlib.patches import Ellipse
 20 | 
 21 | import pickle as pkl
 22 | import pdb
 23 | 
 24 | import os
 25 | 
 26 | batch_size = 32
 27 | num_classes = 10
 28 | epochs = 10000000
 29 | 
 30 | # input image dimensions
 31 | img_rows, img_cols = 32, 32
 32 | 
 33 | # the data, shuffled and split between train and test sets
 34 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 35 | 
 36 | if K.image_data_format() == 'channels_first':
 37 |     x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
 38 |     x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
 39 |     input_shape = (1, img_rows, img_cols)
 40 | else:
 41 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
 42 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
 43 |     input_shape = (img_rows, img_cols, 3)
 44 |     
 45 | class MyNormalisation():
 46 |     
 47 |     def __init__(self):
 48 |         pass
 49 |     
 50 |     def tooltransform(self, imagedata):
 51 |         imagedatanormed = imagedata / 255
 52 |         yuv_from_rgb = np.array([[ 0.299     ,  0.587     ,  0.114      ],
 53 |                         [-0.14714119, -0.28886916,  0.43601035 ],
 54 |                         [ 0.61497538, -0.51496512, -0.10001026 ]])
 55 |                         
 56 |         imagedatanormed = np.moveaxis(imagedatanormed, 3, 2)
 57 |         imagedatanormed = np.dot(yuv_from_rgb, imagedatanormed)
 58 |         imagedatanormed = np.moveaxis(imagedatanormed, 0, 3)
 59 |             
 60 |         return imagedatanormed
 61 |         
 62 |         
 63 |     def fit_transform(self, imagedata):
 64 |         imagedatanormed = self.tooltransform(imagedata)
 65 |             
 66 |         self.mean = imagedatanormed.mean()
 67 |         imagedatanormed -= self.mean
 68 |         
 69 |         self.std = imagedatanormed.std()
 70 |         imagedatanormed /= self.std
 71 |         
 72 |         return imagedatanormed
 73 |     
 74 |     def transform(self, imagedata):
 75 |         imagedatanormed = self.tooltransform(imagedata)
 76 |         imagedatanormed -= self.mean
 77 |         imagedatanormed /= self.std
 78 |         return imagedatanormed
 79 |         
 80 |         
 81 | x_train = x_train.astype('float32')
 82 | x_test = x_test.astype('float32')
 83 | mynormalisation = MyNormalisation()
 84 | x_train = mynormalisation.fit_transform(x_train)
 85 | x_test = mynormalisation.transform(x_test)
 86 | 
 87 | print('x_train shape:', x_train.shape)
 88 | print(x_train.shape[0], 'train samples')
 89 | print(x_test.shape[0], 'test samples')
 90 | 
 91 | # convert class vectors to binary class matrices
 92 | y_train = keras.utils.to_categorical(y_train, num_classes)
 93 | y_test = keras.utils.to_categorical(y_test, num_classes)
 94 | 
 95 | def make_model_vgg():
 96 |     input_ = Input(shape=(32,32,3))
 97 |     x = input_
 98 |     
 99 |     def convbnrelu(nfilters):
100 |         def fun(input_):
101 |             x = ZeroPadding2D((1, 1))(input_)
102 |             x = Conv2D(nfilters, kernel_size=(3,3))(x)
103 |             x = BatchNormalization()(x)
104 |             x = Activation("relu")(x)
105 |             return x
106 |         return fun
107 |                  
108 |     def vgglayer(nlayers, nfilters, dropout=0.4):     
109 |         def fun(input_):
110 |             x = input_
111 |             for _ in range(nlayers - 1):
112 |                 x = convbnrelu(nfilters)(x)
113 |                 x = SpatialDropout2D(dropout)(x)
114 |             x = convbnrelu(nfilters)(x)
115 |             x = MaxPooling2D(pool_size=(2,2))(x)
116 |             return x
117 |         return fun
118 |     
119 |     x = vgglayer(2, 64, dropout=0.3)(x) 
120 |     x = vgglayer(2, 128)(x)
121 |     x = vgglayer(3, 256)(x)
122 |     x = vgglayer(3, 512)(x)
123 |     x = vgglayer(3, 512)(x)
124 |     
125 |     x = Flatten()(x)
126 |     x = Dropout(0.5)(x)
127 |     x = Dense(512)(x)
128 |     x = BatchNormalization()(x)
129 |     x = Dropout(0.5)(x)
130 |     output = Dense(num_classes, activation='softmax')(x)
131 |     optim = Adam(lr=0.0001)
132 |     
133 |     model = Model(inputs=[input_], output=[output])
134 |     
135 |     model.compile(loss=keras.losses.categorical_crossentropy,
136 |                 optimizer=optim,
137 |                 metrics=['accuracy'])
138 |     return model
139 | 
140 | 
141 | def make_model_tinycnn():
142 |     model = Sequential()
143 |     model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
144 |     model.add(Conv2D(32, kernel_size=(3, 3),
145 |                     activation='relu'))
146 |     model.add(SpatialDropout2D(0.3))
147 |     model.add(ZeroPadding2D((1, 1)))
148 |     model.add(Conv2D(32, (3, 3), activation='relu'))
149 |     model.add(MaxPooling2D(pool_size=(2, 2)))
150 |     
151 |     model.add(ZeroPadding2D((1, 1)))
152 |     model.add(Conv2D(32, (3, 3), activation='relu'))
153 |     model.add(SpatialDropout2D(0.4))
154 |     model.add(ZeroPadding2D((1, 1)))
155 |     model.add(Conv2D(32, (3, 3), activation='relu'))
156 |     model.add(MaxPooling2D(pool_size=(2, 2)))
157 |     
158 |     model.add(Flatten())
159 |     
160 |     model.add(Dropout(0.5))
161 |     model.add(Dense(256, activation='relu'))
162 |     
163 |     
164 |     model.add(Dense(256, activation='relu'))
165 |     model.add(Dropout(0.5))    
166 |     model.add(Dense(num_classes, activation='softmax'))
167 |     optim = Adam(lr=0.0001)
168 |     
169 |     model.compile(loss=keras.losses.categorical_crossentropy,
170 |                 optimizer=optim,
171 |                 metrics=['accuracy'])
172 |     return model
173 | 
174 | def make_model_mlp():
175 |     model = Sequential()
176 |     model.add(Flatten(input_shape=input_shape))
177 |     model.add(Dense(512, activation='relu'))
178 |     model.add(Dropout(0.2))
179 |     model.add(Dense(512, activation='relu'))
180 |     model.add(Dropout(0.2))
181 |     model.add(Dense(10, activation='softmax'))
182 |     
183 |     
184 |     model.compile(loss='categorical_crossentropy',
185 |                 optimizer=optim,
186 |                 metrics=['accuracy'])
187 |     return model    
188 | 
189 | 
190 | def make_model_shallow():
191 |     model = Sequential()
192 |     model.add(Flatten(input_shape=input_shape))
193 |     model.add(Dense(5000, activation='relu'))
194 |     #model.add(Dropout(0.2))
195 |     model.add(Dense(10, activation='softmax'))
196 |     
197 |     optim = Adam(lr=0.00001)
198 |     model.compile(loss='categorical_crossentropy',
199 |                 optimizer=optim,
200 |                 metrics=['accuracy'])
201 |     return model    
202 | 
203 | 
204 | 
205 |     
206 | loss_train = []
207 | loss_test = []
208 | acc_train = []
209 | acc_test = []
210 | histlist = []
211 | 
212 | modelsscoreslist = []
213 |         
214 | print("Already computed models : ", [m["shortdescription"] for m in modelsscoreslist])
215 | v = 0
216 | 
217 | 
218 | cb1 = EarlyStopping(monitor='loss', min_delta=0.005, patience=500, verbose=1, mode='auto')
219 | cb2 = EarlyStopping(monitor='loss', min_delta=0.005, patience=50, verbose=1, mode='auto')
220 | 
221 | datagen = ImageDataGenerator(
222 |     width_shift_range=0.2,
223 |     height_shift_range=0.2,
224 |     horizontal_flip=True)
225 | 
226 | class MyImageGenerator():
227 |     def __init__(self, datagen, imgs, labels, batch_size):
228 |         self.datagen = datagen.flow(imgs, labels, batch_size=batch_size)
229 |         self.labels = labels
230 |         self.batch_size = batch_size
231 |         
232 |     def __iter__(self):
233 |         return self
234 |     def __next__(self):
235 |         return self.next()
236 |     
237 |     def next(self):
238 |         x, y = next(self.datagen)
239 |         #x = x[2:-2,2:-2]
240 |         return (x,y)
241 |     
242 | datagen.fit(x_train)
243 | 
244 |         
245 | def computescores(modelgenerator, description, shortdescription, 
246 |                   x_train, indexes, **kwargs):
247 |     model = modelgenerator()
248 |     model.summary()
249 |     v = 0
250 |     validation_data = None
251 |     #loss_test = []
252 |     #acc_test = []
253 |     #ltrain = []
254 |     #atrain = []
255 |     modelsscoreslist.append(None)
256 | 
257 |     
258 |     for k, idx in enumerate(indexes):
259 |         print("===> Training with %d training samples."%(idx))
260 |         model = modelgenerator()
261 |         x_reduced_train = x_train[:idx]
262 |         y_reduced_train = y_train[:idx]
263 |         
264 |         mygen = MyImageGenerator(datagen, x_reduced_train, 
265 |                                  y_reduced_train, batch_size)
266 | 
267 |         
268 |         if k == len(indexes) - 1:
269 |             x_valid = x_train[idx:]
270 |             y_valid = y_train[idx:]
271 |         else:
272 |             x_valid = x_train[idx:indexes[k+1]]
273 |             y_valid = y_train[idx:indexes[k+1]]
274 |         
275 |         #v=1
276 |         if idx > 10000:
277 |             v = 1
278 |             cb = cb2
279 |         else:
280 |             v=2
281 |             cb = cb1
282 |         
283 |         validation_data = (x_valid, y_valid)
284 |         
285 |         
286 |         
287 |         steps_per_epoch = int(np.ceil(idx/batch_size ))
288 |         hist = model.fit_generator(mygen, steps_per_epoch, 
289 |             validation_data=validation_data,
290 |             verbose=1, callbacks=[cb],
291 |             **kwargs)
292 |         
293 |         histlist.append(hist.history)
294 |         
295 |         score = model.evaluate(x_valid, y_valid, verbose=0)
296 |     
297 |         loss_test.append(score[0])
298 |         acc_test.append(score[1])
299 |         ltrain = hist.history["loss"][-1]
300 |         atrain = hist.history["acc"][-1]
301 |         
302 |         acc_train.append(atrain)
303 |         loss_train.append(ltrain)
304 |         
305 |         print("Loss : %.3f  Accuracy : %.2f  Loss train : %.3f  Accuracy train %.2f" % (loss_test[-1], acc_test[-1], loss_train[-1], acc_train[-1]))
306 | 
307 |         rdict = {"description":description, "shortdescription":shortdescription,
308 |                  "indexes":indexes, "histories":histlist}
309 | 
310 |         modelsscoreslist[-1] = rdict
311 |         with open("metrics.pkl", "wb") as f:
312 |             pkl.dump(modelsscoreslist, f)
313 | 
314 |     return rdict
315 | 
316 | 
317 | 
318 | minidx = num_classes
319 | geomparam = 2
320 | maxk = int(np.floor( (np.log(x_train.shape[0]) - np.log(num_classes)) / np.log(geomparam)))
321 | indexes = [int(np.floor(num_classes * geomparam ** k)) for k in range(maxk + 1 )]
322 | #indexes = indexes[-1:]
323 | 
324 | modelscores = computescores(make_model_shallow, 
325 |     "Shallow1 : Shallow network with width 5000", 
326 |     "Shallow1", x_train, indexes, epochs=epochs)
327 | 
328 | 
329 |     
330 | modelsscoreslist.append(modelscores)
331 | 
332 | 
333 | 


--------------------------------------------------------------------------------
/prequential/mnist.py:
--------------------------------------------------------------------------------
  1 | '''Trains a simple convnet on the MNIST dataset.
  2 | 
  3 | Gets to 99.25% test accuracy after 12 epochs
  4 | (there is still a lot of margin for parameter tuning).
  5 | 16 seconds per epoch on a GRID K520 GPU.
  6 | '''
  7 | from __future__ import print_function
  8 | import numpy as np
  9 | 
 10 | import keras
 11 | from keras.datasets import mnist
 12 | from keras.models import Sequential, Model
 13 | from keras.layers import (
 14 |     Dense, Dropout, Flatten, SpatialDropout2D, BatchNormalization, Input,
 15 |     Conv2D, MaxPooling2D, ZeroPadding2D, Activation)
 16 | from keras import backend as K
 17 | from keras.optimizers import RMSprop, Adam
 18 | from keras.callbacks import EarlyStopping
 19 | from keras.preprocessing.image import ImageDataGenerator
 20 | 
 21 | import matplotlib
 22 | matplotlib.use('Agg')
 23 | import matplotlib.pyplot as plt
 24 | import matplotlib.gridspec as gridspec
 25 | from matplotlib.patches import Ellipse
 26 | 
 27 | import pickle as pkl
 28 | import pdb
 29 | 
 30 | import os
 31 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
 32 | 
 33 | batch_size = 32
 34 | num_classes = 10
 35 | epochs = 10000000
 36 | 
 37 | # input image dimensions
 38 | img_rows, img_cols = 28, 28
 39 | 
 40 | # the data, shuffled and split between train and test sets
 41 | 
 42 | def customload_data(path):
 43 |     f = np.load(path)
 44 |     x_train, y_train = f['x_train'], f['y_train']
 45 |     x_test, y_test = f['x_test'], f['y_test']
 46 |     f.close()
 47 |     return (x_train, y_train), (x_test, y_test)
 48 | (x_train, y_train), (x_test, y_test) = customload_data("mnist.npz")
 49 | 
 50 | if K.image_data_format() == 'channels_first':
 51 |     x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
 52 |     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
 53 |     input_shape = (1, img_rows, img_cols)
 54 | else:
 55 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
 56 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
 57 |     input_shape = (img_rows, img_cols, 1)
 58 |     
 59 | class MyNormalisation():
 60 |     
 61 |     def __init__(self):
 62 |         pass
 63 |     
 64 |     def tooltransform(self, imagedata):
 65 |         imagedatanormed = imagedata / 255
 66 |         yuv_from_rgb = np.array([[ 0.299     ,  0.587     ,  0.114      ],
 67 |                         [-0.14714119, -0.28886916,  0.43601035 ],
 68 |                         [ 0.61497538, -0.51496512, -0.10001026 ]])
 69 |                         
 70 |         #imagedatanormed = np.moveaxis(imagedatanormed, 3, 2)
 71 |         #imagedatanormed = np.dot(yuv_from_rgb, imagedatanormed)
 72 |         #imagedatanormed = np.moveaxis(imagedatanormed, 0, 3)
 73 |             
 74 |         return imagedatanormed
 75 |         
 76 |         
 77 |     def fit_transform(self, imagedata):
 78 |         imagedatanormed = self.tooltransform(imagedata)
 79 |             
 80 |         self.mean = imagedatanormed.mean()
 81 |         imagedatanormed -= self.mean
 82 |         
 83 |         self.std = imagedatanormed.std()
 84 |         imagedatanormed /= self.std
 85 |         
 86 |         return imagedatanormed
 87 |     
 88 |     def transform(self, imagedata):
 89 |         imagedatanormed = self.tooltransform(imagedata)
 90 |         imagedatanormed -= self.mean
 91 |         imagedatanormed /= self.std
 92 |         return imagedatanormed
 93 |         
 94 |         
 95 | x_train = x_train.astype('float32')
 96 | x_test = x_test.astype('float32')
 97 | mynormalisation = MyNormalisation()
 98 | x_train = mynormalisation.fit_transform(x_train)
 99 | x_test = mynormalisation.transform(x_test)
100 | 
101 | print('x_train shape:', x_train.shape)
102 | print(x_train.shape[0], 'train samples')
103 | print(x_test.shape[0], 'test samples')
104 | 
105 | # convert class vectors to binary class matrices
106 | y_train = keras.utils.to_categorical(y_train, num_classes)
107 | y_test = keras.utils.to_categorical(y_test, num_classes)
108 | 
109 | def make_model_vgg():
110 |     input_ = Input(shape=(28,28,1))
111 |     x = input_
112 |     
113 |     def convbnrelu(nfilters):
114 |         def fun(input_):
115 |             x = ZeroPadding2D((1, 1))(input_)
116 |             x = Conv2D(nfilters, kernel_size=(3,3))(x)
117 |             x = BatchNormalization()(x)
118 |             x = Activation("relu")(x)
119 |             return x
120 |         return fun
121 |                  
122 |     def vgglayer(nlayers, nfilters, dropout=0.4):     
123 |         def fun(input_):
124 |             x = input_
125 |             for _ in range(nlayers - 1):
126 |                 x = convbnrelu(nfilters)(x)
127 |                 x = SpatialDropout2D(dropout)(x)
128 |             x = convbnrelu(nfilters)(x)
129 |             x = MaxPooling2D(pool_size=(2,2))(x)
130 |             return x
131 |         return fun
132 |     
133 |     x = vgglayer(2, 32, dropout=0.3)(x) 
134 |     x = vgglayer(2, 64)(x)
135 |     x = vgglayer(2, 128)(x)
136 |     x = vgglayer(2, 256)(x)
137 |     #x = vgglayer(3, 256)(x)
138 |     
139 |     x = Flatten()(x)
140 |     x = Dropout(0.5)(x)
141 |     x = Dense(256)(x)
142 |     x = Dropout(0.5)(x)
143 |     x = Dense(256)(x)
144 |     x = Dropout(0.5)(x)
145 |     output = Dense(num_classes, activation='softmax')(x)
146 |     optim = Adam(lr=0.001)
147 |     
148 |     model = Model(inputs=[input_], output=[output])
149 |     
150 |     model.compile(loss=keras.losses.categorical_crossentropy,
151 |                 optimizer=optim,
152 |                 metrics=['accuracy'])
153 |     return model
154 | 
155 | 
156 | 
157 | def make_model_mlp():
158 |     model = Sequential()
159 |     model.add(Flatten(input_shape=input_shape))
160 |     model.add(Dense(256, activation='relu'))
161 |     model.add(Dropout(0.2))
162 |     model.add(Dense(256, activation='relu'))
163 |     model.add(Dropout(0.2))
164 |     model.add(Dense(10, activation='softmax'))
165 |     
166 |     
167 |     model.compile(loss='categorical_crossentropy',
168 |                 optimizer=Adam(),
169 |                 metrics=['accuracy'])
170 |     return model    
171 | 
172 | 
173 |     
174 | loss_train = []
175 | loss_test = []
176 | acc_train = []
177 | acc_test = []
178 | histlist = []
179 | 
180 | 
181 | modelsscoreslist = []
182 |         
183 | v = 0
184 | 
185 | 
186 | cb1 = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=500, verbose=1, mode='auto')
187 | cb2 = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=50, verbose=1, mode='auto')
188 | 
189 | datagen = ImageDataGenerator(
190 |     width_shift_range=0.2,
191 |     height_shift_range=0.2,
192 |     horizontal_flip=True)
193 | 
194 | class MyImageGenerator():
195 |     def __init__(self, datagen, imgs, labels, batch_size):
196 |         self.datagen = datagen.flow(imgs, labels, batch_size=batch_size)
197 |         self.labels = labels
198 |         self.batch_size = batch_size
199 |         
200 |     def __iter__(self):
201 |         return self
202 |     def __next__(self):
203 |         return self.next()
204 |     
205 |     def next(self):
206 |         x, y = next(self.datagen)
207 |         #x = x[2:-2,2:-2]
208 |         return (x,y)
209 |     
210 | datagen.fit(x_train)
211 | 
212 |         
213 | def computescores(modelgenerator, description, shortdescription, 
214 |                   x_train, indexes, **kwargs):
215 |     model = modelgenerator()
216 |     model.summary()
217 |     v = 0
218 |     validation_data = None
219 |     #loss_test = []
220 |     #acc_test = []
221 |     #ltrain = []
222 |     #atrain = []
223 |     
224 |     for k, idx in enumerate(indexes):
225 |         print("===> Training with %d training samples."%(idx))
226 |         model = modelgenerator()
227 |         x_reduced_train = x_train[:idx]
228 |         y_reduced_train = y_train[:idx]
229 |         
230 |         mygen = MyImageGenerator(datagen, x_reduced_train, 
231 |                                  y_reduced_train, batch_size)
232 | 
233 |         
234 |         if k == len(indexes) - 1:
235 |             x_valid = x_train[idx:]
236 |             y_valid = y_train[idx:]
237 |         else:
238 |             x_valid = x_train[idx:indexes[k+1]]
239 |             y_valid = y_train[idx:indexes[k+1]]
240 |         
241 |         #v=1
242 |         if idx > 10000:
243 |             v = 1
244 |             cb = cb2
245 |         else:
246 |             v=2
247 |             cb = cb1
248 |         
249 |         validation_data = (x_valid, y_valid)
250 |         
251 |         
252 |         
253 |         steps_per_epoch = int(np.ceil(idx/batch_size ))
254 |         hist = model.fit_generator(mygen, steps_per_epoch, 
255 |             validation_data=validation_data,
256 |             verbose=v, callbacks=[cb],
257 |             **kwargs)
258 |         histlist.append(hist.history)
259 |         
260 |         score = model.evaluate(x_valid, y_valid, verbose=0)
261 |     
262 |         loss_test.append(score[0])
263 |         acc_test.append(score[1])
264 |         ltrain = hist.history["loss"][-1]
265 |         atrain = hist.history["acc"][-1]
266 |         
267 |         acc_train.append(atrain)
268 |         loss_train.append(ltrain)
269 |         
270 |         print("Loss : %.3f  Accuracy : %.2f  Loss train : %.3f  Accuracy train %.2f" % (loss_test[-1], acc_test[-1], loss_train[-1], acc_train[-1]))
271 | 
272 |     rdict = {"description":description, "shortdescription":shortdescription,
273 |         "indexes":indexes, "histories":histlist}
274 |     return rdict
275 | 
276 | 
277 | 
278 | minidx = num_classes
279 | geomparam = 2
280 | maxk = int(np.floor( (np.log(x_train.shape[0]) - np.log(num_classes)) / np.log(geomparam)))
281 | indexes = [int(np.floor(num_classes * geomparam ** k)) for k in range(maxk + 1 )]
282 | #indexes = indexes[-1:]
283 | print("Indexes : ", indexes)
284 | 
285 | modelscores = computescores(make_model_vgg, 
286 |     "VGG : Same VGG than for CIFAR", 
287 |     "VGG", x_train, indexes, epochs=epochs)
288 | 
289 | 


--------------------------------------------------------------------------------
/prequential/switch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle as pkl
  3 | 
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.gridspec as gridspec
  8 | from matplotlib.patches import Ellipse
  9 | 
 10 | import pdb
 11 |         
 12 | def plot_metrics_samples(modelsscoreslist, namefile, subplots=None,
 13 |                          num_classes=10, datasetshape=50000):
 14 |     maxindexes = max(m["indexes"][-1] for m in modelsscoreslist)
 15 |     
 16 |         
 17 | 
 18 |     if subplots is None:
 19 |         subplots = ["val_loss", "loss", "val_acc", "acc", "cost", "comprate", "costlab"]
 20 |     n_subplots = len(subplots)
 21 |     #gs = gridspec.GridSpec(3, 1)
 22 |     #gs.update(left=0.05, right=0.48, wspace=0.05)
 23 |     #fig, axes = plt.subplots(n_subplots, 1, figsize=(8,3*n_subplots))
 24 |     fig, axes = plt.subplots(n_subplots // 2, 2, figsize=(10, 5))
 25 | 
 26 | 
 27 |     # Loss plot
 28 |     def loss_subplot(losskey, title, ax):
 29 |         #ax.set_title(title)
 30 |         for m in [m for m in modelsscoreslist if losskey in m]:
 31 |             mloss = m[losskey]
 32 |             ax.plot(m["indexes"], mloss, label=m["shortdescription"],
 33 |                       linewidth=1., alpha=0.7, color=m.get("color"),
 34 |                       linestyle=m.get("linestyle"))        
 35 |         ax.set_yscale('log')
 36 |         ax.set_xlim([0., datasetshape])
 37 |         #ax.set_xlabel('Mini-batch-number')
 38 |         ax.set_ylabel('Loss (log-scale)')
 39 |         ax.set_xlabel('Number of samples')
 40 |         ax.legend(loc="upper right",fontsize=8)
 41 |         #ax.set_xscale('log')
 42 |         ax.get_yaxis().set_label_coords(-0.1,0.5)
 43 | 
 44 |         
 45 |     
 46 |     def acc_subplot(acckey, title, ax):
 47 |         #ax.set_title(title)
 48 |         #ax_acc.grid(axis='y', color='k', linewidth=0.2)
 49 |         for m in [m for m in modelsscoreslist if acckey in m]:
 50 |             ax.plot(m["indexes"], m[acckey], 
 51 |                 label=m["shortdescription"], linewidth=1., alpha=0.7,
 52 |                 color=m.get("color"), linestyle=m.get("linestyle"))
 53 |         ax.set_xlim([0., datasetshape])
 54 |         ax.set_ylim([0., 1.])
 55 |         #ax_acc.set_xlabel('Mini-batch-number')
 56 |         ax.set_ylabel('Accuracy on the next\ndata pack (%)')
 57 |         ax.set_xlabel('Number of samples')
 58 |         ax.legend(loc="lower right",fontsize=8., ncol=2)
 59 |         ax.get_yaxis().set_label_coords(-0.1,0.5)
 60 | 
 61 |     
 62 |     # Cost plot
 63 |     def cost_subplot(title, ax):
 64 |         #ax.set_title(title)
 65 |         
 66 |         ##### A SUPPRIMER
 67 |         _, costbase, _, _ = modelsscoreslist[0]["cost"]
 68 |         for m in [m for m in modelsscoreslist if "cost" in m]:
 69 |             indexes_cost, cost, _, _ = m["cost"]           
 70 |             ax.plot(indexes_cost, (cost - costbase)/1000, 
 71 |                         label=m["shortdescription"], 
 72 |                         linewidth=1., alpha=0.7,
 73 |                         color=m.get("color"), 
 74 |                         linestyle=m.get("linestyle"))
 75 |             print(m["shortdescription"], cost[-1])
 76 |             #ax.text(indexes_cost[-1] + 100, cost[-1], 
 77 |             #        str(int(cost[-1])), fontsize=6., 
 78 |             #        #color=m.get("color"),
 79 |             #        )
 80 |             
 81 |             #ax.legend(loc="lower left",fontsize=8.)
 82 |         
 83 |         
 84 |         #ax_loss.set_xlabel('Mini-batch-number')
 85 |         ax.set_ylabel('Cumulative encoding cost\n(difference with uniform) (kbits)')
 86 |         #ax.set_xlabel('Number of samples')
 87 |         ax.set_xlim([0., datasetshape])
 88 |         ax.get_yaxis().set_label_coords(-0.1,0.5)
 89 | 
 90 |         
 91 |     def costlab_subplot(title, ax):
 92 |         #ax.set_title(title)
 93 |         for m in [m for m in modelsscoreslist if "cost" in m]:
 94 |             indexes_cost, _, _, costlab = m["cost"]           
 95 |             ax.plot(indexes_cost, costlab, 
 96 |                         label=m["shortdescription"], 
 97 |                         linewidth=1., alpha=0.7,
 98 |                         color=m.get("color"), 
 99 |                         linestyle=m.get("linestyle"))
100 |         #ax.legend(loc="upper right",fontsize=8.)            
101 |     
102 |         #ax_loss.set_xlabel('Mini-batch-number')
103 |         ax.set_ylabel('Encoding cost per \nsample (bits)')
104 |         ax.set_ylim([0., 2*np.log2(10)])
105 |         #ax.set_xlabel('Number of samples')
106 |         ax.set_xlim([0., datasetshape])
107 |         #ax.set_yscale('log')
108 |         ax.get_yaxis().set_label_coords(-0.1,0.5)
109 | 
110 | 
111 |     def compressionrate_subplot(title, ax):
112 |         #ax.set_title(title)
113 |         for m in [m for m in modelsscoreslist if "cost" in m]:
114 |             indexes_cost, _, comprate, _ = m["cost"]           
115 |             ax.plot(indexes_cost, comprate, 
116 |                         label=m["shortdescription"], 
117 |                         linewidth=1., alpha=0.7,
118 |                         color=m.get("color"), 
119 |                         linestyle=m.get("linestyle"))
120 |     
121 |         ax.set_xlabel('Number of samples')
122 |         ax.set_ylabel('Compression ratio')
123 |         ax.set_xlim([0., datasetshape])
124 |         ax.set_ylim([0., 2.])
125 |         #ax.set_yscale('log')
126 |         #ax.legend(bbox_to_anchor=(0., -.6, 1., -1.6), ncol=2, mode="expand", loc=3, borderaxespad=0., fontsize=10.)
127 |         ax.get_yaxis().set_label_coords(-0.1,0.5)
128 | 
129 |     
130 |  
131 |     
132 |     for subp, ax in zip(subplots, axes.flat):
133 |         if subp == "val_loss":
134 |             loss_subplot("val_loss", "Loss (evaluated on the next pack of data)", ax)
135 |         if subp == "loss":
136 |             loss_subplot("loss", "Loss (train)", ax)
137 |         if subp == "val_acc":
138 |             acc_subplot("val_acc", "Accuracy (evaluated on the next pack of data)", ax)
139 |         if subp == "acc":   
140 |             acc_subplot("acc", "Accuracy (train)", ax)
141 |         if subp == "cost":
142 |             cost_subplot("Cumulative encoding cost (difference with uniform encoding cost)", ax)
143 |         if subp == "comprate":
144 |             compressionrate_subplot("Compression rate", ax)
145 |         if subp == "costlab":
146 |             costlab_subplot("Encoding cost for each label", ax)
147 |     
148 |     
149 |     
150 |     
151 |     fig.tight_layout()
152 |     plt.savefig(namefile, format="eps")
153 |     
154 |     
155 | def costfun(indexes, loss, initial_cost, datasetshape, interpolation=False):
156 |     
157 |     indexes_cost = np.arange(datasetshape + 1)
158 |     uniform_cost = initial_cost * indexes_cost
159 |     loss_cost = np.zeros(datasetshape + 1)
160 |     for k, idx in enumerate(indexes):
161 |         if k == len(indexes) - 1:
162 |             maxidx = datasetshape + 1 
163 |             loss_cost[idx:] = loss[-1]
164 |         else:
165 |             for t in range(idx, indexes[k+1]):
166 |                 if interpolation:
167 |                     loss_cost[t] = loss[k] + (t - idx) / (indexes[k+1] - idx) * \
168 |                         (loss[k+1] - loss[k])
169 |                 else:
170 |                     loss_cost[t] = loss[k]    
171 |     #loss_cost = np.zeros(len(loss) +1 )
172 |     loss_cost[:indexes[0]] = initial_cost
173 |     #loss_cost[1:] = loss
174 |     
175 |     cost = loss_cost - initial_cost
176 |     
177 |     compressionbound = loss_cost.cumsum() / uniform_cost
178 |     #cost[1:] = (loss_cost - initial_cost) * (indexes_cost[1:] - indexes_cost[:-1])
179 |     cost = cost.cumsum()
180 |     
181 |     #cost = cost -
182 |     return indexes_cost, cost, compressionbound, loss_cost
183 | 
184 | 
185 | def switch_loss(modelsscoreslist, num_classes, datasetshape, interpolate=False):
186 |     initial_cost = np.log(num_classes) / np.log(2) # HERE IN BITS
187 |     maxindexes = datasetshape
188 |         
189 |     indexes = list(range(maxindexes))
190 |     switchloss = [initial_cost for _ in range(maxindexes)] 
191 |     for m in modelsscoreslist:
192 |         mloss = m["val_loss"]
193 |                 
194 |         for k, idx in enumerate(m["indexes"]):
195 |             if k == len(m["indexes"]) - 1:
196 |                 maxrange = maxindexes
197 |             else:
198 |                 maxrange = m["indexes"][k+1]
199 |             for t in range(idx, maxrange):
200 |                 if k == len(m["indexes"]) - 1:
201 |                     switchloss[t] = min(mloss[k], switchloss[t])
202 |                 else:
203 |                     if interpolate:
204 |                         interp = m["val_loss"][k] + \
205 |                             (t - idx) / (m["indexes"][k+1] - idx) * \
206 |                             (m["val_loss"][k+1] - m["val_loss"][k])
207 |                     else:
208 |                         interp = mloss[k]
209 |                     switchloss[t] = min(interp, switchloss[t])
210 |             
211 |     switchdict = {"description":"Switch", "shortdescription":"Switch",
212 |         "indexes":indexes,
213 |         "val_loss":switchloss,
214 |         "color":"r",
215 |         "linestyle":"--", 
216 |         #"acc_test":(1/num_classes)*np.ones(maxindexes),
217 |         #"loss_train":np.log(num_classes)*np.ones(maxindexes),
218 |         #"acc_train":(1/num_classes)*np.ones(maxindexes),
219 |         }
220 |         
221 |     return switchdict
222 |     
223 |     
224 |     
225 |     
226 | 
227 |         
228 |         
229 | 
230 | def makemodelscoreslist(modellist, num_classes=10, datasetshape=50000, autoswitch="none"):
231 |     modelsscoreslist = []
232 |     maxindexes = max(m["indexes"][-1] for m in modellist)
233 |     
234 |     uniform = {"description":"Uniform random", "shortdescription":"uniform",
235 |         "indexes":np.arange(maxindexes),
236 |         "val_loss":np.log2(num_classes)*np.ones(maxindexes), 
237 |         "val_acc":(1/num_classes)*np.ones(maxindexes),
238 |         "loss":np.log2(num_classes)*np.ones(maxindexes),
239 |         "acc":(1/num_classes)*np.ones(maxindexes),
240 |         "linestyle":":",
241 |         "color":"k"}
242 |     
243 |     modelsscoreslist.append(uniform)
244 |     
245 |     if True: #if in bits
246 |         for m in modellist:
247 |             if "histories" in m:
248 |                 for key in ["loss", "val_loss"]:
249 |                     for h in m["histories"]:
250 |                         h[key] /= np.log(2)
251 |             else:
252 |                 if "loss_train" in m:
253 |                     m["loss_train"] = m["loss_train"] / np.log(2)
254 |                
255 |                 m["loss_test"] = m["loss_test"] / np.log(2)
256 | 
257 |                 
258 |         
259 |         
260 |         
261 |     for m in modellist:
262 |         newm = {}
263 |         for key in ["indexes", "description", "shortdescription"]:
264 |             newm[key] = m[key]
265 |         
266 |         
267 |         if "histories" in m:
268 |             for key in ["loss", "acc", "val_loss", "val_acc"]:
269 |                 newm[key] = [h[key][-1] for h in m["histories"]]
270 |             if autoswitch == "none" or autoswitch == "both":
271 |                 modelsscoreslist.append(newm)
272 |         else:
273 |             if "loss_train" in m:
274 |                 newm["loss"] = m["loss_train"] 
275 |             if "acc_train" in m:
276 |                 newm["acc"] = m["acc_train"]
277 |             newm["val_loss"] = m["loss_test"] 
278 |             newm["val_acc"] = m["acc_test"]
279 |             modelsscoreslist.append(newm)
280 |         
281 |             
282 |     countselfsw = 0
283 |     for m in [m for m in modellist if "histories" in m]:
284 |         autoswitchm = {}
285 |         autoswitchm["indexes"] = m["indexes"]
286 |         autoswitchm["description"] = m["description"] + " +autoswitch"
287 |         autoswitchm["shortdescription"] = m["shortdescription"] + "+SelfSw"
288 |         autoswitchm["linestyle"] = "--"
289 |         
290 |         autoswitchm['color'] = 'C'+str(countselfsw)
291 |         
292 |         for key in ["loss", "acc", "val_loss", "val_acc"]:
293 |             autoswitchm[key] = []
294 |         
295 |         tmp=0
296 |         for h in m["histories"]:
297 |             
298 |             
299 |             bestl = np.inf
300 |             bestk = 0
301 |             for (k, l) in enumerate(h["val_loss"]):
302 |                 if l < bestl:
303 |                     bestl = l
304 |                     bestk = k
305 |             print(m["shortdescription"], m["indexes"][tmp], bestk)        
306 |             for key in ["loss", "acc", "val_loss", "val_acc"]:
307 |                 autoswitchm[key].append(h[key][bestk])
308 |             tmp += 1
309 |         if autoswitch == "as" or autoswitch == "both":      
310 |             modelsscoreslist.append(autoswitchm)
311 | 
312 |         countselfsw += 1
313 |         
314 |             
315 |         
316 |     switchscores = switch_loss(modelsscoreslist, num_classes, datasetshape)
317 |     
318 |     if autoswitch == "both" or autoswitch == "sw":
319 |         pass
320 |         #modelsscoreslist.append(switchscores)
321 |     
322 |     for m in modelsscoreslist:
323 |         m["cost"] = costfun(m["indexes"], m["val_loss"], 
324 |                             np.log(num_classes)/np.log(2), datasetshape)
325 |                             
326 |     return modelsscoreslist                        
327 |     
328 |   
329 | 
330 | with open("metrics.pkl", "rb") as f:
331 |     modelsscoreslist = pkl.load(f)
332 | 
333 | 
334 | for m in modelsscoreslist:
335 |     print(m["description"])
336 |     
337 |     
338 | 
339 | newmodelsscoreslist = makemodelscoreslist(modelsscoreslist, autoswitch="both", datasetshape=50000)
340 | 
341 | for m in newmodelsscoreslist:
342 |     print(m["shortdescription"])
343 | 
344 | 
345 | subplots = ["costlab",  "cost", "val_acc",   "comprate", ]
346 | plot_metrics_samples(newmodelsscoreslist, "cifarscores.eps", subplots, num_classes=10, datasetshape=50000)
347 |   
348 | 


--------------------------------------------------------------------------------
/variational/var_cifar10.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import pyvarinf
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | 
  9 | from torchvision import datasets, transforms
 10 | from torch.autograd import Variable
 11 | 
 12 | import numpy as np
 13 | 
 14 | # Training settings
 15 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 16 | parser.add_argument('--batch-size', type=int, default=32, metavar='N',
 17 |                     help='input batch size for training (default: 64)')
 18 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 19 |                     help='input batch size for testing (default: 1000)')
 20 | parser.add_argument('--epochs', type=int, default=1000, metavar='N',
 21 |                     help='number of epochs to train (default: 10)')
 22 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
 23 |                     help='learning rate (default: 0.01)')
 24 | parser.add_argument('--momentum', type=float, default=0.0, metavar='M',
 25 |                     help='SGD momentum (default: 0.5)')
 26 | parser.add_argument('--no-cuda', action='store_true', default=False,
 27 |                     help='disables CUDA training')
 28 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 29 |                     help='random seed (default: 1)')
 30 | parser.add_argument('--log-interval', type=int, default=50, metavar='N',
 31 |                     help='how many batches to wait before logging training status')
 32 | parser.add_argument('--prior', type=str, default='gaussian', metavar='P',
 33 |                     help='prior used (default: gaussian)',
 34 |                     choices=['gaussian', 'mixtgauss', 'conjugate', 'conjugate_known_mean'])
 35 | 
 36 | args = parser.parse_args()
 37 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 38 | 
 39 | # setting up prior parameters
 40 | prior_parameters = {}
 41 | if args.prior != 'gaussian':
 42 |     prior_parameters['n_mc_samples'] = 1
 43 |     
 44 | if args.prior == 'mixtgauss':
 45 |     prior_parameters['sigma_1'] = 0.02
 46 |     prior_parameters['sigma_2'] = 0.2
 47 |     prior_parameters['pi'] = 0.5
 48 | if args.prior == 'conjugate':
 49 |     prior_parameters['mu_0'] = 0.
 50 |     prior_parameters['kappa_0'] = 3.
 51 |     prior_parameters['alpha_0'] = .5
 52 |     prior_parameters['beta_0'] = .5
 53 | if args.prior == 'conjugate_known_mean':
 54 |     prior_parameters['alpha_0'] = .5
 55 |     prior_parameters['beta_0'] = .5
 56 |     prior_parameters['mean'] = 0.
 57 | 
 58 | torch.manual_seed(args.seed)
 59 | if args.cuda:
 60 |     torch.cuda.manual_seed(args.seed)
 61 | 
 62 | 
 63 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 64 | # train_loader = torch.utils.data.DataLoader(
 65 | #     datasets.MNIST('~/datasets', train=True, download=True,
 66 | #                    transform=transforms.Compose([
 67 | #                        transforms.ToTensor(),
 68 | #                        transforms.Normalize((0.1307,), (0.3081,))
 69 | #                    ])),
 70 | #     batch_size=args.batch_size, shuffle=True, **kwargs)
 71 | # test_loader = torch.utils.data.DataLoader(
 72 | #     datasets.MNIST('~/datasets', train=False, transform=transforms.Compose([
 73 | #                        transforms.ToTensor(),
 74 | #                        transforms.Normalize((0.1307,), (0.3081,))
 75 | #                    ])),
 76 | #     batch_size=args.batch_size, shuffle=True, **kwargs)
 77 | 
 78 | transform_train = transforms.Compose([
 79 |     #transforms.RandomCrop(28),
 80 |     transforms.ToTensor(),
 81 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 82 | ])
 83 | 
 84 | transform_test = transforms.Compose([
 85 |     #transforms.RandomCrop(28),
 86 |     transforms.ToTensor(),
 87 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 88 | ])
 89 | 
 90 | trainset = datasets.CIFAR10(root='~/datasets', train=True, download=True, transform=transform_train)
 91 | train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
 92 | 
 93 | testset = datasets.CIFAR10(root='~/datasets', train=False, download=True, transform=transform_test)
 94 | test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
 95 | 
 96 | 
 97 | 
 98 | 
 99 | class Net(nn.Module):
100 |     def __init__(self):
101 |         super(Net, self).__init__()
102 |         self.conv11 = nn.Conv2d(3, 32, kernel_size=3)#, padding=2)
103 |         self.conv12 = nn.Conv2d(32, 32, kernel_size=3)#, padding=2)
104 | 
105 |         self.conv21 = nn.Conv2d(32, 64, kernel_size=3)#, padding=2)
106 |         self.conv22 = nn.Conv2d(64, 64, kernel_size=3)#, padding=2)
107 |         #self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
108 |         
109 |         self.fc1 = nn.Linear(5*5*64, 256)
110 |         self.fc2 = nn.Linear(256, 256)
111 |         self.fc3 = nn.Linear(256, 10)
112 |         self.bn1 = nn.BatchNorm2d(32)
113 |         self.bn2 = nn.BatchNorm2d(64)
114 | 
115 |     def forward(self, x):
116 |         x = F.relu(self.conv11(x))
117 |         x = F.relu(self.conv12(x))
118 |         x = F.max_pool2d(x, 2)
119 | 
120 |         x = F.relu(self.conv21(x))
121 |         x = F.relu(self.conv22(x))
122 |         x = F.max_pool2d(x, 2)
123 |         
124 |         #x = F.max_pool2d(F.relu(self.conv1(x)), 2)
125 |         #x = self.bn1(x)
126 |         #x = F.max_pool2d(F.relu(self.conv2(x)), 2)
127 |         #x = self.bn2(x)
128 | 
129 |         x = x.view(x.size(0),-1)
130 |         x = F.relu(self.fc1(x))
131 |         x = F.relu(self.fc2(x))
132 |         return F.log_softmax(F.relu(self.fc3(x)))
133 | 
134 | class MLPNet(nn.Module):
135 |     def __init__(self):
136 |         super(MLPNet, self).__init__()
137 |         self.fc1 = nn.Linear(3*32*32, 256)
138 |         #self.fc1 = nn.Linear(1*28*28, 256)
139 |         self.bn1 = nn.BatchNorm1d(256)
140 |         self.fc2 = nn.Linear(256, 256)
141 |         self.bn2 = nn.BatchNorm1d(256)
142 |         self.fc3 = nn.Linear(256, 10)
143 |         
144 |     def forward(self, x):
145 |         x = x.view(x.size(0), -1)
146 |         x = F.relu(self.fc1(x))
147 |         #x = self.bn1(x)
148 |         x = F.relu(self.fc2(x))
149 |         #x = self.bn2(x)
150 |         x = self.fc3(x)
151 |         return F.log_softmax(x)
152 | 
153 | class LeNet(nn.Module):
154 |     def __init__(self):
155 |         super(LeNet, self).__init__()
156 |         self.conv1 = nn.Conv2d(3, 6, 5)
157 |         self.conv2 = nn.Conv2d(6, 16, 5)
158 |         self.fc1   = nn.Linear(16*5*5, 120)
159 |         self.fc2   = nn.Linear(120, 84)
160 |         self.fc3   = nn.Linear(84, 10)
161 | 
162 |     def forward(self, x):
163 |         out = F.relu(self.conv1(x))
164 |         out = F.max_pool2d(out, 2)
165 |         out = F.relu(self.conv2(out))
166 |         out = F.max_pool2d(out, 2)
167 |         out = out.view(out.size(0), -1)
168 |         out = F.relu(self.fc1(out))
169 |         out = F.relu(self.fc2(out))
170 |         out = self.fc3(out)
171 |         return F.log_softmax(out)
172 | 
173 | model = Net()
174 | var_model = pyvarinf.Variationalize(model)
175 | var_model.set_prior(args.prior, **prior_parameters)
176 | if args.cuda:
177 |     var_model.cuda()
178 | 
179 | optimizer = optim.Adam(var_model.parameters(), lr=args.lr)
180 | #optimizer = optim.SGD(var_model.parameters(), lr=args.lr)
181 | 
182 | 
183 | 
184 | def train(epoch):
185 |     var_model.train()
186 |     for batch_idx, (data, target) in enumerate(train_loader):
187 |         if args.cuda:
188 |             data, target = data.cuda(), target.cuda()
189 |         optimizer.zero_grad()
190 |         output = var_model(data)
191 |         loss_error = F.nll_loss(output, target)
192 |         loss_prior = var_model.prior_loss() / len(train_loader.dataset)
193 |         loss = loss_error + loss_prior
194 |         loss.backward()
195 |         optimizer.step()
196 |         if batch_idx % args.log_interval == 0:
197 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tLoss error: {:.6f}\tLoss weights: {:.6f}'.format(
198 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
199 |                 100. * batch_idx / len(train_loader), loss.item(), loss_error.item(), loss_prior.item()))
200 | 
201 | 
202 | def compressionscores():
203 |     var_model.train()
204 |     loss_prior = var_model.prior_loss().item()
205 |     loss_error = 0
206 |     for batch_idx, (data, target) in enumerate(train_loader):
207 |         if args.cuda:
208 |             data, target = data.cuda(), target.cuda()
209 |         output = var_model(data)
210 |         loss_error += F.nll_loss(output, target, size_average=False).item()
211 |         
212 |     loss = loss_error + loss_prior
213 |     print('Compression scores: DL: {:.0f}\tDL error: {:.0f}\tDL weights: {:.6f}\tCompRate: {:.4f}'.format(
214 |         loss, loss_error, loss_prior, loss / (len(train_loader.dataset) * np.log(10))))
215 |         
216 | def test(epoch):
217 |     var_model.eval()
218 |     test_loss = 0
219 |     correct = 0
220 |     for data, target in test_loader:
221 |         with torch.no_grad():
222 |             if args.cuda:
223 |                 data, target = data.cuda(), target.cuda()
224 |             data, target = Variable(data), Variable(target)
225 |             output = var_model(data)
226 |             test_loss += F.nll_loss(output, target).item()
227 |             pred = output.max(1)[1] # get the index of the max log-probability
228 |             correct += pred.eq(target).cpu().sum().item()
229 | 
230 |             test_loss = test_loss
231 |             test_loss /= len(test_loader) # loss function already averages over batch size
232 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
233 |         test_loss, correct, len(test_loader.dataset),
234 |         100. * correct / len(test_loader.dataset)))
235 | 
236 | compressionscores()
237 | for epoch in range(1, args.epochs + 1):
238 |     train(epoch)
239 |     test(epoch)
240 |     compressionscores()
241 | 


--------------------------------------------------------------------------------