├── README.md
├── VRAE.py
├── dataset.py
└── train_VRAE.py


/README.md:
--------------------------------------------------------------------------------
1 | # chainer-Variational-Recurrent-Autoencoder
2 | 
3 | ## Reference
4 | 
5 | * http://arxiv.org/pdf/1412.6581.pdf
6 | * https://github.com/y0ast/Variational-Recurrent-Autoencoder
7 | 


--------------------------------------------------------------------------------
/VRAE.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import numpy as np
  4 | 
  5 | 
  6 | from chainer import cuda, Variable, function, FunctionSet, optimizers
  7 | from chainer import functions as F
  8 | 
  9 | class VRAE(FunctionSet):
 10 |     def __init__(self, **layers):
 11 |         super(VRAE, self).__init__(**layers)
 12 | 
 13 |     def softplus(self, x):
 14 |         return F.log(F.exp(x) + 1)
 15 | 
 16 |     def binary_cross_entropy(self, y, t):
 17 |         ya = y.data
 18 |         ta = t.data
 19 |         d  = -1*np.sum(ya*np.log(ta) + (1-ya)*np.log(1-ta))
 20 |         v_d = Variable(np.array(d).astype(np.float32))
 21 |         return v_d
 22 | 
 23 |     def forward_one_step(self, x_data, state, continuous=True, nonlinear_q='tanh', nonlinear_p='tanh', output_f = 'sigmoid', gpu=-1):
 24 | 
 25 |         output = np.zeros( x_data.shape ).astype(np.float32)
 26 | 
 27 |         nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu}
 28 |         nonlinear_f_q = nonlinear[nonlinear_q]
 29 |         nonlinear_f_p = nonlinear[nonlinear_p]
 30 | 
 31 |         output_a_f = nonlinear[output_f]
 32 | 
 33 |         # compute q(z|x)
 34 |         for i in range(x_data.shape[0]):
 35 |             x_in_t = Variable(x_data[i].reshape((1, x_data.shape[1])))
 36 |             hidden_q_t = nonlinear_f_q( self.recog_in_h( x_in_t ) + self.recog_h_h( state['recog_h'] ) )
 37 |             state['recog_h'] = hidden_q_t
 38 | 
 39 |         q_mean = self.recog_mean( state['recog_h'] )
 40 |         q_log_sigma = 0.5 * self.recog_log_sigma( state['recog_h'] )
 41 | 
 42 |         eps = np.random.normal(0, 1, q_log_sigma.data.shape ).astype(np.float32)
 43 | 
 44 |         if gpu >= 0:
 45 |             eps = cuda.to_gpu(eps)
 46 | 
 47 |         eps = Variable(eps)
 48 |         z   = q_mean + F.exp(q_log_sigma) * eps
 49 | 
 50 |         # compute p( x | z)
 51 | 
 52 |         h0 = nonlinear_f_p( self.z(z) )
 53 |         out= self.output(h0)
 54 |         x_0 = output_a_f( out )
 55 |         state['gen_h'] = h0
 56 |         if gpu >= 0:
 57 |             np_x_0 = cuda.to_cpu(x_0.data)
 58 |             output[0] = np_x_0
 59 |         else:
 60 |             output[0] = x_0.data
 61 | 
 62 |         if continuous == True:
 63 |             rec_loss = F.mean_squared_error(x_0, Variable(x_data[0].reshape((1, x_data.shape[1]))))
 64 |         else:
 65 |             rec_loss = F.sigmoid_cross_entropy(out, Variable(x_data[0].reshape((1, x_data.shape[1])).astype(np.int32)))
 66 | 
 67 |         x_t = x_0
 68 | 
 69 |         for i in range(1, x_data.shape[0]):
 70 |             h_t_1 = nonlinear_f_p( self.gen_in_h( x_t ) + self.gen_h_h(state['gen_h']) )
 71 |             x_t_1      = self.output(h_t_1)
 72 |             state['gen_h'] = h_t_1
 73 | 
 74 |             if continuous == True:
 75 |                 output_t   = output_a_f( x_t_1 )
 76 |                 rec_loss += F.mean_squared_error(output_t, Variable(x_data[i].reshape((1, x_data.shape[1]))))
 77 | 
 78 |             else:
 79 |                 out = x_t_1
 80 |                 rec_loss += F.sigmoid_cross_entropy(out, Variable(x_data[i].reshape((1,x_data.shape[1])).astype(np.int32)))
 81 |                 x_t = output_t = output_a_f( x_t_1 )
 82 | 
 83 |             if gpu >= 0:
 84 |                 np_output_t = cuda.to_cpu(output_t.data)
 85 |                 output[i] = np_output_t
 86 |             else:
 87 |                 output[i]  = output_t.data
 88 | 
 89 | 
 90 |         KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma))
 91 | 
 92 |         return output, rec_loss, KLD, state
 93 | 
 94 | 
 95 |     def generate_z_x(self, seq_length_per_z, sample_z, nonlinear_q='tanh', nonlinear_p='tanh', output_f='sigmoid', gpu=-1):
 96 | 
 97 |         output = np.zeros((seq_length_per_z * sample_z.shape[0], self.recog_in_h.W.shape[1]))
 98 | 
 99 |         nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu}
100 |         nonlinear_f_q = nonlinear[nonlinear_q]
101 |         nonlinear_f_p = nonlinear[nonlinear_p]
102 | 
103 |         output_a_f = nonlinear[output_f]
104 | 
105 |         for epoch in xrange(sample_z.shape[0]):
106 |             gen_out = np.zeros((seq_length_per_z, output.shape[1]))
107 |             z = Variable(sample_z[epoch].reshape((1, sample_z.shape[1])))
108 | 
109 |             # compute p( x | z)
110 |             h0 = nonlinear_f_p( self.z(z) )
111 |             x_gen_0 = output_a_f( self.output(h0) )
112 |             state['gen_h'] = h0
113 |             if gpu >= 0:
114 |                 np_x_gen_0 = cuda.to_cpu(x_gen_0.data)
115 |                 gen_out[0] = np_x_gen_0
116 |             else:
117 |                 gen_out[0] = x_gen_0.data
118 | 
119 |             x_t_1 = x_gen_0
120 | 
121 |             for i in range(1, seq_length_per_z):
122 |                 hidden_p_t = nonlinear_f_p( self.gen_in_h(x_t_1) + self.gen_h_h(state['gen_h']) )
123 |                 output_t   = output_a_f( self.output(hidden_p_t) )
124 |                 if gpu >= 0:
125 |                     np_output_t = cuda.to_cpu(output_t.data)
126 |                     gen_out[i] = np_output_t
127 |                 else:
128 |                     gen_out[i]  = output_t.data
129 |                 state['gen_h'] = hidden_p_t
130 |                 x_t_1 = output_t
131 | 
132 |             output[epoch*seq_length_per_z+1:(epoch+1)*seq_length_per_z, :] = gen_out[1:]
133 | 
134 |         return output
135 | 
136 | def make_initial_state(n_units, state_pattern, Train=True):
137 |     return {name: Variable(np.zeros((1, n_units), dtype=np.float32), volatile=not Train) for name in state_pattern}
138 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import cPickle as pickle
  6 | import six
  7 | from six.moves.urllib import request
  8 | import scipy
  9 | from scipy import io
 10 | from sklearn import decomposition
 11 | 
 12 | 
 13 | '''
 14 | 
 15 | BVH
 16 | 
 17 | '''
 18 | def load_bvh_data(file_path):
 19 | 
 20 |     frames = 0
 21 |     frame_time = 0.0
 22 | 
 23 | 
 24 |     with open(file_path, "rb") as f:
 25 |         lines = f.readlines()
 26 | 
 27 |         n = 0
 28 |         while lines[n].find('MOTION') < 0:
 29 |             n += 1
 30 | 
 31 |             assert n < len(lines)
 32 | 
 33 |         # frames
 34 |         n += 1
 35 |         frames = int(lines[n].split(" ")[-1].replace('\n', ''))
 36 | 
 37 |         # frame time
 38 |         n += 1
 39 |         frame_time = float(lines[n].split(" ")[-1].replace('\n', ''))
 40 | 
 41 |         # motion data
 42 |         n += 1
 43 |         for i in range(frames):
 44 |             motion = lines[n + i].split(' ')
 45 | 
 46 |             if i == 0:
 47 |                 dim = len(motion)
 48 |                 global motion_data
 49 |                 motion_data = np.zeros(frames * dim, dtype=np.float32).reshape((frames, dim))
 50 | 
 51 |             for j in range(dim):
 52 |                 motion_data[i, j] = float(motion[j].replace('\n', ''))
 53 | 
 54 |     return frames, frame_time, motion_data
 55 | 
 56 | 
 57 | 
 58 | 
 59 | '''
 60 | 
 61 | MNIST
 62 | 
 63 | '''
 64 | 
 65 | def load_mnist(images, labels, num):
 66 |     dim = 784
 67 |     data = np.zeros(num * dim, dtype=np.uint8).reshape((num, dim))
 68 |     target = np.zeros(num, dtype=np.uint8).reshape((num, ))
 69 | 
 70 |     with gzip.open(images, 'rb') as f_images,\
 71 |             gzip.open(labels, 'rb') as f_labels:
 72 |         f_images.read(16)
 73 |         f_labels.read(8)
 74 |         for i in six.moves.range(num):
 75 |             target[i] = ord(f_labels.read(1))
 76 |             for j in six.moves.range(dim):
 77 |                 data[i, j] = ord(f_images.read(1))
 78 | 
 79 |     return data, target
 80 | 
 81 | 
 82 | def download_mnist_data(data_dir):
 83 | 
 84 |     parent = 'http://yann.lecun.com/exdb/mnist'
 85 |     train_images = 'train-images-idx3-ubyte.gz'
 86 |     train_labels = 'train-labels-idx1-ubyte.gz'
 87 |     test_images = 't10k-images-idx3-ubyte.gz'
 88 |     test_labels = 't10k-labels-idx1-ubyte.gz'
 89 |     num_train = 60000
 90 |     num_test = 10000
 91 | 
 92 |     print('Downloading {:s}...'.format(train_images))
 93 |     request.urlretrieve('{:s}/{:s}'.format(parent, train_images), train_images)
 94 |     print('Done')
 95 |     print('Downloading {:s}...'.format(train_labels))
 96 |     request.urlretrieve('{:s}/{:s}'.format(parent, train_labels), train_labels)
 97 |     print('Done')
 98 |     print('Downloading {:s}...'.format(test_images))
 99 |     request.urlretrieve('{:s}/{:s}'.format(parent, test_images), test_images)
100 |     print('Done')
101 |     print('Downloading {:s}...'.format(test_labels))
102 |     request.urlretrieve('{:s}/{:s}'.format(parent, test_labels), test_labels)
103 |     print('Done')
104 | 
105 |     print('Converting training data...')
106 |     data_train, target_train = load_mnist(train_images, train_labels,
107 |                                           num_train)
108 |     print('Done')
109 |     print('Converting test data...')
110 |     data_test, target_test = load_mnist(test_images, test_labels, num_test)
111 |     mnist = {}
112 |     mnist['data'] = np.append(data_train, data_test, axis=0)
113 |     mnist['target'] = np.append(target_train, target_test, axis=0)
114 | 
115 |     print('Done')
116 |     print('Save output...')
117 |     with open('%s/mnist/mnist.pkl' % data_dir, 'wb') as output:
118 |         six.moves.cPickle.dump(mnist, output, -1)
119 |     print('Done')
120 |     print('Convert completed')
121 | 
122 | 
123 | def load_mnist_data(data_dir):
124 |     if not os.path.exists('%s/mnist/mnist.pkl' % data_dir):
125 |         download_mnist_data(data_dir)
126 |     with open('%s/mnist/mnist.pkl' % data_dir, 'rb') as mnist_pickle:
127 |         mnist = six.moves.cPickle.load(mnist_pickle)
128 |     return mnist
129 | 
130 | '''
131 | SVHN
132 | 
133 | '''
134 | 
135 | def download_svhn_data(data_dir):
136 | 
137 |     parent = 'http://ufldl.stanford.edu/housenumbers'
138 |     train_images = 'train_32x32.mat'
139 |     test_images = 'test_32x32.mat'
140 | 
141 |     data_path = data_dir+"/SVHN/"
142 |     if not os.path.exists(data_path):
143 |         os.mkdir(data_path)
144 | 
145 |     print('Downloading {:s}...'.format(train_images))
146 |     request.urlretrieve('{:s}/{:s}'.format(parent, train_images), data_path+train_images)
147 |     print('Done')
148 |     print('Downloading {:s}...'.format(test_images))
149 |     request.urlretrieve('{:s}/{:s}'.format(parent, test_images), data_path+test_images)
150 |     print('Done')
151 | 
152 | 
153 | 
154 | 
155 | def svhn_pickle_checker(data_dir):
156 |     if os.path.exists(data_dir+'/SVHN/train_x.pkl') and os.path.exists(data_dir+'/SVHN/train_y.pkl') \
157 |         and os.path.exists(data_dir+'/SVHN/test_x.pkl') and os.path.exists(data_dir+'/SVHN/test_y.pkl'):
158 |         return 1
159 |     else:
160 |         return 0
161 | 
162 | def load_svhn(data_dir, toFloat=True, binarize_y=True, dtype=np.float32, pca=False, n_components=1000):
163 | 
164 |     # if svhn_pickle_checker(data_dir) == 1:
165 |     #     print "load from pickle file."
166 |     #     train_x = pickle.load(open(data_dir+'/SVHN/train_x.pkl'))
167 |     #     train_y = pickle.load(open(data_dir+'/SVHN/train_y.pkl'))
168 |     #     test_x  = pickle.load(open(data_dir+'/SVHN/test_x.pkl'))
169 |     #     test_y  = pickle.load(open(data_dir+'/SVHN/test_y.pkl'))
170 |     #
171 |     #     return train_x, train_y, test_x, test_y
172 | 
173 |     if not os.path.exists(data_dir+'/SVHN/train_32x32.mat') or not os.path.exists(data_dir+'/SVHN/test_32x32.mat'):
174 |         download_svhn_data(data_dir)
175 | 
176 |     train = scipy.io.loadmat(data_dir+'/SVHN/train_32x32.mat')
177 |     train_x = train['X'].swapaxes(0,1).T.reshape((train['X'].shape[3], -1))
178 |     train_y = train['y'].reshape((-1)) - 1
179 |     test = scipy.io.loadmat(data_dir+'/SVHN/test_32x32.mat')
180 |     test_x = test['X'].swapaxes(0,1).T.reshape((test['X'].shape[3], -1))
181 |     test_y = test['y'].reshape((-1)) - 1
182 |     if toFloat:
183 |         train_x = train_x.astype(dtype)/256.
184 |         test_x = test_x.astype(dtype)/256.
185 |     if binarize_y:
186 |         train_y = binarize_labels(train_y)
187 |         test_y = binarize_labels(test_y)
188 | 
189 |     # if pca:
190 |     #     x_stack = np.vstack([train_x, test_x])
191 |     #     pca = decomposition.PCA(n_components=n_components)
192 |     #     pca.whiten=True
193 |     #     # pca.fit(x_stack)
194 |     #     # x_pca = pca.transform(x_stack)
195 |     #     x_pca = pca.fit_transform(x_stack)
196 |     #     train_x = x_pca[:train_x.shape[0], :]
197 |     #     test_x = x_pca[train_x.shape[0]:, :]
198 |     #
199 |     #     with open('%s/SVHN/pca.pkl' % data_dir, "wb") as f:
200 |     #         pickle.dump(pca, f)
201 |     #     with open('%s/SVHN/train_x.pkl' % data_dir, "wb") as f:
202 |     #         pickle.dump(train_x, f)
203 |     #     with open('%s/SVHN/train_y.pkl' % data_dir, "wb") as f:
204 |     #         pickle.dump(train_y, f)
205 |     #     with open('%s/SVHN/test_x.pkl' % data_dir, "wb") as f:
206 |     #         pickle.dump(test_x, f)
207 |     #     with open('%s/SVHN/test_y.pkl' % data_dir, "wb") as f:
208 |     #         pickle.dump(test_y, f)
209 | 
210 |     return train_x, train_y, test_x, test_y
211 | 
212 | def binarize_labels(y, n_classes=10):
213 |     new_y = np.zeros((y.shape[0], n_classes))
214 |     for i in range(y.shape[0]):
215 |         new_y[i, y[i]] = 1
216 |     return new_y.astype(np.float32)
217 | 
218 | 
219 | 
220 | '''
221 | 
222 | Shakespeare
223 | 
224 | '''
225 | def load_shakespeare(data_dir):
226 |     vocab = {}
227 |     words = open('%s/tinyshakespeare/input.txt' % data_dir, 'rb').read()
228 |     words = list(words)
229 |     dataset = np.ndarray((len(words), ), dtype=np.int32)
230 |     for i, word in enumerate(words):
231 |         if word not in vocab:
232 |             vocab[word] = len(vocab)
233 |         dataset[i] = vocab[word]
234 | 
235 |     return dataset, words, vocab
236 | 
237 | 
238 | '''
239 | 
240 | music
241 | 
242 | '''
243 | 
244 | def load_midi_data(data_dir):
245 |     import midi.utils as utils
246 | 
247 |     midi_data = utils.midiread(data_dir, dt=0.5)
248 | 
249 |     return midi_data.piano_roll
250 | 


--------------------------------------------------------------------------------
/train_VRAE.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | import time
  3 | import math
  4 | import sys
  5 | import argparse
  6 | import cPickle as pickle
  7 | import copy
  8 | import os
  9 | import six
 10 | 
 11 | import numpy as np
 12 | from chainer import cuda, Variable, FunctionSet, optimizers
 13 | import chainer.functions as F
 14 | from VRAE import VRAE, make_initial_state
 15 | 
 16 | import dataset
 17 | 
 18 | parser = argparse.ArgumentParser()
 19 | parser.add_argument('--data_path',      type=str,   default="dataset")
 20 | parser.add_argument('--output_dir',     type=str,   default="model")
 21 | parser.add_argument('--dataset',        type=str,   default="midi")
 22 | parser.add_argument('--init_from',      type=str,   default="")
 23 | parser.add_argument('--clip_grads',     type=int,   default=5)
 24 | parser.add_argument('--gpu',            type=int,   default=-1)
 25 | 
 26 | args = parser.parse_args()
 27 | 
 28 | if not os.path.exists(args.output_dir):
 29 |     os.mkdir(args.output_dir)
 30 | 
 31 | 
 32 | if args.dataset == 'midi':
 33 |     midi = dataset.load_midi_data('%s/midi/sample.mid' % args.data_path)
 34 |     train_x = midi[:120].astype(np.float32)
 35 | 
 36 |     n_x = train_x.shape[1]
 37 |     n_hidden = [500]
 38 |     n_z = 2
 39 |     n_y = n_x
 40 | 
 41 |     frames  = train_x.shape[0]
 42 |     n_batch = 6
 43 |     seq_length = frames / n_batch
 44 | 
 45 |     split_x = np.vsplit(train_x, n_batch)
 46 | 
 47 |     n_epochs = 500
 48 |     continuous = False
 49 | 
 50 | 
 51 | if args.dataset == 'bvh':
 52 |     frames, frame_time, motion_data = dataset.load_bvh_data("%s/bvh/sample.bvh")
 53 |     max_motion = np.max(motion_data, axis=0)
 54 |     min_motion = np.min(motion_data, axis=0)
 55 | 
 56 |     norm_motion_data = (motion_data - min_motion) / (max_motion - min_motion)
 57 |     train_x = norm_motion_data
 58 |     train_y = norm_motion_data
 59 | 
 60 |     n_x = train_x.shape[1]
 61 |     n_hidden = [250]
 62 |     n_z = 10
 63 |     n_y = n_x
 64 | 
 65 |     n_online= 10
 66 |     n_batch = train_x.shape[0] / n_online
 67 | 
 68 |     if train_x.shape[0] % n_online != 0:
 69 |         reduced_sample = train_x.shape[0] % n_online
 70 |         train_x = train_x[:train_x.shape[0] - reduced_sample]
 71 | 
 72 |     n_epochs = 500
 73 |     continuous = True
 74 | 
 75 | 
 76 | 
 77 | n_hidden_recog = n_hidden
 78 | n_hidden_gen   = n_hidden
 79 | n_layers_recog = len(n_hidden_recog)
 80 | n_layers_gen   = len(n_hidden_gen)
 81 | 
 82 | layers = {}
 83 | 
 84 | # Recognition model.
 85 | rec_layer_sizes = [(train_x.shape[1], n_hidden_recog[0])]
 86 | rec_layer_sizes += zip(n_hidden_recog[:-1], n_hidden_recog[1:])
 87 | rec_layer_sizes += [(n_hidden_recog[-1], n_z)]
 88 | 
 89 | layers['recog_in_h'] = F.Linear(train_x.shape[1], n_hidden_recog[0], nobias=True)
 90 | layers['recog_h_h']  = F.Linear(n_hidden_recog[0], n_hidden_recog[0])
 91 | 
 92 | layers['recog_mean'] = F.Linear(n_hidden_recog[-1], n_z)
 93 | layers['recog_log_sigma'] = F.Linear(n_hidden_recog[-1], n_z)
 94 | 
 95 | # Generating model.
 96 | gen_layer_sizes = [(n_z, n_hidden_gen[0])]
 97 | gen_layer_sizes += zip(n_hidden_gen[:-1], n_hidden_gen[1:])
 98 | gen_layer_sizes += [(n_hidden_gen[-1], train_x.shape[1])]
 99 | 
100 | layers['z'] = F.Linear(n_z, n_hidden_gen[0])
101 | layers['gen_in_h'] = F.Linear(train_x.shape[1], n_hidden_gen[0], nobias=True)
102 | layers['gen_h_h']  = F.Linear(n_hidden_gen[0], n_hidden_gen[0])
103 | 
104 | layers['output']   = F.Linear(n_hidden_gen[-1], train_x.shape[1])
105 | 
106 | if args.init_from == "":
107 |     model = VRAE(**layers)
108 | else:
109 |     model = pickle.load(open(args.init_from))
110 | 
111 | # state pattern
112 | state_pattern = ['recog_h', 'gen_h']
113 | 
114 | if args.gpu >= 0:
115 |     cuda.init(args.gpu)
116 |     model.to_gpu()
117 | 
118 | 
119 | # use Adam
120 | optimizer = optimizers.Adam()
121 | optimizer.setup(model.collect_parameters())
122 | 
123 | total_losses = np.zeros(n_epochs, dtype=np.float32)
124 | 
125 | for epoch in xrange(1, n_epochs + 1):
126 |     print('epoch', epoch)
127 | 
128 |     t1 = time.time()
129 |     total_rec_loss = 0.0
130 |     total_kl_loss = 0.0
131 |     total_loss = 0.0
132 |     outputs = np.zeros(train_x.shape, dtype=np.float32)
133 |     # state = make_initial_state(n_hidden_recog[0], state_pattern)
134 |     for i in xrange(n_batch):
135 |         state = make_initial_state(n_hidden_recog[0], state_pattern)
136 |         x_batch = split_x[i]
137 | 
138 |         if args.gpu >= 0:
139 |             x_batch = cuda.to_gpu(x_batch)
140 | 
141 |         output, rec_loss, kl_loss, state = model.forward_one_step(x_batch, state, continuous, nonlinear_q='tanh', nonlinear_p='tanh', output_f = 'sigmoid', gpu=-1)
142 | 
143 |         outputs[i*seq_length:(i+1)*seq_length, :] = output
144 | 
145 |         loss = rec_loss + kl_loss
146 |         total_loss += loss
147 |         total_rec_loss += rec_loss
148 |         total_losses[epoch-1] = total_loss.data
149 | 
150 |         optimizer.zero_grads()
151 |         loss.backward()
152 |         loss.unchain_backward()
153 |         optimizer.clip_grads(args.clip_grads)
154 |         optimizer.update()
155 | 
156 |     saved_output = outputs
157 | 
158 |     print "{}/{}, train_loss = {}, total_rec_loss = {}, time = {}".format(epoch, n_epochs, total_loss.data, total_rec_loss.data, time.time()-t1)
159 | 
160 |     if epoch % 100 == 0:
161 |         model_path = "%s/VRAE_%s_%d.pkl" % (args.output_dir, args.dataset, epoch)
162 |         with open(model_path, "w") as f:
163 |             pickle.dump(copy.deepcopy(model).to_cpu(), f)
164 | 


--------------------------------------------------------------------------------