├── .DS_Store ├── .gitignore ├── lib ├── datasets.py ├── dcn.py ├── dec.py ├── denoisingAutoencoder.py ├── idec.py ├── ops.py ├── stackedDAE.py └── utils.py ├── logs └── mnist.log ├── run-experiment.qsub.sh ├── run_experiment.py ├── test_dcn.py ├── test_dec.py ├── test_idec.py ├── test_sdae.py └── test_sdae_for_dcn.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eelxpeng/dec-pytorch/5ef10376f95f5d47fd34025b3dc0d7065a5d75bd/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dataset/ 2 | model/ 3 | *.pyc 4 | -------------------------------------------------------------------------------- /lib/datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import errno 4 | import numpy as np 5 | import torch 6 | import torch.utils.data as data 7 | 8 | class MNIST(data.Dataset): 9 | """`MNIST `_ Dataset. 10 | Args: 11 | root (string): Root directory of dataset where ``processed/training.pt`` 12 | and ``processed/test.pt`` exist. 13 | train (bool, optional): If True, creates dataset from ``training.pt``, 14 | otherwise from ``test.pt``. 15 | download (bool, optional): If true, downloads the dataset from the internet and 16 | puts it in root directory. If dataset is already downloaded, it is not 17 | downloaded again. 18 | transform (callable, optional): A function/transform that takes in an PIL image 19 | and returns a transformed version. E.g, ``transforms.RandomCrop`` 20 | target_transform (callable, optional): A function/transform that takes in the 21 | target and transforms it. 22 | """ 23 | urls = [ 24 | 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 25 | 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 26 | 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 27 | 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 28 | ] 29 | raw_folder = 'raw' 30 | processed_folder = 'processed' 31 | training_file = 'training.pt' 32 | test_file = 'test.pt' 33 | classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', 34 | '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] 35 | class_to_idx = {_class: i for i, _class in enumerate(classes)} 36 | 37 | @property 38 | def targets(self): 39 | if self.train: 40 | return self.train_labels 41 | else: 42 | return self.test_labels 43 | 44 | def __init__(self, root, train=True, transform=None, target_transform=None, download=False): 45 | self.root = os.path.expanduser(root) 46 | self.transform = transform 47 | self.target_transform = target_transform 48 | self.train = train # training set or test set 49 | self.use_cuda = torch.cuda.is_available() 50 | 51 | if download: 52 | self.download() 53 | 54 | if not self._check_exists(): 55 | raise RuntimeError('Dataset not found.' + 56 | ' You can use download=True to download it') 57 | 58 | if self.train: 59 | self.train_data, self.train_labels = torch.load( 60 | os.path.join(self.root, self.processed_folder, self.training_file)) 61 | self.train_data = self.train_data.view(self.train_data.size(0), -1).float()*0.02 62 | # self.train_data = self.train_data.view(self.train_data.size(0), -1).float()/255 63 | self.train_labels = self.train_labels.int() 64 | if self.use_cuda: 65 | self.train_data = self.train_data.cuda() 66 | self.train_labels = self.train_labels.cuda() 67 | else: 68 | self.test_data, self.test_labels = torch.load( 69 | os.path.join(self.root, self.processed_folder, self.test_file)) 70 | self.test_data = self.test_data.view(self.test_data.size(0), -1).float()*0.02 71 | # self.test_data = self.test_data.view(self.test_data.size(0), -1).float()/255 72 | self.test_labels = self.test_labels.int() 73 | if self.use_cuda: 74 | self.test_data = self.test_data.cuda() 75 | self.test_labels = self.test_labels.cuda() 76 | 77 | def __getitem__(self, index): 78 | """ 79 | Args: 80 | index (int): Index 81 | Returns: 82 | tuple: (image, target) where target is index of the target class. 83 | """ 84 | if self.train: 85 | img, target = self.train_data[index], self.train_labels[index] 86 | else: 87 | img, target = self.test_data[index], self.test_labels[index] 88 | 89 | return img, target 90 | 91 | def __len__(self): 92 | if self.train: 93 | return len(self.train_data) 94 | else: 95 | return len(self.test_data) 96 | 97 | def _check_exists(self): 98 | return os.path.exists(os.path.join(self.root, self.processed_folder, self.training_file)) and \ 99 | os.path.exists(os.path.join(self.root, self.processed_folder, self.test_file)) 100 | 101 | def download(self): 102 | """Download the MNIST data if it doesn't exist in processed_folder already.""" 103 | from six.moves import urllib 104 | import gzip 105 | 106 | if self._check_exists(): 107 | return 108 | 109 | # download files 110 | try: 111 | os.makedirs(os.path.join(self.root, self.raw_folder)) 112 | os.makedirs(os.path.join(self.root, self.processed_folder)) 113 | except OSError as e: 114 | if e.errno == errno.EEXIST: 115 | pass 116 | else: 117 | raise 118 | 119 | for url in self.urls: 120 | print('Downloading ' + url) 121 | data = urllib.request.urlopen(url) 122 | filename = url.rpartition('/')[2] 123 | file_path = os.path.join(self.root, self.raw_folder, filename) 124 | with open(file_path, 'wb') as f: 125 | f.write(data.read()) 126 | with open(file_path.replace('.gz', ''), 'wb') as out_f, \ 127 | gzip.GzipFile(file_path) as zip_f: 128 | out_f.write(zip_f.read()) 129 | os.unlink(file_path) 130 | 131 | # process and save as torch files 132 | print('Processing...') 133 | 134 | training_set = ( 135 | read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')), 136 | read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte')) 137 | ) 138 | test_set = ( 139 | read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')), 140 | read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte')) 141 | ) 142 | with open(os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f: 143 | torch.save(training_set, f) 144 | with open(os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f: 145 | torch.save(test_set, f) 146 | 147 | print('Done!') 148 | 149 | def __repr__(self): 150 | fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' 151 | fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) 152 | tmp = 'train' if self.train is True else 'test' 153 | fmt_str += ' Split: {}\n'.format(tmp) 154 | fmt_str += ' Root Location: {}\n'.format(self.root) 155 | tmp = ' Transforms (if any): ' 156 | fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 157 | tmp = ' Target Transforms (if any): ' 158 | fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) 159 | return fmt_str 160 | 161 | 162 | def read_label_file(path): 163 | with open(path, 'rb') as f: 164 | data = f.read() 165 | assert get_int(data[:4]) == 2049 166 | length = get_int(data[4:8]) 167 | parsed = np.frombuffer(data, dtype=np.uint8, offset=8) 168 | return torch.from_numpy(parsed).view(length).long() 169 | 170 | 171 | def read_image_file(path): 172 | with open(path, 'rb') as f: 173 | data = f.read() 174 | assert get_int(data[:4]) == 2051 175 | length = get_int(data[4:8]) 176 | num_rows = get_int(data[8:12]) 177 | num_cols = get_int(data[12:16]) 178 | images = [] 179 | parsed = np.frombuffer(data, dtype=np.uint8, offset=16) 180 | return torch.from_numpy(parsed).view(length, num_rows, num_cols) -------------------------------------------------------------------------------- /lib/dcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torchvision 7 | from torchvision import datasets, transforms 8 | from torch.autograd import Variable 9 | 10 | import numpy as np 11 | import math 12 | from lib.utils import Dataset, masking_noise 13 | from lib.ops import MSELoss, BCELoss 14 | from lib.denoisingAutoencoder import DenoisingAutoencoder 15 | from lib.utils import acc 16 | from sklearn.metrics.cluster import normalized_mutual_info_score 17 | from sklearn.cluster import KMeans 18 | 19 | def buildNetwork(layers, activation="relu", dropout=0): 20 | net = [] 21 | for i in range(1, len(layers)): 22 | net.append(nn.Linear(layers[i-1], layers[i])) 23 | if activation=="relu": 24 | net.append(nn.ReLU()) 25 | elif activation=="sigmoid": 26 | net.append(nn.Sigmoid()) 27 | if dropout > 0: 28 | net.append(nn.Dropout(dropout)) 29 | return nn.Sequential(*net) 30 | 31 | def batch_km(data, center, count): 32 | """ 33 | Function to perform a KMeans update on a batch of data, center is the 34 | centroid from last iteration. 35 | 36 | """ 37 | N = data.shape[0] 38 | K = center.shape[0] 39 | 40 | # update assignment 41 | idx = np.zeros(N, dtype=np.int) 42 | for i in range(N): 43 | dist = np.inf 44 | ind = 0 45 | for j in range(K): 46 | temp_dist = np.linalg.norm(data[i] - center[j]) 47 | if temp_dist < dist: 48 | dist = temp_dist 49 | ind = j 50 | idx[i] = ind 51 | 52 | # update centriod 53 | center_new = center 54 | for i in range(N): 55 | c = idx[i] 56 | count[c] += 1 57 | eta = 1.0/count[c] 58 | center_new[c] = (1 - eta) * center_new[c] + eta * data[i] 59 | center_new.astype(np.float32) 60 | return idx, center_new, count 61 | 62 | class DeepClusteringNetwork(nn.Module): 63 | def __init__(self, input_dim=784, z_dim=10, n_centroids=10, binary=True, 64 | encodeLayer=[400], decodeLayer=[400], activation="relu", 65 | dropout=0, tied=False): 66 | super(self.__class__, self).__init__() 67 | self.z_dim = z_dim 68 | self.layers = [input_dim] + encodeLayer + [z_dim] 69 | self.activation = activation 70 | self.dropout = dropout 71 | self.encoder = buildNetwork([input_dim] + encodeLayer, activation=activation, dropout=dropout) 72 | self.decoder = buildNetwork([z_dim] + decodeLayer, activation=activation, dropout=dropout) 73 | self._enc_mu = nn.Linear(encodeLayer[-1], z_dim) 74 | 75 | self._dec = nn.Linear(decodeLayer[-1], input_dim) 76 | self._dec_act = None 77 | if binary: 78 | self._dec_act = nn.Sigmoid() 79 | 80 | def decode(self, z): 81 | h = self.decoder(z) 82 | x = self._dec(h) 83 | if self._dec_act is not None: 84 | x = self._dec_act(x) 85 | return x 86 | 87 | def loss_function(self, recon_x, x, z, center): 88 | if self._dec_act is not None: 89 | recon_loss = -torch.mean(torch.sum(x*torch.log(torch.clamp(recon_x, min=1e-10))+ 90 | (1-x)*torch.log(torch.clamp(1-recon_x, min=1e-10)), 1)) 91 | else: 92 | recon_loss = torch.mean(torch.sum((x - recon_x)**2, 1)) 93 | 94 | cluster_loss = torch.mean(torch.sum((center - z)**2, 1)) 95 | loss = cluster_loss + recon_loss 96 | return loss, recon_loss, cluster_loss 97 | 98 | def forward(self, x): 99 | h = self.encoder(x) 100 | z = self._enc_mu(h) 101 | 102 | return z, self.decode(z) 103 | 104 | def save_model(self, path): 105 | torch.save(self.state_dict(), path) 106 | 107 | def load_model(self, path): 108 | pretrained_dict = torch.load(path, map_location=lambda storage, loc: storage) 109 | model_dict = self.state_dict() 110 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 111 | model_dict.update(pretrained_dict) 112 | self.load_state_dict(model_dict) 113 | 114 | def pretrain(self, trainloader, validloader, lr=0.001, batch_size=128, num_epochs=10, corrupt=0.3, loss_type="mse"): 115 | trloader = trainloader 116 | valoader = validloader 117 | daeLayers = [] 118 | for l in range(1, len(self.layers)): 119 | infeatures = self.layers[l-1] 120 | outfeatures = self.layers[l] 121 | dae = DenoisingAutoencoder(infeatures, outfeatures, activation=self.activation, dropout=self.dropout, tied=True) 122 | if l==1: 123 | dae.fit(trloader, valoader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type=loss_type) 124 | else: 125 | if self.activation=="sigmoid": 126 | dae.fit(trloader, valoader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type="cross-entropy") 127 | else: 128 | dae.fit(trloader, valoader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type="mse") 129 | data_x = dae.encodeBatch(trloader) 130 | valid_x = dae.encodeBatch(valoader) 131 | trainset = Dataset(data_x, data_x) 132 | trloader = torch.utils.data.DataLoader( 133 | trainset, batch_size=batch_size, shuffle=True, num_workers=2) 134 | validset = Dataset(valid_x, valid_x) 135 | valoader = torch.utils.data.DataLoader( 136 | validset, batch_size=1000, shuffle=False, num_workers=2) 137 | daeLayers.append(dae) 138 | 139 | self.copyParam(daeLayers) 140 | 141 | def copyParam(self, daeLayers): 142 | if self.dropout==0: 143 | every = 2 144 | else: 145 | every = 3 146 | # input layer 147 | # copy encoder weight 148 | self.encoder[0].weight.data.copy_(daeLayers[0].weight.data) 149 | self.encoder[0].bias.data.copy_(daeLayers[0].bias.data) 150 | self._dec.weight.data.copy_(daeLayers[0].deweight.data) 151 | self._dec.bias.data.copy_(daeLayers[0].vbias.data) 152 | 153 | for l in range(1, len(self.layers)-2): 154 | # copy encoder weight 155 | self.encoder[l*every].weight.data.copy_(daeLayers[l].weight.data) 156 | self.encoder[l*every].bias.data.copy_(daeLayers[l].bias.data) 157 | 158 | # copy decoder weight 159 | self.decoder[-(l-1)*every-2].weight.data.copy_(daeLayers[l].deweight.data) 160 | self.decoder[-(l-1)*every-2].bias.data.copy_(daeLayers[l].vbias.data) 161 | 162 | # z layer 163 | self._enc_mu.weight.data.copy_(daeLayers[-1].weight.data) 164 | self._enc_mu.bias.data.copy_(daeLayers[-1].bias.data) 165 | self.decoder[0].weight.data.copy_(daeLayers[-1].deweight.data) 166 | self.decoder[0].bias.data.copy_(daeLayers[-1].vbias.data) 167 | 168 | def encodeBatch(self, data): 169 | use_cuda = torch.cuda.is_available() 170 | if use_cuda: 171 | self.cuda() 172 | data = data.cuda() 173 | z, _ = self.forward(data) 174 | return z.data.cpu() 175 | 176 | def initialize_cluster(self, trainX, trainY, init="k-means++"): 177 | trainX = self.encodeBatch(trainX) 178 | trainX = trainX.cpu().numpy() 179 | trainY = trainY.cpu().numpy() 180 | n_components = len(np.unique(trainY)) 181 | km = KMeans(n_clusters=n_components, init=init).fit(trainX) 182 | y_pred = km.predict(trainX) 183 | print("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred))) 184 | 185 | u_p = km.cluster_centers_ 186 | return u_p, y_pred 187 | 188 | def fit(self, trainX, trainY, lr=0.001, batch_size=128, num_epochs=10): 189 | n_components = len(np.unique(trainY)) 190 | use_cuda = torch.cuda.is_available() 191 | if use_cuda: 192 | self.cuda() 193 | print("=====Initialize Cluster Centers=======") 194 | centers, assignments = self.initialize_cluster(trainX, trainY) 195 | 196 | print("=====Stacked Denoising Autoencoding layer=======") 197 | optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 198 | num_train = trainX.shape[0] 199 | n_batches = int(math.ceil(num_train / batch_size)) 200 | count = 100*np.ones(n_components, dtype=np.int) 201 | for epoch in range(num_epochs): 202 | # train 1 epoch 203 | train_loss = 0.0 204 | train_recon_loss = 0.0 205 | train_cluster_loss = 0.0 206 | for batch_idx in range(n_batches): 207 | inputs = trainX[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] 208 | labels = assignments[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] 209 | inputs = inputs.view(inputs.size(0), -1).float() 210 | centers_batch_tensor = torch.from_numpy(centers[labels]) 211 | if use_cuda: 212 | inputs = inputs.cuda() 213 | centers_batch_tensor = centers_batch_tensor.cuda() 214 | optimizer.zero_grad() 215 | inputs = Variable(inputs) 216 | centers_batch_tensor = Variable(centers_batch_tensor) 217 | 218 | z, outputs = self.forward(inputs) 219 | loss, recon_loss, cluster_loss = self.loss_function(outputs, inputs, z, centers_batch_tensor) 220 | train_loss += loss.data*len(inputs) 221 | train_recon_loss += recon_loss.data*len(inputs) 222 | train_cluster_loss += cluster_loss.data*len(inputs) 223 | loss.backward() 224 | optimizer.step() 225 | 226 | # Perform mini-batch KM 227 | temp_idx, centers, count = batch_km(z.data.cpu().numpy(), centers, count) 228 | assignments[batch_idx*batch_size : min((batch_idx+1)*batch_size, num_train)] = temp_idx 229 | 230 | print("#Epoch %3d: Loss: %.3f, Recon Loss: %.3f, Cluster Loss: %.3f" % ( 231 | epoch+1, train_loss / num_train, train_recon_loss/num_train, train_cluster_loss/num_train)) 232 | 233 | if (epoch+1) % 10 == 0: 234 | centers, assignments = self.initialize_cluster(trainX, trainY, centers) 235 | 236 | 237 | -------------------------------------------------------------------------------- /lib/dec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torchvision 7 | from torchvision import datasets, transforms 8 | from torch.autograd import Variable 9 | 10 | import numpy as np 11 | import math 12 | from lib.utils import acc 13 | from sklearn.metrics.cluster import normalized_mutual_info_score 14 | from sklearn.cluster import KMeans 15 | 16 | def buildNetwork(layers, activation="relu", dropout=0): 17 | net = [] 18 | for i in range(1, len(layers)): 19 | net.append(nn.Linear(layers[i-1], layers[i])) 20 | if activation=="relu": 21 | net.append(nn.ReLU()) 22 | elif activation=="sigmoid": 23 | net.append(nn.Sigmoid()) 24 | if dropout > 0: 25 | net.append(nn.Dropout(dropout)) 26 | return nn.Sequential(*net) 27 | 28 | class DEC(nn.Module): 29 | def __init__(self, input_dim=784, z_dim=10, n_clusters=10, 30 | encodeLayer=[400], activation="relu", dropout=0, alpha=1.): 31 | super(self.__class__, self).__init__() 32 | self.z_dim = z_dim 33 | self.layers = [input_dim] + encodeLayer + [z_dim] 34 | self.activation = activation 35 | self.dropout = dropout 36 | self.encoder = buildNetwork([input_dim] + encodeLayer, activation=activation, dropout=dropout) 37 | self._enc_mu = nn.Linear(encodeLayer[-1], z_dim) 38 | 39 | self.n_clusters = n_clusters 40 | self.alpha = alpha 41 | self.mu = Parameter(torch.Tensor(n_clusters, z_dim)) 42 | 43 | def save_model(self, path): 44 | torch.save(self.state_dict(), path) 45 | 46 | def load_model(self, path): 47 | pretrained_dict = torch.load(path, map_location=lambda storage, loc: storage) 48 | model_dict = self.state_dict() 49 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 50 | model_dict.update(pretrained_dict) 51 | self.load_state_dict(model_dict) 52 | 53 | def forward(self, x): 54 | h = self.encoder(x) 55 | z = self._enc_mu(h) 56 | # compute q -> NxK 57 | q = 1.0 / (1.0 + torch.sum((z.unsqueeze(1) - self.mu)**2, dim=2) / self.alpha) 58 | q = q**(self.alpha+1.0)/2.0 59 | q = q / torch.sum(q, dim=1, keepdim=True) 60 | return z, q 61 | 62 | def encodeBatch(self, dataloader, islabel=False): 63 | use_cuda = torch.cuda.is_available() 64 | if use_cuda: 65 | self.cuda() 66 | 67 | encoded = [] 68 | ylabels = [] 69 | self.eval() 70 | for batch_idx, (inputs, labels) in enumerate(dataloader): 71 | inputs = Variable(inputs) 72 | z,_ = self.forward(inputs) 73 | encoded.append(z.data.cpu()) 74 | ylabels.append(labels) 75 | 76 | encoded = torch.cat(encoded, dim=0) 77 | ylabels = torch.cat(ylabels) 78 | if islabel: 79 | out = (encoded, ylabels) 80 | else: 81 | out = encoded 82 | return out 83 | 84 | def loss_function(self, p, q): 85 | def kld(target, pred): 86 | return torch.mean(torch.sum(target*torch.log(target/(pred+1e-6)), dim=1)) 87 | 88 | loss = kld(p, q) 89 | return loss 90 | 91 | def target_distribution(self, q): 92 | p = q**2 / torch.sum(q, dim=0) 93 | p = p / torch.sum(p, dim=1, keepdim=True) 94 | return p 95 | 96 | def fit(self, X, y=None, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3): 97 | '''X: tensor data''' 98 | use_cuda = torch.cuda.is_available() 99 | if use_cuda: 100 | self.cuda() 101 | print("=====Training DEC=======") 102 | # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 103 | optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) 104 | 105 | print("Initializing cluster centers with kmeans.") 106 | kmeans = KMeans(self.n_clusters, n_init=20) 107 | data, _ = self.forward(X) 108 | y_pred = kmeans.fit_predict(data.data.cpu().numpy()) 109 | y_pred_last = y_pred 110 | self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) 111 | if y is not None: 112 | y = y.cpu().numpy() 113 | print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) 114 | 115 | self.train() 116 | num = X.shape[0] 117 | num_batch = int(math.ceil(1.0*X.shape[0]/batch_size)) 118 | for epoch in range(num_epochs): 119 | if epoch%update_interval == 0: 120 | # update the targe distribution p 121 | _, q = self.forward(X) 122 | p = self.target_distribution(q).data 123 | 124 | # evalute the clustering performance 125 | y_pred = torch.argmax(q, dim=1).data.cpu().numpy() 126 | if y is not None: 127 | print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) 128 | 129 | # check stop criterion 130 | delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / num 131 | y_pred_last = y_pred 132 | if epoch>0 and delta_label < tol: 133 | print('delta_label ', delta_label, '< tol ', tol) 134 | print("Reach tolerance threshold. Stopping training.") 135 | break 136 | 137 | # train 1 epoch 138 | train_loss = 0.0 139 | for batch_idx in range(num_batch): 140 | xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] 141 | pbatch = p[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] 142 | 143 | optimizer.zero_grad() 144 | inputs = Variable(xbatch) 145 | target = Variable(pbatch) 146 | 147 | z, qbatch = self.forward(inputs) 148 | loss = self.loss_function(target, qbatch) 149 | train_loss += loss.data*len(inputs) 150 | loss.backward() 151 | optimizer.step() 152 | 153 | print("#Epoch %3d: Loss: %.4f" % ( 154 | epoch+1, train_loss / num)) 155 | 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /lib/denoisingAutoencoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torchvision 7 | from torchvision import datasets, transforms 8 | from torch.autograd import Variable 9 | 10 | import numpy as np 11 | import math 12 | from lib.utils import Dataset, masking_noise 13 | from lib.ops import MSELoss, BCELoss 14 | 15 | def adjust_learning_rate(init_lr, optimizer, epoch): 16 | lr = init_lr * (0.1 ** (epoch//100)) 17 | toprint = True 18 | for param_group in optimizer.param_groups: 19 | if param_group["lr"]!=lr: 20 | param_group["lr"] = lr 21 | if toprint: 22 | print("Switching to learning rate %f" % lr) 23 | toprint = False 24 | 25 | class DenoisingAutoencoder(nn.Module): 26 | def __init__(self, in_features, out_features, activation="relu", 27 | dropout=0.2, tied=False): 28 | super(self.__class__, self).__init__() 29 | self.in_features = in_features 30 | self.out_features = out_features 31 | self.weight = Parameter(torch.Tensor(out_features, in_features)) 32 | if tied: 33 | self.deweight = self.weight.t() 34 | else: 35 | self.deweight = Parameter(torch.Tensor(in_features, out_features)) 36 | self.bias = Parameter(torch.Tensor(out_features)) 37 | self.vbias = Parameter(torch.Tensor(in_features)) 38 | 39 | if activation=="relu": 40 | self.enc_act_func = nn.ReLU() 41 | elif activation=="sigmoid": 42 | self.enc_act_func = nn.Sigmoid() 43 | elif activation=="none": 44 | self.enc_act_func = None 45 | self.dropout = nn.Dropout(p=dropout) 46 | 47 | self.reset_parameters() 48 | 49 | def reset_parameters(self): 50 | stdv = 0.01 51 | self.weight.data.uniform_(-stdv, stdv) 52 | self.bias.data.uniform_(-stdv, stdv) 53 | stdv = 0.01 54 | self.deweight.data.uniform_(-stdv, stdv) 55 | self.vbias.data.uniform_(-stdv, stdv) 56 | 57 | def forward(self, x): 58 | if self.enc_act_func is not None: 59 | return self.dropout(self.enc_act_func(F.linear(x, self.weight, self.bias))) 60 | else: 61 | return self.dropout(F.linear(x, self.weight, self.bias)) 62 | 63 | def encode(self, x, train=True): 64 | if train: 65 | self.dropout.train() 66 | else: 67 | self.dropout.eval() 68 | if self.enc_act_func is not None: 69 | return self.dropout(self.enc_act_func(F.linear(x, self.weight, self.bias))) 70 | else: 71 | return self.dropout(F.linear(x, self.weight, self.bias)) 72 | 73 | def encodeBatch(self, dataloader): 74 | use_cuda = torch.cuda.is_available() 75 | encoded = [] 76 | for batch_idx, (inputs, _) in enumerate(dataloader): 77 | inputs = inputs.view(inputs.size(0), -1).float() 78 | if use_cuda: 79 | inputs = inputs.cuda() 80 | inputs = Variable(inputs) 81 | hidden = self.encode(inputs, train=False) 82 | encoded.append(hidden.data.cpu()) 83 | 84 | encoded = torch.cat(encoded, dim=0) 85 | return encoded 86 | 87 | def decode(self, x, binary=False): 88 | if not binary: 89 | return F.linear(x, self.deweight, self.vbias) 90 | else: 91 | return F.sigmoid(F.linear(x, self.deweight, self.vbias)) 92 | 93 | def fit(self, trainloader, validloader, lr=0.001, batch_size=128, num_epochs=10, corrupt=0.3, 94 | loss_type="mse"): 95 | """ 96 | data_x: FloatTensor 97 | valid_x: FloatTensor 98 | """ 99 | use_cuda = torch.cuda.is_available() 100 | if use_cuda: 101 | self.cuda() 102 | print("=====Denoising Autoencoding layer=======") 103 | # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 104 | optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) 105 | if loss_type=="mse": 106 | criterion = MSELoss() 107 | elif loss_type=="cross-entropy": 108 | criterion = BCELoss() 109 | 110 | # validate 111 | total_loss = 0.0 112 | total_num = 0 113 | for batch_idx, (inputs, _) in enumerate(validloader): 114 | # inputs = inputs.view(inputs.size(0), -1).float() 115 | # if use_cuda: 116 | # inputs = inputs.cuda() 117 | inputs = Variable(inputs) 118 | hidden = self.encode(inputs) 119 | if loss_type=="cross-entropy": 120 | outputs = self.decode(hidden, binary=True) 121 | else: 122 | outputs = self.decode(hidden) 123 | 124 | valid_recon_loss = criterion(outputs, inputs) 125 | total_loss += valid_recon_loss.data * len(inputs) 126 | total_num += inputs.size()[0] 127 | 128 | valid_loss = total_loss / total_num 129 | print("#Epoch 0: Valid Reconstruct Loss: %.4f" % (valid_loss)) 130 | 131 | self.train() 132 | for epoch in range(num_epochs): 133 | # train 1 epoch 134 | train_loss = 0.0 135 | adjust_learning_rate(lr, optimizer, epoch) 136 | for batch_idx, (inputs, _) in enumerate(trainloader): 137 | # inputs = inputs.view(inputs.size(0), -1).float() 138 | inputs_corr = masking_noise(inputs, corrupt) 139 | # if use_cuda: 140 | # inputs = inputs.cuda() 141 | # inputs_corr = inputs_corr.cuda() 142 | optimizer.zero_grad() 143 | inputs = Variable(inputs) 144 | inputs_corr = Variable(inputs_corr) 145 | 146 | hidden = self.encode(inputs_corr) 147 | if loss_type=="cross-entropy": 148 | outputs = self.decode(hidden, binary=True) 149 | else: 150 | outputs = self.decode(hidden) 151 | recon_loss = criterion(outputs, inputs) 152 | train_loss += recon_loss.data*len(inputs) 153 | recon_loss.backward() 154 | optimizer.step() 155 | 156 | # validate 157 | valid_loss = 0.0 158 | for batch_idx, (inputs, _) in enumerate(validloader): 159 | # inputs = inputs.view(inputs.size(0), -1).float() 160 | # if use_cuda: 161 | # inputs = inputs.cuda() 162 | inputs = Variable(inputs) 163 | hidden = self.encode(inputs, train=False) 164 | if loss_type=="cross-entropy": 165 | outputs = self.decode(hidden, binary=True) 166 | else: 167 | outputs = self.decode(hidden) 168 | 169 | valid_recon_loss = criterion(outputs, inputs) 170 | valid_loss += valid_recon_loss.data * len(inputs) 171 | 172 | print("#Epoch %3d: Reconstruct Loss: %.4f, Valid Reconstruct Loss: %.4f" % ( 173 | epoch+1, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) 174 | 175 | def extra_repr(self): 176 | return 'in_features={}, out_features={}, bias={}'.format( 177 | self.in_features, self.out_features, self.bias is not None 178 | ) 179 | 180 | -------------------------------------------------------------------------------- /lib/idec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torchvision 7 | from torchvision import datasets, transforms 8 | from torch.autograd import Variable 9 | 10 | import numpy as np 11 | import math 12 | from lib.utils import acc 13 | from sklearn.metrics.cluster import normalized_mutual_info_score 14 | from sklearn.cluster import KMeans 15 | 16 | class MSELoss(nn.Module): 17 | def __init__(self): 18 | super(self.__class__, self).__init__() 19 | 20 | def forward(self, input, target): 21 | return torch.mean((input-target)**2) 22 | 23 | def buildNetwork(layers, activation="relu", dropout=0): 24 | net = [] 25 | for i in range(1, len(layers)): 26 | net.append(nn.Linear(layers[i-1], layers[i])) 27 | if activation=="relu": 28 | net.append(nn.ReLU()) 29 | elif activation=="sigmoid": 30 | net.append(nn.Sigmoid()) 31 | if dropout > 0: 32 | net.append(nn.Dropout(dropout)) 33 | return nn.Sequential(*net) 34 | 35 | class IDEC(nn.Module): 36 | def __init__(self, input_dim=784, z_dim=10, n_clusters=10, 37 | encodeLayer=[400], decodeLayer=[400], activation="relu", dropout=0, alpha=1., gamma=0.1): 38 | super(self.__class__, self).__init__() 39 | self.z_dim = z_dim 40 | self.layers = [input_dim] + encodeLayer + [z_dim] 41 | self.activation = activation 42 | self.dropout = dropout 43 | self.encoder = buildNetwork([input_dim] + encodeLayer, activation=activation, dropout=dropout) 44 | self.decoder = buildNetwork([z_dim] + decodeLayer, activation=activation, dropout=dropout) 45 | self._enc_mu = nn.Linear(encodeLayer[-1], z_dim) 46 | self._dec = nn.Linear(decodeLayer[-1], input_dim) 47 | 48 | self.n_clusters = n_clusters 49 | self.alpha = alpha 50 | self.gamma = gamma 51 | self.mu = Parameter(torch.Tensor(n_clusters, z_dim)) 52 | 53 | def save_model(self, path): 54 | torch.save(self.state_dict(), path) 55 | 56 | def load_model(self, path): 57 | pretrained_dict = torch.load(path, map_location=lambda storage, loc: storage) 58 | model_dict = self.state_dict() 59 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 60 | model_dict.update(pretrained_dict) 61 | self.load_state_dict(model_dict) 62 | 63 | def forward(self, x): 64 | h = self.encoder(x) 65 | z = self._enc_mu(h) 66 | h = self.decoder(z) 67 | xrecon = self._dec(h) 68 | # compute q -> NxK 69 | q = self.soft_assign(z) 70 | return z, q, xrecon 71 | 72 | def soft_assign(self, z): 73 | q = 1.0 / (1.0 + torch.sum((z.unsqueeze(1) - self.mu)**2, dim=2) / self.alpha) 74 | q = q**(self.alpha+1.0)/2.0 75 | q = q / torch.sum(q, dim=1, keepdim=True) 76 | return q 77 | 78 | def encodeBatch(self, X, batch_size=256): 79 | use_cuda = torch.cuda.is_available() 80 | if use_cuda: 81 | self.cuda() 82 | 83 | encoded = [] 84 | self.eval() 85 | num = X.shape[0] 86 | num_batch = int(math.ceil(1.0*X.shape[0]/batch_size)) 87 | for batch_idx in range(num_batch): 88 | xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] 89 | inputs = Variable(xbatch) 90 | z,_, _ = self.forward(inputs) 91 | encoded.append(z.data) 92 | 93 | encoded = torch.cat(encoded, dim=0) 94 | return encoded 95 | 96 | def loss_function(self, x, xrecon, p, q): 97 | def kld(target, pred): 98 | return torch.mean(torch.sum(target*torch.log(target/(pred+1e-6)), dim=1)) 99 | 100 | kldloss = kld(p, q) 101 | recon_loss = torch.mean((xrecon-x)**2) 102 | loss = recon_loss + self.gamma*kldloss 103 | return loss 104 | 105 | def target_distribution(self, q): 106 | p = q**2 / torch.sum(q, dim=0) 107 | p = p / torch.sum(p, dim=1, keepdim=True) 108 | return p 109 | 110 | def fit(self, X, y=None, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3): 111 | '''X: tensor data''' 112 | use_cuda = torch.cuda.is_available() 113 | if use_cuda: 114 | self.cuda() 115 | print("=====Training IDEC=======") 116 | optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 117 | # optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) 118 | 119 | 120 | print("Initializing cluster centers with kmeans.") 121 | kmeans = KMeans(self.n_clusters, n_init=20) 122 | data = self.encodeBatch(X) 123 | y_pred = kmeans.fit_predict(data.data.cpu().numpy()) 124 | y_pred_last = y_pred 125 | self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) 126 | if y is not None: 127 | y = y.cpu().numpy() 128 | print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) 129 | 130 | self.train() 131 | num = X.shape[0] 132 | num_batch = int(math.ceil(1.0*X.shape[0]/batch_size)) 133 | for epoch in range(num_epochs): 134 | if epoch%update_interval == 0: 135 | # update the targe distribution p 136 | latent = self.encodeBatch(X) 137 | q = self.soft_assign(latent) 138 | p = self.target_distribution(q).data 139 | 140 | # evalute the clustering performance 141 | y_pred = torch.argmax(q, dim=1).data.cpu().numpy() 142 | if y is not None: 143 | print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) 144 | 145 | # check stop criterion 146 | delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / num 147 | y_pred_last = y_pred 148 | if epoch>0 and delta_label < tol: 149 | print('delta_label ', delta_label, '< tol ', tol) 150 | print("Reach tolerance threshold. Stopping training.") 151 | break 152 | 153 | # train 1 epoch 154 | train_loss = 0.0 155 | for batch_idx in range(num_batch): 156 | xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] 157 | pbatch = p[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] 158 | 159 | optimizer.zero_grad() 160 | inputs = Variable(xbatch) 161 | target = Variable(pbatch) 162 | 163 | z, qbatch, xrecon = self.forward(inputs) 164 | loss = self.loss_function(inputs, xrecon, target, qbatch) 165 | train_loss += loss.data*len(inputs) 166 | loss.backward() 167 | optimizer.step() 168 | 169 | print("#Epoch %3d: Loss: %.4f" % ( 170 | epoch+1, train_loss / num)) 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /lib/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | import math 7 | 8 | 9 | class MSELoss(nn.Module): 10 | def __init__(self): 11 | super(self.__class__, self).__init__() 12 | 13 | def forward(self, input, target): 14 | return 0.5 * torch.mean((input-target)**2) 15 | 16 | class BCELoss(nn.Module): 17 | def __init__(self): 18 | super(self.__class__, self).__init__() 19 | 20 | def forward(self, input, target): 21 | return -torch.mean(torch.sum(target*torch.log(torch.clamp(input, min=1e-10))+ 22 | (1-target)*torch.log(torch.clamp(1-input, min=1e-10)), 1)) 23 | -------------------------------------------------------------------------------- /lib/stackedDAE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torchvision 7 | from torchvision import datasets, transforms 8 | from torch.autograd import Variable 9 | 10 | import numpy as np 11 | import math 12 | from lib.utils import Dataset, masking_noise 13 | from lib.ops import MSELoss, BCELoss 14 | from lib.denoisingAutoencoder import DenoisingAutoencoder 15 | 16 | def buildNetwork(layers, activation="relu", dropout=0): 17 | net = [] 18 | for i in range(1, len(layers)): 19 | net.append(nn.Linear(layers[i-1], layers[i])) 20 | if activation=="relu": 21 | net.append(nn.ReLU()) 22 | elif activation=="sigmoid": 23 | net.append(nn.Sigmoid()) 24 | if dropout > 0: 25 | net.append(nn.Dropout(dropout)) 26 | return nn.Sequential(*net) 27 | 28 | def adjust_learning_rate(init_lr, optimizer, epoch): 29 | lr = init_lr * (0.1 ** (epoch//100)) 30 | toprint = True 31 | for param_group in optimizer.param_groups: 32 | if param_group["lr"]!=lr: 33 | param_group["lr"] = lr 34 | if toprint: 35 | print("Switching to learning rate %f" % lr) 36 | toprint = False 37 | 38 | class StackedDAE(nn.Module): 39 | def __init__(self, input_dim=784, z_dim=10, binary=True, 40 | encodeLayer=[400], decodeLayer=[400], activation="relu", 41 | dropout=0, tied=False): 42 | super(self.__class__, self).__init__() 43 | self.z_dim = z_dim 44 | self.layers = [input_dim] + encodeLayer + [z_dim] 45 | self.activation = activation 46 | self.dropout = dropout 47 | self.encoder = buildNetwork([input_dim] + encodeLayer, activation=activation, dropout=dropout) 48 | self.decoder = buildNetwork([z_dim] + decodeLayer, activation=activation, dropout=dropout) 49 | self._enc_mu = nn.Linear(encodeLayer[-1], z_dim) 50 | 51 | self._dec = nn.Linear(decodeLayer[-1], input_dim) 52 | self._dec_act = None 53 | if binary: 54 | self._dec_act = nn.Sigmoid() 55 | 56 | def decode(self, z): 57 | h = self.decoder(z) 58 | x = self._dec(h) 59 | if self._dec_act is not None: 60 | x = self._dec_act(x) 61 | return x 62 | 63 | def loss_function(self, recon_x, x): 64 | loss = -torch.mean(torch.sum(x*torch.log(torch.clamp(recon_x, min=1e-10))+ 65 | (1-x)*torch.log(torch.clamp(1-recon_x, min=1e-10)), 1)) 66 | 67 | return loss 68 | 69 | def forward(self, x): 70 | h = self.encoder(x) 71 | z = self._enc_mu(h) 72 | 73 | return z, self.decode(z) 74 | 75 | def save_model(self, path): 76 | torch.save(self.state_dict(), path) 77 | 78 | def load_model(self, path): 79 | pretrained_dict = torch.load(path, map_location=lambda storage, loc: storage) 80 | model_dict = self.state_dict() 81 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 82 | model_dict.update(pretrained_dict) 83 | self.load_state_dict(model_dict) 84 | 85 | def pretrain(self, trainloader, validloader, lr=0.001, batch_size=128, num_epochs=10, corrupt=0.2, loss_type="cross-entropy"): 86 | trloader = trainloader 87 | valoader = validloader 88 | daeLayers = [] 89 | for l in range(1, len(self.layers)): 90 | infeatures = self.layers[l-1] 91 | outfeatures = self.layers[l] 92 | if l!= len(self.layers)-1: 93 | dae = DenoisingAutoencoder(infeatures, outfeatures, activation=self.activation, dropout=corrupt) 94 | else: 95 | dae = DenoisingAutoencoder(infeatures, outfeatures, activation="none", dropout=0) 96 | print(dae) 97 | if l==1: 98 | dae.fit(trloader, valoader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type=loss_type) 99 | else: 100 | if self.activation=="sigmoid": 101 | dae.fit(trloader, valoader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type="cross-entropy") 102 | else: 103 | dae.fit(trloader, valoader, lr=lr, batch_size=batch_size, num_epochs=num_epochs, corrupt=corrupt, loss_type="mse") 104 | data_x = dae.encodeBatch(trloader) 105 | valid_x = dae.encodeBatch(valoader) 106 | trainset = Dataset(data_x, data_x) 107 | trloader = torch.utils.data.DataLoader( 108 | trainset, batch_size=batch_size, shuffle=True, num_workers=0) 109 | validset = Dataset(valid_x, valid_x) 110 | valoader = torch.utils.data.DataLoader( 111 | validset, batch_size=1000, shuffle=False, num_workers=0) 112 | daeLayers.append(dae) 113 | 114 | self.copyParam(daeLayers) 115 | 116 | def copyParam(self, daeLayers): 117 | if self.dropout==0: 118 | every = 2 119 | else: 120 | every = 3 121 | # input layer 122 | # copy encoder weight 123 | self.encoder[0].weight.data.copy_(daeLayers[0].weight.data) 124 | self.encoder[0].bias.data.copy_(daeLayers[0].bias.data) 125 | self._dec.weight.data.copy_(daeLayers[0].deweight.data) 126 | self._dec.bias.data.copy_(daeLayers[0].vbias.data) 127 | 128 | for l in range(1, len(self.layers)-2): 129 | # copy encoder weight 130 | self.encoder[l*every].weight.data.copy_(daeLayers[l].weight.data) 131 | self.encoder[l*every].bias.data.copy_(daeLayers[l].bias.data) 132 | 133 | # copy decoder weight 134 | self.decoder[-(l-1)*every-2].weight.data.copy_(daeLayers[l].deweight.data) 135 | self.decoder[-(l-1)*every-2].bias.data.copy_(daeLayers[l].vbias.data) 136 | 137 | # z layer 138 | self._enc_mu.weight.data.copy_(daeLayers[-1].weight.data) 139 | self._enc_mu.bias.data.copy_(daeLayers[-1].bias.data) 140 | self.decoder[0].weight.data.copy_(daeLayers[-1].deweight.data) 141 | self.decoder[0].bias.data.copy_(daeLayers[-1].vbias.data) 142 | 143 | def fit(self, trainloader, validloader, lr=0.001, num_epochs=10, corrupt=0.3, 144 | loss_type="mse"): 145 | """ 146 | data_x: FloatTensor 147 | valid_x: FloatTensor 148 | """ 149 | use_cuda = torch.cuda.is_available() 150 | if use_cuda: 151 | self.cuda() 152 | print("=====Stacked Denoising Autoencoding Layer=======") 153 | # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) 154 | optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) 155 | if loss_type=="mse": 156 | criterion = MSELoss() 157 | elif loss_type=="cross-entropy": 158 | criterion = BCELoss() 159 | 160 | # validate 161 | total_loss = 0.0 162 | total_num = 0 163 | for batch_idx, (inputs, _) in enumerate(validloader): 164 | inputs = inputs.view(inputs.size(0), -1).float() 165 | if use_cuda: 166 | inputs = inputs.cuda() 167 | inputs = Variable(inputs) 168 | z, outputs = self.forward(inputs) 169 | 170 | valid_recon_loss = criterion(outputs, inputs) 171 | total_loss += valid_recon_loss.data * len(inputs) 172 | total_num += inputs.size()[0] 173 | 174 | valid_loss = total_loss / total_num 175 | print("#Epoch 0: Valid Reconstruct Loss: %.4f" % (valid_loss)) 176 | self.train() 177 | for epoch in range(num_epochs): 178 | # train 1 epoch 179 | adjust_learning_rate(lr, optimizer, epoch) 180 | train_loss = 0.0 181 | for batch_idx, (inputs, _) in enumerate(trainloader): 182 | inputs = inputs.view(inputs.size(0), -1).float() 183 | inputs_corr = masking_noise(inputs, corrupt) 184 | if use_cuda: 185 | inputs = inputs.cuda() 186 | inputs_corr = inputs_corr.cuda() 187 | optimizer.zero_grad() 188 | inputs = Variable(inputs) 189 | inputs_corr = Variable(inputs_corr) 190 | 191 | z, outputs = self.forward(inputs_corr) 192 | recon_loss = criterion(outputs, inputs) 193 | train_loss += recon_loss.data*len(inputs) 194 | recon_loss.backward() 195 | optimizer.step() 196 | 197 | # validate 198 | valid_loss = 0.0 199 | for batch_idx, (inputs, _) in enumerate(validloader): 200 | inputs = inputs.view(inputs.size(0), -1).float() 201 | if use_cuda: 202 | inputs = inputs.cuda() 203 | inputs = Variable(inputs) 204 | z, outputs = self.forward(inputs) 205 | 206 | valid_recon_loss = criterion(outputs, inputs) 207 | valid_loss += valid_recon_loss.data * len(inputs) 208 | 209 | print("#Epoch %3d: Reconstruct Loss: %.4f, Valid Reconstruct Loss: %.4f" % ( 210 | epoch+1, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) 211 | 212 | 213 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | '''Some helper functions for PyTorch, including: 2 | - get_mean_and_std: calculate the mean and std value of dataset. 3 | - msr_init: net parameter initialization. 4 | - progress_bar: progress bar mimic xlua.progress. 5 | ''' 6 | import os 7 | import sys 8 | import time 9 | import math 10 | import numpy as np 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.init as init 15 | import torch.utils.data as data 16 | from PIL import Image 17 | 18 | def weights_xavier_init(m): 19 | if isinstance(m, nn.Linear): 20 | nn.init.xavier_uniform(m.weight.data) 21 | nn.init.constant(m.bias.data, 0) 22 | 23 | class Dataset(data.Dataset): 24 | def __init__(self, data, labels, transform=None, target_transform=None): 25 | self.transform = transform 26 | self.target_transform = target_transform 27 | self.data = data 28 | self.labels = labels 29 | if torch.cuda.is_available(): 30 | self.data = self.data.cuda() 31 | self.labels = self.labels.cuda() 32 | 33 | def __getitem__(self, index): 34 | img, target = self.data[index], self.labels[index] 35 | # img = Image.fromarray(img) 36 | if self.transform is not None: 37 | img = self.transform(img) 38 | 39 | if self.target_transform is not None: 40 | target = self.target_transform(target) 41 | 42 | return img, target 43 | 44 | def __len__(self): 45 | return len(self.data) 46 | 47 | def masking_noise(data, frac): 48 | """ 49 | data: Tensor 50 | frac: fraction of unit to be masked out 51 | """ 52 | data_noise = data.clone() 53 | rand = torch.rand(data.size()) 54 | data_noise[rand 1765 | dec = DEC(input_dim=784, z_dim=10, n_clusters=10, 1766 | NameError: name 'DEC' is not defined 1767 | -------------------------------------------------------------------------------- /run-experiment.qsub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #$ -S /bin/bash 4 | #$ -V 5 | #$ -cwd 6 | 7 | #$ -w e 8 | #$ -l h=client110 9 | 10 | datasets="mnist stl reuters10k har" 11 | for dataset in $datasets 12 | do 13 | python test_ae-3layer.py --dataset $dataset --save model/pretrained_"$dataset".pt 14 | python test_ltvae-3layer.py --dataset $dataset --lr 0.002 --epochs 20 --everyepochs 5 --pretrain model/pretrained_"$dataset".pt 15 | done 16 | # python test_ae-3layer.py --dataset mnist --lr 0.001 --epochs 100 -------------------------------------------------------------------------------- /run_experiment.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | from torchvision import datasets, transforms 4 | import numpy as np 5 | import argparse 6 | from lib.stackedDAE import StackedDAE 7 | from lib.dec import DEC 8 | from lib.datasets import MNIST 9 | 10 | dataset = "mnist" 11 | repeat = 10 12 | batch_size = 256 13 | 14 | for i in range(1, repeat+1): 15 | print("Experiment #%d" % i) 16 | 17 | train_loader = torch.utils.data.DataLoader( 18 | MNIST('./dataset/mnist', train=True, download=True), 19 | batch_size=batch_size, shuffle=True, num_workers=0) 20 | test_loader = torch.utils.data.DataLoader( 21 | MNIST('./dataset/mnist', train=False), 22 | batch_size=batch_size, shuffle=False, num_workers=0) 23 | # pretrain 24 | sdae = StackedDAE(input_dim=784, z_dim=10, binary=False, 25 | encodeLayer=[500,500,2000], decodeLayer=[2000,500,500], activation="relu", 26 | dropout=0) 27 | print(sdae) 28 | sdae.pretrain(train_loader, test_loader, lr=0.1, batch_size=batch_size, 29 | num_epochs=300, corrupt=0.2, loss_type="mse") 30 | sdae.fit(train_loader, test_loader, lr=0.1, num_epochs=500, corrupt=0.2, loss_type="mse") 31 | sdae_savepath = ("model/sdae-run-%d.pt" % i) 32 | sdae.save_model(sdae_savepath) 33 | 34 | # finetune 35 | mnist_train = MNIST('./dataset/mnist', train=True, download=True) 36 | mnist_test = MNIST('./dataset/mnist', train=False) 37 | X = mnist_train.train_data 38 | y = mnist_train.train_labels 39 | 40 | dec = DEC(input_dim=784, z_dim=10, n_clusters=10, 41 | encodeLayer=[500,500,2000], activation="relu", dropout=0) 42 | print(dec) 43 | dec.load_model(sdae_savepath) 44 | dec.fit(X, y, lr=0.01, batch_size=256, num_epochs=100, 45 | update_interval=1) 46 | dec_savepath = ("model/dec-run-%d.pt" % i) 47 | dec.save_model(dec_savepath) -------------------------------------------------------------------------------- /test_dcn.py: -------------------------------------------------------------------------------- 1 | """ 2 | python test_dcn.py --lr 0.0001 --epochs 50 --pretrain model/sdae.pt 3 | On MNIST achieves acc: 0.86325, nmi: 0.82759 4 | """ 5 | import sys 6 | sys.path.append("..") 7 | import torch 8 | import torch.utils.data 9 | from torchvision import datasets, transforms 10 | import numpy as np 11 | import argparse 12 | from lib.dcn import DeepClusteringNetwork 13 | from lib.datasets import MNIST 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 17 | parser.add_argument('--lr', type=float, default=0.001, metavar='N', 18 | help='learning rate for training (default: 0.001)') 19 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 20 | help='input batch size for training (default: 128)') 21 | parser.add_argument('--update-interval', type=int, default=1, metavar='N', 22 | help='number of epochs to train (default: 10)') 23 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 24 | help='number of epochs to train (default: 10)') 25 | parser.add_argument('--pretrain', type=str, default="", metavar='N', 26 | help='number of epochs to train (default: 10)') 27 | args = parser.parse_args() 28 | 29 | # according to the released code, mnist data is multiplied by 0.02 30 | # 255*0.02 = 5.1. transforms.ToTensor() coverts 255 -> 1.0 31 | # so add a customized Scale transform to multiple by 5.1 32 | mnist_train = MNIST('./dataset/mnist', train=True, download=True) 33 | mnist_test = MNIST('./dataset/mnist', train=False) 34 | X = mnist_train.train_data 35 | y = mnist_train.train_labels 36 | 37 | dcn = DeepClusteringNetwork(input_dim=784, z_dim=10, n_centroids=10, binary=False, 38 | encodeLayer=[500,500,2000], decodeLayer=[2000,500,500], activation="relu", dropout=0) 39 | print(dcn) 40 | dcn.load_model(args.pretrain) 41 | 42 | dcn.fit(X, y, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs) 43 | dcn.save_model("model/dcn.pt") 44 | -------------------------------------------------------------------------------- /test_dec.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import torch 4 | import torch.utils.data 5 | from torchvision import datasets, transforms 6 | import numpy as np 7 | import argparse 8 | from lib.dec import DEC 9 | from lib.datasets import MNIST 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 13 | parser.add_argument('--lr', type=float, default=0.01, metavar='N', 14 | help='learning rate for training (default: 0.001)') 15 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 16 | help='input batch size for training (default: 128)') 17 | parser.add_argument('--update-interval', type=int, default=1, metavar='N', 18 | help='number of epochs to train (default: 10)') 19 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 20 | help='number of epochs to train (default: 10)') 21 | parser.add_argument('--pretrain', type=str, default="", metavar='N', 22 | help='number of epochs to train (default: 10)') 23 | args = parser.parse_args() 24 | 25 | # according to the released code, mnist data is multiplied by 0.02 26 | # 255*0.02 = 5.1. transforms.ToTensor() coverts 255 -> 1.0 27 | # so add a customized Scale transform to multiple by 5.1 28 | mnist_train = MNIST('./dataset/mnist', train=True, download=True) 29 | mnist_test = MNIST('./dataset/mnist', train=False) 30 | X = mnist_train.train_data 31 | y = mnist_train.train_labels 32 | 33 | dec = DEC(input_dim=784, z_dim=10, n_clusters=10, 34 | encodeLayer=[500,500,2000], activation="relu", dropout=0) 35 | print(dec) 36 | dec.load_model(args.pretrain) 37 | dec.fit(X, y, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, 38 | update_interval=args.update_interval) 39 | dec.save_model("model/dec.pt") 40 | -------------------------------------------------------------------------------- /test_idec.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import torch 4 | import torch.utils.data 5 | from torchvision import datasets, transforms 6 | import numpy as np 7 | import argparse 8 | from lib.idec import IDEC 9 | from lib.datasets import MNIST 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 13 | parser.add_argument('--lr', type=float, default=0.001, metavar='N', 14 | help='learning rate for training (default: 0.001)') 15 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 16 | help='input batch size for training (default: 128)') 17 | parser.add_argument('--update-interval', type=int, default=1, metavar='N', 18 | help='number of epochs to train (default: 10)') 19 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 20 | help='number of epochs to train (default: 10)') 21 | parser.add_argument('--pretrain', type=str, default="", metavar='N', 22 | help='number of epochs to train (default: 10)') 23 | args = parser.parse_args() 24 | 25 | # according to the released code, mnist data is multiplied by 0.02 26 | # 255*0.02 = 5.1. transforms.ToTensor() coverts 255 -> 1.0 27 | # so add a customized Scale transform to multiple by 5.1 28 | mnist_train = MNIST('./dataset/mnist', train=True, download=True) 29 | mnist_test = MNIST('./dataset/mnist', train=False) 30 | X = mnist_train.train_data 31 | y = mnist_train.train_labels 32 | 33 | idec = IDEC(input_dim=784, z_dim=10, n_clusters=10, 34 | encodeLayer=[500,500,2000], decodeLayer=[2000,500,500], activation="relu", dropout=0) 35 | print(idec) 36 | idec.load_model(args.pretrain) 37 | idec.fit(X, y, lr=args.lr, batch_size=args.batch_size, num_epochs=args.epochs, 38 | update_interval=args.update_interval) 39 | idec.save_model("model/idec.pt") 40 | -------------------------------------------------------------------------------- /test_sdae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import torch 4 | import torch.utils.data 5 | from torchvision import datasets, transforms 6 | import numpy as np 7 | import argparse 8 | from lib.stackedDAE import StackedDAE 9 | from lib.datasets import MNIST 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 13 | parser.add_argument('--lr', type=float, default=0.1, metavar='N', 14 | help='learning rate for training (default: 0.001)') 15 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 16 | help='input batch size for training (default: 128)') 17 | parser.add_argument('--pretrainepochs', type=int, default=10, metavar='N', 18 | help='number of epochs to train (default: 10)') 19 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 20 | help='number of epochs to train (default: 10)') 21 | args = parser.parse_args() 22 | 23 | # according to the released code, mnist data is multiplied by 0.02 24 | # 255*0.02 = 5.1. transforms.ToTensor() coverts 255 -> 1.0 25 | # so add a customized Scale transform to multiple by 5.1 26 | train_loader = torch.utils.data.DataLoader( 27 | MNIST('./dataset/mnist', train=True, download=True), 28 | batch_size=args.batch_size, shuffle=True, num_workers=0) 29 | test_loader = torch.utils.data.DataLoader( 30 | MNIST('./dataset/mnist', train=False), 31 | batch_size=args.batch_size, shuffle=False, num_workers=0) 32 | 33 | sdae = StackedDAE(input_dim=784, z_dim=10, binary=False, 34 | encodeLayer=[500,500,2000], decodeLayer=[2000,500,500], activation="relu", 35 | dropout=0) 36 | print(sdae) 37 | sdae.pretrain(train_loader, test_loader, lr=args.lr, batch_size=args.batch_size, 38 | num_epochs=args.pretrainepochs, corrupt=0.2, loss_type="mse") 39 | sdae.fit(train_loader, test_loader, lr=args.lr, num_epochs=args.epochs, corrupt=0.2, loss_type="mse") 40 | sdae.save_model("model/sdae.pt") 41 | -------------------------------------------------------------------------------- /test_sdae_for_dcn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | import torch 4 | import torch.utils.data 5 | from torchvision import datasets, transforms 6 | import numpy as np 7 | import argparse 8 | from lib.stackedDAE import StackedDAE 9 | from lib.datasets import MNIST 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 13 | parser.add_argument('--lr', type=float, default=0.1, metavar='N', 14 | help='learning rate for training (default: 0.001)') 15 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 16 | help='input batch size for training (default: 128)') 17 | parser.add_argument('--pretrainepochs', type=int, default=10, metavar='N', 18 | help='number of epochs to train (default: 10)') 19 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 20 | help='number of epochs to train (default: 10)') 21 | args = parser.parse_args() 22 | 23 | # according to the released code, mnist data is multiplied by 0.02 24 | # 255*0.02 = 5.1. transforms.ToTensor() coverts 255 -> 1.0 25 | # so add a customized Scale transform to multiple by 5.1 26 | train_loader = torch.utils.data.DataLoader( 27 | MNIST('./dataset/mnist', train=True, download=True), 28 | batch_size=args.batch_size, shuffle=True, num_workers=0) 29 | test_loader = torch.utils.data.DataLoader( 30 | MNIST('./dataset/mnist', train=False), 31 | batch_size=args.batch_size, shuffle=False, num_workers=0) 32 | 33 | sdae = StackedDAE(input_dim=784, z_dim=10, binary=False, 34 | encodeLayer=[500,500,2000], decodeLayer=[2000,500,500], activation="relu", 35 | dropout=0) 36 | print(sdae) 37 | sdae.pretrain(train_loader, test_loader, lr=args.lr, batch_size=args.batch_size, 38 | num_epochs=args.pretrainepochs, corrupt=0.2, loss_type="mse") 39 | # sdae.fit(train_loader, test_loader, lr=args.lr, num_epochs=args.epochs, corrupt=0.2, loss_type="mse") 40 | sdae.save_model("model/sdae-dcn.pt") 41 | --------------------------------------------------------------------------------