├── README.md ├── classification_aleatoric.py ├── classification_combined.py ├── classification_epistemic.py └── classification_normal.py /README.md: -------------------------------------------------------------------------------- 1 | # My-implementation-of-What-Uncertainties-Do-We-Need-in-Bayesian-Deep-Learning-for-Computer-Vision 2 | This is my implementation of classification task in paper _What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision_. 3 | 4 | In this repo, I model aleatoric uncertainty, epistemic uncertainty and both of them of a classification task with MNIST dataset. I also implement a vanilla network to be compared with. 5 | 6 | However, these are all based on my understanding of this paper. In regression task, it is easy to compute `var(mean)` (i.e. epistemic uncertainty) and `mean(var)` (i.e. aleatoric uncertainty), but in classification task, I really don't know how to compute aleatoric uncertainty and epistemic uncertainty for each sample, since they are all vectors rather than a single value. If I can compute them, I also don't know how to plot these uncertainties like regression task. (**New**: After reading so many papers, I think the best way to quantify uncertainties in classification task is entropy, though it is not comparable with the measurement of variance in regression task. In this situation, the `sigma` vector **is just used in the loss function to mitigate the influence of noisy samples**. Explicit quantification of uncertainties is better done by the entropy of `mu`. I'll implement this later.) 7 | 8 | ~~Besides, I still can't make sure that whether each logit value is drawn from a Gaussian and the whole logit vector is drawn from a multi-dimensional Gaussian distribution. I saw other repos predict variance of each sample by a single value, while I think it should be a vector hence the variance is a 'diagonal matrix with one element for each logit value'.~~ (This is discussed in issue #1) 9 | 10 | The results seem that modeling aleatoric uncertainty can improve model's performance. 11 | 12 | Any feedback and discussion is welcome. 13 | -------------------------------------------------------------------------------- /classification_aleatoric.py: -------------------------------------------------------------------------------- 1 | # When modeling aleatoric uncertainty, we perform MAP inference using learned loss attenuation. 2 | # That is, we put the distribution on the outputs of the network and capture data's inherent noise. 3 | # We assume that outputs are drawn from Gaussian distribution. 4 | 5 | 6 | import os 7 | import matplotlib.pyplot as plt 8 | import time 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.utils.data as Data 13 | from torchvision import datasets, transforms 14 | import torch.nn.functional as F 15 | 16 | 17 | 18 | EPOCH = 10 19 | BATCH_SIZE = 100 20 | LR = 0.001 21 | DOWNLOAD_MNIST = False 22 | CLASS_NUM = 10 23 | NUM_SAMPLES = 10 24 | 25 | 26 | def get_hms(seconds): 27 | m, s = divmod(seconds, 60) 28 | h, m = divmod(m, 60) 29 | 30 | return h, m, s 31 | 32 | 33 | if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'): 34 | DOWNLOAD_MNIST = True 35 | 36 | 37 | train_loader = torch.utils.data.DataLoader( 38 | datasets.MNIST('./mnist', train=True, download=DOWNLOAD_MNIST, 39 | transform=transforms.Compose([ 40 | transforms.ToTensor(), 41 | transforms.Normalize((0.1307,), (0.3081,)) 42 | ])), 43 | batch_size=BATCH_SIZE, shuffle=True) 44 | test_loader = torch.utils.data.DataLoader( 45 | datasets.MNIST('./mnist', train=False, transform=transforms.Compose([ 46 | transforms.ToTensor(), 47 | transforms.Normalize((0.1307,), (0.3081,)) 48 | ])), 49 | batch_size=BATCH_SIZE, shuffle=True) 50 | 51 | 52 | class CNN(nn.Module): 53 | def __init__(self): 54 | super(CNN, self).__init__() 55 | self.conv1 = nn.Sequential( # input shape (1, 28, 28) 56 | nn.Conv2d( 57 | in_channels=1, # input height 58 | out_channels=16, # n_filters 59 | kernel_size=5, # filter size 60 | stride=1, # filter movement/step 61 | padding=2, # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1 62 | ), # output shape (16, 28, 28) 63 | nn.ReLU(), 64 | nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) 65 | nn.Dropout(0.5) 66 | ) 67 | self.conv2 = nn.Sequential( # input shape (16, 14, 14) 68 | nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) 69 | nn.ReLU(), 70 | nn.MaxPool2d(2), # output shape (32, 7, 7) 71 | nn.Dropout(0.5) 72 | ) 73 | # self.linear = nn.Linear(32 * 7 * 7, 10) # fully connected layer, output 10 classes [batch_size, 10] 74 | self.linear = nn.Linear(32 * 7 * 7, CLASS_NUM * 2) 75 | 76 | def forward(self, x): 77 | x = self.conv1(x) 78 | x = self.conv2(x) 79 | x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) 80 | logit = self.linear(x) 81 | mu, sigma = logit.split(CLASS_NUM, 1) 82 | return mu, sigma 83 | 84 | 85 | cnn = CNN() 86 | print(cnn) # net architecture 87 | 88 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) 89 | 90 | 91 | 92 | best_acc = 0 93 | 94 | elapsed_time = 0 95 | start_time = time.time() 96 | 97 | for epoch in range(EPOCH): 98 | cnn.train() 99 | for batch_idx, (train_x, train_y) in enumerate(train_loader): 100 | 101 | # dropout can be used when training, since performing dropout also when testing is the way to model epistemic uncertainty 102 | mu, sigma = cnn(train_x) 103 | 104 | prob_total = torch.zeros((NUM_SAMPLES, train_y.size(0), CLASS_NUM)) 105 | for t in range(NUM_SAMPLES): 106 | # assume that each logit value is drawn from Gaussian distribution, therefore the whole logit vector is drawn from multi-dimensional Gaussian distribution 107 | epsilon = torch.randn(sigma.size()) 108 | logit = mu + torch.mul(sigma, epsilon) 109 | prob_total[t] = F.softmax(logit, dim=1) 110 | 111 | prob_ave = torch.mean(prob_total, 0) 112 | loss = F.nll_loss(torch.log(prob_ave), train_y) 113 | 114 | optimizer.zero_grad() 115 | loss.backward() 116 | optimizer.step() 117 | 118 | print('Epoch: ', epoch, '| batch: ', batch_idx, '| train loss: %.4f' % loss.data.numpy()) 119 | 120 | 121 | cnn.eval() 122 | correct = 0 123 | for batch_idx, (test_x, test_y) in enumerate(test_loader): 124 | test_mu, test_sigma = cnn(test_x) 125 | 126 | pred_y = torch.max(test_mu, 1)[1].data.numpy() 127 | correct += float((pred_y == test_y.data.numpy()).astype(int).sum()) 128 | 129 | # Aleatoric uncertainty is measured by some function of test_sigma. 130 | 131 | accuracy = correct / float(len(test_loader.dataset)) 132 | print('-> Epoch: ', epoch, '| test accuracy: %.4f' % accuracy) 133 | if accuracy > best_acc: 134 | best_acc = accuracy 135 | 136 | 137 | 138 | 139 | elapsed_time = time.time() - start_time 140 | print('Best test accuracy is: ', best_acc) # 0.9918 141 | print('Elapsed time : %d:%02d:%02d' % (get_hms(elapsed_time))) 142 | 143 | -------------------------------------------------------------------------------- /classification_combined.py: -------------------------------------------------------------------------------- 1 | # When modeling epistemic uncertainty and aleatoric uncertainty, we use MC dropout as well as loss attenuation to capture both model uncertainty and data uncertainty. 2 | # We put distributions on both weights of the network and outputs of the network. 3 | 4 | 5 | import os 6 | import matplotlib.pyplot as plt 7 | import time 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.utils.data as Data 12 | from torchvision import datasets, transforms 13 | import torch.nn.functional as F 14 | 15 | EPOCH = 10 16 | BATCH_SIZE = 100 17 | LR = 0.001 18 | DOWNLOAD_MNIST = False 19 | CLASS_NUM = 10 20 | NUM_SAMPLES = 10 21 | 22 | 23 | def get_hms(seconds): 24 | m, s = divmod(seconds, 60) 25 | h, m = divmod(m, 60) 26 | 27 | return h, m, s 28 | 29 | 30 | def apply_dropout(m): 31 | if type(m) == nn.Dropout: 32 | m.train() 33 | 34 | 35 | if not (os.path.exists('./mnist/')) or not os.listdir('./mnist/'): 36 | DOWNLOAD_MNIST = True 37 | 38 | 39 | 40 | train_loader = torch.utils.data.DataLoader( 41 | datasets.MNIST('./mnist', train=True, download=DOWNLOAD_MNIST, 42 | transform=transforms.Compose([ 43 | transforms.ToTensor(), 44 | transforms.Normalize((0.1307,), (0.3081,)) 45 | ])), 46 | batch_size=BATCH_SIZE, shuffle=True) 47 | test_loader = torch.utils.data.DataLoader( 48 | datasets.MNIST('./mnist', train=False, transform=transforms.Compose([ 49 | transforms.ToTensor(), 50 | transforms.Normalize((0.1307,), (0.3081,)) 51 | ])), 52 | batch_size=BATCH_SIZE, shuffle=True) 53 | 54 | 55 | 56 | print('train data len: ', len(train_loader.dataset)) 57 | print('test data len: ', len(test_loader.dataset)) 58 | 59 | 60 | class CNN(nn.Module): 61 | def __init__(self): 62 | super(CNN, self).__init__() 63 | self.conv1 = nn.Sequential( # input shape (1, 28, 28) 64 | nn.Conv2d( 65 | in_channels=1, # input height 66 | out_channels=16, # n_filters 67 | kernel_size=5, # filter size 68 | stride=1, # filter movement/step 69 | padding=2, 70 | 71 | ), # output shape (16, 28, 28) 72 | nn.ReLU(), 73 | nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) 74 | nn.Dropout(0.5) 75 | ) 76 | self.conv2 = nn.Sequential( # input shape (16, 14, 14) 77 | nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) 78 | nn.ReLU(), 79 | nn.MaxPool2d(2), # output shape (32, 7, 7) 80 | nn.Dropout(0.5) 81 | ) 82 | self.linear = nn.Linear(32 * 7 * 7, CLASS_NUM * 2) # fully connected layer, output 10 classes [batch_size, 10] 83 | 84 | def forward(self, x): 85 | x = self.conv1(x) 86 | x = self.conv2(x) 87 | x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) 88 | logit = self.linear(x) 89 | mu, sigma = logit.split(CLASS_NUM, 1) 90 | return mu, sigma 91 | 92 | 93 | cnn = CNN() 94 | print(cnn) # net architecture 95 | 96 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) 97 | 98 | best_acc = 0 99 | 100 | elapsed_time = 0 101 | start_time = time.time() 102 | 103 | for epoch in range(EPOCH): 104 | cnn.train() 105 | for batch_idx, (train_x, train_y) in enumerate(train_loader): 106 | 107 | mu, sigma = cnn(train_x) 108 | 109 | prob_total = torch.zeros((NUM_SAMPLES, train_y.size(0), CLASS_NUM)) 110 | for t in range(NUM_SAMPLES): 111 | # assume that each logit value is drawn from Gaussian distribution, therefore the whole logit vector is drawn from multi-dimensional Gaussian distribution 112 | epsilon = torch.randn(sigma.size()) 113 | logit = mu + torch.mul(sigma, epsilon) 114 | prob_total[t] = F.softmax(logit, dim=1) 115 | 116 | prob_ave = torch.mean(prob_total, 0) 117 | loss = F.nll_loss(torch.log(prob_ave), train_y) 118 | 119 | optimizer.zero_grad() 120 | loss.backward() 121 | optimizer.step() 122 | 123 | print('Epoch: ', epoch, '| batch: ', batch_idx, '| train loss: %.4f' % loss.data.numpy()) 124 | 125 | 126 | cnn.eval() 127 | cnn.apply(apply_dropout) 128 | correct = 0 129 | for batch_idx, (test_x, test_y) in enumerate(test_loader): 130 | prob_total = torch.zeros((NUM_SAMPLES, test_y.size(0), CLASS_NUM)) 131 | sigma_total = torch.zeros((NUM_SAMPLES, test_y.size(0), CLASS_NUM)) 132 | for t in range(NUM_SAMPLES): 133 | test_mu, test_sigma = cnn(test_x) 134 | prob_total[t] = F.softmax(test_mu, dim=1) 135 | sigma_total[t] = test_sigma 136 | 137 | prob_ave = torch.mean(prob_total, 0) 138 | pred_y = torch.max(prob_ave, 1)[1].data.numpy() 139 | correct += float((pred_y == test_y.data.numpy()).astype(int).sum()) 140 | 141 | sigma_ave = torch.mean(sigma_total, 0) 142 | # Aleatoric uncertainty is measured by some function of sigma_ave. 143 | # Epistemic uncertainty is measured by some function of prob_ave (e.g. entropy). 144 | 145 | accuracy = correct / float(len(test_loader.dataset)) 146 | print('-> Epoch: ', epoch, '| test accuracy: %.4f' % accuracy) 147 | if accuracy > best_acc: 148 | best_acc = accuracy 149 | 150 | 151 | 152 | 153 | elapsed_time = time.time() - start_time 154 | print('Best test accuracy is: ', best_acc) # 0.9893 155 | print('Elapsed time : %d:%02d:%02d' % (get_hms(elapsed_time))) 156 | 157 | 158 | -------------------------------------------------------------------------------- /classification_epistemic.py: -------------------------------------------------------------------------------- 1 | # When modeling epistemic uncertainty, we enable dropout in training as well as in testing. 2 | # In this time, we put distribution on the weights of the network to capture model uncertainty. 3 | 4 | # 1. In Monte Carlo dropout, the approximating distribution is a mixture of two Gaussians components, with small variances and the mean of one is fixed at zero. 5 | # (Sampling from it can be seen as sampling from a Bernoulli distribution, but we can't define the approximating distribution as Bernoulli, since our prior is Gaussian and KL divergence between Bernoulli and Gaussian is not defined.) 6 | # reference: http://mlg.eng.cam.ac.uk/yarin/blog_3d801aa532c1ce.html 7 | 8 | # 2. In Monte Carlo dropout, we sample from Bernoulli distribution to approximate the Spike and Slab prior (i.e. a mixture of two Gaussians components, with small variances and the mean of one is fixed at zero) 9 | # reference: Yarin Gal's Phd thesis 10 | 11 | # I don't know which is correct. 12 | 13 | 14 | 15 | import os 16 | import matplotlib.pyplot as plt 17 | import time 18 | 19 | import torch 20 | import torch.nn as nn 21 | import torch.utils.data as Data 22 | from torchvision import datasets, transforms 23 | import torch.nn.functional as F 24 | 25 | 26 | 27 | EPOCH = 10 28 | BATCH_SIZE = 100 29 | LR = 0.001 30 | DOWNLOAD_MNIST = False 31 | CLASS_NUM = 10 32 | NUM_SAMPLES = 10 33 | 34 | 35 | def get_hms(seconds): 36 | m, s = divmod(seconds, 60) 37 | h, m = divmod(m, 60) 38 | 39 | return h, m, s 40 | 41 | 42 | def apply_dropout(m): 43 | if type(m) == nn.Dropout: 44 | m.train() 45 | 46 | 47 | if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'): 48 | DOWNLOAD_MNIST = True 49 | 50 | 51 | train_loader = torch.utils.data.DataLoader( 52 | datasets.MNIST('./mnist', train=True, download=DOWNLOAD_MNIST, 53 | transform=transforms.Compose([ 54 | transforms.ToTensor(), 55 | transforms.Normalize((0.1307,), (0.3081,)) 56 | ])), 57 | batch_size=BATCH_SIZE, shuffle=True) 58 | test_loader = torch.utils.data.DataLoader( 59 | datasets.MNIST('./mnist', train=False, transform=transforms.Compose([ 60 | transforms.ToTensor(), 61 | transforms.Normalize((0.1307,), (0.3081,)) 62 | ])), 63 | batch_size=BATCH_SIZE, shuffle=True) 64 | 65 | 66 | class CNN(nn.Module): 67 | def __init__(self): 68 | super(CNN, self).__init__() 69 | self.conv1 = nn.Sequential( # input shape (1, 28, 28) 70 | nn.Conv2d( 71 | in_channels=1, # input height 72 | out_channels=16, # n_filters 73 | kernel_size=5, # filter size 74 | stride=1, # filter movement/step 75 | padding=2, # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1 76 | ), # output shape (16, 28, 28) 77 | nn.ReLU(), 78 | nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) 79 | nn.Dropout(0.5) 80 | ) 81 | self.conv2 = nn.Sequential( # input shape (16, 14, 14) 82 | nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) 83 | nn.ReLU(), 84 | nn.MaxPool2d(2), # output shape (32, 7, 7) 85 | nn.Dropout(0.5) 86 | ) 87 | self.linear = nn.Linear(32 * 7 * 7, CLASS_NUM) # fully connected layer, output 10 classes [batch_size, 10] 88 | 89 | def forward(self, x): 90 | x = self.conv1(x) 91 | x = self.conv2(x) 92 | x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) 93 | logit = self.linear(x) 94 | mu = logit 95 | return mu 96 | 97 | 98 | cnn = CNN() 99 | print(cnn) # net architecture 100 | 101 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) 102 | loss_func = nn.CrossEntropyLoss() 103 | 104 | 105 | best_acc = 0 106 | 107 | elapsed_time = 0 108 | start_time = time.time() 109 | 110 | for epoch in range(EPOCH): 111 | cnn.train() 112 | for batch_idx, (train_x, train_y) in enumerate(train_loader): 113 | 114 | mu = cnn(train_x) 115 | loss = loss_func(mu, train_y) 116 | 117 | optimizer.zero_grad() 118 | loss.backward() 119 | optimizer.step() 120 | 121 | print('Epoch: ', epoch, '| batch: ', batch_idx, '| train loss: %.4f' % loss.data.numpy()) 122 | 123 | 124 | cnn.eval() 125 | cnn.apply(apply_dropout) 126 | correct = 0 127 | for batch_idx, (test_x, test_y) in enumerate(test_loader): 128 | prob_total = torch.zeros((NUM_SAMPLES, test_y.size(0), CLASS_NUM)) 129 | for t in range(NUM_SAMPLES): 130 | test_mu = cnn(test_x) 131 | prob_total[t] = F.softmax(test_mu, dim=1) 132 | 133 | prob_ave = torch.mean(prob_total, 0) 134 | pred_y = torch.max(prob_ave, 1)[1].data.numpy() 135 | correct += float((pred_y == test_y.data.numpy()).astype(int).sum()) 136 | 137 | # Epistemic uncertainty is measured by some function of prob_ave (e.g. entropy). 138 | 139 | accuracy = correct / float(len(test_loader.dataset)) 140 | print('-> Epoch: ', epoch, '| test accuracy: %.4f' % accuracy) 141 | if accuracy > best_acc: 142 | best_acc = accuracy 143 | 144 | 145 | 146 | 147 | 148 | elapsed_time = time.time() - start_time 149 | print('Best test accuracy is: ', best_acc) # 0.9893 150 | print('Elapsed time : %d:%02d:%02d' % (get_hms(elapsed_time))) 151 | 152 | -------------------------------------------------------------------------------- /classification_normal.py: -------------------------------------------------------------------------------- 1 | # This is a normal neural network. 2 | 3 | 4 | import os 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.utils.data as Data 11 | from torchvision import datasets, transforms 12 | import torch.nn.functional as F 13 | 14 | 15 | 16 | EPOCH = 10 17 | BATCH_SIZE = 100 18 | LR = 0.001 19 | DOWNLOAD_MNIST = False 20 | CLASS_NUM = 10 21 | NUM_SAMPLES = 10 22 | 23 | 24 | def get_hms(seconds): 25 | m, s = divmod(seconds, 60) 26 | h, m = divmod(m, 60) 27 | 28 | return h, m, s 29 | 30 | 31 | 32 | if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'): 33 | DOWNLOAD_MNIST = True 34 | 35 | 36 | train_loader = torch.utils.data.DataLoader( 37 | datasets.MNIST('./mnist', train=True, download=DOWNLOAD_MNIST, 38 | transform=transforms.Compose([ 39 | transforms.ToTensor(), 40 | transforms.Normalize((0.1307,), (0.3081,)) 41 | ])), 42 | batch_size=BATCH_SIZE, shuffle=True) 43 | test_loader = torch.utils.data.DataLoader( 44 | datasets.MNIST('./mnist', train=False, transform=transforms.Compose([ 45 | transforms.ToTensor(), 46 | transforms.Normalize((0.1307,), (0.3081,)) 47 | ])), 48 | batch_size=BATCH_SIZE, shuffle=True) 49 | 50 | 51 | class CNN(nn.Module): 52 | def __init__(self): 53 | super(CNN, self).__init__() 54 | self.conv1 = nn.Sequential( # input shape (1, 28, 28) 55 | nn.Conv2d( 56 | in_channels=1, # input height 57 | out_channels=16, # n_filters 58 | kernel_size=5, # filter size 59 | stride=1, # filter movement/step 60 | padding=2, # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1 61 | ), # output shape (16, 28, 28) 62 | nn.ReLU(), 63 | nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) 64 | nn.Dropout(0.5) 65 | ) 66 | self.conv2 = nn.Sequential( # input shape (16, 14, 14) 67 | nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) 68 | nn.ReLU(), 69 | nn.MaxPool2d(2), # output shape (32, 7, 7) 70 | nn.Dropout(0.5) 71 | ) 72 | self.linear = nn.Linear(32 * 7 * 7, CLASS_NUM) # fully connected layer, output 10 classes [batch_size, 10] 73 | 74 | def forward(self, x): 75 | x = self.conv1(x) 76 | x = self.conv2(x) 77 | x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) 78 | logit = self.linear(x) 79 | mu = logit 80 | return mu 81 | 82 | 83 | cnn = CNN() 84 | print(cnn) # net architecture 85 | 86 | optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) 87 | loss_func = nn.CrossEntropyLoss() 88 | 89 | 90 | best_acc = 0 91 | 92 | elapsed_time = 0 93 | start_time = time.time() 94 | 95 | for epoch in range(EPOCH): 96 | cnn.train() 97 | for batch_idx, (train_x, train_y) in enumerate(train_loader): 98 | 99 | mu = cnn(train_x) 100 | loss = loss_func(mu, train_y) 101 | 102 | optimizer.zero_grad() 103 | loss.backward() 104 | optimizer.step() 105 | 106 | print('Epoch: ', epoch, '| batch: ', batch_idx, '| train loss: %.4f' % loss.data.numpy()) 107 | 108 | 109 | cnn.eval() 110 | correct = 0 111 | for batch_idx, (test_x, test_y) in enumerate(test_loader): 112 | 113 | test_mu = cnn(test_x) 114 | prob = F.softmax(test_mu, dim=1) 115 | 116 | pred_y = torch.max(prob, 1)[1].data.numpy() 117 | correct += float((pred_y == test_y.data.numpy()).astype(int).sum()) 118 | 119 | 120 | accuracy = correct / float(len(test_loader.dataset)) 121 | print('-> Epoch: ', epoch, '| test accuracy: %.4f' % accuracy) 122 | if accuracy > best_acc: 123 | best_acc = accuracy 124 | 125 | 126 | 127 | 128 | 129 | elapsed_time = time.time() - start_time 130 | print('Best test accuracy is: ', best_acc) # 0.9914 131 | print('Elapsed time : %d:%02d:%02d' % (get_hms(elapsed_time))) 132 | 133 | --------------------------------------------------------------------------------