├── figures
    ├── NPN.jpg
    ├── nn-vs-npn.png
    ├── nn-vs-npn-op.png
    └── acc-var-MNIST.jpg
├── boston_housing_nor_val.pkl
├── boston_housing_nor_train.pkl
├── utils.py
├── .gitignore
├── datasets_boston_housing.py
├── regress_mlp.sh
├── regress_npn.sh
├── mlp.sh
├── npn.sh
├── cnn_mlp.sh
├── cnn_npn.sh
├── mlp-att.sh
├── npnlite.sh
├── README.md
├── npn.py
└── main_mlp.py


/figures/NPN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/js05212/PyTorch-for-NPN/HEAD/figures/NPN.jpg


--------------------------------------------------------------------------------
/figures/nn-vs-npn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/js05212/PyTorch-for-NPN/HEAD/figures/nn-vs-npn.png


--------------------------------------------------------------------------------
/figures/nn-vs-npn-op.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/js05212/PyTorch-for-NPN/HEAD/figures/nn-vs-npn-op.png


--------------------------------------------------------------------------------
/boston_housing_nor_val.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/js05212/PyTorch-for-NPN/HEAD/boston_housing_nor_val.pkl


--------------------------------------------------------------------------------
/figures/acc-var-MNIST.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/js05212/PyTorch-for-NPN/HEAD/figures/acc-var-MNIST.jpg


--------------------------------------------------------------------------------
/boston_housing_nor_train.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/js05212/PyTorch-for-NPN/HEAD/boston_housing_nor_train.pkl


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | def plain_log(filename, text):
2 |     fp = open(filename,'a')
3 |     fp.write(text)
4 |     fp.close()
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | tmp*
 2 | *~
 3 | __pycache__
 4 | *.model
 5 | *.pyc
 6 | *.py_*
 7 | *.sh_*
 8 | README
 9 | hao-att.sh
10 | cp.sh
11 | weight_info*
12 | main_npn.py
13 | main.py
14 | npn_example.py
15 | 


--------------------------------------------------------------------------------
/datasets_boston_housing.py:
--------------------------------------------------------------------------------
 1 | from torch.utils import data
 2 | import pickle
 3 | import numpy as np
 4 | 
 5 | class Dataset_boston_housing(data.Dataset):
 6 |     # boston housing dataset
 7 |     def __init__(self, path):
 8 |         # read data pickle as a dict, where keys are X, X_labels
 9 |         with open(path, 'rb') as f:
10 |             self.data = pickle.load(f)
11 | 
12 |     def __len__(self):
13 |         return self.data['X'].shape[0]
14 | 
15 |     def __getitem__(self, index):
16 |         index = index % self.data['X'].shape[0]
17 |         return self.data['X'][index].astype('float32'), np.asarray([self.data['X_labels'][index]]).astype('float32')
18 | 
19 | 


--------------------------------------------------------------------------------
/regress_mlp.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.0'
 3 | lr='1e0'
 4 | output_s='0e1'
 5 | type='regress_mlp'
 6 | loss='mse'
 7 | evaluate=''
 8 | checkpoint='none'
 9 | save_head='tmp'
10 | epo='1000'
11 | save_interval='100'
12 | batch_size='16'
13 | log_file='tmp_mlp'
14 | seed='1112'
15 | CUDA_VISIBLE_DEVICES=$gpuid python3.5 main_mlp.py \
16 |     --lr $lr \
17 |     --epochs $epo \
18 |     --batch-size $batch_size \
19 |     --log_file $log_file \
20 |     --dropout $dropout \
21 |     --save_interval $save_interval \
22 |     --loss $loss \
23 |     --save_head $save_head \
24 |     $evaluate \
25 |     --checkpoint $checkpoint \
26 |     --type $type \
27 |     --output_s $output_s \
28 |     --seed $seed
29 | 


--------------------------------------------------------------------------------
/regress_npn.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.0'
 3 | lr='1e0'
 4 | output_s='0e1'
 5 | type='regress_npn'
 6 | loss='gaussian'
 7 | evaluate=''
 8 | checkpoint='none'
 9 | save_head='tmp'
10 | epo='1000'
11 | save_interval='100'
12 | batch_size='16'
13 | log_file='tmp_mlp'
14 | seed='1112'
15 | CUDA_VISIBLE_DEVICES=$gpuid python3.5 main_mlp.py \
16 |     --lr $lr \
17 |     --epochs $epo \
18 |     --batch-size $batch_size \
19 |     --log_file $log_file \
20 |     --dropout $dropout \
21 |     --save_interval $save_interval \
22 |     --loss $loss \
23 |     --save_head $save_head \
24 |     $evaluate \
25 |     --checkpoint $checkpoint \
26 |     --type $type \
27 |     --output_s $output_s \
28 |     --seed $seed
29 | 


--------------------------------------------------------------------------------
/mlp.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.2'
 3 | lr='5e0' # 1e-4
 4 | lambda='1e-1'
 5 | type='mlp'
 6 | num_train='100'
 7 | loss='default' # default
 8 | evaluate=''
 9 | checkpoint='none'
10 | save_head='tmp'
11 | epo='500'
12 | save_interval='100'
13 | batch_size='128'
14 | log_file='tmp_mlp'
15 | seed='2'
16 | CUDA_VISIBLE_DEVICES=$gpuid python main_mlp.py \
17 |     --lr $lr \
18 |     --epochs $epo \
19 |     --num_train $num_train \
20 |     --batch-size $batch_size \
21 |     --log_file $log_file \
22 |     --dropout $dropout \
23 |     --save_interval $save_interval \
24 |     --loss $loss \
25 |     --save_head $save_head \
26 |     $evaluate \
27 |     --checkpoint $checkpoint \
28 |     --type $type \
29 |     --output_s $lambda \
30 |     --seed $seed
31 | 


--------------------------------------------------------------------------------
/npn.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.2'
 3 | lr='5e0' # 1e-4
 4 | lambda='1e-1'
 5 | type='npn'
 6 | num_train='100'
 7 | loss='default' # default
 8 | evaluate=''
 9 | checkpoint='none'
10 | save_head='tmp'
11 | epo='500'
12 | save_interval='100'
13 | batch_size='128'
14 | log_file='tmp_mlp'
15 | seed='2'
16 | CUDA_VISIBLE_DEVICES=$gpuid python main_mlp.py \
17 |     --lr $lr \
18 |     --epochs $epo \
19 |     --num_train $num_train \
20 |     --batch-size $batch_size \
21 |     --log_file $log_file \
22 |     --dropout $dropout \
23 |     --save_interval $save_interval \
24 |     --loss $loss \
25 |     --save_head $save_head \
26 |     $evaluate \
27 |     --checkpoint $checkpoint \
28 |     --type $type \
29 |     --output_s $lambda \
30 |     --seed $seed
31 | 


--------------------------------------------------------------------------------
/cnn_mlp.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.2'
 3 | lr='5e0' # 1e-4
 4 | lambda='1e-1'
 5 | type='cnn'
 6 | num_train='100'
 7 | loss='default' # default
 8 | evaluate=''
 9 | checkpoint='none'
10 | save_head='tmp'
11 | epo='500'
12 | save_interval='100'
13 | batch_size='128'
14 | log_file='tmp_mlp'
15 | seed='2'
16 | CUDA_VISIBLE_DEVICES=$gpuid python main_mlp.py \
17 |     --lr $lr \
18 |     --epochs $epo \
19 |     --num_train $num_train \
20 |     --batch-size $batch_size \
21 |     --log_file $log_file \
22 |     --dropout $dropout \
23 |     --save_interval $save_interval \
24 |     --loss $loss \
25 |     --save_head $save_head \
26 |     $evaluate \
27 |     --checkpoint $checkpoint \
28 |     --type $type \
29 |     --output_s $lambda \
30 |     --seed $seed
31 | 


--------------------------------------------------------------------------------
/cnn_npn.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.2'
 3 | lr='5e0' # 1e-4
 4 | lambda='1e-1'
 5 | type='npncnn'
 6 | num_train='100'
 7 | loss='default' # default
 8 | evaluate=''
 9 | checkpoint='none'
10 | save_head='tmp'
11 | epo='500'
12 | save_interval='100'
13 | batch_size='128'
14 | log_file='tmp_mlp'
15 | seed='2'
16 | CUDA_VISIBLE_DEVICES=$gpuid python main_mlp.py \
17 |     --lr $lr \
18 |     --epochs $epo \
19 |     --num_train $num_train \
20 |     --batch-size $batch_size \
21 |     --log_file $log_file \
22 |     --dropout $dropout \
23 |     --save_interval $save_interval \
24 |     --loss $loss \
25 |     --save_head $save_head \
26 |     $evaluate \
27 |     --checkpoint $checkpoint \
28 |     --type $type \
29 |     --output_s $lambda \
30 |     --seed $seed
31 | 


--------------------------------------------------------------------------------
/mlp-att.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.2'
 3 | lr='5e0' # 1e-4
 4 | lambda='1e-1'
 5 | type='npn'
 6 | num_train='100'
 7 | loss='default' # default
 8 | evaluate=''
 9 | checkpoint='none'
10 | save_head='tmp'
11 | epo='500'
12 | save_interval='100'
13 | batch_size='128'
14 | log_file='tmp_mlp'
15 | seed='1112'
16 | CUDA_VISIBLE_DEVICES=$gpuid python3.5 main_mlp.py \
17 |     --lr $lr \
18 |     --epochs $epo \
19 |     --num_train $num_train \
20 |     --batch-size $batch_size \
21 |     --log_file $log_file \
22 |     --dropout $dropout \
23 |     --save_interval $save_interval \
24 |     --loss $loss \
25 |     --save_head $save_head \
26 |     $evaluate \
27 |     --checkpoint $checkpoint \
28 |     --type $type \
29 |     --output_s $lambda \
30 |     --seed $seed
31 | 


--------------------------------------------------------------------------------
/npnlite.sh:
--------------------------------------------------------------------------------
 1 | gpuid='0'
 2 | dropout='0.2'
 3 | lr='5e0' # 1e-4
 4 | lambda='1e-1'
 5 | type='npn_lite'
 6 | num_train='100'
 7 | loss='default' # default
 8 | evaluate=''
 9 | checkpoint='none'
10 | save_head='tmp'
11 | epo='500'
12 | save_interval='100'
13 | batch_size='128'
14 | log_file='tmp_mlp'
15 | seed='2'
16 | CUDA_VISIBLE_DEVICES=$gpuid python main_mlp.py \
17 |     --lr $lr \
18 |     --epochs $epo \
19 |     --num_train $num_train \
20 |     --batch-size $batch_size \
21 |     --log_file $log_file \
22 |     --dropout $dropout \
23 |     --save_interval $save_interval \
24 |     --loss $loss \
25 |     --save_head $save_head \
26 |     $evaluate \
27 |     --checkpoint $checkpoint \
28 |     --type $type \
29 |     --output_s $lambda \
30 |     --seed $seed
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Natural-Parameter Networks (NPN) in PyTorch
  2 | ============
  3 | 
  4 | This is the PyTorch code for the NIPS paper ['Natural-Parameter Networks: A Class of Probabilistic Neural Networks'](http://wanghao.in/paper/NIPS16_NPN.pdf). 
  5 | 
  6 | It is a class of probabilistic neural networks that treat both weights and neurons as distributions rather than just points in high-dimensional space. Distributions are first-citizens in the networks. The design allows distributions to feedforward and backprop across the network. Given an input data point, NPN will output a predicted distribution with information on both the prediction and uncertainty.
  7 | 
  8 | NPN can be used either independently or as a building block for [Bayesian Deep Learning](http://wanghao.in/paper/TKDE16_BDL.pdf) (BDL).
  9 | 
 10 | Note that this is the code for Gaussian NPN to run on the MNIST and Boston
 11 | Housing datasets. For Gamma NPN or Poisson NPN please go to the other repo.
 12 | 
 13 | ## Neural networks v.s. natural-parameter-networks in two figures:
 14 | 
 15 | ### Distributions as first-class citizens:
 16 | 
 17 | <p align="center">
 18 | <img src="./figures/nn-vs-npn.png" alt="" data-canonical-src="./figures/nn-vs-npn.png" width="600" height="270"/>
 19 | </p>
 20 | 
 21 | ### Closed-form operations to handle uncertainty:
 22 | 
 23 | <p align="center">
 24 | <img src="./figures/nn-vs-npn-op.png" alt="" data-canonical-src="./figures/nn-vs-npn-op.png" width="730" height="380"/>
 25 | </p>
 26 | 
 27 | ## Example results on uncertainty-aware prediction:
 28 | ### Output both prediction and uncertainty for regression:
 29 | <p align="center">
 30 | <img src="./figures/NPN.jpg" alt="" data-canonical-src="./figures/NPN.jpg" width="420" height="300"/>
 31 | </p>
 32 | Above is the predictive distribution for NPN. The shaded regions correspond
 33 | to 3 standard deviations. The black curve is the data-generating function and blue curves
 34 | show the mean of the predictive distributions. Red stars are the training data.
 35 | 
 36 | ### Accuracy versus uncertainty (variance):
 37 | <p align="center">
 38 | <img src="./figures/acc-var-MNIST.jpg" alt="" data-canonical-src="./figures/NPN.jpg" width="420" height="300"/>
 39 | </p>
 40 | Above is the classification accuracy for different variance (uncertainty). Note that ‘1’ in the x-axis means the variance is in the range [0, 0.04), ‘2’ means the variance is in the range [0.04, 0.08), etc.
 41 | 
 42 | ## Accuracy: 
 43 | 
 44 | Using only 100 training samples in the training set of MNIST:
 45 | 
 46 | 
 47 | | Method | Accuracy |
 48 | | -------|----------|
 49 | | NPN (ours) | 74.58% |
 50 | | MLP | 69.02% |
 51 | | CNN+NPN (ours) | 86.87% |
 52 | | CNN+MLP | 82.90% |
 53 | 
 54 | ## RMSE:
 55 | 
 56 | Regression task on Boston Housing:
 57 | 
 58 | | Method | RMSE |
 59 | | -------|----------|
 60 | | NPN (ours) | 3.2197 |
 61 | | MLP | 3.5748 |
 62 | 
 63 | ## How to run the code:
 64 | 
 65 | * In general, to train the model, run the command: 'sh mlp-att.sh'
 66 | * To train NPN (fully connected), run the command: 'sh npn.sh'
 67 | * To train MLP (fully connected), run the command: 'sh mlp.sh'
 68 | * To train CNN+NPN, run the command: 'sh cnn_npn.sh'
 69 | * To train CNN+MLP, run the command: 'sh cnn_mlp.sh'
 70 | * For regression tasks (Boston Housing) using NPN, run the command: 'sh regress_npn.sh'
 71 | * For regression tasks (Boston Housing) using MLP, run the command: 'sh regress_mlp.sh'
 72 | 
 73 | ## Short code example:
 74 | This is *everything* to implement a three-layer NPN on PyTorch (essentially only need to replace nn.Linear with NPNLinear):
 75 | ```python
 76 | from npn import NPNLinear
 77 | from npn import NPNSigmoid
 78 | class NPNNet(nn.Module):
 79 |     def __init__(self):
 80 |         super(NPNNet, self).__init__()
 81 | 
 82 |         # Last parameter of NPNLinear
 83 |         # True: input contains both the mean and variance
 84 |         # False: input contains only the mean
 85 |         self.fc1 = NPNLinear(784, 800, False)
 86 |         self.sigmoid1 = NPNSigmoid()
 87 |         self.fc2 = NPNLinear(800, 800)
 88 |         self.sigmoid2 = NPNSigmoid()
 89 |         self.fc3 = NPNLinear(800, 10)
 90 |         self.sigmoid3 = NPNSigmoid()
 91 | 
 92 |     def forward(self, x):
 93 |         x = self.sigmoid1(self.fc1(x))
 94 |         x = self.sigmoid2(self.fc2(x))
 95 |         # output mean (x) and variance (s) of Gaussian NPN
 96 |         x, s = self.sigmoid3(self.fc3(x))
 97 |         return x, s 
 98 | ```
 99 | 
100 | ## Install:
101 | 
102 | The code is tested under PyTorch 0.2.03 and Python 3.5.2.
103 | 
104 | ## Official Matlab implementation:
105 | 
106 | The official Matlab version (with GPU support) can be found [here](https://github.com/js05212/NPN)
107 | 
108 | ## Other implementations (third-party):
109 | 
110 | Another version of Pytorch/Python code (with extension to GRU) by [sohamghosh121](https://github.com/sohamghosh121/natural-parameter-networks).
111 | 
112 | ## Reference:
113 | [Natural-Parameter Networks: A Class of Probabilistic Neural Networks](http://wanghao.in/paper/NIPS16_NPN.pdf)
114 | ```
115 | @inproceedings{DBLP:conf/nips/WangSY16,
116 |   author    = {Hao Wang and
117 |                Xingjian Shi and
118 |                Dit{-}Yan Yeung},
119 |   title     = {Natural-Parameter Networks: {A} Class of Probabilistic Neural Networks},
120 |   booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
121 |                on Neural Information Processing Systems 2016, December 5-10, 2016,
122 |                Barcelona, Spain},
123 |   pages     = {118--126},
124 |   year      = {2016}
125 | }
126 | ```
127 | 
128 | 


--------------------------------------------------------------------------------
/npn.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from torch.autograd import Variable
  3 | import torch.optim as optim
  4 | import torch
  5 | import torch.nn.functional as F
  6 | import math
  7 | import random
  8 | 
  9 | class NPNLinear(nn.Module):
 10 |     def positive_s(self, x, use_sigmoid = 0):
 11 |         if use_sigmoid == 0:
 12 |             y = torch.log(torch.exp(x) + 1)
 13 |         else:
 14 |             y = F.sigmoid(x)
 15 |         return y
 16 | 
 17 |     def positive_s_inv(self, x, use_sigmoid = 0):
 18 |         if use_sigmoid == 0:
 19 |             y = torch.log(torch.exp(x) - 1)
 20 |         else:
 21 |             y = - torch.log(1 / x - 1)
 22 |         return y
 23 | 
 24 |     def __init__(self, in_channels, out_channels, dual_input = True, init_type = 0):
 25 |         # init_type 0: normal, 1: mixture of delta distr'
 26 |         super(NPNLinear, self).__init__()
 27 |         self.in_channels = in_channels
 28 |         self.out_channels = out_channels
 29 |         self.dual_input = dual_input
 30 | 
 31 |         self.W_m = nn.Parameter(2 * math.sqrt(6) / math.sqrt(in_channels + out_channels) * (torch.rand(in_channels, out_channels) - 0.5))
 32 |         if init_type == 0:
 33 |             #W_s_init = 1 * math.sqrt(6) / math.sqrt(in_channels + out_channels) * torch.rand(in_channels, out_channels)
 34 |             W_s_init = 0.01 * math.sqrt(6) / math.sqrt(in_channels + out_channels) * torch.rand(in_channels, out_channels)
 35 |         else:
 36 |             bern = torch.bernoulli(torch.ones(in_channels, out_channels) * 0.5)
 37 |             W_s_init = bern * math.exp(-2) + (1 - bern) * math.exp(-14)
 38 |             print(W_s_init[:4,:4])
 39 |         #self.W_s_ = nn.Parameter(torch.log(torch.exp(W_s_init) - 1))
 40 |         self.W_s_ = nn.Parameter(self.positive_s_inv(W_s_init, 0))
 41 |         #self.W_m = nn.Parameter(torch.zeros(in_channels, out_channels))
 42 |         #self.W_s_ = nn.Parameter(torch.zeros(in_channels, out_channels))
 43 | 
 44 |         self.bias_m = nn.Parameter(torch.zeros(out_channels))
 45 |         #self.bias_s_ = nn.Parameter(torch.zeros(out_channels))
 46 |         if init_type == 0:
 47 |             self.bias_s_ = nn.Parameter(torch.ones(out_channels) * (-10))
 48 |             #self.bias_s_ = nn.Parameter(torch.ones(out_channels) * (-1))
 49 |         else:
 50 |             bern = torch.bernoulli(torch.ones(out_channels) * 0.5)
 51 |             bias_s_init = bern * math.exp(-2) + (1 - bern) * math.exp(-14)
 52 |             self.bias_s_ = nn.Parameter(self.positive_s_inv(bias_s_init, 0))
 53 | 
 54 |     def forward(self, x):
 55 |         if self.dual_input:
 56 |             x_m, x_s = x
 57 |         else:
 58 |             x_m = x
 59 |             #x_s = Variable(torch.zeros(x_m.size()))
 60 |             x_s = x.clone()
 61 |             x_s = 0 * x_s
 62 | 
 63 |         o_m = torch.mm(x_m, self.W_m)
 64 |         o_m = o_m + self.bias_m.expand_as(o_m)
 65 | 
 66 |         #W_s = torch.log(torch.exp(self.W_s_) + 1)
 67 |         #bias_s = torch.log(torch.exp(self.bias_s_) + 1)
 68 |         W_s = self.positive_s(self.W_s_, 0)
 69 |         bias_s = self.positive_s(self.bias_s_, 0)
 70 | 
 71 |         o_s = torch.mm(x_s, W_s) + torch.mm(x_s, self.W_m * self.W_m) + torch.mm(x_m * x_m, W_s)
 72 |         o_s = o_s + bias_s.expand_as(o_s)
 73 | 
 74 |         #print('bingo om os')
 75 |         #print(o_m.data)
 76 |         #print(o_s.data)
 77 | 
 78 |         return o_m, o_s
 79 | 
 80 | 
 81 | class NPNLinearLite(nn.Module):
 82 |     def __init__(self, in_channels, out_channels, dual_input = True, init_type = 0):
 83 |         super(NPNLinearLite, self).__init__()
 84 |         self.in_channels = in_channels
 85 |         self.out_channels = out_channels
 86 |         self.dual_input = dual_input
 87 | 
 88 |         self.W_m = nn.Parameter(2 * math.sqrt(6) / math.sqrt(in_channels + out_channels) * (torch.rand(in_channels, out_channels) - 0.5))
 89 |         self.bias_m = nn.Parameter(torch.zeros(out_channels))
 90 | 
 91 |     def forward(self, x):
 92 |         if self.dual_input:
 93 |             x_m, x_s = x
 94 |         else:
 95 |             x_m = x
 96 |             x_s = x.clone()
 97 |             x_s = 0 * x_s
 98 | 
 99 |         o_m = torch.mm(x_m, self.W_m)
100 |         o_m = o_m + self.bias_m.expand_as(o_m)
101 | 
102 |         o_s = torch.mm(x_s, self.W_m * self.W_m)
103 | 
104 |         return o_m, o_s
105 | 
106 | class Linear2Branch(nn.Module):
107 |     def __init__(self, in_channels, out_channels, dual_input = True, init_type = 0):
108 |         super(Linear2Branch, self).__init__()
109 |         self.in_channels = in_channels
110 |         self.out_channels = out_channels
111 |         self.dual_input = dual_input
112 | 
113 |         self.W_m = nn.Parameter(2 * math.sqrt(6) / math.sqrt(in_channels + out_channels) * (torch.rand(in_channels, out_channels) - 0.5))
114 |         self.W_s = nn.Parameter(2 * math.sqrt(6) / math.sqrt(in_channels + out_channels) * (torch.rand(in_channels, out_channels) - 0.5))
115 | 
116 |         self.bias_m = nn.Parameter(torch.zeros(out_channels))
117 |         self.bias_s = nn.Parameter(torch.zeros(out_channels))
118 | 
119 |     def forward(self, x):
120 |         if self.dual_input:
121 |             x_m, x_s = x
122 |         else:
123 |             x_m = x
124 |             x_s = x
125 | 
126 |         o_m = torch.mm(x_m, self.W_m)
127 |         o_m = o_m + self.bias_m.expand_as(o_m)
128 | 
129 |         o_s = torch.mm(x_s, self.W_s)
130 |         o_s = o_s + self.bias_s.expand_as(o_s)
131 |         o_s = o_s.exp()
132 | 
133 |         return o_m, o_s
134 | 
135 | class NPNRelu(nn.Module):
136 |     def __init__(self):
137 |         super(NPNRelu, self).__init__()
138 |         self.scale = math.sqrt(8/math.pi) # sqrt(8/pi)
139 | 
140 |     def forward(self, x):
141 |         assert(len(x) == 2)
142 |         o_m, o_s = x
143 |         a_m = F.sigmoid(self.scale * o_m * (o_s ** (-0.5))) * o_m + torch.sqrt(o_s) / math.sqrt(2 * math.pi) * torch.exp(-o_m ** 2 / (2 * o_s))
144 |         a_s = F.sigmoid(self.scale * o_m * (o_s ** (-0.5))) * (o_m ** 2 + o_s) + o_m * torch.sqrt(o_s) / math.sqrt(2 * math.pi) * torch.exp(-o_m ** 2 / (2 * o_s)) - a_m ** 2  # mbr
145 |         return a_m, a_s
146 | 
147 | class NPNSigmoid(nn.Module):
148 |     def __init__(self):
149 |         super(NPNSigmoid, self).__init__()
150 |         self.xi_sq = math.pi / 8
151 |         self.alpha = 4 - 2 * math.sqrt(2)
152 |         self.beta = - math.log(math.sqrt(2) + 1)
153 | 
154 |     def forward(self, x):
155 |         assert(len(x) == 2)
156 |         o_m, o_s = x
157 |         a_m = F.sigmoid(o_m / (1 + self.xi_sq * o_s) ** 0.5)
158 |         a_s = F.sigmoid(self.alpha * (o_m + self.beta) / (1 + self.xi_sq * self.alpha ** 2 * o_s) ** 0.5) - a_m ** 2
159 |         return a_m, a_s
160 | 
161 | #class NPNDropout(nn.Module):
162 | #    def __init__(self, rate):
163 | #        super(NPNDropout, self).__init__()
164 | #        self.dropout = nn.Dropout(p = rate)
165 | #    def forward(self, x):
166 | #        assert(len(x) == 2)
167 | #        if self.training:
168 | #            self.dropout.train()
169 | #        else:
170 | #            self.dropout.eval()
171 | #        x_m, x_s = x
172 | #        y_m = self.dropout(x_m)
173 | #        y_s = self.dropout(x_s)
174 | #        return y_m, y_s
175 | 
176 | class NPNDropout(nn.Module):
177 |     def __init__(self, rate):
178 |         super(NPNDropout, self).__init__()
179 |         self.dropout = nn.Dropout2d(p = rate)
180 |     def forward(self, x):
181 |         assert(len(x) == 2)
182 |         if self.training:
183 |             self.dropout.train()
184 |         else:
185 |             self.dropout.eval()
186 |         x_m, x_s = x
187 |         x_m = x_m.unsqueeze(2)
188 |         x_s = x_s.unsqueeze(2)
189 |         x_com = torch.cat((x_m, x_s), dim = 2)
190 |         x_com = x_com.unsqueeze(3)
191 |         x_com = self.dropout(x_com)
192 |         y_m = x_com[:,:,0,0]
193 |         y_s = x_com[:,:,1,0]
194 |         return y_m, y_s
195 | 
196 | def NPNBCELoss(pred_m, pred_s, label):
197 |     loss = -torch.sum((torch.log(pred_m + 1e-10) * label + torch.log(1 - pred_m + 1e-10) * (1 - label))/ (pred_s + 1e-10) - torch.log(pred_s+ 1e-10))
198 |     #loss = -torch.sum((torch.log(pred_m) * label + torch.log(1 - pred_m) * (1 - label))/ (pred_s + 1e-10) - torch.log(pred_s+ 1e-10)) / pred_m.size()[0]
199 |     #loss = -torch.sum((torch.log(torch.max(pred_m, 1e-10)) * label + torch.log(torch.max(1 - pred_m, 1e-10)) * (1 - label))/ (torch.max(pred_s, 1e-10)) - torch.log(torch.max(pred_s, 1e-10)))
200 |     return loss
201 | 
202 | def KL_BG(pred_m, pred_s, label):
203 |     #loss = 0.5 * torch.sum((1 - label) * (pred_m ** 2 / pred_s + torch.log(math.pi * 2 * pred_s)) + label * ((pred_m - 1) ** 2 / pred_s + torch.log(math.pi * 2 * pred_s))) / pred_m.size()[0]
204 |     loss = 0.5 * torch.sum((1 - label) * (pred_m ** 2 / pred_s + torch.log(torch.clamp(math.pi * 2 * pred_s, min=1e-6))) + label * ((pred_m - 1) ** 2 / pred_s + torch.log(torch.clamp(math.pi * 2 * pred_s, min=1e-6)))) / pred_m.size()[0] # min = 1e-6
205 |     #loss = 0.5 * torch.sum((1 - label) * (pred_m ** 2 / (pred_s) + torch.log(math.pi * 2 * pred_s)) + label * ((pred_s - 1) ** 2 / pred_s + torch.log(math.pi * 2 * pred_s))) / pred_m.size()[0]
206 |     return loss
207 | 
208 | def L2_loss(pred, label):
209 |     loss = torch.sum((pred - label) ** 2)
210 |     return loss
211 | 
212 | def KL_loss(pred, label):
213 |     assert(len(pred) == 2)
214 |     pred_m, pred_s = pred
215 |     #loss = 0.5 * torch.sum(((pred_m - label) ** 2) / (pred_s + 1e-10) + torch.log(pred_s)) # may need epsilon
216 |     loss = 0.5 * torch.sum(((pred_m - label) ** 2) / (pred_s) + torch.log(pred_s)) # may need epsilon
217 |     return loss
218 | 
219 | def multi_logistic_loss(pred, label):
220 |     assert(len(label.size()) == 1)
221 |     print('bingo type\n', label.data.type())
222 |     print('bingo label\n', pred[:, label])
223 |     log_prob = torch.sum(torch.log(1 - pred)) + torch.sum(log(pred[:, label.data]) - log(1 - pred[:, label.data]))
224 |     return -log_prob
225 | 
226 | def RMSE(pred, label):
227 |     loss = torch.mean(torch.sum((pred - label) ** 2, 1), 0) ** 0.5
228 |     return loss
229 | 


--------------------------------------------------------------------------------
/main_mlp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from torch.utils.data.sampler import SubsetRandomSampler
  3 | import numpy as np
  4 | import random
  5 | import argparse
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import torch.optim as optim
 10 | from torchvision import datasets, transforms
 11 | from torch.autograd import Variable
 12 | from utils import plain_log
 13 | from npn import NPNLinear
 14 | from npn import Linear2Branch
 15 | from npn import NPNLinearLite
 16 | from npn import NPNSigmoid
 17 | from npn import NPNRelu
 18 | from npn import NPNDropout
 19 | from npn import multi_logistic_loss
 20 | from npn import NPNBCELoss
 21 | from npn import KL_BG
 22 | from npn import KL_loss
 23 | from npn import RMSE
 24 | from datasets_boston_housing import Dataset_boston_housing
 25 | from torch.utils import data
 26 | 
 27 | # Training settings
 28 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 29 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 30 |                     help='input batch size for training (default: 64)')
 31 | parser.add_argument('--num_workers', type=int, default=2,
 32 |                     help='number of workers')
 33 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 34 |                     help='input batch size for testing (default: 1000)')
 35 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
 36 |                     help='number of epochs to train (default: 10)')
 37 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
 38 |                     help='learning rate (default: 0.01)')
 39 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
 40 |                     help='SGD momentum (default: 0.5)')
 41 | parser.add_argument('--output_s', type=float, default=1.0,
 42 |                     help='lambda of output_s')
 43 | parser.add_argument('--dropout', type=float, default=0.0,
 44 |                     help='dropout rate')
 45 | parser.add_argument('--no-cuda', action='store_true', default=False,
 46 |                     help='disables CUDA training')
 47 | parser.add_argument('--evaluate', action='store_true', default=False,
 48 |                     help='evaluate only')
 49 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 50 |                     help='random seed (default: 1)')
 51 | parser.add_argument('--log-interval', type=int, default=1000, metavar='N',
 52 |                     help='how many batches to wait before logging training status')
 53 | parser.add_argument('--checkpoint', type=str, default='none',
 54 |                     help='file name of checkpoint model')
 55 | parser.add_argument('--save_interval', type=int, default=100, metavar='N',
 56 |                     help='how many epochs to wait before saving model')
 57 | parser.add_argument('--log_file', type=str, default='tmp',
 58 |                     help='log file name')
 59 | parser.add_argument('--save_head', type=str, default='tmp',
 60 |                     help='file name head for saving')
 61 | parser.add_argument('--type', type=str, default='mlp',
 62 |                     help='mlp/npn')
 63 | parser.add_argument('--loss', type=str, default='default',
 64 |                     help='default/npnbce/kl')
 65 | parser.add_argument('--num_train', type=int, default=60000,
 66 |                     help='num train')
 67 | args = parser.parse_args()
 68 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 69 | 
 70 | torch.manual_seed(args.seed)
 71 | if args.cuda:
 72 |     torch.cuda.manual_seed(args.seed)
 73 | random.seed(args.seed)
 74 | torch.manual_seed(int(args.seed))
 75 | 
 76 | if args.type.startswith('regress_'):
 77 |     bh_train_dataset = Dataset_boston_housing('./boston_housing_nor_train.pkl')
 78 |     bh_val_dataset = Dataset_boston_housing('./boston_housing_nor_val.pkl')
 79 | 
 80 |     train_loader = data.DataLoader(
 81 |         dataset = bh_train_dataset,
 82 |         batch_size = args.batch_size,
 83 |         shuffle = False,
 84 |         num_workers = args.num_workers,
 85 |         pin_memory = False
 86 |     )
 87 | 
 88 |     test_loader = data.DataLoader(
 89 |         dataset = bh_val_dataset,
 90 |         batch_size = args.test_batch_size,
 91 |         shuffle = False,
 92 |         num_workers = args.num_workers,
 93 |         pin_memory = False
 94 |     )
 95 | else:
 96 |     mnist_train = datasets.MNIST('../data', train=True, download=True,
 97 |                    transform=transforms.Compose([
 98 |                        transforms.ToTensor(),
 99 |                        transforms.Normalize((0.1307,), (0.3081,))
100 |                    ]))
101 |     size_train = len(mnist_train)
102 |     indices = list(range(size_train))
103 |     np.random.shuffle(indices)
104 |     num_train = args.num_train
105 |     train_ind = indices[:num_train]
106 |     train_sampler = SubsetRandomSampler(train_ind)
107 | 
108 |     kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
109 |     train_loader = torch.utils.data.DataLoader(
110 |         datasets.MNIST('../data', train=True, download=True,
111 |                        transform=transforms.Compose([
112 |                            transforms.ToTensor()
113 |                        ])),
114 |         batch_size=args.batch_size, sampler=train_sampler, **kwargs)
115 |         #batch_size=args.batch_size, shuffle=False, **kwargs)
116 |     test_loader = torch.utils.data.DataLoader(
117 |         datasets.MNIST('../data', train=False, transform=transforms.Compose([
118 |                            transforms.ToTensor()
119 |                        ])),
120 |         batch_size=args.test_batch_size, shuffle=False, **kwargs)
121 | 
122 | 
123 | class Net(nn.Module):
124 |     def __init__(self):
125 |         super(Net, self).__init__()
126 |         self.fc1 = nn.Linear(784, 800)
127 |         self.fc2 = nn.Linear(800, 800)
128 |         self.fc3 = nn.Linear(800, 10)
129 |         self.dropout = args.dropout
130 | 
131 |         self.drop1 = nn.Dropout(self.dropout)
132 |         self.drop2 = nn.Dropout(self.dropout)
133 | 
134 |     def forward(self, x):
135 |         x = x.view(-1, 784)
136 |         x = F.sigmoid(self.fc1(x))
137 |         x = self.drop1(x)
138 |         x = F.sigmoid(self.fc2(x))
139 |         x = self.drop2(x)
140 |         #x = F.relu(self.fc1(x))
141 |         #x = F.relu(self.fc2(x))
142 |         x = self.fc3(x)
143 |         return F.log_softmax(x)
144 |         ##x = torch.log(F.sigmoid(x))
145 |         #x = torch.log(F.softmax(F.sigmoid(x)))
146 |         #return x
147 | 
148 | class NPNNet(nn.Module):
149 |     def __init__(self):
150 |         super(NPNNet, self).__init__()
151 |         self.dropout = args.dropout
152 | 
153 |         self.fc1 = NPNLinear(784, 800, False)
154 |         self.sigmoid1 = NPNSigmoid()
155 |         #self.sigmoid1 = NPNRelu()
156 |         self.fc2 = NPNLinear(800, 800)
157 |         self.sigmoid2 = NPNSigmoid()
158 |         #self.sigmoid2 = NPNRelu()
159 |         self.fc3 = NPNLinear(800, 10)
160 |         self.sigmoid3 = NPNSigmoid()
161 | 
162 |         self.drop1 = NPNDropout(self.dropout)
163 |         self.drop2 = NPNDropout(self.dropout)
164 |         self.drop3 = NPNDropout(self.dropout)
165 | 
166 |     def forward(self, x):
167 |         x = x.view(-1, 784)
168 |         x = self.fc1(x)
169 |         x = self.sigmoid1(x)
170 |         x = self.drop1(x)
171 |         x = self.fc2(x)
172 |         x = self.sigmoid2(x)
173 |         x = self.drop2(x)
174 |         x, s = self.sigmoid3(self.fc3(x))
175 |         return x, s
176 | 
177 | class NPNNetLite(nn.Module):
178 |     def __init__(self):
179 |         super(NPNNetLite, self).__init__()
180 |         self.dropout = args.dropout
181 | 
182 |         self.fc1 = Linear2Branch(784, 800, False)
183 |         self.sigmoid1 = NPNSigmoid()
184 |         #self.sigmoid1 = NPNRelu()
185 |         self.fc2 = NPNLinearLite(800, 800)
186 |         self.sigmoid2 = NPNSigmoid()
187 |         #self.sigmoid2 = NPNRelu()
188 |         self.fc3 = NPNLinearLite(800, 10)
189 |         self.sigmoid3 = NPNSigmoid()
190 | 
191 |         self.drop1 = NPNDropout(self.dropout)
192 |         self.drop2 = NPNDropout(self.dropout)
193 |         self.drop3 = NPNDropout(self.dropout)
194 | 
195 |     def forward(self, x):
196 |         x = x.view(-1, 784)
197 |         x = self.fc1(x)
198 |         x = self.sigmoid1(x)
199 |         x = self.drop1(x)
200 |         x = self.fc2(x)
201 |         x = self.sigmoid2(x)
202 |         x = self.drop2(x)
203 |         x, s = self.sigmoid3(self.fc3(x))
204 |         return x, s
205 | 
206 | class CNN(nn.Module):
207 |     def __init__(self):
208 |         super(CNN, self).__init__()
209 |         self.dropout = args.dropout
210 | 
211 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
212 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
213 |         self.conv2_drop = nn.Dropout2d()
214 |         self.fc1 = nn.Linear(320, 50)
215 |         self.fc2 = nn.Linear(50, 10)
216 | 
217 |         self.drop1 = nn.Dropout(self.dropout)
218 | 
219 |     def forward(self, x):
220 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
221 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
222 |         x = x.view(-1, 320)
223 |         x = F.relu(self.fc1(x))
224 |         #x = F.dropout(x, training=self.training)
225 |         x = self.drop1(x)
226 |         x = self.fc2(x)
227 |         return F.log_softmax(x)
228 | 
229 | class NPNCNN(nn.Module):
230 |     def __init__(self):
231 |         super(NPNCNN, self).__init__()
232 |         self.dropout = args.dropout
233 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
234 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
235 |         self.conv2_drop = nn.Dropout2d()
236 |         self.fc1 = NPNLinear(320, 50, dual_input=False)
237 |         self.relu1 = NPNRelu()
238 |         self.drop1 = NPNDropout(self.dropout)
239 |         self.fc2 = NPNLinear(50, 10)
240 |         self.sigmoid1 = NPNSigmoid()
241 | 
242 |     def forward(self, x):
243 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
244 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
245 |         x = x.view(-1, 320)
246 |         x = self.relu1(self.fc1(x))
247 |         x = self.drop1(x)
248 |         x = self.fc2(x)
249 |         if args.loss == 'nll':
250 |             x, _ = x
251 |             return F.log_softmax(x), x
252 |         else:
253 |             x, s = self.sigmoid1(x)
254 |             return x, s
255 | 
256 | class ReNPN(nn.Module):
257 |     def __init__(self):
258 |         super(ReNPN, self).__init__()
259 |         self.dropout = args.dropout
260 | 
261 |         self.fc1 = NPNLinear(13, 50, False)
262 |         self.relu1 = NPNRelu()
263 |         self.fc2 = NPNLinear(50, 1)
264 | 
265 |     def forward(self, x):
266 |         x = self.fc1(x)
267 |         x = self.relu1(x)
268 |         x, s = self.fc2(x)
269 |         return x, s
270 | 
271 | class ReMLP(nn.Module):
272 |     def __init__(self):
273 |         super(ReMLP, self).__init__()
274 |         self.dropout = args.dropout
275 | 
276 |         self.fc1 = nn.Linear(13, 50)
277 |         self.fc2 = nn.Linear(50, 1)
278 | 
279 |     def forward(self, x):
280 |         x = self.fc1(x)
281 |         x = F.relu(x)
282 |         x = self.fc2(x)
283 |         return x
284 | 
285 | if args.type == 'mlp':
286 |     model = Net()
287 | elif args.type == 'npn':
288 |     model = NPNNet()
289 | elif args.type == 'npn_lite':
290 |     model = NPNNetLite()
291 | elif args.type == 'cnn':
292 |     model = CNN()
293 | elif args.type == 'npncnn':
294 |     model = NPNCNN()
295 | elif args.type == 'regress_npn':
296 |     model = ReNPN()
297 | elif args.type == 'regress_mlp':
298 |     model = ReMLP()
299 | if args.cuda:
300 |     model.cuda()
301 | 
302 | #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
303 | optimizer = optim.Adadelta(model.parameters(), lr = args.lr, eps = 1e-7) # lr default 0.02
304 | #optimizer = optim.Adam(model.parameters(), lr = args.lr) # lr 
305 | ind = list(range(args.batch_size))
306 | ind_test = list(range(1000))
307 | bce = nn.BCELoss()
308 | mse = nn.MSELoss()
309 | 
310 | def train(epoch):
311 |     model.train()
312 |     sum_loss = 0
313 |     for batch_idx, (data, target) in enumerate(train_loader):
314 |         # TODO: expand label here
315 |         if not args.type.startswith('regress_'):
316 |             target_ex = torch.zeros(target.size()[0], 10)
317 |             target_ex[ind[:min(args.batch_size, target.size()[0])], target] = 1
318 | 
319 |         if args.type == 'npn' or args.type == 'npncnn' or args.type == 'npn_lite':
320 |             target = target_ex
321 |         if args.cuda:
322 |             data, target = data.cuda(), target.cuda()
323 |         data, target = Variable(data), Variable(target)
324 |         optimizer.zero_grad()
325 |         output = model(data)
326 |         if args.type == 'mlp' or args.type == 'cnn':
327 |             loss = F.nll_loss(output, target)
328 |         else:
329 |             if args.type != 'regress_mlp':
330 |                 x, s = output
331 |             #loss = F.nll_loss(torch.log(x+1e-10), target) + args.output_s * torch.sum(s)
332 |             #loss = multi_logistic_loss(x, target) + args.output_s * torch.sum(s)
333 |             if args.loss == 'default':
334 |                 loss = bce(x, target) + args.output_s * torch.sum(s ** 2)
335 |             elif args.loss == 'npnbce':
336 |                 loss = NPNBCELoss(x, s, target) + args.output_s * torch.sum(s ** 2)
337 |             elif args.loss == 'kl':
338 |                 loss = KL_BG(x, s, target) + args.output_s * torch.sum(s ** 2)
339 |             elif args.loss == 'nll':
340 |                 loss = F.nll_loss(x, target)
341 |             elif args.loss == 'gaussian':
342 |                 loss = KL_loss(output, target) + 0.5 * args.output_s * torch.sum(s ** 2)
343 |             elif args.loss == 'mse':
344 |                 loss = mse(output, target)
345 |             # TODO: use BCELoss
346 |         #sum_loss += loss.data[0]
347 |         sum_loss += loss.item()
348 |         loss.backward()
349 |         optimizer.step()
350 |         if batch_idx % args.log_interval == 0 and batch_idx != 0:
351 |             log_txt = 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.7f}'.format(
352 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
353 |                 100. * batch_idx / len(train_loader), loss.item())
354 |                 #100. * batch_idx / len(train_loader), loss.data[0])
355 |             print(log_txt)
356 |             plain_log(args.log_file,log_txt+'\n')
357 |     avg_loss = sum_loss / len(train_loader.dataset) * args.batch_size
358 |     log_txt = 'Train Epoch {}: Average Loss = {:.7f}'.format(epoch, avg_loss)
359 |     print(log_txt)
360 |     plain_log(args.log_file,log_txt+'\n')
361 |     if epoch % args.save_interval == 0 and epoch != 0:
362 |         torch.save(model, '%s.model' % args.save_head)
363 | 
364 | def test():
365 |     model.eval()
366 |     test_loss = 0
367 |     rmse_loss = 0
368 |     correct = 0
369 |     for data, target in test_loader:
370 |         if not args.type.startswith('regress_'):
371 |             target_ex = torch.zeros(target.size()[0], 10)
372 |             target_ex[ind_test[:min(1000, target.size()[0])], target] = 1
373 | 
374 |         if args.cuda:
375 |             if not args.type.startswith('regress_'):
376 |                 data, target_ex, target = data.cuda(), target_ex.cuda(), target.cuda()
377 |             else:
378 |                 data, target = data.cuda(), target.cuda()
379 |         if not args.type.startswith('regress_'):
380 |             data, target_ex = Variable(data, volatile=True), Variable(target_ex)
381 |         else:
382 |             data, target = Variable(data, volatile=True), Variable(target)
383 |         output = model(data)
384 |         if args.type == 'npn' or args.type == 'npncnn' or args.type == 'npn_lite' or args.type.startswith('regress_'):
385 |             if args.type != 'regress_mlp':
386 |                 output, s = output
387 |             #test_loss += F.nll_loss(torch.log(output+1e-10), target, size_average=False).data[0] # sum up batch loss
388 |             if args.loss == 'default':
389 |                 test_loss += (bce(output, target_ex) + args.output_s * torch.sum(s ** 2)).item()
390 |             elif args.loss == 'npnbce':
391 |                 test_loss += (NPNBCELoss(output, s, target_ex) + args.output_s * torch.sum(s ** 2)).item()
392 |             elif args.loss == 'kl':
393 |                 test_loss += (KL_BG(output, s, target_ex) + args.output_s * torch.sum(s ** 2)).item()
394 |             elif args.loss == 'gaussian':
395 |                 test_loss += KL_loss((output, s), target).item()
396 |                 rmse_loss += RMSE(output, target).item()
397 |             elif args.loss == 'mse':
398 |                 test_loss += mse(output, target).item()
399 |                 rmse_loss += RMSE(output, target).item()
400 |         else:
401 |             test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss
402 |         if not args.type.startswith('regress_'):
403 |             pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
404 |             correct += pred.eq(target.view_as(pred)).cpu().sum()
405 | 
406 |     test_loss /= len(test_loader.dataset)
407 |     if not args.type.startswith('regress_'):
408 |         log_txt = 'Test set: Average loss: {:.7f}, Accuracy: {}/{} ({:.0f}%)'.format(
409 |             test_loss, correct, len(test_loader.dataset),
410 |             100. * correct / len(test_loader.dataset))
411 |     else:
412 |         log_txt = 'Test set: Average loss: {:.7f}, RMSE: {:.6f}'.format(test_loss, rmse_loss)
413 |     print(log_txt)
414 |     plain_log(args.log_file,log_txt+'\n')
415 | 
416 | if args.checkpoint != 'none':
417 |     model = torch.load(args.checkpoint)
418 |     print(str(model))
419 |     for key, module in model._modules.items():
420 |         print('key', key)
421 |         print('module', module)
422 |         if module.__class__.__name__ == 'NPNLinear':
423 |             print('para\n', torch.log(torch.exp(module.W_s_[:8,:8])+1))
424 | 
425 | if not args.evaluate:
426 |     for epoch in range(1, args.epochs + 1):
427 |         train(epoch)
428 |         if epoch % 1 == 0:
429 |             test()
430 | test()
431 | 


--------------------------------------------------------------------------------