├── .gitignore
├── main.py
├── src
    ├── __init__.py
    ├── data
    │   ├── PermutedMNIST.py
    │   ├── __init__.py
    │   └── utils.py
    ├── model
    │   ├── ProgressiveNeuralNetworks.py
    │   └── __init__.py
    └── tools
    │   ├── __init__.py
    │   ├── arg_parser_actions.py
    │   └── evaluation.py
└── test
    ├── __init__.py
    ├── hod_pytorch.py
    ├── hod_tf.py
    └── test_rand_perm.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__/
3 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn.functional as F
  8 | 
  9 | import logging
 10 | import visdom
 11 | 
 12 | from torch.autograd import Variable
 13 | from tqdm import tqdm
 14 | 
 15 | from src.data.PermutedMNIST import get_permuted_MNIST
 16 | from src.model.ProgressiveNeuralNetworks import PNN
 17 | from src.tools.arg_parser_actions import LengthCheckAction
 18 | from src.tools.evaluation import evaluate_model
 19 | 
 20 | logging.basicConfig()
 21 | logger = logging.getLogger(__name__)
 22 | logger.setLevel(logging.DEBUG)
 23 | 
 24 | 
 25 | def get_args():
 26 |     parser = argparse.ArgumentParser(description='Progressive Neural Networks')
 27 |     parser.add_argument('-path', default='/local/veniat/data', type=str, help='path to the data')
 28 |     parser.add_argument('-cuda', default=-1, type=int, help='Cuda device to use (-1 for none)')
 29 |     parser.add_argument('-visdom_url', default="http://37.187.126.101", type=str, help='Visdom server url')
 30 |     parser.add_argument('-visdom_port', default=8100, type=int, help='Visdom server port')
 31 | 
 32 |     parser.add_argument('--layers', metavar='L', type=int, default=3, help='Number of layers per task')
 33 |     parser.add_argument('--sizes', dest='sizes', default=[784, 1024, 512, 10], nargs='+',
 34 |                         action=LengthCheckAction)
 35 | 
 36 |     parser.add_argument('--n_tasks', dest='n_tasks', type=int, default=5)
 37 |     parser.add_argument('--epochs', dest='epochs', type=int, default=10)
 38 |     parser.add_argument('--bs', dest='batch_size', type=int, default=50)
 39 |     parser.add_argument('--lr', dest='lr', type=float, default=1e-3, help='Optimizer learning rate')
 40 |     parser.add_argument('--wd', dest='wd', type=float, default=1e-4, help='Optimizer weight decay')
 41 |     parser.add_argument('--momentum', dest='momentum', type=float, default=1e-4, help='Optimizer momentum')
 42 | 
 43 |     args = parser.parse_known_args()
 44 |     return args[0]
 45 | 
 46 | 
 47 | def main(args):
 48 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(args['cuda'])
 49 |     viz = visdom.Visdom(server=args['visdom_url'], port=args['visdom_port'], env='PNN tests')
 50 | 
 51 |     model = PNN(args['layers'])
 52 | 
 53 |     tasks_data = [get_permuted_MNIST(args['path'], args['batch_size']) for _ in range(args['n_tasks'])]
 54 | 
 55 |     x = torch.Tensor()
 56 |     y = torch.LongTensor()
 57 | 
 58 |     if args['cuda'] != -1:
 59 |         logger.info('Running with cuda (GPU n°{})'.format(args['cuda']))
 60 |         model.cuda()
 61 |         x = x.cuda()
 62 |         y = y.cuda()
 63 |     else:
 64 |         logger.warning('Running WITHOUT cuda')
 65 | 
 66 |     for task_id, (train_set, val_set, test_set) in enumerate(tasks_data):
 67 |         # val_perf = evaluate_model(model, x, y, val_set, task_id=task_id)
 68 | 
 69 |         model.freeze_columns()
 70 |         model.new_task(args['sizes'])
 71 | 
 72 |         optimizer = torch.optim.RMSprop(model.parameters(task_id), lr=args['lr'],
 73 |                                         weight_decay=args['wd'], momentum=args['momentum'])
 74 | 
 75 |         train_accs = []
 76 |         train_losses = []
 77 |         for epoch in range(args['epochs']):
 78 |             total_samples = 0
 79 |             total_loss = 0
 80 |             correct_samples = 0
 81 |             for inputs, labels in tqdm(train_set):
 82 |                 x.resize_(inputs.size()).copy_(inputs)
 83 |                 y.resize_(labels.size()).copy_(labels)
 84 | 
 85 |                 x = x.view(x.size(0), -1)
 86 |                 predictions = model(Variable(x))
 87 | 
 88 |                 _, predicted = torch.max(predictions.data, 1)
 89 |                 total_samples += y.size(0)
 90 |                 correct_samples += (predicted == y).sum()
 91 | 
 92 |                 indiv_loss = F.cross_entropy(predictions, Variable(y))
 93 |                 total_loss += indiv_loss.data[0]
 94 | 
 95 |                 optimizer.zero_grad()
 96 |                 indiv_loss.backward()
 97 |                 optimizer.step()
 98 | 
 99 |             train_accs.append(correct_samples / total_samples)
100 |             train_losses.append(total_loss / total_samples)
101 |             logger.info(
102 |                 '[T{}][{}/{}] Loss={}, Acc= {}'.format(task_id, epoch, args['epochs'], train_losses[-1],
103 |                                                        train_accs[-1]))
104 |             viz.line(np.array(train_accs), X=np.arange(epoch+1), win='tacc{}'.format(task_id),
105 |                      opts={'title': 'Task {}: train accuracy'.format(task_id)})
106 |             viz.line(np.array(train_losses), X=np.arange(epoch+1), win='tloss{}'.format(task_id),
107 |                      opts={'title': 'Task {}: train loss'.format(task_id)})
108 | 
109 |         perfs = []
110 |         logger.info('Evaluation after task {}:'.format(task_id))
111 |         for i in range(task_id + 1):
112 |             _, val, test = tasks_data[i]
113 |             val_perf = evaluate_model(model, x, y, val, task_id=i)
114 |             test_perf = evaluate_model(model, x, y, test, task_id=i)
115 |             perfs.append([val_perf, test_perf])
116 |             logger.info('\tT n°{} - Val:{}%, test:{}%'.format(i, val_perf, test_perf))
117 | 
118 |         viz.line(np.array(perfs), X=np.arange(task_id+1), win='all_task',
119 |                      opts={'title': 'Evaluation on all tasks', 'legend': ['Val', 'Test']})
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     main(vars(get_args()))
124 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/__init__.py


--------------------------------------------------------------------------------
/src/data/PermutedMNIST.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import DataLoader
 3 | from torchvision import transforms
 4 | from torchvision.datasets import MNIST
 5 | 
 6 | from src.data.utils import validation_split
 7 | 
 8 | 
 9 | def get_permuted_MNIST(path, batch_size):
10 |     im_width = im_height = 28
11 |     val_size = 10000
12 | 
13 |     rand_perm = RandomPermutation(0, 0, im_width, im_height)
14 |     normalization = transforms.Normalize((0.1307,), (0.3081,))
15 | 
16 |     #Todo: rethink RandomPermutation usage slows down dataloading by a factor > 6, Should try directly on batches.
17 |     transfrom = transforms.Compose([
18 |         transforms.ToTensor(),
19 |         rand_perm,
20 |         normalization]
21 |     )
22 | 
23 |     train_set = MNIST(root=path, train=True, download=True, transform=transfrom)
24 |     test_set = MNIST(root=path, train=False, download=True, transform=transfrom)
25 |     train_set, val_set = validation_split(train_set, transfrom, transfrom, val_size=val_size)
26 | 
27 |     train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) if train_set is not None else None
28 |     test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False) if test_set is not None else None
29 |     val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False) if val_set is not None else None
30 | 
31 |     return train_loader, val_loader, test_loader
32 | 
33 | 
34 | class RandomPermutation(object):
35 |     """
36 |     Applies a constant random permutation to the images.
37 |     """
38 | 
39 |     def __init__(self, x_off=0, y_off=0, width=None, height=None):
40 |         self.x_off = x_off
41 |         self.y_off = y_off
42 |         self.width = width
43 |         self.height = height
44 |         self.x_max = x_off + width
45 |         self.y_max = y_off + height
46 |         self.kernel = torch.randperm(width * height)
47 | 
48 |     def __call__(self, input):
49 |         return rand_perm_(input, self.x_off, self.y_off, self.x_max, self.y_max, self.kernel)
50 | 
51 | 
52 | def rand_perm_(img, x, y, x_max, y_max, kernel):
53 |     """
54 |     Applies INPLACE the random permutation defined in `kernel` to the image `img` on
55 |     the zone defined by `x`, `y`, `x_max`, `y_max`
56 |     :param img: Input image of dimension (C*W*H)
57 |     :param x: offset on x axis
58 |     :param y: offset on y axis
59 |     :param x_max: end of the zone to permute on the x axis
60 |     :param y_max: end of the zone to permute on the y axis
61 |     :param kernel: LongTensor of dim 1 containing one value for each point in the zone to permute
62 |     :return: teh permuted image (even if the permutation is done inplace).
63 |     """
64 |     zone = img[:, x:x_max, y:y_max].contiguous()
65 |     img[:, x:x_max, y:y_max] = zone.view(img.size(0), -1).index_select(1, kernel).view(zone.size())
66 |     return img
67 | 


--------------------------------------------------------------------------------
/src/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/data/__init__.py


--------------------------------------------------------------------------------
/src/data/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class PartialDataset(torch.utils.data.Dataset):
 5 |     def __init__(self, parent_ds, offset, length, transform):
 6 |         self.parent_ds = parent_ds
 7 |         self.offset = offset
 8 |         self.length = length
 9 |         self.transform = transform
10 |         assert len(parent_ds) >= offset + length, Exception("Parent Dataset not long enough")
11 |         super(PartialDataset, self).__init__()
12 | 
13 |     def __len__(self):
14 |         return self.length
15 | 
16 |     def __getitem__(self, i):
17 |         self.parent_ds.transform = self.transform
18 |         return self.parent_ds[i + self.offset]
19 | 
20 | 
21 | def validation_split(dataset, train_transforms, val_transforms, val_size=None, val_share=0.1):
22 |     """
23 |        Split a (training and validation combined) dataset into training and validation.
24 |        Note that to be statistically sound, the items in the dataset should be statistically
25 |        independent (e.g. not sorted by class, not several instances of the same dataset that
26 |        could end up in either set).
27 | 
28 |        inputs:
29 |           dataset:   ("training") dataset to split into training and validation
30 |           val_share: fraction of validation data (should be 0<val_share<1, default: 0.1)
31 |        returns: input dataset split into test_ds, val_ds
32 | 
33 |        """
34 | 
35 |     val_offset = len(dataset) - val_size if val_size is not None else int(len(dataset) * (1 - val_share))
36 |     assert val_offset > 0, "Can't extract a size {} validation set out of a size {} dataset".format(val_size, len(dataset))
37 |     return PartialDataset(dataset, 0, val_offset, train_transforms), PartialDataset(dataset, val_offset, len(dataset) - val_offset, val_transforms)
38 | 


--------------------------------------------------------------------------------
/src/model/ProgressiveNeuralNetworks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class PNNLinearBlock(nn.Module):
 7 |     def __init__(self, col, depth, n_in, n_out):
 8 |         super(PNNLinearBlock, self).__init__()
 9 |         self.col = col
10 |         self.depth = depth
11 |         self.n_in = n_in
12 |         self.n_out = n_out
13 |         self.w = nn.Linear(n_in, n_out)
14 | 
15 |         self.u = nn.ModuleList()
16 |         if self.depth > 0:
17 |             self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)])
18 | 
19 |     def forward(self, inputs):
20 |         if not isinstance(inputs, list):
21 |             inputs = [inputs]
22 |         cur_column_out = self.w(inputs[-1])
23 |         prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]
24 | 
25 |         return F.relu(cur_column_out + sum(prev_columns_out))
26 | 
27 | 
28 | class PNN(nn.Module):
29 |     def __init__(self, n_layers):
30 |         super(PNN, self).__init__()
31 |         self.n_layers = n_layers
32 |         self.columns = nn.ModuleList([])
33 | 
34 |         self.use_cuda = False
35 | 
36 |     def forward(self, x, task_id=-1):
37 |         assert self.columns, 'PNN should at least have one column (missing call to `new_task` ?)'
38 |         inputs = [c[0](x) for c in self.columns]
39 | 
40 |         for l in range(1, self.n_layers):
41 |             outputs = []
42 | 
43 |             #TODO: Use task_id to check if all columns are necessary
44 |             for i, column in enumerate(self.columns):
45 |                 outputs.append(column[l](inputs[:i+1]))
46 | 
47 |             inputs = outputs
48 | 
49 |         return inputs[task_id]
50 | 
51 |     def new_task(self, sizes):
52 |         msg = "Should have the out size for each layer + input size (got {} sizes but {} layers)."
53 |         assert len(sizes) == self.n_layers + 1, msg.format(len(sizes), self.n_layers)
54 |         task_id = len(self.columns)
55 | 
56 |         modules = []
57 |         for i in range(0, self.n_layers):
58 |             modules.append(PNNLinearBlock(task_id, i, sizes[i], sizes[i+1]))
59 |         new_column = nn.ModuleList(modules)
60 |         self.columns.append(new_column)
61 | 
62 |         if self.use_cuda:
63 |             self.cuda()
64 | 
65 |     def freeze_columns(self, skip=None):
66 |         if skip == None:
67 |             skip = []
68 | 
69 |         for i, c in enumerate(self.columns):
70 |             if i not in skip:
71 |                 for params in c.parameters():
72 |                     params.requires_grad = False
73 | 
74 |     def parameters(self, col=None):
75 |         if col is None:
76 |             return super(PNN, self).parameters()
77 |         return self.columns[col].parameters()
78 | 
79 |     def cuda(self, *args, **kwargs):
80 |         self.use_cuda = True
81 |         super(PNN, self).cuda(*args, **kwargs)
82 | 
83 |     def cpu(self):
84 |         self.use_cuda = False
85 |         super(PNN, self).cpu()
86 | 
87 | 


--------------------------------------------------------------------------------
/src/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/model/__init__.py


--------------------------------------------------------------------------------
/src/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/tools/__init__.py


--------------------------------------------------------------------------------
/src/tools/arg_parser_actions.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | class LengthCheckAction(argparse.Action):
 5 | 
 6 |     def __call__(self, parser, namespace, values, option_string=None):
 7 |         if len(values) != namespace.layers:
 8 |             msg = "Sizes must have length L (number of layers). L={}, got {} values"
 9 |             parser.error(msg.format(namespace.layers, len(values)))
10 | 
11 |         setattr(namespace, self.dest, values)
12 | 


--------------------------------------------------------------------------------
/src/tools/evaluation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from tqdm import tqdm
 4 | 
 5 | 
 6 | def evaluate_model(model, x, y, dataset_loader, **kwargs):
 7 |     total = 0
 8 |     correct = 0
 9 |     for images, labels in tqdm(dataset_loader, ascii=True):
10 |         x.resize_(images.size()).copy_(images)
11 |         y.resize_(labels.size()).copy_(labels)
12 | 
13 |         inputs = Variable(x.view(x.size(0), -1), volatile=True)
14 |         preds = model(inputs, **kwargs)
15 | 
16 |         _, predicted = torch.max(preds.data, 1)
17 | 
18 |         total += labels.size(0)
19 |         correct += (predicted == y).sum()
20 | 
21 |     return 100 * correct / total


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/test/__init__.py


--------------------------------------------------------------------------------
/test/hod_pytorch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | # import tensorflow as tf
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | from sklearn.datasets import make_blobs
 10 | 
 11 | 
 12 | def vectorize(tensors):
 13 |     res = None
 14 |     for t in tensors:
 15 |         if res is None:
 16 |             res = t.view(-1)
 17 |         else:
 18 |             res = torch.cat([res, t.view(-1)])
 19 |     return res
 20 | 
 21 | 
 22 | ### Create Dataset
 23 | 
 24 | np.random.seed(666)
 25 | nb_samples = 2000
 26 | X, Y = make_blobs(n_samples=nb_samples, n_features=2, centers=2, cluster_std=1.1, random_state=2000)
 27 | 
 28 | # Transform the original dataset so to learn the bias as any other parameter
 29 | Xc = np.ones((nb_samples, X.shape[1] + 1), dtype=np.float32)
 30 | Yc = np.zeros((nb_samples, 1), dtype=np.float32)
 31 | 
 32 | Xc[:, 0:2] = X
 33 | Yc[:, 0] = Y
 34 | 
 35 | # ## TF
 36 | 
 37 | # In[113]:
 38 | 
 39 | 
 40 | # Create Tensorflow graph
 41 | graph = tf.Graph()
 42 | 
 43 | with graph.as_default():
 44 |     Xi = tf.placeholder(tf.float32, Xc.shape)
 45 |     Yi = tf.placeholder(tf.float32, Yc.shape)
 46 | 
 47 |     # Weights (+ bias)
 48 |     W = tf.Variable(tf.random_normal([Xc.shape[1], 1], 0.0, 0.01))
 49 | 
 50 |     # Z = wx + b
 51 |     Z = tf.matmul(Xi, W)
 52 | 
 53 |     # Log-likelihood
 54 |     log_likelihood = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=Z, labels=Yi))
 55 | 
 56 |     # Cost function (Log-likelihood + L2 penalty)
 57 |     cost = log_likelihood + 0.5  # * tf.norm(W, ord=2)
 58 | 
 59 |     trainer = tf.train.GradientDescentOptimizer(0.0025)
 60 |     training_step = trainer.minimize(cost)
 61 | 
 62 |     # Compute the FIM
 63 |     dW = tf.gradients(-log_likelihood, W)
 64 |     FIM = tf.matmul(tf.reshape(dW, (Xc.shape[1], 1)), tf.reshape(dW, (Xc.shape[1], 1)), transpose_b=True)
 65 | 
 66 |     #    FIM_h = tf.gradients(tf.gradients(-log_likelihood, W)[0], W)[0]
 67 |     #    FIM_h = tf.hessians(-log_likelihood, tf.squeeze(W))
 68 |     # y_list = tf.unstack(y)
 69 |     hess_list = [tf.gradients(y_, W)[0] for y_ in tf.unstack(tf.squeeze(dW))]  # list [grad(y0, x), grad(y1, x), ...]
 70 |     FIM_h = tf.stack(hess_list)
 71 | 
 72 | # Create Tensorflow session
 73 | session = tf.InteractiveSession(graph=graph)
 74 | 
 75 | # Initialize all variables
 76 | tf.global_variables_initializer().run()
 77 | 
 78 | # Run a training cycle
 79 | # The model is quite simple, however a check on the cost function should be performed
 80 | for _ in range(3500):
 81 |     _, c, z_out = session.run([training_step, cost, Z], feed_dict={Xi: Xc, Yi: Yc})
 82 | 
 83 | # In[114]:
 84 | 
 85 | 
 86 | # Compute Fisher Information Matrix on MLE
 87 | tf_w, tf_ll, tf_grads, tf_fim, tf_fim_h = session.run([W, log_likelihood, dW, FIM, FIM_h], feed_dict={Xi: Xc, Yi: Yc})
 88 | 
 89 | # ## Pytorch
 90 | 
 91 | # In[5]:
 92 | 
 93 | 
 94 | X_py = torch.from_numpy(X).float()
 95 | Y_py = Variable(torch.from_numpy(Y)).float().unsqueeze(1)
 96 | 
 97 | # In[6]:
 98 | 
 99 | 
100 | n_step = 3500
101 | model = nn.Linear(2, 1)
102 | optimizer = torch.optim.SGD(model.parameters(), lr=0.0025)
103 | 
104 | for _ in range(n_step):
105 |     preds = model(Variable(X_py))
106 |     loss = F.binary_cross_entropy(preds.sigmoid(), Y_py, size_average=False) + 0.5
107 |     optimizer.zero_grad()
108 |     loss.backward()
109 |     optimizer.step()
110 | 
111 | # In[28]:
112 | 
113 | 
114 | pytorch_p = model(Variable(X_py)).sigmoid()
115 | pytorch_w = vectorize(model.parameters())
116 | pytorch_ll = -(Y_py * pytorch_p.log() + (1 - Y_py) * torch.log((1 - pytorch_p) + 1e-9)).sum()
117 | # pytorch_ll = F.binary_cross_entropy(pytorch_p, Y_py, size_average=False)
118 | pytorch_grads = vectorize(torch.autograd.grad(-pytorch_ll, model.parameters(), create_graph=True))
119 | pytorch_fim = torch.mm(pytorch_grads.view(-1, 1), pytorch_grads.view(1, -1))
120 | 
121 | pytorch_hess_fim = np.array(
122 |     [vectorize(torch.autograd.grad(v, model.parameters(), create_graph=True)).data.numpy() for v in pytorch_grads])
123 | 
124 | # ## Create a new pytorch model from TF weights
125 | 
126 | # In[8]:
127 | 
128 | 
129 | new_model = nn.Linear(2, 1)
130 | new_model.weight = nn.Parameter(torch.from_numpy(tf_w[:2]).view(1, 2))
131 | new_model.bias = nn.Parameter(torch.from_numpy(tf_w[2]))
132 | 
133 | # In[15]:
134 | 
135 | 
136 | new_p = new_model(Variable(X_py)).sigmoid()
137 | 
138 | new_pytorch_w = vectorize(new_model.parameters())
139 | new_pytorch_ll = F.binary_cross_entropy(new_p, Y_py, size_average=False)
140 | new_pytorch_grads = vectorize(torch.autograd.grad(-new_pytorch_ll, new_model.parameters(), create_graph=True))
141 | new_pytorch_fim = torch.mm(new_pytorch_grads.view(-1, 1), new_pytorch_grads.view(1, -1))
142 | 
143 | # new_pytorch_hess_fim = [torch.autograd.grad(v, new_model.parameters()) for v in vectorize(new_pytorch_grads)]
144 | 
145 | 
146 | # # RES:
147 | 
148 | # ### Pytorch
149 | 
150 | # In[30]:
151 | 
152 | 
153 | print("Pytorch params: {}".format(pytorch_w.data.numpy()))
154 | print("Pytorch Log-Likelihood: {}".format(pytorch_ll.data[0]))
155 | print("Pytorch LL-grad: {}".format(pytorch_grads.data.numpy()))
156 | print("Pytorch FIM:\n {}".format(pytorch_fim.data.numpy()))
157 | print("Pytorch FIM (Hess):\n {}".format(pytorch_hess_fim))
158 | 
159 | # ### TF
160 | 
161 | # In[115]:
162 | 
163 | 
164 | print("TF params: {}".format(tf_w))
165 | print("TF Log-Likelihood: {}".format(tf_ll))
166 | print("TF LL-grad: {}".format(tf_grads))
167 | print("TF FIM:\n {}".format(tf_fim))
168 | print("TF FIM_h:\n {}".format(tf_fim_h))
169 | 
170 | # ### TF copy (Pytorch B)
171 | 
172 | # In[12]:
173 | 
174 | 
175 | print("New model params: {}".format(new_pytorch_w.data.numpy()))
176 | print("New model Log-Likelihood: {}".format(new_pytorch_ll.data[0]))
177 | print("New model LL-grad: {}".format(new_pytorch_grads.data.numpy()))
178 | print("New model FIM:\n {}".format(new_pytorch_fim.data.numpy()))
179 | 


--------------------------------------------------------------------------------
/test/hod_tf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from sklearn.datasets import make_blobs
 5 | 
 6 | ### Create Dataset
 7 | 
 8 | np.random.seed(666)
 9 | nb_samples = 2000
10 | X, Y = make_blobs(n_samples=nb_samples, n_features=2, centers=2, cluster_std=1.1, random_state=2000)
11 | 
12 | # Transform the original dataset so to learn the bias as any other parameter
13 | Xc = np.ones((nb_samples, X.shape[1] + 1), dtype=np.float32)
14 | Yc = np.zeros((nb_samples, 1), dtype=np.float32)
15 | 
16 | Xc[:, 0:2] = X
17 | Yc[:, 0] = Y
18 | 
19 | # Create Tensorflow graph
20 | graph = tf.Graph()
21 | 
22 | with graph.as_default():
23 |     Xi = tf.placeholder(tf.float32, Xc.shape)
24 |     Yi = tf.placeholder(tf.float32, Yc.shape)
25 | 
26 |     # Weights (+ bias)
27 |     W = tf.Variable(tf.random_normal([Xc.shape[1], 1], 0.0, 0.01))
28 | 
29 |     # Z = wx + b
30 |     Z = tf.matmul(Xi, W)
31 | 
32 |     # Log-likelihood
33 |     log_likelihood = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=Z, labels=Yi))
34 | 
35 |     # Cost function (Log-likelihood + L2 penalty)
36 |     cost = log_likelihood + 0.5  # * tf.norm(W, ord=2)
37 | 
38 |     trainer = tf.train.GradientDescentOptimizer(0.0025)
39 |     training_step = trainer.minimize(cost)
40 | 
41 |     # Compute the FIM
42 |     dW = tf.gradients(-log_likelihood, W)
43 |     FIM = tf.matmul(tf.reshape(dW, (Xc.shape[1], 1)), tf.reshape(dW, (Xc.shape[1], 1)), transpose_b=True)
44 | 
45 |     #    FIM_h = tf.gradients(tf.gradients(-log_likelihood, W)[0], W)[0]
46 |     #    FIM_h = tf.hessians(-log_likelihood, tf.squeeze(W))
47 |     # y_list = tf.unstack(y)
48 |     hess_list = [tf.gradients(y_, W)[0] for y_ in tf.unstack(tf.squeeze(dW))]  # list [grad(y0, x), grad(y1, x), ...]
49 |     FIM_h = tf.stack(hess_list)
50 | 
51 | # Create Tensorflow session
52 | session = tf.InteractiveSession(graph=graph)
53 | 
54 | # Initialize all variables
55 | tf.global_variables_initializer().run()
56 | 
57 | # Run a training cycle
58 | # The model is quite simple, however a check on the cost function should be performed
59 | for _ in range(3500):
60 |     _, c, z_out = session.run([training_step, cost, Z], feed_dict={Xi: Xc, Yi: Yc})
61 | 
62 | # Compute Fisher Information Matrix on MLE
63 | tf_w, tf_ll, tf_grads, tf_fim, tf_fim_h = session.run([W, log_likelihood, dW, FIM, FIM_h], feed_dict={Xi: Xc, Yi: Yc})
64 | 
65 | # # RES:
66 | 
67 | print("TF params: {}".format(tf_w))
68 | print("TF Log-Likelihood: {}".format(tf_ll))
69 | print("TF LL-grad: {}".format(tf_grads))
70 | print("TF FIM:\n {}".format(tf_fim))
71 | print("TF FIM_h:\n {}".format(tf_fim_h))
72 | 


--------------------------------------------------------------------------------
/test/test_rand_perm.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import torch
 4 | 
 5 | from src.data.PermutedMNIST import RandomPermutation
 6 | 
 7 | 
 8 | class MyTestCase(unittest.TestCase):
 9 |     def test_RandomPermutation(self):
10 |         n = 10
11 |         in_img = torch.rand(3, 224, 224)
12 |         for i in range(n):
13 |             rand_perm = RandomPermutation(0, 0, 224, 224)
14 |             permuted = rand_perm(in_img.clone())
15 |             for j in range(n):
16 |                 self.assertTrue(torch.equal(permuted, rand_perm(in_img.clone())))
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     unittest.main()
21 | 


--------------------------------------------------------------------------------