├── .gitignore ├── main.py ├── src ├── __init__.py ├── data │ ├── PermutedMNIST.py │ ├── __init__.py │ └── utils.py ├── model │ ├── ProgressiveNeuralNetworks.py │ └── __init__.py └── tools │ ├── __init__.py │ ├── arg_parser_actions.py │ └── evaluation.py └── test ├── __init__.py ├── hod_pytorch.py ├── hod_tf.py └── test_rand_perm.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__/ 3 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import os 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | import logging 10 | import visdom 11 | 12 | from torch.autograd import Variable 13 | from tqdm import tqdm 14 | 15 | from src.data.PermutedMNIST import get_permuted_MNIST 16 | from src.model.ProgressiveNeuralNetworks import PNN 17 | from src.tools.arg_parser_actions import LengthCheckAction 18 | from src.tools.evaluation import evaluate_model 19 | 20 | logging.basicConfig() 21 | logger = logging.getLogger(__name__) 22 | logger.setLevel(logging.DEBUG) 23 | 24 | 25 | def get_args(): 26 | parser = argparse.ArgumentParser(description='Progressive Neural Networks') 27 | parser.add_argument('-path', default='/local/veniat/data', type=str, help='path to the data') 28 | parser.add_argument('-cuda', default=-1, type=int, help='Cuda device to use (-1 for none)') 29 | parser.add_argument('-visdom_url', default="http://37.187.126.101", type=str, help='Visdom server url') 30 | parser.add_argument('-visdom_port', default=8100, type=int, help='Visdom server port') 31 | 32 | parser.add_argument('--layers', metavar='L', type=int, default=3, help='Number of layers per task') 33 | parser.add_argument('--sizes', dest='sizes', default=[784, 1024, 512, 10], nargs='+', 34 | action=LengthCheckAction) 35 | 36 | parser.add_argument('--n_tasks', dest='n_tasks', type=int, default=5) 37 | parser.add_argument('--epochs', dest='epochs', type=int, default=10) 38 | parser.add_argument('--bs', dest='batch_size', type=int, default=50) 39 | parser.add_argument('--lr', dest='lr', type=float, default=1e-3, help='Optimizer learning rate') 40 | parser.add_argument('--wd', dest='wd', type=float, default=1e-4, help='Optimizer weight decay') 41 | parser.add_argument('--momentum', dest='momentum', type=float, default=1e-4, help='Optimizer momentum') 42 | 43 | args = parser.parse_known_args() 44 | return args[0] 45 | 46 | 47 | def main(args): 48 | os.environ['CUDA_VISIBLE_DEVICES'] = str(args['cuda']) 49 | viz = visdom.Visdom(server=args['visdom_url'], port=args['visdom_port'], env='PNN tests') 50 | 51 | model = PNN(args['layers']) 52 | 53 | tasks_data = [get_permuted_MNIST(args['path'], args['batch_size']) for _ in range(args['n_tasks'])] 54 | 55 | x = torch.Tensor() 56 | y = torch.LongTensor() 57 | 58 | if args['cuda'] != -1: 59 | logger.info('Running with cuda (GPU n°{})'.format(args['cuda'])) 60 | model.cuda() 61 | x = x.cuda() 62 | y = y.cuda() 63 | else: 64 | logger.warning('Running WITHOUT cuda') 65 | 66 | for task_id, (train_set, val_set, test_set) in enumerate(tasks_data): 67 | # val_perf = evaluate_model(model, x, y, val_set, task_id=task_id) 68 | 69 | model.freeze_columns() 70 | model.new_task(args['sizes']) 71 | 72 | optimizer = torch.optim.RMSprop(model.parameters(task_id), lr=args['lr'], 73 | weight_decay=args['wd'], momentum=args['momentum']) 74 | 75 | train_accs = [] 76 | train_losses = [] 77 | for epoch in range(args['epochs']): 78 | total_samples = 0 79 | total_loss = 0 80 | correct_samples = 0 81 | for inputs, labels in tqdm(train_set): 82 | x.resize_(inputs.size()).copy_(inputs) 83 | y.resize_(labels.size()).copy_(labels) 84 | 85 | x = x.view(x.size(0), -1) 86 | predictions = model(Variable(x)) 87 | 88 | _, predicted = torch.max(predictions.data, 1) 89 | total_samples += y.size(0) 90 | correct_samples += (predicted == y).sum() 91 | 92 | indiv_loss = F.cross_entropy(predictions, Variable(y)) 93 | total_loss += indiv_loss.data[0] 94 | 95 | optimizer.zero_grad() 96 | indiv_loss.backward() 97 | optimizer.step() 98 | 99 | train_accs.append(correct_samples / total_samples) 100 | train_losses.append(total_loss / total_samples) 101 | logger.info( 102 | '[T{}][{}/{}] Loss={}, Acc= {}'.format(task_id, epoch, args['epochs'], train_losses[-1], 103 | train_accs[-1])) 104 | viz.line(np.array(train_accs), X=np.arange(epoch+1), win='tacc{}'.format(task_id), 105 | opts={'title': 'Task {}: train accuracy'.format(task_id)}) 106 | viz.line(np.array(train_losses), X=np.arange(epoch+1), win='tloss{}'.format(task_id), 107 | opts={'title': 'Task {}: train loss'.format(task_id)}) 108 | 109 | perfs = [] 110 | logger.info('Evaluation after task {}:'.format(task_id)) 111 | for i in range(task_id + 1): 112 | _, val, test = tasks_data[i] 113 | val_perf = evaluate_model(model, x, y, val, task_id=i) 114 | test_perf = evaluate_model(model, x, y, test, task_id=i) 115 | perfs.append([val_perf, test_perf]) 116 | logger.info('\tT n°{} - Val:{}%, test:{}%'.format(i, val_perf, test_perf)) 117 | 118 | viz.line(np.array(perfs), X=np.arange(task_id+1), win='all_task', 119 | opts={'title': 'Evaluation on all tasks', 'legend': ['Val', 'Test']}) 120 | 121 | 122 | if __name__ == '__main__': 123 | main(vars(get_args())) 124 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/__init__.py -------------------------------------------------------------------------------- /src/data/PermutedMNIST.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torchvision import transforms 4 | from torchvision.datasets import MNIST 5 | 6 | from src.data.utils import validation_split 7 | 8 | 9 | def get_permuted_MNIST(path, batch_size): 10 | im_width = im_height = 28 11 | val_size = 10000 12 | 13 | rand_perm = RandomPermutation(0, 0, im_width, im_height) 14 | normalization = transforms.Normalize((0.1307,), (0.3081,)) 15 | 16 | #Todo: rethink RandomPermutation usage slows down dataloading by a factor > 6, Should try directly on batches. 17 | transfrom = transforms.Compose([ 18 | transforms.ToTensor(), 19 | rand_perm, 20 | normalization] 21 | ) 22 | 23 | train_set = MNIST(root=path, train=True, download=True, transform=transfrom) 24 | test_set = MNIST(root=path, train=False, download=True, transform=transfrom) 25 | train_set, val_set = validation_split(train_set, transfrom, transfrom, val_size=val_size) 26 | 27 | train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) if train_set is not None else None 28 | test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False) if test_set is not None else None 29 | val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False) if val_set is not None else None 30 | 31 | return train_loader, val_loader, test_loader 32 | 33 | 34 | class RandomPermutation(object): 35 | """ 36 | Applies a constant random permutation to the images. 37 | """ 38 | 39 | def __init__(self, x_off=0, y_off=0, width=None, height=None): 40 | self.x_off = x_off 41 | self.y_off = y_off 42 | self.width = width 43 | self.height = height 44 | self.x_max = x_off + width 45 | self.y_max = y_off + height 46 | self.kernel = torch.randperm(width * height) 47 | 48 | def __call__(self, input): 49 | return rand_perm_(input, self.x_off, self.y_off, self.x_max, self.y_max, self.kernel) 50 | 51 | 52 | def rand_perm_(img, x, y, x_max, y_max, kernel): 53 | """ 54 | Applies INPLACE the random permutation defined in `kernel` to the image `img` on 55 | the zone defined by `x`, `y`, `x_max`, `y_max` 56 | :param img: Input image of dimension (C*W*H) 57 | :param x: offset on x axis 58 | :param y: offset on y axis 59 | :param x_max: end of the zone to permute on the x axis 60 | :param y_max: end of the zone to permute on the y axis 61 | :param kernel: LongTensor of dim 1 containing one value for each point in the zone to permute 62 | :return: teh permuted image (even if the permutation is done inplace). 63 | """ 64 | zone = img[:, x:x_max, y:y_max].contiguous() 65 | img[:, x:x_max, y:y_max] = zone.view(img.size(0), -1).index_select(1, kernel).view(zone.size()) 66 | return img 67 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class PartialDataset(torch.utils.data.Dataset): 5 | def __init__(self, parent_ds, offset, length, transform): 6 | self.parent_ds = parent_ds 7 | self.offset = offset 8 | self.length = length 9 | self.transform = transform 10 | assert len(parent_ds) >= offset + length, Exception("Parent Dataset not long enough") 11 | super(PartialDataset, self).__init__() 12 | 13 | def __len__(self): 14 | return self.length 15 | 16 | def __getitem__(self, i): 17 | self.parent_ds.transform = self.transform 18 | return self.parent_ds[i + self.offset] 19 | 20 | 21 | def validation_split(dataset, train_transforms, val_transforms, val_size=None, val_share=0.1): 22 | """ 23 | Split a (training and validation combined) dataset into training and validation. 24 | Note that to be statistically sound, the items in the dataset should be statistically 25 | independent (e.g. not sorted by class, not several instances of the same dataset that 26 | could end up in either set). 27 | 28 | inputs: 29 | dataset: ("training") dataset to split into training and validation 30 | val_share: fraction of validation data (should be 0 0, "Can't extract a size {} validation set out of a size {} dataset".format(val_size, len(dataset)) 37 | return PartialDataset(dataset, 0, val_offset, train_transforms), PartialDataset(dataset, val_offset, len(dataset) - val_offset, val_transforms) 38 | -------------------------------------------------------------------------------- /src/model/ProgressiveNeuralNetworks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class PNNLinearBlock(nn.Module): 7 | def __init__(self, col, depth, n_in, n_out): 8 | super(PNNLinearBlock, self).__init__() 9 | self.col = col 10 | self.depth = depth 11 | self.n_in = n_in 12 | self.n_out = n_out 13 | self.w = nn.Linear(n_in, n_out) 14 | 15 | self.u = nn.ModuleList() 16 | if self.depth > 0: 17 | self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)]) 18 | 19 | def forward(self, inputs): 20 | if not isinstance(inputs, list): 21 | inputs = [inputs] 22 | cur_column_out = self.w(inputs[-1]) 23 | prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)] 24 | 25 | return F.relu(cur_column_out + sum(prev_columns_out)) 26 | 27 | 28 | class PNN(nn.Module): 29 | def __init__(self, n_layers): 30 | super(PNN, self).__init__() 31 | self.n_layers = n_layers 32 | self.columns = nn.ModuleList([]) 33 | 34 | self.use_cuda = False 35 | 36 | def forward(self, x, task_id=-1): 37 | assert self.columns, 'PNN should at least have one column (missing call to `new_task` ?)' 38 | inputs = [c[0](x) for c in self.columns] 39 | 40 | for l in range(1, self.n_layers): 41 | outputs = [] 42 | 43 | #TODO: Use task_id to check if all columns are necessary 44 | for i, column in enumerate(self.columns): 45 | outputs.append(column[l](inputs[:i+1])) 46 | 47 | inputs = outputs 48 | 49 | return inputs[task_id] 50 | 51 | def new_task(self, sizes): 52 | msg = "Should have the out size for each layer + input size (got {} sizes but {} layers)." 53 | assert len(sizes) == self.n_layers + 1, msg.format(len(sizes), self.n_layers) 54 | task_id = len(self.columns) 55 | 56 | modules = [] 57 | for i in range(0, self.n_layers): 58 | modules.append(PNNLinearBlock(task_id, i, sizes[i], sizes[i+1])) 59 | new_column = nn.ModuleList(modules) 60 | self.columns.append(new_column) 61 | 62 | if self.use_cuda: 63 | self.cuda() 64 | 65 | def freeze_columns(self, skip=None): 66 | if skip == None: 67 | skip = [] 68 | 69 | for i, c in enumerate(self.columns): 70 | if i not in skip: 71 | for params in c.parameters(): 72 | params.requires_grad = False 73 | 74 | def parameters(self, col=None): 75 | if col is None: 76 | return super(PNN, self).parameters() 77 | return self.columns[col].parameters() 78 | 79 | def cuda(self, *args, **kwargs): 80 | self.use_cuda = True 81 | super(PNN, self).cuda(*args, **kwargs) 82 | 83 | def cpu(self): 84 | self.use_cuda = False 85 | super(PNN, self).cpu() 86 | 87 | -------------------------------------------------------------------------------- /src/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/model/__init__.py -------------------------------------------------------------------------------- /src/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/src/tools/__init__.py -------------------------------------------------------------------------------- /src/tools/arg_parser_actions.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | class LengthCheckAction(argparse.Action): 5 | 6 | def __call__(self, parser, namespace, values, option_string=None): 7 | if len(values) != namespace.layers: 8 | msg = "Sizes must have length L (number of layers). L={}, got {} values" 9 | parser.error(msg.format(namespace.layers, len(values))) 10 | 11 | setattr(namespace, self.dest, values) 12 | -------------------------------------------------------------------------------- /src/tools/evaluation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from tqdm import tqdm 4 | 5 | 6 | def evaluate_model(model, x, y, dataset_loader, **kwargs): 7 | total = 0 8 | correct = 0 9 | for images, labels in tqdm(dataset_loader, ascii=True): 10 | x.resize_(images.size()).copy_(images) 11 | y.resize_(labels.size()).copy_(labels) 12 | 13 | inputs = Variable(x.view(x.size(0), -1), volatile=True) 14 | preds = model(inputs, **kwargs) 15 | 16 | _, predicted = torch.max(preds.data, 1) 17 | 18 | total += labels.size(0) 19 | correct += (predicted == y).sum() 20 | 21 | return 100 * correct / total -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomVeniat/ProgressiveNeuralNetworks.pytorch/a24f1ef2632055b849dc2c93d82c3d740ffc86c1/test/__init__.py -------------------------------------------------------------------------------- /test/hod_pytorch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # import tensorflow as tf 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | 9 | from sklearn.datasets import make_blobs 10 | 11 | 12 | def vectorize(tensors): 13 | res = None 14 | for t in tensors: 15 | if res is None: 16 | res = t.view(-1) 17 | else: 18 | res = torch.cat([res, t.view(-1)]) 19 | return res 20 | 21 | 22 | ### Create Dataset 23 | 24 | np.random.seed(666) 25 | nb_samples = 2000 26 | X, Y = make_blobs(n_samples=nb_samples, n_features=2, centers=2, cluster_std=1.1, random_state=2000) 27 | 28 | # Transform the original dataset so to learn the bias as any other parameter 29 | Xc = np.ones((nb_samples, X.shape[1] + 1), dtype=np.float32) 30 | Yc = np.zeros((nb_samples, 1), dtype=np.float32) 31 | 32 | Xc[:, 0:2] = X 33 | Yc[:, 0] = Y 34 | 35 | # ## TF 36 | 37 | # In[113]: 38 | 39 | 40 | # Create Tensorflow graph 41 | graph = tf.Graph() 42 | 43 | with graph.as_default(): 44 | Xi = tf.placeholder(tf.float32, Xc.shape) 45 | Yi = tf.placeholder(tf.float32, Yc.shape) 46 | 47 | # Weights (+ bias) 48 | W = tf.Variable(tf.random_normal([Xc.shape[1], 1], 0.0, 0.01)) 49 | 50 | # Z = wx + b 51 | Z = tf.matmul(Xi, W) 52 | 53 | # Log-likelihood 54 | log_likelihood = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=Z, labels=Yi)) 55 | 56 | # Cost function (Log-likelihood + L2 penalty) 57 | cost = log_likelihood + 0.5 # * tf.norm(W, ord=2) 58 | 59 | trainer = tf.train.GradientDescentOptimizer(0.0025) 60 | training_step = trainer.minimize(cost) 61 | 62 | # Compute the FIM 63 | dW = tf.gradients(-log_likelihood, W) 64 | FIM = tf.matmul(tf.reshape(dW, (Xc.shape[1], 1)), tf.reshape(dW, (Xc.shape[1], 1)), transpose_b=True) 65 | 66 | # FIM_h = tf.gradients(tf.gradients(-log_likelihood, W)[0], W)[0] 67 | # FIM_h = tf.hessians(-log_likelihood, tf.squeeze(W)) 68 | # y_list = tf.unstack(y) 69 | hess_list = [tf.gradients(y_, W)[0] for y_ in tf.unstack(tf.squeeze(dW))] # list [grad(y0, x), grad(y1, x), ...] 70 | FIM_h = tf.stack(hess_list) 71 | 72 | # Create Tensorflow session 73 | session = tf.InteractiveSession(graph=graph) 74 | 75 | # Initialize all variables 76 | tf.global_variables_initializer().run() 77 | 78 | # Run a training cycle 79 | # The model is quite simple, however a check on the cost function should be performed 80 | for _ in range(3500): 81 | _, c, z_out = session.run([training_step, cost, Z], feed_dict={Xi: Xc, Yi: Yc}) 82 | 83 | # In[114]: 84 | 85 | 86 | # Compute Fisher Information Matrix on MLE 87 | tf_w, tf_ll, tf_grads, tf_fim, tf_fim_h = session.run([W, log_likelihood, dW, FIM, FIM_h], feed_dict={Xi: Xc, Yi: Yc}) 88 | 89 | # ## Pytorch 90 | 91 | # In[5]: 92 | 93 | 94 | X_py = torch.from_numpy(X).float() 95 | Y_py = Variable(torch.from_numpy(Y)).float().unsqueeze(1) 96 | 97 | # In[6]: 98 | 99 | 100 | n_step = 3500 101 | model = nn.Linear(2, 1) 102 | optimizer = torch.optim.SGD(model.parameters(), lr=0.0025) 103 | 104 | for _ in range(n_step): 105 | preds = model(Variable(X_py)) 106 | loss = F.binary_cross_entropy(preds.sigmoid(), Y_py, size_average=False) + 0.5 107 | optimizer.zero_grad() 108 | loss.backward() 109 | optimizer.step() 110 | 111 | # In[28]: 112 | 113 | 114 | pytorch_p = model(Variable(X_py)).sigmoid() 115 | pytorch_w = vectorize(model.parameters()) 116 | pytorch_ll = -(Y_py * pytorch_p.log() + (1 - Y_py) * torch.log((1 - pytorch_p) + 1e-9)).sum() 117 | # pytorch_ll = F.binary_cross_entropy(pytorch_p, Y_py, size_average=False) 118 | pytorch_grads = vectorize(torch.autograd.grad(-pytorch_ll, model.parameters(), create_graph=True)) 119 | pytorch_fim = torch.mm(pytorch_grads.view(-1, 1), pytorch_grads.view(1, -1)) 120 | 121 | pytorch_hess_fim = np.array( 122 | [vectorize(torch.autograd.grad(v, model.parameters(), create_graph=True)).data.numpy() for v in pytorch_grads]) 123 | 124 | # ## Create a new pytorch model from TF weights 125 | 126 | # In[8]: 127 | 128 | 129 | new_model = nn.Linear(2, 1) 130 | new_model.weight = nn.Parameter(torch.from_numpy(tf_w[:2]).view(1, 2)) 131 | new_model.bias = nn.Parameter(torch.from_numpy(tf_w[2])) 132 | 133 | # In[15]: 134 | 135 | 136 | new_p = new_model(Variable(X_py)).sigmoid() 137 | 138 | new_pytorch_w = vectorize(new_model.parameters()) 139 | new_pytorch_ll = F.binary_cross_entropy(new_p, Y_py, size_average=False) 140 | new_pytorch_grads = vectorize(torch.autograd.grad(-new_pytorch_ll, new_model.parameters(), create_graph=True)) 141 | new_pytorch_fim = torch.mm(new_pytorch_grads.view(-1, 1), new_pytorch_grads.view(1, -1)) 142 | 143 | # new_pytorch_hess_fim = [torch.autograd.grad(v, new_model.parameters()) for v in vectorize(new_pytorch_grads)] 144 | 145 | 146 | # # RES: 147 | 148 | # ### Pytorch 149 | 150 | # In[30]: 151 | 152 | 153 | print("Pytorch params: {}".format(pytorch_w.data.numpy())) 154 | print("Pytorch Log-Likelihood: {}".format(pytorch_ll.data[0])) 155 | print("Pytorch LL-grad: {}".format(pytorch_grads.data.numpy())) 156 | print("Pytorch FIM:\n {}".format(pytorch_fim.data.numpy())) 157 | print("Pytorch FIM (Hess):\n {}".format(pytorch_hess_fim)) 158 | 159 | # ### TF 160 | 161 | # In[115]: 162 | 163 | 164 | print("TF params: {}".format(tf_w)) 165 | print("TF Log-Likelihood: {}".format(tf_ll)) 166 | print("TF LL-grad: {}".format(tf_grads)) 167 | print("TF FIM:\n {}".format(tf_fim)) 168 | print("TF FIM_h:\n {}".format(tf_fim_h)) 169 | 170 | # ### TF copy (Pytorch B) 171 | 172 | # In[12]: 173 | 174 | 175 | print("New model params: {}".format(new_pytorch_w.data.numpy())) 176 | print("New model Log-Likelihood: {}".format(new_pytorch_ll.data[0])) 177 | print("New model LL-grad: {}".format(new_pytorch_grads.data.numpy())) 178 | print("New model FIM:\n {}".format(new_pytorch_fim.data.numpy())) 179 | -------------------------------------------------------------------------------- /test/hod_tf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from sklearn.datasets import make_blobs 5 | 6 | ### Create Dataset 7 | 8 | np.random.seed(666) 9 | nb_samples = 2000 10 | X, Y = make_blobs(n_samples=nb_samples, n_features=2, centers=2, cluster_std=1.1, random_state=2000) 11 | 12 | # Transform the original dataset so to learn the bias as any other parameter 13 | Xc = np.ones((nb_samples, X.shape[1] + 1), dtype=np.float32) 14 | Yc = np.zeros((nb_samples, 1), dtype=np.float32) 15 | 16 | Xc[:, 0:2] = X 17 | Yc[:, 0] = Y 18 | 19 | # Create Tensorflow graph 20 | graph = tf.Graph() 21 | 22 | with graph.as_default(): 23 | Xi = tf.placeholder(tf.float32, Xc.shape) 24 | Yi = tf.placeholder(tf.float32, Yc.shape) 25 | 26 | # Weights (+ bias) 27 | W = tf.Variable(tf.random_normal([Xc.shape[1], 1], 0.0, 0.01)) 28 | 29 | # Z = wx + b 30 | Z = tf.matmul(Xi, W) 31 | 32 | # Log-likelihood 33 | log_likelihood = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=Z, labels=Yi)) 34 | 35 | # Cost function (Log-likelihood + L2 penalty) 36 | cost = log_likelihood + 0.5 # * tf.norm(W, ord=2) 37 | 38 | trainer = tf.train.GradientDescentOptimizer(0.0025) 39 | training_step = trainer.minimize(cost) 40 | 41 | # Compute the FIM 42 | dW = tf.gradients(-log_likelihood, W) 43 | FIM = tf.matmul(tf.reshape(dW, (Xc.shape[1], 1)), tf.reshape(dW, (Xc.shape[1], 1)), transpose_b=True) 44 | 45 | # FIM_h = tf.gradients(tf.gradients(-log_likelihood, W)[0], W)[0] 46 | # FIM_h = tf.hessians(-log_likelihood, tf.squeeze(W)) 47 | # y_list = tf.unstack(y) 48 | hess_list = [tf.gradients(y_, W)[0] for y_ in tf.unstack(tf.squeeze(dW))] # list [grad(y0, x), grad(y1, x), ...] 49 | FIM_h = tf.stack(hess_list) 50 | 51 | # Create Tensorflow session 52 | session = tf.InteractiveSession(graph=graph) 53 | 54 | # Initialize all variables 55 | tf.global_variables_initializer().run() 56 | 57 | # Run a training cycle 58 | # The model is quite simple, however a check on the cost function should be performed 59 | for _ in range(3500): 60 | _, c, z_out = session.run([training_step, cost, Z], feed_dict={Xi: Xc, Yi: Yc}) 61 | 62 | # Compute Fisher Information Matrix on MLE 63 | tf_w, tf_ll, tf_grads, tf_fim, tf_fim_h = session.run([W, log_likelihood, dW, FIM, FIM_h], feed_dict={Xi: Xc, Yi: Yc}) 64 | 65 | # # RES: 66 | 67 | print("TF params: {}".format(tf_w)) 68 | print("TF Log-Likelihood: {}".format(tf_ll)) 69 | print("TF LL-grad: {}".format(tf_grads)) 70 | print("TF FIM:\n {}".format(tf_fim)) 71 | print("TF FIM_h:\n {}".format(tf_fim_h)) 72 | -------------------------------------------------------------------------------- /test/test_rand_perm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import torch 4 | 5 | from src.data.PermutedMNIST import RandomPermutation 6 | 7 | 8 | class MyTestCase(unittest.TestCase): 9 | def test_RandomPermutation(self): 10 | n = 10 11 | in_img = torch.rand(3, 224, 224) 12 | for i in range(n): 13 | rand_perm = RandomPermutation(0, 0, 224, 224) 14 | permuted = rand_perm(in_img.clone()) 15 | for j in range(n): 16 | self.assertTrue(torch.equal(permuted, rand_perm(in_img.clone()))) 17 | 18 | 19 | if __name__ == '__main__': 20 | unittest.main() 21 | --------------------------------------------------------------------------------