├── lnets ├── __init__.py ├── data │ ├── __init__.py │ ├── utils.py │ ├── small_data.py │ ├── generate_data_indices.py │ ├── data_transforms.py │ └── load_data.py ├── tasks │ ├── __init__.py │ ├── gan │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ └── data_loader.py │ │ ├── mains │ │ │ ├── __init__.py │ │ │ └── train_gan.py │ │ ├── models │ │ │ └── __init__.py │ │ ├── configs │ │ │ ├── train_GAN.json │ │ │ └── train_LWGAN.json │ │ └── gan_utils.py │ ├── dualnets │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── estimate_wde_gan.json │ │ │ ├── absolute_value_experiment.json │ │ │ ├── three_cones_experiment.json │ │ │ └── high_dimensional_cone_experiment.json │ │ ├── distrib │ │ │ ├── __init__.py │ │ │ ├── base_distrib.py │ │ │ ├── gan_sampler.py │ │ │ ├── multi_spherical_shell.py │ │ │ └── load_distrib.py │ │ ├── visualize │ │ │ ├── __init__.py │ │ │ └── visualize_dualnet.py │ │ └── mains │ │ │ └── train_dual.py │ ├── adversarial │ │ ├── __init__.py │ │ ├── mains │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ ├── check_adv_gradients.py │ │ │ ├── train_pgd.py │ │ │ └── eval_adv_robustness.py │ │ ├── configs │ │ │ ├── check_adv_gradients.json │ │ │ ├── CWAttack.json │ │ │ ├── boundary_attack.json │ │ │ ├── fast_gradient_sign.json │ │ │ └── projected_gradient_descent.json │ │ └── attack │ │ │ └── perform_attack.py │ └── classification │ │ ├── mains │ │ ├── generate_data_indices.py │ │ ├── ortho_finetune.py │ │ ├── check_undead.py │ │ ├── eval_classifier.py │ │ └── train_classifier.py │ │ └── configs │ │ ├── small_mnist │ │ ├── lenet_bjorck.json │ │ └── lenet_standard.json │ │ └── standard │ │ ├── fc_classification.json │ │ ├── fc_classification_bjorck.json │ │ ├── fc_classification_l_inf.json │ │ ├── fc_classification_parseval.json │ │ ├── fc_classification_dropout.json │ │ ├── fc_classification_spec_jac.json │ │ ├── fc_classification_spec_norm.json │ │ └── fc_classification_l_inf_margin.json ├── utils │ ├── __init__.py │ ├── math │ │ ├── __init__.py │ │ ├── projections │ │ │ ├── __init__.py │ │ │ ├── project.py │ │ │ ├── l2_ball.py │ │ │ └── linf_ball.py │ │ └── autodiff.py │ ├── seeding.py │ ├── dynamic_importer.py │ ├── misc.py │ ├── logging.py │ ├── config.py │ ├── saving_and_loading.py │ └── training_getters.py ├── models │ ├── regularization │ │ ├── __init__.py │ │ └── spec_jac.py │ ├── layers │ │ ├── __init__.py │ │ ├── conv │ │ │ ├── __init__.py │ │ │ ├── standard_conv2d.py │ │ │ ├── base_conv2d.py │ │ │ ├── bjorck_conv2d.py │ │ │ └── l_inf_projected_conv2d.py │ │ ├── dense │ │ │ ├── __init__.py │ │ │ ├── parseval_l2_linear.py │ │ │ ├── standard_linear.py │ │ │ ├── l_inf_projected.py │ │ │ ├── bjorck_linear.py │ │ │ ├── base_dense_linear.py │ │ │ └── spectral_normal.py │ │ └── scale.py │ ├── utils │ │ ├── __init__.py │ │ ├── selections.py │ │ └── conversion.py │ ├── architectures │ │ ├── __init__.py │ │ ├── hard_coded │ │ │ ├── __init__.py │ │ │ ├── parseval_infogan_discriminator.py │ │ │ ├── lenet.py │ │ │ └── resnet.py │ │ ├── base_architecture.py │ │ ├── fully_convolutional_2d.py │ │ └── fully_connected.py │ ├── activations │ │ ├── base_activation.py │ │ ├── __init__.py │ │ ├── identity.py │ │ ├── group_sort.py │ │ └── maxout.py │ ├── model_types │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── dual_optim_model.py │ │ └── classification_model.py │ └── __init__.py ├── optimizers │ └── aggmo.py └── trainers │ └── trainer.py ├── setup.py └── .gitignore /lnets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/gan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/utils/math/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/gan/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/gan/mains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/gan/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/models/regularization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/mains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/distrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/visualize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lnets/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.layers.dense import * 2 | from lnets.models.layers.conv import * 3 | from lnets.models.layers.scale import Scale 4 | -------------------------------------------------------------------------------- /lnets/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.utils.selections import select_activation_function 2 | from lnets.models.utils.selections import select_linear_layer 3 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/distrib/base_distrib.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class BaseDistrib(object): 4 | def __init__(self, config): 5 | self.config = config 6 | 7 | def __call__(self, size): 8 | raise NotImplementedError 9 | -------------------------------------------------------------------------------- /lnets/models/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.architectures.fully_convolutional_2d import FullyConv2D 2 | from lnets.models.architectures.fully_connected import FCNet 3 | from lnets.models.architectures.hard_coded import * 4 | -------------------------------------------------------------------------------- /lnets/models/layers/conv/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.layers.conv.standard_conv2d import StandardConv2d 2 | from lnets.models.layers.conv.bjorck_conv2d import BjorckConv2d 3 | from lnets.models.layers.conv.l_inf_projected_conv2d import LInfProjectedConv2D 4 | -------------------------------------------------------------------------------- /lnets/models/activations/base_activation.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Activation(nn.Module): 5 | def __init__(self): 6 | super(Activation, self).__init__() 7 | 8 | def forward(self, x): 9 | raise NotImplementedError 10 | -------------------------------------------------------------------------------- /lnets/models/activations/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.activations.base_activation import Activation 2 | from lnets.models.activations.maxout import Maxout, MaxMin 3 | from lnets.models.activations.identity import Identity 4 | from lnets.models.activations.group_sort import GroupSort 5 | -------------------------------------------------------------------------------- /lnets/models/activations/identity.py: -------------------------------------------------------------------------------- 1 | from lnets.models.activations.base_activation import Activation 2 | 3 | 4 | class Identity(Activation): 5 | def __init__(self): 6 | super(Identity, self).__init__() 7 | 8 | def forward(self, x): 9 | return x 10 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/configs/check_adv_gradients.json: -------------------------------------------------------------------------------- 1 | { 2 | "pretrained_path": "out/path/to/model", 3 | "output_root": "out/adversarial", 4 | 5 | "data": { 6 | "batch_size": 32 7 | }, 8 | "cuda": true, 9 | "visualization": { 10 | "num_rows": 8 11 | } 12 | } -------------------------------------------------------------------------------- /lnets/models/architectures/hard_coded/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.architectures.hard_coded.lenet import LeNet 2 | from lnets.models.architectures.hard_coded.resnet import ResNet, BasicBlock 3 | from lnets.models.architectures.hard_coded.parseval_infogan_discriminator import ParsevalInfoGanDiscriminator 4 | -------------------------------------------------------------------------------- /lnets/utils/seeding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import random 4 | 5 | 6 | def set_experiment_seed(seed): 7 | # Set the seed. 8 | np.random.seed(seed) 9 | 10 | random.seed(seed) 11 | 12 | torch.manual_seed(seed) 13 | torch.cuda.manual_seed_all(seed) 14 | torch.backends.cudnn.deterministic = True 15 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/configs/CWAttack.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SLSQPAttack", 3 | "output_root": "out/adversarial", 4 | "pretrained_path": "out/path/to/model", 5 | "attack_kwargs": { 6 | }, 7 | "data": { 8 | "batch_size": 1 9 | }, 10 | "num_examples": 100, 11 | "distance": "MeanSquaredDistance", 12 | "cuda": true 13 | } -------------------------------------------------------------------------------- /lnets/tasks/adversarial/configs/boundary_attack.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "BoundaryAttack", 3 | "output_root": "out/adversarial", 4 | "pretrained_path": "out/path/to/model", 5 | "attack_kwargs": { 6 | }, 7 | "data": { 8 | "batch_size": 1 9 | }, 10 | "num_examples": 40, 11 | "distance": "Linfinity", 12 | "cuda": true 13 | } 14 | -------------------------------------------------------------------------------- /lnets/utils/math/projections/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.utils.math.projections.l2_ball import bjorck_orthonormalize, project_on_l2_ball, get_safe_bjorck_scaling 2 | from lnets.utils.math.projections.linf_ball import project_on_linf_ball, scale_on_linf_ball, get_weight_signs, \ 3 | get_linf_projection_threshold 4 | from lnets.utils.math.projections.project import project_weights 5 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/configs/fast_gradient_sign.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "GradientSignAttack", 3 | "output_root": "out/adversarual", 4 | "pretrained_path": "out/path/to/model", 5 | "attack_kwargs": { 6 | "epsilons": 50, 7 | "max_epsilon": 1.0 8 | }, 9 | "data": { 10 | "batch_size": 1 11 | }, 12 | "num_examples": 500, 13 | "distance": "Linfinity", 14 | "cuda": true 15 | } -------------------------------------------------------------------------------- /lnets/utils/dynamic_importer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import importlib 4 | 5 | 6 | def dynamic_import(filepath, class_name): 7 | tail, head = os.path.split(filepath) 8 | sys.path.append(tail) 9 | module = importlib.import_module(".".join(head.split(".")[:-1])) 10 | sys.path.remove(tail) 11 | 12 | my_class = getattr(module, class_name) 13 | 14 | return my_class 15 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/configs/projected_gradient_descent.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ProjectedGradientDescent", 3 | "output_root": "out/adversarial", 4 | "pretrained_path": "out/path/to/model", 5 | "attack_kwargs": { 6 | "binary_search": 5, 7 | "iterations": 20 8 | }, 9 | "data": { 10 | "batch_size": 1 11 | }, 12 | "num_examples": 500, 13 | "distance": "Linfinity", 14 | "cuda": true 15 | } -------------------------------------------------------------------------------- /lnets/models/model_types/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.model_types.base_model import ExperimentModel 2 | from lnets.models.model_types.classification_model import ClassificationModel, HingeLossClassificationModel 3 | from lnets.models.model_types.classification_model import JacSpecClassificationModel 4 | from lnets.models.model_types.classification_model import MarginClassificationModel 5 | from lnets.models.model_types.dual_optim_model import DualOptimModel 6 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.layers.dense.base_dense_linear import DenseLinear 2 | from lnets.models.layers.dense.bjorck_linear import BjorckLinear 3 | from lnets.models.layers.dense.spectral_normal import SpectralNormLinear 4 | from lnets.models.layers.dense.l_inf_projected import LInfProjectedLinear 5 | from lnets.models.layers.dense.standard_linear import StandardLinear 6 | from lnets.models.layers.dense.parseval_l2_linear import ParsevalL2Linear -------------------------------------------------------------------------------- /lnets/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def initialize_best_val(criterion): 5 | if criterion == 'min': 6 | return np.inf 7 | elif criterion == "max": 8 | return -np.inf 9 | else: 10 | print("The optimization criterion must be either 'max' or 'min'. ") 11 | 12 | 13 | def to_cuda(pytorch_object, cuda=False): 14 | if cuda: 15 | return pytorch_object.cuda() 16 | else: 17 | return pytorch_object 18 | -------------------------------------------------------------------------------- /lnets/tasks/gan/configs/train_GAN.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed": 0, 3 | "task": "GAN_training", 4 | "output_root": "out/gan/LGANS", 5 | "exp_name": "LWGAN", 6 | 7 | 8 | "gan_type": "WGAN", 9 | 10 | "data_root": "data/LNets/gan", 11 | 12 | "dataset": "mnist", 13 | "split": "", 14 | "epoch": 50, 15 | "batch_size": 64, 16 | "input_size": 28, 17 | 18 | 19 | "lrG": 0.0002, 20 | "lrD": 0.0002, 21 | "beta1": 0.5, 22 | "beta2": 0.999, 23 | 24 | "gpu_mode": false, 25 | "benchmark_mode": true 26 | } -------------------------------------------------------------------------------- /lnets/tasks/gan/configs/train_LWGAN.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed": 0, 3 | "task": "GAN_training", 4 | "output_root": "out/gan/LGANS", 5 | "exp_name": "LWGAN", 6 | 7 | 8 | "gan_type": "LWGAN", 9 | 10 | "data_root": "data/LNets/gan", 11 | 12 | "dataset": "mnist", 13 | "split": "", 14 | "epoch": 50, 15 | "batch_size": 64, 16 | "input_size": 28, 17 | 18 | 19 | "lrG": 0.0002, 20 | "lrD": 0.002, 21 | "beta1": 0.5, 22 | "beta2": 0.999, 23 | 24 | "gpu_mode": false, 25 | "benchmark_mode": true 26 | } -------------------------------------------------------------------------------- /lnets/utils/math/autodiff.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import grad 3 | 4 | 5 | def compute_jacobian(output, inputs, create_graph=True, retain_graph=True): 6 | """ 7 | :param output: Batch X Classes 8 | :param inputs: Batch X Size (e.g. Depth X Width X Height) 9 | :return: jacobian: Batch X Size X Classes 10 | """ 11 | assert inputs.requires_grad 12 | 13 | # num_classes = output.size()[1] 14 | 15 | return torch.stack([grad([output[:, i].sum()], [inputs], retain_graph=retain_graph, create_graph=create_graph)[0] 16 | for i in range(output.size(1))], dim=-1) 17 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/parseval_l2_linear.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from lnets.models.layers.dense.base_dense_linear import DenseLinear 4 | 5 | 6 | class ParsevalL2Linear(DenseLinear): 7 | def __init__(self, in_features=1, out_features=1, bias=True, config=None): 8 | super(ParsevalL2Linear, self).__init__() 9 | self._set_config(config) 10 | self._set_network_parameters(in_features, out_features, bias, cuda=config.cuda) 11 | 12 | def forward(self, x): 13 | self.project_weights(self.config.model.per_update_proj) 14 | 15 | return F.linear(x, self.weight, self.bias) 16 | 17 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/standard_linear.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from lnets.models.layers.dense.base_dense_linear import DenseLinear 4 | 5 | 6 | class StandardLinear(DenseLinear): 7 | r""" 8 | Applies a linear transformation to the incoming distrib: :math:`y = Ax + b` 9 | """ 10 | 11 | def __init__(self, in_features, out_features, bias=True, config=None): 12 | super(DenseLinear, self).__init__() 13 | self._set_config(config) 14 | self._set_network_parameters(in_features, out_features, bias, cuda=config.cuda) 15 | 16 | def forward(self, x): 17 | return F.linear(x, self.weight, self.bias) 18 | -------------------------------------------------------------------------------- /lnets/models/layers/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | r"""Scales the input vector by a given scalar. 7 | """ 8 | 9 | def __init__(self, factor, cuda=False): 10 | super(Scale, self).__init__() 11 | self.factor = factor 12 | 13 | if cuda: 14 | self.factor = torch.Tensor([self.factor]).cuda() 15 | 16 | def reset_parameters(self): 17 | pass 18 | 19 | def forward(self, input): 20 | if self.factor == 1: # This is to make sure this operation is not backpropped on, or unnecessarily computed. 21 | return input 22 | else: 23 | return self.factor * input 24 | 25 | def extra_repr(self): 26 | return 'factor={}'.format(self.factor) 27 | -------------------------------------------------------------------------------- /lnets/data/utils.py: -------------------------------------------------------------------------------- 1 | from lnets.data.small_data import get_small_data_indices 2 | 3 | import numpy as np 4 | import os 5 | 6 | 7 | def save_indices(dataset, indices_path, per_class_count, total_class_count, val_size): 8 | train_indices, val_indices = get_small_data_indices(dataset, per_class_count, total_class_count, val_size) 9 | np.savetxt(os.path.join(indices_path, "train_indices_{}.txt".format(per_class_count)), train_indices) 10 | np.savetxt(os.path.join(indices_path, "val_indices_{}.txt".format(per_class_count)), val_indices) 11 | 12 | 13 | def load_indices(path, per_class_count): 14 | train_indices = os.path.join(path, "train_indices_{}.txt".format(per_class_count)) 15 | val_indices = os.path.join(path, "val_indices_{}.txt".format(per_class_count)) 16 | return np.loadtxt(train_indices, dtype=np.int32), np.loadtxt(val_indices, dtype=np.int32) 17 | -------------------------------------------------------------------------------- /lnets/models/layers/conv/standard_conv2d.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.conv import _ConvNd 2 | from torch.nn.modules.utils import _pair 3 | import torch.nn.functional as F 4 | 5 | 6 | class StandardConv2d(_ConvNd): 7 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 8 | padding=0, dilation=1, groups=1, bias=True, config=None): 9 | kernel_size = _pair(kernel_size) 10 | stride = _pair(stride) 11 | padding = _pair(padding) 12 | dilation = _pair(dilation) 13 | super(StandardConv2d, self).__init__( 14 | in_channels, out_channels, kernel_size, stride, padding, dilation, 15 | False, _pair(0), groups, bias) 16 | 17 | self.config = config 18 | 19 | def forward(self, x): 20 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 21 | -------------------------------------------------------------------------------- /lnets/models/model_types/base_model.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.nn as nn 4 | import torchnet as tnt 5 | 6 | 7 | class ExperimentModel(nn.Module): 8 | def __init__(self, model): 9 | super(ExperimentModel, self).__init__() 10 | self.model = model 11 | self._init_meters() 12 | 13 | def forward(self, x): 14 | return self.model(x) 15 | 16 | def loss(self, sample, test=False): 17 | raise NotImplementedError 18 | 19 | def input_size(self): 20 | return self.model.input_size 21 | 22 | def _init_meters(self): 23 | self.meters = OrderedDict([('loss', tnt.meter.AverageValueMeter())]) 24 | 25 | def reset_meters(self): 26 | for meter in self.meters.values(): 27 | meter.reset() 28 | 29 | def add_to_meters(self, state): 30 | self.meters['loss'].add(state['loss'].data[0]) 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | from codecs import open 5 | from os import path 6 | import os 7 | 8 | working_dir = path.abspath(path.dirname(__file__)) 9 | ROOT = os.path.abspath(os.path.dirname(__file__)) 10 | 11 | # Read the README. 12 | with open(os.path.join(ROOT, 'README.md'), encoding="utf-8") as f: 13 | README = f.read() 14 | 15 | setup(name='lnets', 16 | version='0.0.1', 17 | description='Implementation and evaluation of lipschitz neural networks. ', 18 | long_description=README, 19 | long_description_content_type='text/markdown', 20 | packages=find_packages(exclude=['tests*']), 21 | setup_requires=["cython", "numpy", "torch", "torchvision"], 22 | install_requires=["numpy", "scipy", "cython", "matplotlib", "jupyter", "POT", "scikit-learn", "tqdm", "munch", 23 | "pytest", "torchnet", "foolbox", "imageio"], 24 | ) 25 | -------------------------------------------------------------------------------- /lnets/models/model_types/dual_optim_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | from lnets.models.model_types.base_model import ExperimentModel 5 | 6 | 7 | class DualOptimModel(ExperimentModel): 8 | def _init_meters(self): 9 | super(DualOptimModel, self)._init_meters() 10 | 11 | def loss(self, sample, test=False): 12 | # d1 stands for distribution 1. 13 | # d2 stands for distribution 2. 14 | 15 | samples_from_d1 = Variable(sample[0]) 16 | samples_from_d2 = Variable(sample[1]) 17 | 18 | potentials_from_d1 = self.model.forward(samples_from_d1) 19 | potentials_from_d2 = self.model.forward(samples_from_d2) 20 | 21 | assert potentials_from_d1.shape[1] == 1 22 | assert potentials_from_d2.shape[1] == 1 23 | 24 | loss = -1 * (torch.mean(potentials_from_d1) - torch.mean(potentials_from_d2)) 25 | 26 | return loss, (potentials_from_d1, potentials_from_d2) 27 | 28 | def add_to_meters(self, state): 29 | self.meters['loss'].add(state['loss'].item()) 30 | -------------------------------------------------------------------------------- /lnets/models/architectures/base_architecture.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from lnets.models.activations import Activation 4 | 5 | 6 | class Architecture(nn.Module): 7 | def __init__(self): 8 | super(Architecture, self).__init__() 9 | 10 | def __len__(self): 11 | return len(self.model) 12 | 13 | def __getitem__(self, idx): 14 | return self.model[idx] 15 | 16 | def forward(self, x): 17 | raise NotImplementedError 18 | 19 | def project_network_weights(self, proj_config): 20 | # Project the weights on the manifold of orthonormal matrices. 21 | for i, layer in enumerate(self.model): 22 | try: 23 | self.model[i].project_weights(proj_config) 24 | except: 25 | continue 26 | 27 | def get_activations(self, x): 28 | activations = [] 29 | x = x.view(-1, self.input_dim) 30 | for m in self.model: 31 | x = m(x) 32 | if not isinstance(m, Activation): 33 | activations.append(x.detach().clone()) 34 | return activations 35 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/l_inf_projected.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from lnets.models.layers.dense.base_dense_linear import DenseLinear 5 | from lnets.utils.math.projections import get_weight_signs, get_linf_projection_threshold 6 | 7 | 8 | class LInfProjectedLinear(DenseLinear): 9 | r""" 10 | Applies a linear transformation to the incoming distrib: :math:`y = Ax + b` 11 | such that the L-infinity norm of A is less than 1 by projecting it to the L1 ball 12 | """ 13 | 14 | def __init__(self, in_features, out_features, bias=True, config=None): 15 | super(DenseLinear, self).__init__() 16 | self._set_config(config) 17 | self._set_network_parameters(in_features, out_features, bias, config.cuda) 18 | 19 | def forward(self, x): 20 | thresholds = get_linf_projection_threshold(self.weight, self.config.cuda) 21 | signs = get_weight_signs(self.weight) 22 | projected_weights = signs * torch.clamp(torch.abs(self.weight) - thresholds.unsqueeze(-1), 23 | min=torch.tensor(0).float()) 24 | 25 | return F.linear(x, projected_weights, self.bias) 26 | 27 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/bjorck_linear.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from lnets.models.layers.dense.base_dense_linear import DenseLinear 4 | from lnets.utils.math.projections import bjorck_orthonormalize, get_safe_bjorck_scaling 5 | 6 | 7 | class BjorckLinear(DenseLinear): 8 | def __init__(self, in_features=1, out_features=1, bias=True, config=None): 9 | super(BjorckLinear, self).__init__() 10 | self._set_config(config) 11 | self._set_network_parameters(in_features, out_features, bias, cuda=config.cuda) 12 | 13 | def forward(self, x): 14 | # Scale the values of the matrix to make sure the singular values are less than or equal to 1. 15 | if self.config.model.linear.safe_scaling: 16 | scaling = get_safe_bjorck_scaling(self.weight, cuda=self.config.cuda) 17 | else: 18 | scaling = 1.0 19 | 20 | ortho_w = bjorck_orthonormalize(self.weight.t() / scaling, 21 | beta=self.config.model.linear.bjorck_beta, 22 | iters=self.config.model.linear.bjorck_iter, 23 | order=self.config.model.linear.bjorck_order).t() 24 | return F.linear(x, ortho_w, self.bias) 25 | -------------------------------------------------------------------------------- /lnets/utils/math/projections/project.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from lnets.utils.math.projections import * 4 | 5 | 6 | def project_weights(weight, proj_config, cuda=False): 7 | with torch.no_grad(): 8 | if proj_config.type == "l_2": 9 | # scaling = get_safe_bjorck_scaling(weight, cuda=cuda) 10 | projected_weights = project_on_l2_ball(weight.t(), 11 | bjorck_iter=proj_config.bjorck_iter, 12 | bjorck_order=proj_config.bjorck_order, 13 | bjorck_beta=proj_config.bjorck_beta, 14 | cuda=cuda).t() 15 | 16 | elif proj_config.type == "l_inf_projected": 17 | projected_weights = project_on_linf_ball(weight, cuda) 18 | 19 | elif proj_config.type == "l_inf_scaled": 20 | projected_weights = scale_on_linf_ball(weight, 21 | scale_all=proj_config.scale_all, 22 | cuda=cuda) 23 | 24 | else: 25 | print("Requested projection type not recognized. ") 26 | exit(-1) 27 | 28 | return projected_weights 29 | -------------------------------------------------------------------------------- /lnets/data/small_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_small_data_indices(dataset, total_per_class, class_count, val_size): 5 | total_points = len(dataset) 6 | if total_per_class * class_count + val_size > total_points: 7 | raise Exception('More data points requested than is in data') 8 | random_indices = np.random.permutation(total_points) 9 | 10 | small_data_indices = {} 11 | val_indices = [] 12 | for c in range(class_count): 13 | small_data_indices[c] = [] 14 | 15 | for idx in random_indices: 16 | _, y = dataset[idx] 17 | y = int(y.item()) 18 | if len(small_data_indices[y]) < total_per_class: 19 | small_data_indices[y].append(idx) 20 | elif len(val_indices) < val_size: 21 | val_indices.append(idx) 22 | if all([len(small_data_indices[c]) == total_per_class for c in range(class_count)]): 23 | if len(val_indices) == val_size: 24 | break 25 | if not all([len(small_data_indices[c]) == total_per_class for c in range(class_count)]): 26 | raise Warning('Uneven class counts in small data indices') 27 | return np.array([small_data_indices[c] for c in 28 | range(class_count)]).astype(np.int32).flatten(), np.array(val_indices).astype(np.int32) 29 | -------------------------------------------------------------------------------- /lnets/models/regularization/spec_jac.py: -------------------------------------------------------------------------------- 1 | """ 2 | Spectral norm regularization works as follows: 3 | 4 | 1. Initialize a vector u randomly (which does not require gradients) 5 | At each update: 6 | 1. Apply one-step power iteration method by computing Jvp 7 | 2. Compute the spectral norm (u^T)Jv (where u and Jv is computed in step 1) 8 | 3. Use the spectralnorm as a regularization term requiring only one additional backward pass 9 | """ 10 | 11 | import torch.nn.functional as F 12 | from torch.autograd import grad 13 | 14 | 15 | def jac_spectral_norm(output, x, u): 16 | """ 17 | Returns updated estimates of spectral norm and u 18 | 19 | (Might need to average over batch before-hand for 20 | correct stochastic computation) 21 | 22 | 23 | RETURNS: s, u 24 | spectral norm, leading singular vector 25 | """ 26 | # First we compute the update for u. 27 | u = u.clone() 28 | u.requires_grad = True 29 | vjp = grad(output.mean(0), x, u, create_graph=True)[0].view(x.size(0), -1) 30 | v = F.normalize(vjp).detach() 31 | 32 | # A new trick for calculating Jacobian vector products. 33 | # https://www.reddit.com/r/MachineLearning/comments/6gvv0o/r_first_blog_post_a_new_trick_for_calculating/ 34 | jvp = grad(vjp, u, v, create_graph=True)[0] 35 | u = F.normalize(jvp, dim=0).detach() 36 | spectral_norm = u.dot(jvp) 37 | return spectral_norm, u 38 | -------------------------------------------------------------------------------- /lnets/data/generate_data_indices.py: -------------------------------------------------------------------------------- 1 | from lnets.data.load_data import get_datasets 2 | from lnets.data.utils import save_indices 3 | 4 | import argparse 5 | import os 6 | from munch import Munch 7 | 8 | 9 | def main(opt): 10 | opt.data.transform = Munch(type='none') 11 | 12 | indices_path = os.path.join(opt.data.root, opt.data.name) 13 | 14 | train_data, _, _ = get_datasets(opt) 15 | 16 | save_indices(train_data, indices_path, opt.per_class_count, opt.data.class_count, opt.val_size) 17 | 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser(description='Generate data indices. ') 21 | parser.add_argument('--data.name', type=str, metavar='MODELPATH', 22 | help="location of pretrained model weights to evaluate") 23 | parser.add_argument('--data.root', type=str, help='output directory to which results should be saved') 24 | parser.add_argument('--data.class_count', type=int, help='total number of classes in dataset') 25 | parser.add_argument('--per_class_count', type=int, help="How many training data points per class") 26 | parser.add_argument('--val_size', type=int, help="Total number of validation points") 27 | args = vars(parser.parse_args()) 28 | 29 | opt = {} 30 | for k, v in args.items(): 31 | cur = opt 32 | tokens = k.split('.') 33 | for token in tokens[:-1]: 34 | if token not in cur: 35 | cur[token] = {} 36 | cur = cur[token] 37 | cur[tokens[-1]] = v 38 | main(Munch.fromDict(opt)) 39 | -------------------------------------------------------------------------------- /lnets/tasks/classification/mains/generate_data_indices.py: -------------------------------------------------------------------------------- 1 | from lnets.data.load_data import get_datasets 2 | from lnets.data.utils import save_indices 3 | 4 | import argparse 5 | import os 6 | from munch import Munch 7 | 8 | 9 | def main(opt): 10 | opt.data.transform = Munch(type='none') 11 | 12 | indices_path = os.path.join(opt.data.root, opt.data.name) 13 | 14 | train_data, _, _ = get_datasets(opt) 15 | 16 | save_indices(train_data, indices_path, opt.per_class_count, opt.data.class_count, opt.val_size) 17 | 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser(description='Evaluate adversarial robustness of classification network') 21 | parser.add_argument('--data.name', type=str, metavar='MODELPATH', 22 | help="location of pretrained model weights to evaluate") 23 | parser.add_argument('--data.root', type=str, help='output directory to which results should be saved') 24 | parser.add_argument('--data.class_count', type=int, help='total number of classes in dataset') 25 | parser.add_argument('--per_class_count', type=int, help="How many training data points per class") 26 | parser.add_argument('--val_size', type=int, help="Total number of validation points") 27 | args = vars(parser.parse_args()) 28 | 29 | opt = {} 30 | for k, v in args.items(): 31 | cur = opt 32 | tokens = k.split('.') 33 | for token in tokens[:-1]: 34 | if token not in cur: 35 | cur[token] = {} 36 | cur = cur[token] 37 | cur[tokens[-1]] = v 38 | main(Munch.fromDict(opt)) 39 | -------------------------------------------------------------------------------- /lnets/models/utils/selections.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from lnets.models.layers import * 4 | from lnets.models.activations import * 5 | 6 | 7 | def select_linear_layer(linear_type): 8 | if linear_type == "standard": 9 | return StandardLinear 10 | elif linear_type == "spectral_normal": 11 | return SpectralNormLinear 12 | elif linear_type == "bjorck": 13 | return BjorckLinear 14 | elif linear_type == "l_inf_projected": 15 | return LInfProjectedLinear 16 | elif linear_type == "parseval_l2": 17 | return ParsevalL2Linear 18 | elif linear_type == "standard_conv2d": 19 | return StandardConv2d 20 | elif linear_type == "bjorck_conv2d": 21 | return BjorckConv2d 22 | elif linear_type == "l_inf_projected_conv2d": 23 | return LInfProjectedConv2D 24 | else: 25 | print("The requested dense linear layer is not supported yet. ") 26 | exit(-1) 27 | 28 | 29 | def select_activation_function(activation): 30 | if activation == 'identity': 31 | act_func = Identity 32 | elif activation == 'relu': 33 | act_func = nn.ReLU 34 | elif activation == "abs": 35 | act_func = Abs 36 | elif activation == 'sigmoid': 37 | act_func = nn.Sigmoid 38 | elif activation == 'tanh': 39 | act_func = nn.Tanh 40 | elif activation == 'maxout': 41 | act_func = Maxout 42 | elif activation == 'maxmin': 43 | act_func = MaxMin 44 | elif activation == "group_sort": 45 | act_func = GroupSort 46 | else: 47 | act_func = None 48 | raise Exception('Unexpected activation function. ') 49 | return act_func 50 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/base_dense_linear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import Parameter 5 | 6 | from lnets.utils.math.projections import * 7 | 8 | 9 | class DenseLinear(nn.Module): 10 | 11 | def __init__(self): 12 | super(DenseLinear, self).__init__() 13 | 14 | def _set_network_parameters(self, in_features, out_features, bias=True, cuda=None): 15 | self.in_features = in_features 16 | self.out_features = out_features 17 | 18 | # Set weights and biases. 19 | self.weight = Parameter(torch.Tensor(out_features, in_features)) 20 | if bias: 21 | self.bias = Parameter(torch.Tensor(out_features)) 22 | else: 23 | self.register_parameter('bias', None) 24 | self.reset_parameters() 25 | 26 | def _set_config(self, config): 27 | self.config = config 28 | 29 | def reset_parameters(self): 30 | stdv = 1. / np.sqrt(self.weight.size(1)) 31 | nn.init.orthogonal_(self.weight, gain=stdv) 32 | if self.bias is not None: 33 | self.bias.data.uniform_(-stdv, stdv) 34 | 35 | def forward(self, x): 36 | raise NotImplementedError 37 | 38 | def project_weights(self, proj_config): 39 | with torch.no_grad(): 40 | projected_weights = project_weights(self.weight, proj_config, self.config.cuda) 41 | # Override the previous weights. 42 | self.weight.data.copy_(projected_weights) 43 | 44 | def extra_repr(self): 45 | return 'in_features={}, out_features={}, bias={}'.format( 46 | self.in_features, self.out_features, self.bias is not None) 47 | 48 | -------------------------------------------------------------------------------- /lnets/models/layers/conv/base_conv2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.conv import _ConvNd 3 | from torch.nn.modules.utils import _pair 4 | 5 | from lnets.utils.math.projections import project_weights 6 | 7 | 8 | class BaseConv2D(_ConvNd): 9 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 10 | padding=0, dilation=1, groups=1, bias=True): 11 | kernel_size = _pair(kernel_size) 12 | stride = _pair(stride) 13 | padding = _pair(padding) 14 | dilation = _pair(dilation) 15 | super(BaseConv2D, self).__init__( 16 | in_channels, out_channels, kernel_size, stride, padding, dilation, 17 | False, _pair(0), groups, bias) 18 | 19 | self.original_shape = self.weight.shape 20 | self.out_channels = out_channels 21 | 22 | def _set_config(self, config): 23 | self.config = config 24 | 25 | def project_weights(self, proj_config): 26 | with torch.no_grad(): 27 | flattened_weights = self.conv_form_to_matrix_form(self.weight, (self.out_channels, -1)) 28 | 29 | flattened_projected_weights = project_weights(flattened_weights, proj_config, cuda=self.config.cuda) 30 | 31 | projected_weights = self.matrix_form_to_conv_form(flattened_projected_weights, self.original_shape) 32 | 33 | self.weight.data.copy_(projected_weights) 34 | 35 | @staticmethod 36 | def conv_form_to_matrix_form(weight, matrix_form_shape): 37 | return weight.view(matrix_form_shape) 38 | 39 | @staticmethod 40 | def matrix_form_to_conv_form(weight, conv_form_shape): 41 | return weight.view(conv_form_shape) 42 | 43 | def forward(self, x): 44 | raise NotImplementedError 45 | 46 | -------------------------------------------------------------------------------- /lnets/models/layers/dense/spectral_normal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.nn import Parameter 5 | 6 | from lnets.models.layers.dense.base_dense_linear import DenseLinear 7 | 8 | 9 | class SpectralNormLinear(DenseLinear): 10 | r""" 11 | Applies a linear transformation to incoming distrib: :math:`y = Ax + b` such that A has spectral norm close to 1. 12 | """ 13 | 14 | def __init__(self, in_features, out_features, bias=True, config=None): 15 | super(SpectralNormLinear, self).__init__() 16 | self._set_config(config) 17 | self._set_network_parameters(in_features, out_features, bias, cuda=config.cuda) 18 | 19 | with torch.no_grad(): 20 | self.u = Parameter(torch.Tensor(out_features)) 21 | self.v = Parameter(torch.Tensor(in_features)) 22 | 23 | self.reset_u() 24 | 25 | self.power_iters = config.model.linear.power_iters 26 | 27 | def forward(self, x): 28 | self.power_iteration() 29 | spectral_norm = self.u.dot(self.weight.matmul(self.v)) 30 | normalized_w = self.weight / spectral_norm 31 | 32 | return F.linear(x, normalized_w, self.bias) 33 | 34 | def reset_u(self): 35 | stdv = 1. / np.sqrt(self.weight.size(1)) 36 | self.u.data.normal_(-stdv, stdv) 37 | 38 | def power_iteration(self): 39 | with torch.no_grad(): 40 | for _ in range(self.power_iters): 41 | w_t_u = self.weight.t().matmul(self.u) 42 | w_t_u.div_(w_t_u.norm()) 43 | self.v.data.copy_(w_t_u.data) 44 | w_v = self.weight.matmul(self.v) 45 | w_v.div_(w_v.norm()) 46 | self.u.data.copy_(w_v) 47 | -------------------------------------------------------------------------------- /lnets/models/layers/conv/bjorck_conv2d.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.utils import _pair 2 | import torch.nn.functional as F 3 | 4 | from lnets.models.layers.conv.base_conv2d import BaseConv2D 5 | from lnets.utils.math.projections import bjorck_orthonormalize, get_safe_bjorck_scaling 6 | 7 | 8 | class BjorckConv2d(BaseConv2D): 9 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 10 | padding=0, dilation=1, groups=1, bias=True, config=None): 11 | kernel_size = _pair(kernel_size) 12 | stride = _pair(stride) 13 | padding = _pair(padding) 14 | dilation = _pair(dilation) 15 | super(BaseConv2D, self).__init__( 16 | in_channels, out_channels, kernel_size, stride, padding, dilation, 17 | False, _pair(0), groups, bias) 18 | 19 | self._set_config(config) 20 | self.original_shape = self.weight.shape 21 | self.out_channels = out_channels 22 | 23 | if stride == 1 or stride == [1, 1]: 24 | print("BEWARE: Norm is not being preserved due to stride > 1. ") 25 | 26 | def forward(self, x): 27 | # Reshape and put in a matrix form. 28 | flattened_weights = self.conv_form_to_matrix_form(self.weight, (self.out_channels, -1)) 29 | 30 | # Orthonormalize. The scaling makes sure the singular values of the matrix are constrained by 1. 31 | scaling = get_safe_bjorck_scaling(flattened_weights, cuda=self.config.cuda) 32 | ortho_weight_flattened = bjorck_orthonormalize(flattened_weights / scaling) 33 | 34 | # Reshape back. 35 | ortho_weights = self.matrix_form_to_conv_form(ortho_weight_flattened, self.original_shape) 36 | 37 | return F.conv2d(x, ortho_weights, self.bias, self.stride, self.padding, self.dilation, self.groups) 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | led / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Misc. 107 | .idea 108 | out 109 | /data 110 | 111 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/distrib/gan_sampler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pprint 3 | from munch import Munch 4 | pp = pprint.PrettyPrinter() 5 | 6 | from lnets.tasks.dualnets.distrib.base_distrib import BaseDistrib 7 | from lnets.tasks.gan.models.WGAN import WGAN 8 | 9 | 10 | class GANSampler(BaseDistrib): 11 | def __init__(self, config): 12 | super(GANSampler, self).__init__(config) 13 | 14 | self.config = config 15 | 16 | # Load GAN hyperparameters from GAN training json. 17 | self.gan_config_json_path = config.gan_config_json_path 18 | self.gan_config = Munch(json.load(open(self.gan_config_json_path))) 19 | print('-------- GAN Training Config --------') 20 | pp.pprint(self.gan_config) 21 | print('------------------------') 22 | 23 | # Instantiate the GAN model class. 24 | self.gan = self.instantiate_gan() 25 | 26 | # Load weights. 27 | self.gan.load() 28 | 29 | # Whether we want to sample real of generated images. 30 | self.generate_type = self.config.generate_type 31 | assert self.generate_type == "real" or self.generate_type == "generated", \ 32 | "Must be one of 'generated', or 'real'. " 33 | 34 | def __call__(self, size): 35 | assert size == self.gan_config.batch_size 36 | 37 | if self.generate_type == "generated": 38 | samples = self.gan.get_generated(size) 39 | elif self.generate_type == "real": 40 | samples = self.gan.get_real(size) 41 | 42 | return samples 43 | 44 | def instantiate_gan(self): 45 | if self.gan_config.gan_type == 'WGAN': 46 | gan = WGAN(self.gan_config) 47 | else: 48 | raise Exception("[!] There is no option for " + self.gan_config.gan_type) 49 | 50 | return gan 51 | -------------------------------------------------------------------------------- /lnets/models/activations/group_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | class GroupSort(nn.Module): 6 | 7 | def __init__(self, num_units, axis=-1): 8 | super(GroupSort, self).__init__() 9 | self.num_units = num_units 10 | self.axis = axis 11 | 12 | def forward(self, x): 13 | group_sorted = group_sort(x, self.num_units, self.axis) 14 | assert check_group_sorted(group_sorted, self.num_units, axis=self.axis) == 1, "GroupSort failed. " 15 | 16 | return group_sorted 17 | 18 | def extra_repr(self): 19 | return 'num_groups: {}'.format(self.num_units) 20 | 21 | 22 | def process_group_size(x, num_units, axis=-1): 23 | size = list(x.size()) 24 | num_channels = size[axis] 25 | 26 | if num_channels % num_units: 27 | raise ValueError('number of features({}) is not a ' 28 | 'multiple of num_units({})'.format(num_channels, num_units)) 29 | size[axis] = -1 30 | if axis == -1: 31 | size += [num_channels // num_units] 32 | else: 33 | size.insert(axis+1, num_channels // num_units) 34 | return size 35 | 36 | 37 | def group_sort(x, num_units, axis=-1): 38 | size = process_group_size(x, num_units, axis) 39 | grouped_x = x.view(*size) 40 | sort_dim = axis if axis == -1 else axis + 1 41 | sorted_grouped_x, _ = grouped_x.sort(dim=sort_dim) 42 | sorted_x = sorted_grouped_x.view(*list(x.shape)) 43 | 44 | return sorted_x 45 | 46 | 47 | def check_group_sorted(x, num_units, axis=-1): 48 | size = process_group_size(x, num_units, axis) 49 | 50 | x_np = x.cpu().data.numpy() 51 | x_np = x_np.reshape(*size) 52 | axis = axis if axis == -1 else axis + 1 53 | x_np_diff = np.diff(x_np, axis=axis) 54 | 55 | # Return 1 iff all elements are increasing. 56 | if np.sum(x_np_diff < 0) > 0: 57 | return 0 58 | else: 59 | return 1 -------------------------------------------------------------------------------- /lnets/data/data_transforms.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as transforms 2 | 3 | 4 | def get_data_transforms(config): 5 | # train_transform = None 6 | test_transform = None 7 | 8 | if config.data.transform.type == 'cifar': 9 | train_transform, test_transform = get_cifar_transform(config) 10 | elif config.data.transform.type == 'imagenet': 11 | train_transform, test_transform = get_imagenet_transform(config) 12 | else: 13 | train_transform = transforms.ToTensor() 14 | 15 | # Make sure to turn the input images into PyTorch tensors. 16 | if test_transform is None: 17 | test_transform = transforms.ToTensor() 18 | 19 | return train_transform, test_transform 20 | 21 | 22 | def get_cifar_transform(config): 23 | normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], 24 | std=[0.2023, 0.1994, 0.2010]) 25 | train_transform = transforms.Compose([ 26 | transforms.RandomCrop(32, padding=4), 27 | transforms.RandomHorizontalFlip(), 28 | transforms.ToTensor(), 29 | normalize 30 | ]) 31 | test_transform = transforms.Compose([ 32 | transforms.ToTensor(), 33 | normalize 34 | ]) 35 | 36 | return train_transform, test_transform 37 | 38 | 39 | def get_imagenet_transform(config): 40 | normalize = transforms.Normalize(mean=config.data.transform.norm_mean, 41 | std=config.data.transform.norm_std) 42 | train_transform = transforms.Compose([ 43 | transforms.RandomResizedCrop(224), 44 | transforms.RandomHorizontalFlip(), 45 | transforms.ToTensor(), 46 | normalize, 47 | ]) 48 | test_transform = transforms.Compose([ 49 | transforms.Resize(256), 50 | transforms.CenterCrop(224), 51 | transforms.ToTensor(), 52 | normalize, 53 | ]) 54 | 55 | return train_transform, test_transform 56 | -------------------------------------------------------------------------------- /lnets/models/activations/maxout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from lnets.models.activations.base_activation import Activation 4 | 5 | 6 | class Maxout(Activation): 7 | def __init__(self, num_units, axis=-1): 8 | super(Maxout, self).__init__() 9 | self.num_units = num_units 10 | self.axis = axis 11 | 12 | def forward(self, x): 13 | return maxout(x, self.num_units, self.axis) 14 | 15 | def extra_repr(self): 16 | return 'num_units: {}'.format(self.num_units) 17 | 18 | 19 | class MaxMin(Activation): 20 | 21 | def __init__(self, num_units, axis=-1): 22 | super(MaxMin, self).__init__() 23 | self.num_units = num_units 24 | self.axis = axis 25 | 26 | def forward(self, x): 27 | maxes = maxout(x, self.num_units, self.axis) 28 | mins = minout(x, self.num_units, self.axis) 29 | maxmin = torch.cat((maxes, mins), dim=1) 30 | return maxmin 31 | 32 | def extra_repr(self): 33 | return 'num_units: {}'.format(self.num_units) 34 | 35 | 36 | def process_maxmin_size(x, num_units, axis=-1): 37 | size = list(x.size()) 38 | num_channels = size[axis] 39 | 40 | if num_channels % num_units: 41 | raise ValueError('number of features({}) is not a ' 42 | 'multiple of num_units({})'.format(num_channels, num_units)) 43 | size[axis] = -1 44 | if axis == -1: 45 | size += [num_channels // num_units] 46 | else: 47 | size.insert(axis+1, num_channels // num_units) 48 | return size 49 | 50 | 51 | def maxout(x, num_units, axis=-1): 52 | size = process_maxmin_size(x, num_units, axis) 53 | sort_dim = axis if axis == -1 else axis + 1 54 | return torch.max(x.view(*size), sort_dim)[0] 55 | 56 | 57 | def minout(x, num_units, axis=-1): 58 | size = process_maxmin_size(x, num_units, axis) 59 | sort_dim = axis if axis == -1 else axis + 1 60 | return torch.min(x.view(*size), sort_dim)[0] 61 | -------------------------------------------------------------------------------- /lnets/models/layers/conv/l_inf_projected_conv2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.utils import _pair 3 | import torch.nn.functional as F 4 | 5 | from lnets.models.layers.conv.base_conv2d import BaseConv2D 6 | from lnets.utils.math.projections import get_weight_signs, get_linf_projection_threshold 7 | 8 | 9 | class LInfProjectedConv2D(BaseConv2D): 10 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 11 | padding=0, dilation=1, groups=1, bias=True, config=None): 12 | kernel_size = _pair(kernel_size) 13 | stride = _pair(stride) 14 | padding = _pair(padding) 15 | dilation = _pair(dilation) 16 | super(BaseConv2D, self).__init__( 17 | in_channels, out_channels, kernel_size, stride, padding, dilation, 18 | False, _pair(0), groups, bias) 19 | 20 | self._set_config(config) 21 | self.original_shape = self.weight.shape 22 | self.out_channels = out_channels 23 | 24 | if stride == 1 or stride == [1, 1]: 25 | print("BEWARE: Norm is not being preserved due to stride > 1. ") 26 | 27 | def forward(self, x): 28 | # Reshape and put in a matrix form. 29 | flattened_weights = self.conv_form_to_matrix_form(self.weight, (self.out_channels, -1)) 30 | 31 | # Orthonormalize. The scaling makes sure the singular values of the matrix are constrained by 1. 32 | thresholds = get_linf_projection_threshold(flattened_weights, self.config.cuda) 33 | signs = get_weight_signs(flattened_weights) 34 | flattened_projected_weights = signs * torch.clamp(torch.abs(flattened_weights) - thresholds.unsqueeze(-1), 35 | min=torch.tensor(0).float()) 36 | 37 | # Reshape back. 38 | projected_weights = self.matrix_form_to_conv_form(flattened_projected_weights, self.original_shape) 39 | 40 | return F.conv2d(x, projected_weights, self.bias, self.stride, self.padding, self.dilation, self.groups) 41 | -------------------------------------------------------------------------------- /lnets/tasks/gan/data/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torch.utils.data import DataLoader 4 | from torchvision import datasets, transforms 5 | 6 | 7 | def dataloader(dataset, input_size, batch_size, data_root="data", split='train'): 8 | transform = transforms.Compose([transforms.Resize((input_size, input_size)), transforms.ToTensor(), 9 | transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) 10 | if dataset == 'mnist': 11 | data_path = os.path.join(data_root, "mnist") 12 | data_loader = DataLoader( 13 | datasets.MNIST(data_path, train=True, download=True, transform=transform), 14 | batch_size=batch_size, shuffle=True) 15 | elif dataset == 'fashion-mnist': 16 | data_path = os.path.join(data_root, "fashion-mnist") 17 | data_loader = DataLoader( 18 | datasets.FashionMNIST(data_path, train=True, download=True, transform=transform), 19 | batch_size=batch_size, shuffle=True) 20 | elif dataset == 'cifar10': 21 | data_path = os.path.join(data_root, "cifar10") 22 | data_loader = DataLoader( 23 | datasets.CIFAR10(data_path, train=True, download=True, transform=transform), 24 | batch_size=batch_size, shuffle=True) 25 | elif dataset == 'svhn': 26 | data_path = os.path.join(data_root, "svhn") 27 | data_loader = DataLoader( 28 | datasets.SVHN(data_path, split=split, download=True, transform=transform), 29 | batch_size=batch_size, shuffle=True) 30 | elif dataset == 'stl10': 31 | data_path = os.path.join(data_root, "stl10") 32 | data_loader = DataLoader( 33 | datasets.STL10(data_path, split=split, download=True, transform=transform), 34 | batch_size=batch_size, shuffle=True) 35 | elif dataset == 'lsun-bed': 36 | data_path = os.path.join(data_root, "lsun") 37 | data_loader = DataLoader( 38 | datasets.LSUN(data_path, classes=['bedroom_train'], transform=transform), 39 | batch_size=batch_size, shuffle=True) 40 | 41 | return data_loader 42 | -------------------------------------------------------------------------------- /lnets/models/utils/conversion.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from lnets.models.layers import DenseLinear, StandardLinear 4 | from lnets.models.layers import BjorckLinear 5 | from lnets.utils.math.projections import bjorck_orthonormalize 6 | 7 | 8 | def convert_to_bjorck_module(module_list, config): 9 | for i in range(len(module_list)): 10 | m = module_list[i] 11 | if isinstance(m, DenseLinear): 12 | new_linear = BjorckLinear(m.in_features, m.out_features, m.bias is not None, config) 13 | new_linear.weight.data.copy_(bjorck_orthonormalize(m.weight, iters=30)) 14 | new_linear.bias.data.copy_(m.bias) 15 | module_list[i] = new_linear 16 | if isinstance(m, nn.Sequential): 17 | module_list[i] = nn.Sequential(*convert_to_bjorck_module(list(m.children()), config)) 18 | return module_list 19 | 20 | 21 | def convert_from_bjorck_module(module_list, config): 22 | for i in range(len(module_list)): 23 | m = module_list[i] 24 | if isinstance(m, BjorckLinear): 25 | new_linear = StandardLinear(m.in_features, m.out_features, m.bias is not None, config) 26 | new_linear.weight = m.weight 27 | new_linear.bias = m.bias 28 | module_list[i] = new_linear 29 | if isinstance(m, nn.Sequential): 30 | module_list[i] = nn.Sequential(*convert_from_bjorck_module(list(m.children()), config)) 31 | return module_list 32 | 33 | 34 | def convert_model_to_bjorck(model, config): 35 | if not isinstance(model.model.model, nn.Sequential): 36 | raise Exception('Model type different. ') 37 | 38 | module_list = convert_to_bjorck_module(list(model.model.model.children()), config) 39 | 40 | model.model.model = nn.Sequential(*module_list) 41 | return model 42 | 43 | 44 | def convert_model_from_bjorck(model, config): 45 | if not isinstance(model.model.model, nn.Sequential): 46 | raise Exception('Model type different. ') 47 | 48 | module_list = convert_from_bjorck_module(list(model.model.model.children()), config) 49 | 50 | model.model.model = nn.Sequential(*module_list) 51 | return model 52 | -------------------------------------------------------------------------------- /lnets/utils/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import sys 4 | import json 5 | 6 | 7 | class Logger(object): 8 | """ 9 | Base Logger object. 10 | Initializes the log directory and creates log files given by name in arguments. 11 | Can be used to append future log values to each file. 12 | """ 13 | 14 | def __init__(self, log_dir, *args): 15 | self.log_dir = log_dir 16 | 17 | try: 18 | os.makedirs(log_dir) 19 | except OSError as e: 20 | if e.errno != errno.EEXIST: 21 | raise 22 | 23 | with open(os.path.join(self.log_dir, 'cmd.txt'), 'w') as f: 24 | f.write(" ".join(sys.argv)) 25 | 26 | self.log_names = [a for a in args] 27 | for arg in self.log_names: 28 | setattr(self, 'log_{}'.format(arg), lambda epoch, value, name=arg: self.log(name, epoch, value)) 29 | self.init_logfile(arg) 30 | 31 | def log_config(self, config): 32 | with open(os.path.join(self.log_dir, 'config.json'), 'w') as f: 33 | json.dump(config, f) 34 | 35 | def init_logfile(self, name): 36 | fname = self.get_log_fname(name) 37 | 38 | with open(fname, 'w') as log_file: 39 | log_file.write("epoch,{}\n".format(name)) 40 | 41 | def get_log_fname(self, name): 42 | return os.path.join(self.log_dir, '{}.log'.format(name)) 43 | 44 | def log(self, name, epoch, value): 45 | if name not in self.log_names: 46 | self.init_logfile(name) 47 | self.log_names.append(name) 48 | fname = self.get_log_fname(name) 49 | 50 | with open(fname, 'a') as log_file: 51 | log_file.write("{},{}\n".format(epoch, value)) 52 | 53 | def log_test_value(self, name, value): 54 | test_name = 'test_' + name 55 | self.init_logfile(test_name) 56 | self.log(test_name, 0, value) 57 | 58 | def log_meters(self, prefix, state): 59 | """ 60 | Log the parameters tracked by the training. 61 | """ 62 | if 'epoch' in state: 63 | epoch = state['epoch'] 64 | else: 65 | epoch = 0 66 | for tag, meter in state['model'].meters.items(): 67 | file_id = '{}_{}'.format(prefix, tag) 68 | self.log(file_id, epoch, meter.value()[0]) 69 | -------------------------------------------------------------------------------- /lnets/utils/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import collections 4 | from munch import Munch 5 | import ast 6 | import shlex 7 | 8 | 9 | def update(d, u): 10 | for k, v in u.items(): 11 | if isinstance(v, collections.Mapping): 12 | d[k] = update(d.get(k, {}), v) 13 | else: 14 | if '+' in v: 15 | # This typing is a bit hacky 16 | # Assumes something is in the list 17 | v = [type(d[k][0])(x) for x in v.split('+')] 18 | try: 19 | d[k] = type(d[k])(v) 20 | except (TypeError, ValueError) as e: 21 | raise TypeError(e) # Types not compatible. 22 | except KeyError: 23 | d[k] = v # No matching key in dict. 24 | return d 25 | 26 | 27 | class ConfigParse(argparse.Action): 28 | def __call__(self, parser, namespace, values, option_string=None): 29 | options_dict = {} 30 | for overrides in shlex.split(values): 31 | k, v = overrides.split('=') 32 | k_parts = k.split('.') 33 | dic = options_dict 34 | for key in k_parts[:-1]: 35 | dic = dic.setdefault(key, {}) 36 | if v.startswith('[') and v.endswith(']'): 37 | v = ast.literal_eval(v) 38 | dic[k_parts[-1]] = v 39 | setattr(namespace, self.dest, options_dict) 40 | 41 | 42 | def get_config_overrides(): 43 | parser = argparse.ArgumentParser(description='Experiments with Lipschitz networks') 44 | parser.add_argument('config', help='Base config file') 45 | parser.add_argument('-o', action=ConfigParse, 46 | help='Config option overrides. Separated like: e.g. optim.lr_init=1.0,,optim.lr_decay=0.1') 47 | return parser.parse_args() 48 | 49 | 50 | def process_config(verbose=True): 51 | args = get_config_overrides() 52 | config = json.load(open(args.config)) 53 | if args.o is not None: 54 | print(args.o) 55 | config = update(config, args.o) 56 | 57 | if verbose: 58 | import pprint 59 | pp = pprint.PrettyPrinter() 60 | print('-------- Config --------') 61 | pp.pprint(config) 62 | print('------------------------') 63 | 64 | # Use a munch object for ease of access. Munch is almost the same as Bunch, but better integrated with Python 3. 65 | config = Munch.fromDict(config) 66 | 67 | return config 68 | -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/small_mnist/lenet_bjorck.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed": 0, 3 | "model": { 4 | "pretrained_best_path": null, 5 | "name": "lenet_classify", 6 | "activation": "relu", 7 | "linear": { 8 | "type": "bjorck", 9 | "safe_scaling": false, 10 | "power_iters": 2, 11 | "bjorck_beta": 0.5, 12 | "bjorck_iter": 5, 13 | "bjorck_order": 1, 14 | "bias": true 15 | }, 16 | "dropout_on": false, 17 | "l_constant": 1.0, 18 | "output_dim": 10, 19 | 20 | "per_epoch_proj": { 21 | "turned_on": false, 22 | "every_n_epochs": 100000000, 23 | "type": "l_2", 24 | "bjorck_beta": 0.5, 25 | "bjorck_iter": 20, 26 | "bjorck_order": 1, 27 | "reset_optimizer": false 28 | }, 29 | 30 | "per_update_proj": { 31 | "turned_on": false, 32 | "type": "l_2", 33 | "bjorck_beta": 0.5, 34 | "bjorck_iter": 12, 35 | "bjorck_order": 1 36 | } 37 | }, 38 | 39 | "optim": { 40 | "optimizer": "adam", 41 | "lr_schedule": { 42 | "name": "step", 43 | "lr_init": 0.001, 44 | "lr_decay": 0.1, 45 | "milestones": [ 46 | 100, 47 | 150 48 | ], 49 | "last_epoch": -1 50 | }, 51 | "epochs": 200, 52 | "batch_size": 32, 53 | "momentum": 0.9, 54 | "betas": [ 55 | 0.0, 56 | 0.9, 57 | 0.99 58 | ], 59 | "wdecay": 0.0, 60 | "criterion": { 61 | "tag": "loss", 62 | "minmax": "min" 63 | }, 64 | "patience": 250, 65 | "max_grad_norm": 1e8 66 | }, 67 | 68 | "data": { 69 | "name": "mnist", 70 | "root": "data", 71 | "indices_path": "./data/small_mnist/mnist", 72 | "per_class_count": 100, 73 | "transform": { 74 | "type": "mnist" 75 | }, 76 | "validation": false, 77 | "train_size": 0.9, 78 | "input_dim": 784, 79 | "im_height": 28, 80 | "im_width": 28, 81 | "in_channels": 1, 82 | "num_workers": 0, 83 | "class_count": 10 84 | }, 85 | 86 | "logging": { 87 | "report_freq": 1, 88 | "save_model": false, 89 | "save_best": false 90 | }, 91 | 92 | "cuda": false, 93 | "task": "classify", 94 | "output_root": "out/classification/small_mnist", 95 | "exp_name": "lipschitz" 96 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/small_mnist/lenet_standard.json: -------------------------------------------------------------------------------- 1 | { 2 | "seed": 0, 3 | "model": { 4 | "pretrained_best_path": null, 5 | "name": "lenet_classify", 6 | "activation": "relu", 7 | "linear": { 8 | "type": "standard", 9 | "safe_scaling": false, 10 | "power_iters": 2, 11 | "bjorck_beta": 0.5, 12 | "bjorck_iter": 5, 13 | "bjorck_order": 1, 14 | "bias": true 15 | }, 16 | "dropout_on": false, 17 | "l_constant": 1.0, 18 | "output_dim": 10, 19 | 20 | "per_epoch_proj": { 21 | "turned_on": false, 22 | "every_n_epochs": 100000000, 23 | "type": "l_2", 24 | "bjorck_beta": 0.5, 25 | "bjorck_iter": 20, 26 | "bjorck_order": 1, 27 | "reset_optimizer": false 28 | }, 29 | 30 | "per_update_proj": { 31 | "turned_on": false, 32 | "type": "l_2", 33 | "bjorck_beta": 0.5, 34 | "bjorck_iter": 12, 35 | "bjorck_order": 1 36 | } 37 | }, 38 | 39 | "optim": { 40 | "optimizer": "sgd", 41 | "lr_schedule": { 42 | "name": "step", 43 | "lr_init": 0.01, 44 | "lr_decay": 0.1, 45 | "milestones": [ 46 | 100, 47 | 150 48 | ], 49 | "last_epoch": -1 50 | }, 51 | "epochs": 200, 52 | "batch_size": 32, 53 | "momentum": 0.9, 54 | "betas": [ 55 | 0.0, 56 | 0.9, 57 | 0.99 58 | ], 59 | "wdecay": 0.0, 60 | "criterion": { 61 | "tag": "loss", 62 | "minmax": "min" 63 | }, 64 | "patience": 250, 65 | "max_grad_norm": 1e8 66 | }, 67 | 68 | "data": { 69 | "name": "mnist", 70 | "root": "data", 71 | "indices_path": "./data/small_mnist/mnist", 72 | "per_class_count": 30, 73 | "transform": { 74 | "type": "mnist" 75 | }, 76 | "validation": false, 77 | "train_size": 0.9, 78 | "input_dim": 784, 79 | "im_height": 28, 80 | "im_width": 28, 81 | "in_channels": 1, 82 | "num_workers": 0, 83 | "class_count": 10 84 | }, 85 | 86 | "logging": { 87 | "report_freq": 1, 88 | "save_model": false, 89 | "save_best": false 90 | }, 91 | 92 | "cuda": false, 93 | "task": "classify", 94 | "output_root": "out/classification/small_mnist", 95 | "exp_name": "lipschitz" 96 | } -------------------------------------------------------------------------------- /lnets/models/architectures/hard_coded/parseval_infogan_discriminator.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from lnets.models.layers import * 4 | from lnets.models.activations import * 5 | from lnets.models.architectures.base_architecture import Architecture 6 | 7 | 8 | class ParsevalInfoGanDiscriminator(Architecture): 9 | # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) 10 | # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S 11 | # Note that the BatchNorm layers are also removed. 12 | def __init__(self, input_dim=1, output_dim=1, input_size=32, parseval=True, config=None): 13 | super(ParsevalInfoGanDiscriminator, self).__init__() 14 | self.input_dim = input_dim 15 | self.output_dim = output_dim 16 | self.input_size = input_size 17 | 18 | model_list = list([ 19 | # Conv. 20 | StandardConv2d(self.input_dim, 64, 4, 2, 1, config=config), 21 | 22 | # Activ. 23 | MaxMin(num_units=32, axis=1), 24 | 25 | # Conv 26 | StandardConv2d(64, 128, 4, 2, 1, config=config), 27 | 28 | # Activ. 29 | MaxMin(num_units=64, axis=1), 30 | 31 | # Flatten. 32 | InfoGanFlatten(input_size=input_size), 33 | 34 | # Linear. 35 | StandardLinear(128 * (self.input_size // 4) * (self.input_size // 4), 1024, config=config), 36 | 37 | # Activ. 38 | MaxMin(num_units=512), 39 | 40 | # Linear. 41 | StandardLinear(1024, self.output_dim, config=config), 42 | ]) 43 | 44 | self.model = nn.Sequential(*model_list) 45 | 46 | initialize_weights(self) 47 | 48 | def forward(self, x): 49 | return self.model(x) 50 | 51 | 52 | def initialize_weights(net): 53 | for m in net.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | m.weight.data.normal_(0, 0.02) 56 | m.bias.data.zero_() 57 | elif isinstance(m, nn.ConvTranspose2d): 58 | m.weight.data.normal_(0, 0.02) 59 | m.bias.data.zero_() 60 | elif isinstance(m, nn.Linear): 61 | m.weight.data.normal_(0, 0.02) 62 | m.bias.data.zero_() 63 | 64 | 65 | class InfoGanFlatten(nn.Module): 66 | def __init__(self, input_size): 67 | super(InfoGanFlatten, self).__init__() 68 | self.input_size = input_size 69 | 70 | def forward(self, x): 71 | return x.view(-1, 128 * (self.input_size // 4) * (self.input_size // 4)) 72 | -------------------------------------------------------------------------------- /lnets/optimizers/aggmo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | 4 | 5 | class AggMo(Optimizer): 6 | r"""Implements Aggregated Momentum Gradient Descent. 7 | https://arxiv.org/abs/1804.00325 8 | """ 9 | 10 | def __init__(self, params, lr=required, momentum=[0.0, 0.9, 0.99, 0.999], weight_decay=0): 11 | defaults = dict(lr=lr, momentum=momentum, weight_decay=weight_decay) 12 | super(AggMo, self).__init__(params, defaults) 13 | 14 | def __setstate__(self, state): 15 | super(AggMo, self).__setstate__(state) 16 | 17 | def step(self, closure=None): 18 | """Performs a single optimization step. 19 | Arguments: 20 | closure (callable, optional): A closure that reevaluates the model 21 | and returns the loss. 22 | """ 23 | loss = None 24 | if closure is not None: 25 | loss = closure() 26 | 27 | for group in self.param_groups: 28 | weight_decay = group['weight_decay'] 29 | momentum = group['momentum'] 30 | total_mom = float(len(momentum)) 31 | 32 | for p in group['params']: 33 | if p.grad is None: 34 | continue 35 | d_p = p.grad.data 36 | if weight_decay != 0: 37 | d_p.add_(weight_decay, p.data) 38 | param_state = self.state[p] 39 | if 'momentum_buffer' not in param_state: 40 | param_state['momentum_buffer'] = {} 41 | for beta in momentum: 42 | param_state['momentum_buffer'][beta] = torch.zeros_like(p.data) 43 | for beta in momentum: 44 | buf = param_state['momentum_buffer'][beta] 45 | buf.mul_(beta).add_(d_p) 46 | p.data.sub_(group['lr'] / total_mom , buf) 47 | return loss 48 | 49 | def zero_momentum_buffers(self): 50 | for group in self.param_groups: 51 | momentum = group['momentum'] 52 | for p in group['params']: 53 | param_state = self.state[p] 54 | param_state['momentum_buffer'] = {} 55 | for beta in momentum: 56 | param_state['momentum_buffer'][beta] = torch.zeros_like(p.data) 57 | 58 | def set_momentum(self, momentum): 59 | for group in self.param_groups: 60 | group['momentum'] = momentum 61 | 62 | def update_hparam(self, name, value): 63 | for param_group in self.param_groups: 64 | param_group[name] = value 65 | -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-standard", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "standard", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 10, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "l_constant": 1.0, 33 | "per_epoch_proj": { 34 | "turned_on": false, 35 | "every_n_epochs": 1, 36 | "type": "l_2", 37 | "bjorck_beta": 0.5, 38 | "bjorck_iter": 20, 39 | "bjorck_order": 1, 40 | "reset_optimizer": false 41 | }, 42 | 43 | "per_update_proj": { 44 | "turned_on": false, 45 | "type": "l_2", 46 | "bjorck_beta": 0.5, 47 | "bjorck_iter": 1, 48 | "bjorck_order": 1 49 | } 50 | }, 51 | "optim": { 52 | "optimizer": "adam", 53 | "lr_schedule": { 54 | "name": "step", 55 | "lr_init": 0.001, 56 | "lr_decay": 0.1, 57 | "milestones": [ 58 | 60, 59 | 100, 60 | 150 61 | ], 62 | "last_epoch": -1 63 | }, 64 | "epochs": 200, 65 | "batch_size": 128, 66 | "momentum": 0.9, 67 | "betas": [ 68 | 0.0, 69 | 0.9, 70 | 0.99 71 | ], 72 | "wdecay": 0.0, 73 | "criterion": { 74 | "tag": "acc", 75 | "minmax": "max" 76 | }, 77 | "patience": 50, 78 | "max_grad_norm": 1e8 79 | }, 80 | "data": { 81 | "name": "mnist", 82 | "root": "data", 83 | "indices_path": null, 84 | "per_class_count": 30, 85 | "transform": { 86 | "type": "mnist" 87 | }, 88 | "validation": true, 89 | "train_size": 0.9, 90 | "input_dim": 784, 91 | "im_height": 28, 92 | "im_width": 28, 93 | "num_workers": 0, 94 | "class_count": 10 95 | }, 96 | "logging": { 97 | "report_freq": 1, 98 | "save_model": false, 99 | "save_best": true 100 | } 101 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_bjorck.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-bjorck", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "bjorck", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 20, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "l_constant": 1.0, 33 | "per_epoch_proj": { 34 | "turned_on": false, 35 | "every_n_epochs": 1, 36 | "type": "l_2", 37 | "bjorck_beta": 0.5, 38 | "bjorck_iter": 20, 39 | "bjorck_order": 1, 40 | "reset_optimizer": false 41 | }, 42 | 43 | "per_update_proj": { 44 | "turned_on": false, 45 | "type": "l_2", 46 | "bjorck_beta": 0.5, 47 | "bjorck_iter": 1, 48 | "bjorck_order": 1 49 | } 50 | }, 51 | "optim": { 52 | "optimizer": "adam", 53 | "lr_schedule": { 54 | "name": "step", 55 | "lr_init": 0.001, 56 | "lr_decay": 0.1, 57 | "milestones": [ 58 | 60, 59 | 100, 60 | 150 61 | ], 62 | "last_epoch": -1 63 | }, 64 | "epochs": 200, 65 | "batch_size": 128, 66 | "momentum": 0.9, 67 | "betas": [ 68 | 0.0, 69 | 0.9, 70 | 0.99 71 | ], 72 | "wdecay": 0.0, 73 | "criterion": { 74 | "tag": "acc", 75 | "minmax": "max" 76 | }, 77 | "patience": 50, 78 | "max_grad_norm": 1e8 79 | }, 80 | "data": { 81 | "name": "mnist", 82 | "root": "data", 83 | "indices_path": null, 84 | "per_class_count": 30, 85 | "transform": { 86 | "type": "mnist" 87 | }, 88 | "validation": true, 89 | "train_size": 0.9, 90 | "input_dim": 784, 91 | "im_height": 28, 92 | "im_width": 28, 93 | "num_workers": 0, 94 | "class_count": 10 95 | }, 96 | "logging": { 97 | "report_freq": 1, 98 | "save_model": false, 99 | "save_best": true 100 | } 101 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_l_inf.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-l-inf", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "l_inf_projected", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 10, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "l_constant": 1.0, 33 | "per_epoch_proj": { 34 | "turned_on": false, 35 | "every_n_epochs": 1, 36 | "type": "l_2", 37 | "bjorck_beta": 0.5, 38 | "bjorck_iter": 20, 39 | "bjorck_order": 1, 40 | "reset_optimizer": false 41 | }, 42 | 43 | "per_update_proj": { 44 | "turned_on": false, 45 | "type": "l_2", 46 | "bjorck_beta": 0.5, 47 | "bjorck_iter": 1, 48 | "bjorck_order": 1 49 | } 50 | }, 51 | "optim": { 52 | "optimizer": "adam", 53 | "lr_schedule": { 54 | "name": "step", 55 | "lr_init": 0.001, 56 | "lr_decay": 0.1, 57 | "milestones": [ 58 | 60, 59 | 100, 60 | 150 61 | ], 62 | "last_epoch": -1 63 | }, 64 | "epochs": 200, 65 | "batch_size": 128, 66 | "momentum": 0.9, 67 | "betas": [ 68 | 0.0, 69 | 0.9, 70 | 0.99 71 | ], 72 | "wdecay": 0.0, 73 | "criterion": { 74 | "tag": "acc", 75 | "minmax": "max" 76 | }, 77 | "patience": 50, 78 | "max_grad_norm": 1e8 79 | }, 80 | "data": { 81 | "name": "mnist", 82 | "root": "data", 83 | "indices_path": null, 84 | "per_class_count": 30, 85 | "transform": { 86 | "type": "mnist" 87 | }, 88 | "validation": true, 89 | "train_size": 0.9, 90 | "input_dim": 784, 91 | "im_height": 28, 92 | "im_width": 28, 93 | "num_workers": 0, 94 | "class_count": 10 95 | }, 96 | "logging": { 97 | "report_freq": 1, 98 | "save_model": false, 99 | "save_best": true 100 | } 101 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_parseval.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-parseval", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "standard", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 10, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "l_constant": 1.0, 33 | "per_epoch_proj": { 34 | "turned_on": false, 35 | "every_n_epochs": 1, 36 | "type": "l_2", 37 | "bjorck_beta": 0.5, 38 | "bjorck_iter": 20, 39 | "bjorck_order": 1, 40 | "reset_optimizer": false 41 | }, 42 | 43 | "per_update_proj": { 44 | "turned_on": true, 45 | "type": "l_2", 46 | "bjorck_beta": 0.5, 47 | "bjorck_iter": 1, 48 | "bjorck_order": 1 49 | } 50 | }, 51 | "optim": { 52 | "optimizer": "adam", 53 | "lr_schedule": { 54 | "name": "step", 55 | "lr_init": 0.001, 56 | "lr_decay": 0.1, 57 | "milestones": [ 58 | 60, 59 | 100, 60 | 150 61 | ], 62 | "last_epoch": -1 63 | }, 64 | "epochs": 200, 65 | "batch_size": 128, 66 | "momentum": 0.9, 67 | "betas": [ 68 | 0.0, 69 | 0.9, 70 | 0.99 71 | ], 72 | "wdecay": 0.0, 73 | "criterion": { 74 | "tag": "acc", 75 | "minmax": "max" 76 | }, 77 | "patience": 50, 78 | "max_grad_norm": 1e8 79 | }, 80 | "data": { 81 | "name": "mnist", 82 | "root": "data", 83 | "indices_path": null, 84 | "per_class_count": 30, 85 | "transform": { 86 | "type": "mnist" 87 | }, 88 | "validation": true, 89 | "train_size": 0.9, 90 | "input_dim": 784, 91 | "im_height": 28, 92 | "im_width": 28, 93 | "num_workers": 0, 94 | "class_count": 10 95 | }, 96 | "logging": { 97 | "report_freq": 1, 98 | "save_model": false, 99 | "save_best": true 100 | } 101 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_dropout.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-dropout", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc_dropout", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "standard", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 10, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "l_constant": 1.0, 33 | "per_epoch_proj": { 34 | "turned_on": false, 35 | "every_n_epochs": 1, 36 | "type": "l_2", 37 | "bjorck_beta": 0.5, 38 | "bjorck_iter": 20, 39 | "bjorck_order": 1, 40 | "reset_optimizer": false 41 | }, 42 | 43 | "per_update_proj": { 44 | "turned_on": false, 45 | "type": "l_2", 46 | "bjorck_beta": 0.5, 47 | "bjorck_iter": 1, 48 | "bjorck_order": 1 49 | } 50 | }, 51 | "optim": { 52 | "optimizer": "adam", 53 | "lr_schedule": { 54 | "name": "step", 55 | "lr_init": 0.001, 56 | "lr_decay": 0.1, 57 | "milestones": [ 58 | 60, 59 | 100, 60 | 150 61 | ], 62 | "last_epoch": -1 63 | }, 64 | "epochs": 200, 65 | "batch_size": 128, 66 | "momentum": 0.9, 67 | "betas": [ 68 | 0.0, 69 | 0.9, 70 | 0.99 71 | ], 72 | "wdecay": 0.0, 73 | "criterion": { 74 | "tag": "acc", 75 | "minmax": "max" 76 | }, 77 | "patience": 50, 78 | "max_grad_norm": 1e8 79 | }, 80 | "data": { 81 | "name": "mnist", 82 | "root": "data", 83 | "indices_path": null, 84 | "per_class_count": 30, 85 | "transform": { 86 | "type": "mnist" 87 | }, 88 | "validation": true, 89 | "train_size": 0.9, 90 | "input_dim": 784, 91 | "im_height": 28, 92 | "im_width": 28, 93 | "num_workers": 0, 94 | "class_count": 10 95 | }, 96 | "logging": { 97 | "report_freq": 1, 98 | "save_model": false, 99 | "save_best": true 100 | } 101 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_spec_jac.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-spec-jac", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc_spec_jac", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "standard", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 10, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "sn_reg": 0.05, 33 | "l_constant": 1.0, 34 | "per_epoch_proj": { 35 | "turned_on": false, 36 | "every_n_epochs": 1, 37 | "type": "l_2", 38 | "bjorck_beta": 0.5, 39 | "bjorck_iter": 20, 40 | "bjorck_order": 1, 41 | "reset_optimizer": false 42 | }, 43 | 44 | "per_update_proj": { 45 | "turned_on": false, 46 | "type": "l_2", 47 | "bjorck_beta": 0.5, 48 | "bjorck_iter": 1, 49 | "bjorck_order": 1 50 | } 51 | }, 52 | "optim": { 53 | "optimizer": "adam", 54 | "lr_schedule": { 55 | "name": "step", 56 | "lr_init": 0.001, 57 | "lr_decay": 0.1, 58 | "milestones": [ 59 | 60, 60 | 100, 61 | 150 62 | ], 63 | "last_epoch": -1 64 | }, 65 | "epochs": 200, 66 | "batch_size": 128, 67 | "momentum": 0.9, 68 | "betas": [ 69 | 0.0, 70 | 0.9, 71 | 0.99 72 | ], 73 | "wdecay": 0.0, 74 | "criterion": { 75 | "tag": "acc", 76 | "minmax": "max" 77 | }, 78 | "patience": 50, 79 | "max_grad_norm": 1e8 80 | }, 81 | "data": { 82 | "name": "mnist", 83 | "root": "data", 84 | "indices_path": null, 85 | "per_class_count": 30, 86 | "transform": { 87 | "type": "mnist" 88 | }, 89 | "validation": true, 90 | "train_size": 0.9, 91 | "input_dim": 784, 92 | "im_height": 28, 93 | "im_width": 28, 94 | "num_workers": 0, 95 | "class_count": 10 96 | }, 97 | "logging": { 98 | "report_freq": 1, 99 | "save_model": false, 100 | "save_best": true 101 | } 102 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_spec_norm.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-spec-norm", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc", 10 | "activation": "relu", 11 | "linear": { 12 | "type": "spectral_normal", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 10, 17 | "bjorck_order": 1, 18 | "power_iters": 1, 19 | "bias": true 20 | }, 21 | "layers": [ 22 | 1024, 23 | 1024, 24 | 1024, 25 | 10 26 | ], 27 | "groupings": [ 28 | 2, 29 | 2, 30 | 2, 31 | 1 32 | ], 33 | "l_constant": 1.0, 34 | "per_epoch_proj": { 35 | "turned_on": false, 36 | "every_n_epochs": 1, 37 | "type": "l_2", 38 | "bjorck_beta": 0.5, 39 | "bjorck_iter": 20, 40 | "bjorck_order": 1, 41 | "reset_optimizer": false 42 | }, 43 | 44 | "per_update_proj": { 45 | "turned_on": false, 46 | "type": "l_2", 47 | "bjorck_beta": 0.5, 48 | "bjorck_iter": 1, 49 | "bjorck_order": 1 50 | } 51 | }, 52 | "optim": { 53 | "optimizer": "adam", 54 | "lr_schedule": { 55 | "name": "step", 56 | "lr_init": 0.001, 57 | "lr_decay": 0.1, 58 | "milestones": [ 59 | 60, 60 | 100, 61 | 150 62 | ], 63 | "last_epoch": -1 64 | }, 65 | "epochs": 200, 66 | "batch_size": 128, 67 | "momentum": 0.9, 68 | "betas": [ 69 | 0.0, 70 | 0.9, 71 | 0.99 72 | ], 73 | "wdecay": 0.0, 74 | "criterion": { 75 | "tag": "acc", 76 | "minmax": "max" 77 | }, 78 | "patience": 50, 79 | "max_grad_norm": 1e8 80 | }, 81 | "data": { 82 | "name": "mnist", 83 | "root": "data", 84 | "indices_path": null, 85 | "per_class_count": 30, 86 | "transform": { 87 | "type": "mnist" 88 | }, 89 | "validation": true, 90 | "train_size": 0.9, 91 | "input_dim": 784, 92 | "im_height": 28, 93 | "im_width": 28, 94 | "num_workers": 0, 95 | "class_count": 10 96 | }, 97 | "logging": { 98 | "report_freq": 1, 99 | "save_model": false, 100 | "save_best": true 101 | } 102 | } -------------------------------------------------------------------------------- /lnets/tasks/dualnets/distrib/multi_spherical_shell.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from lnets.tasks.dualnets.distrib.base_distrib import BaseDistrib 4 | 5 | 6 | class MultiSphericalShell(BaseDistrib): 7 | def __init__(self, config): 8 | super(MultiSphericalShell, self).__init__(config) 9 | 10 | self.dim = config.dim 11 | self.empty_dim = config.empty_dim 12 | 13 | self.center_x = config.center_x 14 | self.radius = config.radius 15 | self.reshape_to_grid = config.reshape_to_grid 16 | 17 | assert self.dim > 0, "Dimensionality must be larger than 0. " 18 | assert self.empty_dim >= 0, "The number of empty dimensions must be at least 0. " 19 | assert self.radius > 0, "Radius of the sphere must be strictly larger than 0. " 20 | 21 | def __call__(self, size): 22 | # First, sample origin-centered shells. 23 | if self.dim > 1: 24 | # Sample from an isotropic normal distribution and normalize the obtained samples. 25 | samples = np.random.multivariate_normal(mean=np.zeros(shape=self.dim), cov=np.eye(self.dim), 26 | size=size) 27 | # Origin centered sample. 28 | samples = samples * (self.radius / np.linalg.norm(samples, axis=1)[..., None]) 29 | 30 | else: 31 | # A "circle" in 1D are simply two points equidistant from the origin. 32 | samples = self.radius * 2 * (np.random.binomial(1, 0.5, size=size) - 0.5) 33 | 34 | samples = samples[..., None] 35 | 36 | # Compute the centers. 37 | candidate_centers_x = np.array(self.center_x) 38 | idx = np.random.randint(candidate_centers_x.shape[0], size=size) 39 | centers_x = candidate_centers_x[idx][..., None] 40 | 41 | centers = np.zeros(shape=(size, self.dim)) 42 | centers[:, 0:1] = centers_x 43 | 44 | # Move to the new centers. 45 | samples = centers + samples 46 | 47 | # Add empty dimensions. This makes it possible to code up cones embedded in higher dimensions. 48 | if self.empty_dim != 0: 49 | samples = np.concatenate((samples, np.zeros(shape=(size, self.empty_dim))), axis=1) 50 | 51 | # Reshape the samples into 2d grids if requested. Useful for testing convolutional neural net implementations. 52 | if self.reshape_to_grid: 53 | assert self.reshape_to_grid[0] * self.reshape_to_grid[1] == samples.shape[1] 54 | 55 | new_shape = (samples.shape[0], 1, self.reshape_to_grid[0], self.reshape_to_grid[1]) 56 | samples = samples.reshape(new_shape) 57 | 58 | return samples 59 | -------------------------------------------------------------------------------- /lnets/tasks/classification/configs/standard/fc_classification_l_inf_margin.json: -------------------------------------------------------------------------------- 1 | { 2 | "cuda": false, 3 | "task": "classify", 4 | "output_root": "out/classification/mnist", 5 | "exp_name": "lipschitz-margin", 6 | "seed": 0, 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "classify_fc_hinge", 10 | "activation": "maxmin", 11 | "linear": { 12 | "type": "standard", 13 | "parseval": false, 14 | "safe_scaling": false, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 3, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 1024, 22 | 1024, 23 | 1024, 24 | 10 25 | ], 26 | "groupings": [ 27 | 2, 28 | 2, 29 | 2, 30 | 1 31 | ], 32 | "l_constant": 1000.0, 33 | "margin": 0.3, 34 | "hinge_order": 1, 35 | "per_epoch_proj": { 36 | "turned_on": false, 37 | "every_n_epochs": 1, 38 | "type": "l_inf_projected", 39 | "bjorck_beta": 0.5, 40 | "bjorck_iter": 20, 41 | "bjorck_order": 1, 42 | "reset_optimizer": false 43 | }, 44 | 45 | "per_update_proj": { 46 | "turned_on": true, 47 | "type": "l_inf_projected", 48 | "bjorck_beta": 0.5, 49 | "bjorck_iter": 1, 50 | "bjorck_order": 1 51 | } 52 | }, 53 | "optim": { 54 | "optimizer": "aggmo", 55 | "lr_schedule": { 56 | "name": "step", 57 | "lr_init": 0.0033, 58 | "lr_decay": 0.1, 59 | "milestones": [ 60 | 60, 61 | 100, 62 | 150 63 | ], 64 | "last_epoch": -1 65 | }, 66 | "epochs": 200, 67 | "batch_size": 128, 68 | "momentum": 0.9, 69 | "betas": [ 70 | 0.0, 71 | 0.9, 72 | 0.99 73 | ], 74 | "wdecay": 0.0, 75 | "criterion": { 76 | "tag": "acc", 77 | "minmax": "max" 78 | }, 79 | "patience": 50, 80 | "max_grad_norm": 1e8 81 | }, 82 | "data": { 83 | "name": "mnist", 84 | "root": "data", 85 | "indices_path": null, 86 | "per_class_count": 30, 87 | "transform": { 88 | "type": "mnist" 89 | }, 90 | "validation": true, 91 | "train_size": 0.9, 92 | "input_dim": 784, 93 | "im_height": 28, 94 | "im_width": 28, 95 | "num_workers": 0, 96 | "class_count": 10 97 | }, 98 | "logging": { 99 | "report_freq": 1, 100 | "save_model": false, 101 | "save_best": true 102 | } 103 | } -------------------------------------------------------------------------------- /lnets/models/architectures/hard_coded/lenet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from lnets.models.layers import * 5 | from lnets.models.activations import MaxMin, GroupSort 6 | 7 | 8 | class LeNet(nn.Module): 9 | def __init__(self, in_channels, output_dim, linear_type, activation, dropout_on, config): 10 | super(LeNet, self).__init__() 11 | self.config = config 12 | 13 | self.scale = Scale(config.model.l_constant ** 0.25, config.cuda) 14 | # Select linear layer type. 15 | if linear_type == "bjorck": 16 | conv = BjorckConv2d 17 | linear = BjorckLinear 18 | elif linear_type == "standard": 19 | conv = StandardConv2d 20 | linear = StandardLinear 21 | else: 22 | conv = None 23 | linear = None 24 | print("Layer type not supported. ") 25 | exit(-1) 26 | 27 | # Select activation. 28 | self.act_type = activation 29 | if activation == "relu": 30 | self.act1 = nn.ReLU() 31 | self.act2 = nn.ReLU() 32 | self.act3 = nn.ReLU() 33 | elif activation == "maxmin": 34 | self.act1 = MaxMin(5, axis=1) 35 | self.act2 = MaxMin(10, axis=1) 36 | self.act3 = MaxMin(25) 37 | elif activation == "group_sort": 38 | self.act1 = GroupSort(1, axis=1) 39 | self.act2 = GroupSort(1, axis=1) 40 | self.act3 = GroupSort(1) 41 | else: 42 | print("Activation not supported. ") 43 | exit(-1) 44 | 45 | # Save dropout_on option. 46 | self.dropout_on = dropout_on 47 | 48 | self.conv1 = conv(in_channels, 10, kernel_size=5, config=self.config) 49 | self.conv2 = conv(10, 20, kernel_size=5, config=self.config) 50 | self.conv2_drop = nn.Dropout2d() 51 | 52 | self.fc1 = linear(320, 50, config=config) 53 | self.fc2 = linear(50, output_dim, config=config) 54 | 55 | def forward(self, x): 56 | # Layer 1. 57 | x = self.conv1(x) 58 | x = F.max_pool2d(x, 2) 59 | x = self.act1(x) 60 | x = self.scale(x) 61 | 62 | # Layer 2. 63 | x = self.conv2(x) 64 | if self.dropout_on: 65 | x = self.conv2_drop(x) 66 | x = F.max_pool2d(x, 2) 67 | x = self.act2(x) 68 | x = self.scale(x) 69 | 70 | # Reshape. 71 | x = x.view(-1, 320) 72 | 73 | # Layer 3. 74 | x = self.fc1(x) 75 | x = self.act3(x) 76 | x = self.scale(x) 77 | 78 | if self.dropout_on: 79 | x = F.dropout(x, training=self.training) 80 | 81 | # Layer 4. 82 | x = self.fc2(x) 83 | x = self.scale(x) 84 | 85 | return x 86 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/configs/estimate_wde_gan.json: -------------------------------------------------------------------------------- 1 | { 2 | "task": "EMD", 3 | "output_root": "out/wde_estimate", 4 | "exp_name": "bpp3", 5 | "seed": 0, 6 | 7 | "model": { 8 | "pretrained_best_path": null, 9 | "name": "dual_fc", 10 | "activation": "group_sort", 11 | "linear": { 12 | "type": "bjorck", 13 | "safe_scaling": true, 14 | "power_iters": 2, 15 | "bjorck_beta": 0.5, 16 | "bjorck_iter": 27, 17 | "bjorck_order": 1, 18 | "bias": true 19 | }, 20 | "layers": [ 21 | 720, 22 | 720, 23 | 1 24 | ], 25 | "groupings": [ 26 | 9, 27 | 9, 28 | 1 29 | ], 30 | "l_constant": 1, 31 | "per_epoch_proj": { 32 | "turned_on": false, 33 | "every_n_epochs": 100000000, 34 | "type": "l_2", 35 | "bjorck_beta": 0.5, 36 | "bjorck_iter": 20, 37 | "bjorck_order": 1, 38 | "reset_optimizer": false 39 | }, 40 | "per_update_proj": { 41 | "turned_on": false, 42 | "type": "l_2", 43 | "bjorck_beta": 0.5, 44 | "bjorck_iter": 12, 45 | "bjorck_order": 1 46 | } 47 | }, 48 | 49 | "optim": { 50 | "optimizer": "adam", 51 | "lr_schedule": { 52 | "name": "step", 53 | "lr_init": 0.001, 54 | "lr_decay": 0.9, 55 | "milestones": [ 56 | 64, 57 | 96, 58 | 128, 59 | 256, 60 | 392 61 | ], 62 | "last_epoch": -1 63 | }, 64 | "epoch_len": 32, 65 | "epochs": 512, 66 | "momentum": 0.9, 67 | "betas": [ 68 | 0.0, 69 | 0.9, 70 | 0.99 71 | ], 72 | "wdecay": 0.0, 73 | "criterion": { 74 | "tag": "loss", 75 | "minmax": "min" 76 | }, 77 | "patience": 250, 78 | "max_grad_norm": 0.1 79 | }, 80 | 81 | "distrib1": { 82 | "name": "GANSampler", 83 | "filepath": "lnets/tasks/dualnets/distrib/gan_sampler.py", 84 | "gan_config_json_path": "./out/gan/LGANS/GAN_training_LWGAN_mnist_WGAN_2019_01_22_17_23_05_512353/hparams/hparams.json", 85 | "sample_size": 64, 86 | "test_sample_size": 64, 87 | "generate_type": "real", 88 | "dim": 3072 89 | }, 90 | 91 | "distrib2": { 92 | "name": "GANSampler", 93 | "filepath": "lnets/tasks/dualnets/distrib/gan_sampler.py", 94 | "gan_config_json_path": "./out/gan/LGANS/GAN_training_LWGAN_mnist_WGAN_2019_01_22_17_23_05_512353/hparams/hparams.json", 95 | "sample_size": 64, 96 | "test_sample_size": 64, 97 | "generate_type": "generated", 98 | "dim": 3072 99 | }, 100 | 101 | "logging": { 102 | "report_freq": 8, 103 | "save_model": false, 104 | "save_best": false 105 | }, 106 | 107 | "cuda": false, 108 | "visualize": false 109 | } 110 | -------------------------------------------------------------------------------- /lnets/tasks/classification/mains/ortho_finetune.py: -------------------------------------------------------------------------------- 1 | """ 2 | Do finetuning to ensure that the network is actually orthonormal. 3 | """ 4 | 5 | import json 6 | import os.path 7 | import argparse 8 | from munch import Munch 9 | 10 | import torch 11 | 12 | from lnets.models import get_model 13 | from lnets.data.load_data import load_data 14 | from lnets.models.utils.conversion import convert_model_to_bjorck 15 | from lnets.models.layers import BjorckLinear 16 | from lnets.tasks.classification.mains.train_classifier import train 17 | 18 | 19 | def main(opt): 20 | if not os.path.isdir(opt['output_root']): 21 | os.makedirs(opt['output_root']) 22 | 23 | exp_dir = opt['model']['exp_path'] 24 | 25 | model_path = os.path.join(exp_dir, 'checkpoints', 'best', 'best_model.pt') 26 | with open(os.path.join(exp_dir, 'logs', 'config.json'), 'r') as f: 27 | model_config = Munch.fromDict(json.load(f)) 28 | 29 | # Weird required hack to fix groupings (None is added to start during model training) 30 | if 'groupings' in model_config.model and model_config.model.groupings[0] is -1: 31 | model_config.model.groupings = model_config.model.groupings[1:] 32 | 33 | model = get_model(model_config) 34 | model.load_state_dict(torch.load(model_path)) 35 | 36 | if opt['data']['cuda']: 37 | print('Using CUDA') 38 | model.cuda() 39 | 40 | model_config.data.cuda = opt['data']['cuda'] 41 | data = load_data(model_config) 42 | 43 | # Change the model to use ortho layers by copying the base weights 44 | bjorck_iters = 50 45 | model = convert_model_to_bjorck(model, model_config) 46 | for m in model.modules(): 47 | if isinstance(m, BjorckLinear): 48 | m.config.model.linear.bjorck_iter = bjorck_iters 49 | 50 | model_config.output_root = opt['output_root'] 51 | model_config.optim.lr_schedule.lr_init = 1e-5 52 | model_config.optim.epochs = 5 53 | model = train(model, data, model_config) 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser(description='Do orthonormal finetuning on classifier') 58 | 59 | parser.add_argument('--model.exp_path', type=str, metavar='MODELPATH', 60 | help="location of pretrained model weights to evaluate") 61 | parser.add_argument('--output_root', type=str, default="./outs/classification/finetune", 62 | help='output directory to which results should be saved') 63 | parser.add_argument('--data.cuda', action='store_true', help="run in CUDA mode (default: False)") 64 | 65 | args = vars(parser.parse_args()) 66 | 67 | opt = {} 68 | for k, v in args.items(): 69 | cur = opt 70 | tokens = k.split('.') 71 | for token in tokens[:-1]: 72 | if token not in cur: 73 | cur[token] = {} 74 | cur = cur[token] 75 | cur[tokens[-1]] = v 76 | 77 | main(opt) 78 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/distrib/load_distrib.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from lnets.utils.dynamic_importer import dynamic_import 4 | 5 | 6 | def load_distrib(config): 7 | distrib_loaders = dict() 8 | 9 | distrib_loaders['train'] = DistribLoader(config, mode="train") 10 | distrib_loaders['validation'] = DistribLoader(config, mode="test") 11 | distrib_loaders['test'] = DistribLoader(config, mode="test") 12 | 13 | return distrib_loaders 14 | 15 | 16 | class DistribLoader(object): 17 | def __init__(self, config, mode="train"): 18 | assert mode == "train" or mode == "test", "Mode must be either 'train' or 'test'." 19 | self.distrib1 = construct_distrib_instance(config.distrib1) 20 | self.distrib2 = construct_distrib_instance(config.distrib2) 21 | self.config = config 22 | self.mode = mode 23 | 24 | def __iter__(self): 25 | self.sampled_so_far = 0 26 | return self 27 | 28 | def __next__(self): 29 | if self.sampled_so_far < self.config.optim.epoch_len: 30 | self.sampled_so_far += 1 31 | 32 | if self.mode == "train": 33 | distrib1_samples = self.distrib1(self.config.distrib1.sample_size) 34 | distrib2_samples = self.distrib2(self.config.distrib2.sample_size) 35 | 36 | elif self.mode == "test": 37 | distrib1_samples = self.distrib1(self.config.distrib1.test_sample_size) 38 | distrib2_samples = self.distrib2(self.config.distrib2.test_sample_size) 39 | 40 | # if the samples are already PyTorch tensors, don't touch them. 41 | if not isinstance(distrib1_samples, torch.Tensor): 42 | distrib1_samples = torch.from_numpy(distrib1_samples).float() 43 | 44 | if not isinstance(distrib2_samples, torch.Tensor): 45 | distrib2_samples = torch.from_numpy(distrib2_samples).float() 46 | 47 | return (distrib1_samples.float(), 48 | distrib2_samples.float()) 49 | else: 50 | raise StopIteration 51 | 52 | 53 | def construct_distrib_instance(distrib_config): 54 | assert type(distrib_config.filepath) == str, "distrib_config should have a string field called 'filepath'. " 55 | assert type(distrib_config.name) == str, "distrib_config should have a string field called 'name'. " 56 | assert distrib_config.filepath.endswith(".py"), "distrib_config.filename has to be a python file. " 57 | 58 | name = distrib_config.name 59 | filepath = distrib_config.filepath 60 | 61 | # Import the distribution. Here, the distrib_config.filepath is the script in which the distribution is 62 | # implemented and distrib_config.class_name is the (string) name of the distribution class you are trying to import. 63 | distrib_class = dynamic_import(filepath, name) 64 | 65 | # Create an instance. 66 | distrib = distrib_class(distrib_config) 67 | 68 | return distrib 69 | -------------------------------------------------------------------------------- /lnets/tasks/gan/mains/train_gan.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import datetime 4 | import json 5 | import matplotlib as mpl 6 | mpl.use("Agg") 7 | 8 | from lnets.tasks.gan.models.WGAN import WGAN 9 | from lnets.tasks.gan.models.WGAN_GP import WGAN_GP 10 | from lnets.tasks.gan.models.LWGAN import LWGAN 11 | from lnets.utils.config import process_config 12 | from lnets.utils.seeding import set_experiment_seed 13 | 14 | 15 | def get_exp_name(config): 16 | # Get experiment name. 17 | now = datetime.datetime.now() 18 | now_str = now.strftime("%Y_%m_%d_%H_%M_%S_%f") 19 | 20 | base_exp_name = config.exp_name 21 | task_name = config.task 22 | data_name = config.dataset 23 | gan_type = config.gan_type 24 | 25 | exp_name = "{}_{}_{}_{}_{}".format(task_name, base_exp_name, data_name, gan_type, now_str) 26 | 27 | return exp_name 28 | 29 | 30 | def create_dirs(config): 31 | # Get experiment name. 32 | exp_name = get_exp_name(config) 33 | 34 | # Construct names of related directories. 35 | exp_dir = os.path.join(config.output_root, exp_name) 36 | log_dir = os.path.join(exp_dir, 'logs') 37 | model_dir = os.path.join(exp_dir, 'checkpoints') 38 | figures_dir = os.path.join(exp_dir, 'figures') 39 | hparams_dir = os.path.join(exp_dir, "hparams") 40 | data_root = config.data_root 41 | 42 | # Create non-existing directories. 43 | for dr in [exp_dir, log_dir, model_dir, figures_dir, hparams_dir, data_root]: 44 | if not os.path.exists(dr): 45 | print(dr) 46 | os.makedirs(dr) 47 | 48 | # Add to config dictionary. 49 | config.model_dir = model_dir 50 | config.log_dir = log_dir 51 | config.figures_dir = figures_dir 52 | config.hparams_dir = hparams_dir 53 | config.data_root = data_root 54 | 55 | return config 56 | 57 | 58 | def save_hparams(config): 59 | hparams_string = json.dumps(config) 60 | 61 | hparams_path = os.path.join(config.hparams_dir, "hparams.json") 62 | with open(hparams_path, "w") as hparam_file: 63 | hparam_file.write(hparams_string) 64 | 65 | 66 | def main(): 67 | # Parse config json. 68 | cfg = process_config() 69 | 70 | # Set the seed. 71 | set_experiment_seed(cfg.seed) 72 | 73 | # Create directories to be used in the experiments. 74 | cfg = create_dirs(cfg) 75 | 76 | if cfg.benchmark_mode: 77 | torch.backends.cudnn.benchmark = True 78 | 79 | # Declare instance for GAN. 80 | if cfg.gan_type == 'WGAN': 81 | gan = WGAN(cfg) 82 | elif cfg.gan_type == 'LWGAN': 83 | gan = LWGAN(cfg) 84 | elif cfg.gan_type == 'WGAN_GP': 85 | gan = WGAN_GP(cfg) 86 | else: 87 | raise Exception("[!] There is no option for " + cfg.gan_type) 88 | 89 | # Save the hyperparameter json. 90 | save_hparams(cfg) 91 | 92 | # Launch the graph in a session. 93 | gan.train() 94 | print(" [*] Training finished!") 95 | 96 | # Visualize learned generator. 97 | gan.visualize_results(cfg.epoch) 98 | print(" [*] Testing finished!") 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /lnets/tasks/classification/mains/check_undead.py: -------------------------------------------------------------------------------- 1 | """ 2 | Check the calibration of the model 3 | """ 4 | 5 | import json 6 | import os.path 7 | import argparse 8 | from munch import Munch 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | 12 | import torch 13 | 14 | from lnets.models import get_model 15 | from lnets.data.load_data import load_data 16 | from lnets.models.utils.conversion import convert_model_from_bjorck 17 | 18 | 19 | def get_undead_rate(model, data, threshold=0.8, cuda=True): 20 | undead_rate = [] 21 | for x,_ in data: 22 | if cuda: 23 | x = x.cuda() 24 | activations = model.model.get_activations(x) 25 | undead_rate.append([]) 26 | for layer_a in activations: 27 | layer_a = layer_a.cpu().numpy() 28 | undead_rate[-1].append((layer_a > 0).astype(np.float).mean(0)) 29 | 30 | undead_rate = np.array(undead_rate).mean(0) 31 | return (undead_rate >= threshold).astype(np.float).mean(1) 32 | 33 | 34 | def main(opt): 35 | # if not os.path.isdir(opt['output_root']): 36 | # os.makedirs(opt['output_root']) 37 | 38 | exp_dir = opt['model']['exp_path'] 39 | 40 | model_path = os.path.join(exp_dir, 'checkpoints', 'best', 'best_model.pt') 41 | with open(os.path.join(exp_dir, 'logs', 'config.json'), 'r') as f: 42 | model_config = Munch.fromDict(json.load(f)) 43 | 44 | # Weird required hack to fix groupings (None is added to start during model training) 45 | if 'groupings' in model_config.model and model_config.model.groupings[0] is -1: 46 | model_config.model.groupings = model_config.model.groupings[1:] 47 | model_config.model.linear.bjorck_iters = 20 48 | model_config.cuda = opt['cuda'] 49 | model = get_model(model_config) 50 | model.load_state_dict(torch.load(model_path)) 51 | 52 | if opt['cuda']: 53 | print('Using CUDA') 54 | model.cuda() 55 | 56 | data = load_data(model_config) 57 | 58 | # Change the model to use ortho layers by copying the base weights 59 | model = convert_model_from_bjorck(model, model_config) 60 | model.eval() 61 | rates = [] 62 | thresholds = np.linspace(0.0, 1.0, 50, endpoint=True) 63 | for t in thresholds: 64 | undead_rate = get_undead_rate(model, data['test'], threshold=t, cuda=opt['cuda']) 65 | rates.append(undead_rate) 66 | plt.plot(thresholds, np.array(rates)) 67 | plt.show() 68 | np.save('undead_rates', rates) 69 | print(undead_rate) 70 | 71 | 72 | if __name__ == '__main__': 73 | parser = argparse.ArgumentParser(description='Compute undead unit rates per layer') 74 | 75 | parser.add_argument('--model.exp_path', type=str, metavar='MODELPATH', 76 | help="location of pretrained model weights to evaluate") 77 | parser.add_argument('--cuda', action='store_true', help="run in CUDA mode (default: False)") 78 | 79 | args = vars(parser.parse_args()) 80 | 81 | opt = {} 82 | for k, v in args.items(): 83 | cur = opt 84 | tokens = k.split('.') 85 | for token in tokens[:-1]: 86 | if token not in cur: 87 | cur[token] = {} 88 | cur = cur[token] 89 | cur[tokens[-1]] = v 90 | 91 | main(opt) 92 | -------------------------------------------------------------------------------- /lnets/utils/math/projections/l2_ball.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from lnets.utils.misc import to_cuda 5 | 6 | 7 | def bjorck_orthonormalize(w, beta=0.5, iters=20, order=1): 8 | """ 9 | Bjorck, Ake, and Clazett Bowie. "An iterative algorithm for computing the best estimate of an orthogonal matrix." 10 | SIAM Journal on Numerical Analysis 8.2 (1971): 358-364. 11 | """ 12 | # TODO: Make sure the higher order terms can be implemented more efficiently. 13 | if order == 1: 14 | for _ in range(iters): 15 | w_t_w = w.t().mm(w) 16 | w = (1 + beta) * w - beta * w.mm(w_t_w) 17 | 18 | elif order == 2: 19 | if beta != 0.5: 20 | print("Bjorck orthonormalization with order more than 1 requires a beta of 0.5. ") 21 | exit(-1) 22 | for _ in range(iters): 23 | w_t_w = w.t().mm(w) 24 | w_t_w_w_t_w = w_t_w.mm(w_t_w) 25 | w = (+ (15 / 8) * w 26 | - (5 / 4) * w.mm(w_t_w) 27 | + (3 / 8) * w.mm(w_t_w_w_t_w)) 28 | 29 | elif order == 3: 30 | if beta != 0.5: 31 | print("Bjorck orthonormalization with order more than 1 requires a beta of 0.5. ") 32 | exit(-1) 33 | for _ in range(iters): 34 | w_t_w = w.t().mm(w) 35 | w_t_w_w_t_w = w_t_w.mm(w_t_w) 36 | w_t_w_w_t_w_w_t_w = w_t_w.mm(w_t_w_w_t_w) 37 | 38 | w = (+ (35 / 16) * w 39 | - (35 / 16) * w.mm(w_t_w) 40 | + (21 / 16) * w.mm(w_t_w_w_t_w) 41 | - (5 / 16) * w.mm(w_t_w_w_t_w_w_t_w)) 42 | 43 | elif order == 4: 44 | if beta != 0.5: 45 | print("Bjorck orthonormalization with order more than 1 requires a beta of 0.5. ") 46 | exit(-1) 47 | 48 | for _ in range(iters): 49 | w_t_w = w.t().mm(w) 50 | w_t_w_w_t_w = w_t_w.mm(w_t_w) 51 | w_t_w_w_t_w_w_t_w = w_t_w.mm(w_t_w_w_t_w) 52 | w_t_w_w_t_w_w_t_w_w_t_w = w_t_w.mm(w_t_w_w_t_w_w_t_w) 53 | 54 | w = (+ (315 / 128) * w 55 | - (105 / 32) * w.mm(w_t_w) 56 | + (189 / 64) * w.mm(w_t_w_w_t_w) 57 | - (45 / 32) * w.mm(w_t_w_w_t_w_w_t_w) 58 | + (35 / 128) * w.mm(w_t_w_w_t_w_w_t_w_w_t_w)) 59 | 60 | else: 61 | print("The requested order for orthonormalization is not supported. ") 62 | exit(-1) 63 | 64 | return w 65 | 66 | 67 | def get_safe_bjorck_scaling(weight, cuda=True): 68 | bjorck_scaling = torch.tensor([np.sqrt(weight.shape[0] * weight.shape[1])]).float() 69 | bjorck_scaling = to_cuda(bjorck_scaling, cuda=cuda) 70 | 71 | return bjorck_scaling 72 | 73 | 74 | def project_on_l2_ball(weight, bjorck_iter, bjorck_order, bjorck_beta=0.5, cuda=True): 75 | with torch.no_grad(): 76 | # Run Bjorck orthonormalization procedure to project the matrices on the orthonormal matrices manifold. 77 | ortho_weights = bjorck_orthonormalize(weight.t(), 78 | beta=bjorck_beta, 79 | iters=bjorck_iter, 80 | order=bjorck_order).t() 81 | 82 | return ortho_weights 83 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/configs/absolute_value_experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "task": "wasserstein_distance_estimation", 3 | "output_root": "out/wde", 4 | "exp_name": "absolute_value_experiment", 5 | "seed": 0, 6 | "model": { 7 | "pretrained_best_path": null, 8 | "name": "dual_fc", 9 | "activation": "maxmin", 10 | "linear": { 11 | "type": "bjorck", 12 | "safe_scaling": true, 13 | "power_iters": 2, 14 | "bjorck_beta": 0.5, 15 | "bjorck_iter": 20, 16 | "bjorck_order": 1, 17 | "bias": true 18 | }, 19 | "layers": [ 20 | 128, 21 | 128, 22 | 1 23 | ], 24 | "groupings": [ 25 | 2, 26 | 2, 27 | 1 28 | ], 29 | "l_constant": 1, 30 | 31 | "per_epoch_proj": { 32 | "turned_on": false, 33 | "every_n_epochs": 100000000, 34 | "type": "l_2", 35 | "bjorck_beta": 0.5, 36 | "bjorck_iter": 20, 37 | "bjorck_order": 1, 38 | "reset_optimizer": false 39 | }, 40 | 41 | "per_update_proj": { 42 | "turned_on": false, 43 | "type": "l_2", 44 | "bjorck_beta": 0.5, 45 | "bjorck_iter": 12, 46 | "bjorck_order": 1 47 | } 48 | }, 49 | "optim": { 50 | "optimizer": "aggmo", 51 | "lr_schedule": { 52 | "name": "step", 53 | "lr_init": 0.01, 54 | "lr_decay": 0.9, 55 | "milestones": [ 56 | 32, 57 | 64 58 | ], 59 | "last_epoch": -1 60 | }, 61 | "epoch_len": 16, 62 | "epochs": 225, 63 | "momentum": 0.9, 64 | "betas": [ 65 | 0.0, 66 | 0.9, 67 | 0.99 68 | ], 69 | "wdecay": 0.0, 70 | "criterion": { 71 | "tag": "loss", 72 | "minmax": "min" 73 | }, 74 | "patience": 250, 75 | "max_grad_norm": 10 76 | }, 77 | "distrib1": { 78 | "name": "MultiSphericalShell", 79 | "filepath": "lnets/tasks/dualnets/distrib/multi_spherical_shell.py", 80 | "dim": 1, 81 | "empty_dim": 0, 82 | "num_shells": 1, 83 | "radius": 0.0000001, 84 | "center_x": [ 85 | 0.0 86 | ], 87 | "reshape_to_grid": false, 88 | "sample_size": 32, 89 | "test_sample_size": 2048 90 | }, 91 | "distrib2": { 92 | "name": "MultiSphericalShell", 93 | "filepath": "lnets/tasks/dualnets/distrib/multi_spherical_shell.py", 94 | "dim": 1, 95 | "empty_dim": 0, 96 | "num_shells": 1, 97 | "radius": 1, 98 | "center_x": [ 99 | 0.0 100 | ], 101 | "reshape_to_grid": false, 102 | "sample_size": 32, 103 | "test_sample_size": 2048 104 | }, 105 | "visualize_2d": { 106 | "xrange": [ 107 | -4.1, 108 | 4.1 109 | ], 110 | "yrange": [ 111 | -1.1, 112 | 1.1 113 | ], 114 | "step": 0.02, 115 | "fig_types": [ 116 | "contour" 117 | ], 118 | "elev": [ 119 | 0, 120 | 60, 121 | 90 122 | ], 123 | "azim": [ 124 | 0 125 | ] 126 | }, 127 | "visualize_1d": { 128 | "xrange": [ 129 | -1.2, 130 | 1.2 131 | ], 132 | "step": 0.02 133 | }, 134 | "logging": { 135 | "report_freq": 1, 136 | "save_model": false, 137 | "save_best": false 138 | }, 139 | "cuda": false, 140 | "visualize": true 141 | } -------------------------------------------------------------------------------- /lnets/tasks/dualnets/configs/three_cones_experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "task": "wasserstein_distance_estimation", 3 | "output_root": "out/wde", 4 | "exp_name": "three_cones_experiment", 5 | "seed": 0, 6 | "model": { 7 | "pretrained_best_path": null, 8 | "name": "dual_fc", 9 | "activation": "maxmin", 10 | "linear": { 11 | "type": "bjorck", 12 | "safe_scaling": true, 13 | "power_iters": 2, 14 | "bjorck_beta": 0.5, 15 | "bjorck_iter": 20, 16 | "bjorck_order": 1, 17 | "bias": true 18 | }, 19 | "layers": [ 20 | 312, 21 | 312, 22 | 312, 23 | 1 24 | ], 25 | "groupings": [ 26 | 2, 27 | 2, 28 | 2, 29 | 1 30 | ], 31 | "l_constant": 1, 32 | 33 | "per_epoch_proj": { 34 | "turned_on": false, 35 | "every_n_epochs": 100000000, 36 | "type": "l_2", 37 | "bjorck_beta": 0.5, 38 | "bjorck_iter": 20, 39 | "bjorck_order": 1, 40 | "reset_optimizer": false 41 | }, 42 | 43 | "per_update_proj": { 44 | "turned_on": false, 45 | "type": "l_2", 46 | "bjorck_beta": 0.5, 47 | "bjorck_iter": 12, 48 | "bjorck_order": 1 49 | } 50 | }, 51 | "optim": { 52 | "optimizer": "aggmo", 53 | "lr_schedule": { 54 | "name": "step", 55 | "lr_init": 0.01, 56 | "lr_decay": 0.9, 57 | "milestones": [ 58 | 32, 59 | 64 60 | ], 61 | "last_epoch": -1 62 | }, 63 | "epoch_len": 16, 64 | "epochs": 600, 65 | "momentum": 0.9, 66 | "betas": [ 67 | 0.0, 68 | 0.9, 69 | 0.99 70 | ], 71 | "wdecay": 0.0, 72 | "criterion": { 73 | "tag": "loss", 74 | "minmax": "min" 75 | }, 76 | "patience": 250, 77 | "max_grad_norm": 10 78 | }, 79 | "distrib1": { 80 | "name": "MultiSphericalShell", 81 | "filepath": "lnets/tasks/dualnets/distrib/multi_spherical_shell.py", 82 | "dim": 2, 83 | "empty_dim": 0, 84 | "num_shells": 1, 85 | "radius": 0.0000001, 86 | "center_x": [ 87 | -2.0, 0.0, 2.0 88 | ], 89 | "reshape_to_grid": false, 90 | "sample_size": 32, 91 | "test_sample_size": 2048 92 | }, 93 | "distrib2": { 94 | "name": "MultiSphericalShell", 95 | "filepath": "lnets/tasks/dualnets/distrib/multi_spherical_shell.py", 96 | "dim": 2, 97 | "empty_dim": 0, 98 | "num_shells": 1, 99 | "radius": 1, 100 | "center_x": [ 101 | -2.0, 0.0, 2.0 102 | ], 103 | "reshape_to_grid": false, 104 | "sample_size": 32, 105 | "test_sample_size": 2048 106 | }, 107 | "visualize_2d": { 108 | "xrange": [ 109 | -4.1, 110 | 4.1 111 | ], 112 | "yrange": [ 113 | -1.1, 114 | 1.1 115 | ], 116 | "step": 0.02, 117 | "fig_types": [ 118 | "contour" 119 | ], 120 | "elev": [ 121 | 0, 122 | 60, 123 | 90 124 | ], 125 | "azim": [ 126 | 0 127 | ] 128 | }, 129 | "visualize_1d": { 130 | "xrange": [ 131 | -1.2, 132 | 1.2 133 | ], 134 | "step": 0.02 135 | }, 136 | "logging": { 137 | "report_freq": 1, 138 | "save_model": false, 139 | "save_best": false 140 | }, 141 | "cuda": false, 142 | "visualize": true 143 | } -------------------------------------------------------------------------------- /lnets/tasks/classification/mains/eval_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | Check the calibration of the model 3 | """ 4 | 5 | import json 6 | import os.path 7 | import argparse 8 | from munch import Munch 9 | import matplotlib.pyplot as plt 10 | 11 | import torch 12 | 13 | from lnets.models import get_model 14 | from lnets.data.load_data import load_data 15 | from lnets.models.utils.conversion import convert_model_from_bjorck 16 | from lnets.trainers.trainer import Trainer 17 | 18 | 19 | def check_logit_margins(model, data): 20 | logit_margins = [] 21 | for x,y in data: 22 | x,y = x.cuda(), y.cuda() 23 | logits = model(x) 24 | top, indices = logits.topk(2, 1) 25 | logit_margins.append(torch.abs(top[:, 0] - top[:, 1])[indices[:,0] == y]) 26 | logit_margins = torch.cat(logit_margins) 27 | return logit_margins 28 | 29 | 30 | def main(opt): 31 | exp_dir = opt['model']['exp_path'] 32 | 33 | model_path = os.path.join(exp_dir, 'checkpoints', 'best', 'best_model.pt') 34 | with open(os.path.join(exp_dir, 'logs', 'config.json'), 'r') as f: 35 | model_config = Munch.fromDict(json.load(f)) 36 | 37 | # Weird required hack to fix groupings (None is added to start during model training) 38 | if 'groupings' in model_config.model and model_config.model.groupings[0] is -1: 39 | model_config.model.groupings = model_config.model.groupings[1:] 40 | model_config.cuda = opt['cuda'] 41 | model_config.data.cuda = opt['cuda'] 42 | model = get_model(model_config) 43 | model.load_state_dict(torch.load(model_path)) 44 | 45 | if opt['cuda']: 46 | print('Using CUDA') 47 | model.cuda() 48 | 49 | def on_sample(state): 50 | if opt['cuda']: 51 | state['sample'] = [x.cuda() for x in state['sample']] 52 | 53 | def on_forward(state): 54 | state['model'].add_to_meters(state) 55 | 56 | data = load_data(model_config) 57 | 58 | # Change the model to use ortho layers by copying the base weights 59 | model = convert_model_from_bjorck(model, model_config) 60 | # model.model.project_network_weights(Munch.fromDict({'type': 'l_inf_projected'})) 61 | 62 | # Instantiate the trainer. 63 | trainer = Trainer() 64 | 65 | trainer.hooks['on_sample'] = on_sample 66 | trainer.hooks['on_forward'] = on_forward 67 | print('TESTING') 68 | state = trainer.test(model, data['test']) 69 | for tag, meter in state['model'].meters.items(): 70 | print(tag, meter.value()) 71 | logit_margins = check_logit_margins(model, data['test']) 72 | print(logit_margins.min().item(), logit_margins.max().item(), logit_margins.mean().item()) 73 | plt.hist(logit_margins.detach().cpu().numpy()) 74 | plt.show() 75 | 76 | 77 | if __name__ == '__main__': 78 | parser = argparse.ArgumentParser(description='Evaluate trained classification network') 79 | 80 | parser.add_argument('--model.exp_path', type=str, metavar='MODELPATH', 81 | help="location of pretrained model weights to evaluate") 82 | parser.add_argument('--cuda', action='store_true', help="run in CUDA mode (default: False)") 83 | 84 | args = vars(parser.parse_args()) 85 | 86 | opt = {} 87 | for k, v in args.items(): 88 | cur = opt 89 | tokens = k.split('.') 90 | for token in tokens[:-1]: 91 | if token not in cur: 92 | cur[token] = {} 93 | cur = cur[token] 94 | cur[tokens[-1]] = v 95 | 96 | main(opt) 97 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/configs/high_dimensional_cone_experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "task": "wasserstein_distance_estimation", 3 | "output_root": "out/wde", 4 | "exp_name": "high_dimensional_cone_experiment", 5 | "seed": 0, 6 | "model": { 7 | "pretrained_best_path": null, 8 | "name": "dual_fc", 9 | "activation": "group_sort", 10 | "linear": { 11 | "type": "bjorck", 12 | "safe_scaling": true, 13 | "power_iters": 2, 14 | "bjorck_beta": 0.5, 15 | "bjorck_iter": 20, 16 | "bjorck_order": 1, 17 | "bias": true 18 | }, 19 | "layers": [ 20 | 256, 21 | 256, 22 | 256, 23 | 256, 24 | 256, 25 | 256, 26 | 1 27 | ], 28 | "groupings": [ 29 | 256, 30 | 256, 31 | 256, 32 | 256, 33 | 256, 34 | 256, 35 | 1 36 | ], 37 | "l_constant": 1, 38 | 39 | "per_epoch_proj": { 40 | "turned_on": false, 41 | "every_n_epochs": 100000000, 42 | "type": "l_2", 43 | "bjorck_beta": 0.5, 44 | "bjorck_iter": 20, 45 | "bjorck_order": 1, 46 | "reset_optimizer": false 47 | }, 48 | 49 | "per_update_proj": { 50 | "turned_on": false, 51 | "type": "l_2", 52 | "bjorck_beta": 0.5, 53 | "bjorck_iter": 12, 54 | "bjorck_order": 1 55 | } 56 | }, 57 | "optim": { 58 | "optimizer": "aggmo", 59 | "lr_schedule": { 60 | "name": "step", 61 | "lr_init": 0.005, 62 | "lr_decay": 0.9, 63 | "milestones": [ 64 | 15, 65 | 30, 66 | 60, 67 | 90 68 | ], 69 | "last_epoch": -1 70 | }, 71 | "epoch_len": 128, 72 | "epochs": 150, 73 | "momentum": 0.9, 74 | "betas": [ 75 | 0.0, 76 | 0.9, 77 | 0.99 78 | ], 79 | "wdecay": 0.0, 80 | "criterion": { 81 | "tag": "loss", 82 | "minmax": "min" 83 | }, 84 | "patience": 250, 85 | "max_grad_norm": 1000 86 | }, 87 | "distrib1": { 88 | "name": "MultiSphericalShell", 89 | "filepath": "lnets/tasks/dualnets/distrib/multi_spherical_shell.py", 90 | "dim": 128, 91 | "empty_dim": 0, 92 | "num_shells": 1, 93 | "radius": 0.0000001, 94 | "center_x": [ 95 | 0.0 96 | ], 97 | "reshape_to_grid": false, 98 | "sample_size": 32, 99 | "test_sample_size": 2048 100 | }, 101 | "distrib2": { 102 | "name": "MultiSphericalShell", 103 | "filepath": "lnets/tasks/dualnets/distrib/multi_spherical_shell.py", 104 | "dim": 128, 105 | "empty_dim": 0, 106 | "num_shells": 1, 107 | "radius": 1, 108 | "center_x": [ 109 | 0.0 110 | ], 111 | "reshape_to_grid": false, 112 | "sample_size": 32, 113 | "test_sample_size": 2048 114 | }, 115 | "visualize_2d": { 116 | "xrange": [ 117 | -4.1, 118 | 4.1 119 | ], 120 | "yrange": [ 121 | -1.1, 122 | 1.1 123 | ], 124 | "step": 0.02, 125 | "fig_types": [ 126 | "contour" 127 | ], 128 | "elev": [ 129 | 0, 130 | 60, 131 | 90 132 | ], 133 | "azim": [ 134 | 0 135 | ] 136 | }, 137 | "visualize_1d": { 138 | "xrange": [ 139 | -1.2, 140 | 1.2 141 | ], 142 | "step": 0.02 143 | }, 144 | "logging": { 145 | "report_freq": 1, 146 | "save_model": false, 147 | "save_best": false 148 | }, 149 | "cuda": false, 150 | "visualize": false 151 | } -------------------------------------------------------------------------------- /lnets/trainers/trainer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Based on code from https://github.com/pytorch/tnt/blob/master/torchnet/trainers/trainers.py 3 | """ 4 | 5 | 6 | class Trainer(object): 7 | def __init__(self): 8 | self.hooks = {} 9 | 10 | def hook(self, name, state): 11 | if name in self.hooks: 12 | self.hooks[name](state) 13 | 14 | def train(self, model, iterator, maxepoch, optimizer): 15 | # Initialize the state that will fully describe the status of training. 16 | state = { 17 | 'model': model, 18 | 'iterator': iterator, 19 | 'maxepoch': maxepoch, 20 | 'optimizer': optimizer, 21 | 'epoch': 0, 22 | 't': 0, 23 | 'train': True, 24 | 'stop': False 25 | } 26 | 27 | # On training start. 28 | model.train() # Switch to training mode. 29 | self.hook('on_start', state) 30 | 31 | # Loop over epochs. 32 | while state['epoch'] < state['maxepoch'] and not state['stop']: 33 | # On epoch start. 34 | self.hook('on_start_epoch', state) 35 | 36 | # Loop over samples. 37 | for sample in state['iterator']: 38 | # On sample. 39 | state['sample'] = sample 40 | self.hook('on_sample', state) 41 | 42 | def closure(): 43 | loss, output = state['model'].loss(state['sample']) 44 | state['output'] = output 45 | state['loss'] = loss 46 | loss.backward() 47 | self.hook('on_forward', state) 48 | # To free memory in save_for_backward, 49 | # state['output'] = None 50 | # state['loss'] = None 51 | return loss 52 | 53 | # On update. 54 | state['optimizer'].zero_grad() 55 | state['optimizer'].step(closure) 56 | self.hook('on_update', state) 57 | 58 | state['t'] += 1 59 | state['epoch'] += 1 60 | 61 | # On epoch end. 62 | self.hook('on_end_epoch', state) 63 | 64 | # On training end. 65 | self.hook('on_end', state) 66 | 67 | return state 68 | 69 | def test(self, model, iterator): 70 | # Initialize the state that will fully describe the status of training. 71 | state = { 72 | 'model': model, 73 | 'iterator': iterator, 74 | 't': 0, 75 | 'train': False, 76 | } 77 | model.eval() # Set the PyTorch model to evaluation mode. 78 | 79 | # On start. 80 | self.hook('on_start', state) 81 | self.hook('on_start_val', state) 82 | 83 | # Loop over samples - for one epoch. 84 | for sample in state['iterator']: 85 | # On sample. 86 | state['sample'] = sample 87 | self.hook('on_sample', state) 88 | 89 | def closure(): 90 | loss, output = state['model'].loss(state['sample'], test=True) 91 | state['output'] = output 92 | state['loss'] = loss 93 | self.hook('on_forward', state) 94 | # To free memory in save_for_backward. 95 | # state['output'] = None 96 | # state['loss'] = None 97 | 98 | closure() 99 | state['t'] += 1 100 | 101 | # On training end. 102 | self.hook('on_end_val', state) 103 | self.hook('on_end', state) 104 | model.train() 105 | return state 106 | -------------------------------------------------------------------------------- /lnets/data/load_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from torch.utils.data import Subset, DataLoader 3 | import torchvision.datasets as datasets 4 | 5 | from lnets.data.data_transforms import get_data_transforms 6 | from lnets.data.utils import load_indices 7 | 8 | 9 | def get_datasets(config): 10 | data_name = config['data']['name'].lower() 11 | path = os.path.join(config['data']['root'], data_name) 12 | 13 | train_transform, test_transform = get_data_transforms(config) 14 | 15 | train_data_args = dict(download=True, transform=train_transform) 16 | val_data_args = dict(download=True, transform=test_transform) 17 | test_data_args = dict(train=False, download=True, transform=test_transform) 18 | 19 | if data_name == 'mnist': 20 | train_data = datasets.MNIST(path, **train_data_args) 21 | val_data = datasets.MNIST(path, **val_data_args) 22 | test_data = datasets.MNIST(path, **test_data_args) 23 | elif data_name == 'cifar10': 24 | train_data = datasets.CIFAR10(path, **train_data_args) 25 | val_data = datasets.CIFAR10(path, **val_data_args) 26 | test_data = datasets.CIFAR10(path, **test_data_args) 27 | elif data_name == 'cifar100': 28 | train_data = datasets.CIFAR100(path, **train_data_args) 29 | val_data = datasets.CIFAR100(path, **val_data_args) 30 | test_data = datasets.CIFAR100(path, **test_data_args) 31 | elif data_name == 'fashion-mnist': 32 | train_data = datasets.FashionMNIST(path, **train_data_args) 33 | val_data = datasets.FashionMNIST(path, **val_data_args) 34 | test_data = datasets.FashionMNIST(path, **test_data_args) 35 | elif data_name == 'imagenet-torchvision': 36 | train_data = datasets.ImageFolder(os.path.join(path, 'train'), transform=train_transform) 37 | val_data = datasets.ImageFolder(os.path.join(path, 'valid'), transform=test_transform) 38 | # Currently not loaded. 39 | test_data = None 40 | else: 41 | raise NotImplementedError('Data name %s not supported' % data_name) 42 | 43 | return train_data, val_data, test_data 44 | 45 | 46 | def build_loaders(config, train_data, val_data, test_data): 47 | data_name = config['data']['name'].lower() 48 | batch_size = config['optim']['batch_size'] 49 | num_workers = config['data']['num_workers'] 50 | 51 | if config['data']['indices_path'] is not None: 52 | train_indices, val_indices = load_indices(config['data']['indices_path'], config['data']['per_class_count']) 53 | train_data = Subset(train_data, train_indices) 54 | val_data = Subset(val_data, val_indices) 55 | elif data_name != 'imagenet-torchvision': 56 | # Manually readjust train/val size for memory saving. 57 | data_size = len(train_data) 58 | train_size = int(data_size * config['data']['train_size']) 59 | 60 | train_data.train_data = train_data.train_data[:train_size] 61 | train_data.train_labels = train_data.train_labels[:train_size] 62 | 63 | if config['data']['train_size'] != 1: 64 | val_data.train_data = val_data.train_data[train_size:] 65 | val_data.train_labels = val_data.train_labels[train_size:] 66 | else: 67 | val_data = None 68 | 69 | loaders = { 70 | 'train': DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers), 71 | 'validation': DataLoader(val_data, batch_size=batch_size, num_workers=num_workers), 72 | 'test': DataLoader(test_data, batch_size=batch_size, num_workers=num_workers) 73 | } 74 | 75 | return loaders 76 | 77 | 78 | def load_data(config): 79 | train_data, val_data, test_data = get_datasets(config) 80 | return build_loaders(config, train_data, val_data, test_data) 81 | -------------------------------------------------------------------------------- /lnets/models/architectures/fully_convolutional_2d.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from lnets.models.layers import * 4 | from lnets.models.utils import * 5 | from lnets.models.architectures.base_architecture import Architecture 6 | 7 | 8 | class FullyConv2D(Architecture): 9 | def __init__(self, in_channels, channels, kernels, strides, linear_type, activation, bias=True, config=None): 10 | super(FullyConv2D, self).__init__() 11 | self.config = config 12 | 13 | # Process layer sizes and numbers. 14 | self.in_channels = in_channels 15 | self.channels = channels.copy() 16 | self.channels.insert(0, self.in_channels) 17 | self.num_layers = len(self.channels) 18 | 19 | # Set kernel sizes and strides. 20 | self.kernels = kernels 21 | self.strides = strides 22 | 23 | # Lipschitz constant of the whole module. l_correction_constant is used to make sure the Lipschitz constant 24 | # of the convnet is 1 _without_ the other lipschitz constant. 25 | l_constant = config.model.l_constant 26 | l_correction_constant = config.model.l_correction_constant 27 | l_constant_per_layer = (l_constant * l_correction_constant) ** (1.0 / (self.num_layers - 1)) 28 | 29 | # Other parameters of the convolutional network. 30 | self.conv_parameters = dict(padding=config.model.padding, dilation=config.model.dilation, 31 | groups=config.model.groups, bias=bias, config=config) 32 | 33 | # Select activation function and grouping. 34 | self.act_func = select_activation_function(activation) 35 | 36 | if "groupings" in self.config.model: 37 | self.groupings = self.config.model.groupings 38 | self.groupings.insert(0, -1) # For easier bookkeeping later on. 39 | 40 | # Select linear layer type. 41 | self.linear_type = linear_type 42 | self.use_bias = bias 43 | self.linear = select_linear_layer(self.linear_type) 44 | 45 | # Construct a sequence of linear + activation function layers. The last layer is linear. 46 | layers = self._get_sequential_layers(activation, l_constant_per_layer, self.linear) 47 | self.model = nn.Sequential(*layers) 48 | 49 | def forward(self, x): 50 | return self.model(x) 51 | 52 | def _get_sequential_layers(self, activation, l_constant_per_layer, linear): 53 | layers = list() 54 | 55 | # The first linear layer. Note the scaling layer is to have control over the Lipschitz constant of the network. 56 | layers.append(linear(self.channels[0], self.channels[1], kernel_size=self.kernels[0], stride=self.strides[0], 57 | **self.conv_parameters)) 58 | layers.append(Scale(l_constant_per_layer, cuda=self.config.cuda)) 59 | 60 | # Series of activation + linear. Control Lipsthitz constant of the network by adding the scaling layers. 61 | for i in range(1, len(self.channels) - 1): 62 | # Determine the downsampling that happens after each activation. 63 | if activation == "maxout": 64 | downsampling_factor = (1.0 / self.groupings[i]) 65 | elif activation == "maxmin": 66 | downsampling_factor = (2.0 / self.groupings[i]) 67 | else: 68 | downsampling_factor = 1.0 69 | 70 | # Add the activation function. 71 | if activation in ["maxout", "maxmin", "group_sort"]: 72 | layers.append(self.act_func(self.channels[i] // self.groupings[i], axis=1)) 73 | else: 74 | layers.append(self.act_func()) 75 | 76 | # Add the linear transformations. 77 | layers.append(linear(int(downsampling_factor * self.channels[i]), self.channels[i+1], 78 | kernel_size=self.kernels[i], stride=self.strides[i], **self.conv_parameters)) 79 | layers.append(Scale(l_constant_per_layer, cuda=self.config.cuda)) 80 | 81 | return layers 82 | -------------------------------------------------------------------------------- /lnets/models/architectures/hard_coded/resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code based on: github/kuangliu - https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 3 | 4 | ResNet in PyTorch. 5 | """ 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class BasicBlock(nn.Module): 11 | expansion = 1 12 | 13 | def __init__(self, in_planes, planes, stride=1): 14 | super(BasicBlock, self).__init__() 15 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 16 | self.bn1 = nn.BatchNorm2d(planes) 17 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 18 | self.bn2 = nn.BatchNorm2d(planes) 19 | 20 | self.shortcut = nn.Sequential() 21 | if stride != 1 or in_planes != self.expansion*planes: 22 | self.shortcut = nn.Sequential( 23 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 24 | nn.BatchNorm2d(self.expansion*planes) 25 | ) 26 | 27 | def forward(self, x): 28 | out = F.relu(self.bn1(self.conv1(x))) 29 | out = self.bn2(self.conv2(out)) 30 | out += self.shortcut(x) 31 | out = F.relu(out) 32 | return out 33 | 34 | 35 | class Bottleneck(nn.Module): 36 | expansion = 4 37 | 38 | def __init__(self, in_planes, planes, stride=1): 39 | super(Bottleneck, self).__init__() 40 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 41 | self.bn1 = nn.BatchNorm2d(planes) 42 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 43 | self.bn2 = nn.BatchNorm2d(planes) 44 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 45 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 46 | 47 | self.shortcut = nn.Sequential() 48 | if stride != 1 or in_planes != self.expansion*planes: 49 | self.shortcut = nn.Sequential( 50 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 51 | nn.BatchNorm2d(self.expansion*planes) 52 | ) 53 | 54 | def forward(self, x): 55 | out = F.relu(self.bn1(self.conv1(x))) 56 | out = F.relu(self.bn2(self.conv2(out))) 57 | out = self.bn3(self.conv3(out)) 58 | out += self.shortcut(x) 59 | out = F.relu(out) 60 | return out 61 | 62 | 63 | class ResNet(nn.Module): 64 | def __init__(self, block, block_config, num_classes=10): 65 | super(ResNet, self).__init__() 66 | 67 | num_blocks = block_config['num_blocks'] 68 | channels = block_config['num_channels'] 69 | assert len(channels) == len(num_blocks) 70 | 71 | self.in_planes = channels[0] 72 | 73 | self.conv1 = nn.Conv2d(3, channels[0], kernel_size=3, stride=1, padding=1, bias=False) 74 | self.bn1 = nn.BatchNorm2d(channels[0]) 75 | 76 | self.layers = [self._make_layer(block, channels[0], num_blocks[0], stride=1)] 77 | 78 | for i in range(1,len(channels)): 79 | self.layers.append(self._make_layer(block, channels[i], num_blocks[i], stride=2)) 80 | self.layers = nn.Sequential(*self.layers) 81 | self.avgpool = nn.AvgPool2d(block_config['pool_size']) 82 | self.linear = nn.Linear(channels[-1]*block.expansion, num_classes) 83 | 84 | def _make_layer(self, block, planes, num_blocks, stride): 85 | strides = [stride] + [1]*(num_blocks-1) 86 | layers = [] 87 | for stride in strides: 88 | layers.append(block(self.in_planes, planes, stride)) 89 | self.in_planes = planes * block.expansion 90 | return nn.Sequential(*layers) 91 | 92 | def forward(self, x): 93 | out = F.relu(self.bn1(self.conv1(x))) 94 | out = self.layers(out) 95 | out = self.avgpool(out) 96 | out = out.view(out.size(0), -1) 97 | out = self.linear(out) 98 | return out 99 | -------------------------------------------------------------------------------- /lnets/models/__init__.py: -------------------------------------------------------------------------------- 1 | from lnets.models.architectures import * 2 | from lnets.models.model_types import * 3 | from lnets.models.layers import * 4 | 5 | MODEL_REGISTRY = {} 6 | 7 | 8 | def register_model(model_name): 9 | def decorator(f): 10 | MODEL_REGISTRY[model_name] = f 11 | return f 12 | 13 | return decorator 14 | 15 | 16 | def get_model(config): 17 | model_name = config['model']['name'] 18 | if model_name in MODEL_REGISTRY: 19 | return MODEL_REGISTRY[model_name](config) 20 | else: 21 | raise ValueError("Unknown model {:s}".format(model_name)) 22 | 23 | 24 | # Wasserstein Distance Estimation. 25 | @register_model('dual_fc') 26 | def load_fc_dual(config): 27 | model = FCNet(config.model.layers, config.distrib1.dim, config.model.linear.type, config.model.activation, 28 | bias=config.model.linear.bias, config=config) 29 | return DualOptimModel(model) 30 | 31 | 32 | @register_model("dual_fully_conv") 33 | def load_conv_dual(config): 34 | model = FullyConv2D(config.distrib1.dim, config.model.channels, config.model.kernels, config.model.strides, 35 | linear_type=config.model.linear.type, activation=config.model.activation, config=config) 36 | return DualOptimModel(model) 37 | 38 | 39 | # Classification. 40 | @register_model('classify_fc') 41 | def load_classify_fc(config): 42 | model = FCNet(config.model.layers, config.data.input_dim, config.model.linear.type, config.model.activation, 43 | bias=config.model.linear.bias, config=config) 44 | return ClassificationModel(model) 45 | 46 | 47 | @register_model('classify_fc_dropout') 48 | def load_classify_fc_dropout(config): 49 | model = FCNet(config.model.layers, config.data.input_dim, config.model.linear.type, config.model.activation, 50 | bias=config.model.linear.bias, config=config, dropout=True) 51 | return ClassificationModel(model) 52 | 53 | 54 | @register_model('classify_fc_spec_jac') 55 | def load_classify_fc_spec_jac(config): 56 | model = FCNet(config.model.layers, config.data.input_dim, config.model.linear.type, config.model.activation, 57 | bias=config.model.linear.bias, config=config) 58 | return JacSpecClassificationModel(model, config['model']['sn_reg'], config['cuda']) 59 | 60 | 61 | @register_model('classify_fc_margin') 62 | def load_classify_fc_margin(config): 63 | model = FCNet(config.model.layers, config.data.input_dim, config.model.linear.type, config.model.activation, 64 | bias=config.model.linear.bias, config=config) 65 | return MarginClassificationModel(model, config) 66 | 67 | 68 | @register_model('classify_fc_hinge') 69 | def load_classify_fc_hinge(config): 70 | model = FCNet(config.model.layers, config.data.input_dim, config.model.linear.type, config.model.activation, 71 | bias=config.model.linear.bias, config=config) 72 | return HingeLossClassificationModel(model, config) 73 | 74 | 75 | @register_model("lenet_classify") 76 | def load_lenet_classify(config): 77 | model = LeNet(config.data.in_channels, config.model.output_dim, config.model.linear.type, config.model.activation, config.model.dropout_on, 78 | config=config) 79 | return ClassificationModel(model) 80 | 81 | 82 | @register_model('resnet32') 83 | def CifarResNet32(config): 84 | block_config = { 85 | "num_blocks": [5, 5, 5], 86 | "num_channels": [16, 32, 64], 87 | "width": 1, 88 | "pool_size": 8 89 | } 90 | return ClassificationModel(ResNet(BasicBlock, block_config, config['data']['class_count'])) 91 | 92 | 93 | @register_model('wide-resnet32') 94 | def CifarWideResNet32(config): 95 | block_config = { 96 | "num_blocks": [5, 5, 5], 97 | "num_channels": [16, 32, 64], 98 | "width": 10, 99 | "pool_size": 8 100 | } 101 | return ClassificationModel(ResNet(BasicBlock, block_config, config['data']['class_count'])) 102 | -------------------------------------------------------------------------------- /lnets/utils/math/projections/linf_ball.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_linf_projection_threshold(weight, cuda): 5 | with torch.no_grad(): 6 | if not cuda: 7 | # Sort the weights. 8 | sorted_weights, _ = torch.abs(weight).sort(dim=1, descending=True) 9 | sorted_weights.float() 10 | 11 | # Find the threshold as described in Algorithm 1, Laurent Condat, Fast Projection onto the Simplex 12 | # and the L1 Ball. 13 | partial_sums = torch.cumsum(sorted_weights, dim=1) 14 | indices = torch.arange(end=partial_sums.shape[1]).float() 15 | candidate_ks = (partial_sums < torch.tensor(1).float() + 16 | (indices + torch.tensor(1).float()) * sorted_weights) 17 | candidate_ks = (candidate_ks.float() + 18 | (1.0 / (2 * partial_sums.shape[1])) * (indices + torch.tensor(1).float()).float()) 19 | _, ks = torch.max(candidate_ks.float(), dim=1) 20 | ks = ks.float() 21 | index_ks = torch.cat((torch.arange(end=weight.shape[0]).unsqueeze(-1).float(), 22 | ks.unsqueeze(1)), dim=1).long() 23 | 24 | thresholds = (partial_sums[index_ks[:, 0], index_ks[:, 1]] - torch.tensor(1).float()) / ( 25 | ks + torch.tensor(1).float()) 26 | 27 | else: 28 | # Sort the weights. 29 | sorted_weights, _ = torch.abs(weight).sort(dim=1, descending=True) 30 | 31 | # Find the threshold as described in Algorithm 1, Laurent Condat, Fast Projection onto the Simplex 32 | # and the L1 Ball. 33 | partial_sums = torch.cumsum(sorted_weights, dim=1) 34 | indices = torch.arange(end=partial_sums.shape[1]).float().cuda() 35 | candidate_ks = (partial_sums < torch.tensor(1).float().cuda() + 36 | (indices + torch.tensor(1).float().cuda()) * sorted_weights) 37 | candidate_ks = (candidate_ks.float().cuda() + 38 | (1.0 / (2 * partial_sums.shape[1])) * (indices + 39 | torch.tensor(1).float().cuda()).float()) 40 | _, ks = torch.max(candidate_ks.float(), dim=1) 41 | ks = ks.float().cuda() 42 | index_ks = torch.cat((torch.arange(end=weight.shape[0]).unsqueeze(-1).float().cuda(), 43 | ks.unsqueeze(1)), dim=1).long() 44 | 45 | thresholds = (partial_sums[index_ks[:, 0], index_ks[:, 1]] - torch.tensor(1).float().cuda()) / ( 46 | ks + torch.tensor(1).float().cuda()) 47 | return thresholds 48 | 49 | 50 | def get_weight_signs(weight): 51 | with torch.no_grad(): 52 | return torch.sign(weight) 53 | 54 | 55 | def project_on_linf_ball(weight, cuda): 56 | with torch.no_grad(): 57 | thresholds = get_linf_projection_threshold(weight, cuda) 58 | signs = get_weight_signs(weight) 59 | signs[signs == 0] = 1 60 | projected_weights = signs * torch.clamp(torch.abs(weight) - thresholds.unsqueeze(-1), 61 | min=torch.tensor(0).float()) 62 | 63 | return projected_weights 64 | 65 | 66 | def get_l_inf_row_normalization_factors(weight, scale_all=True, cuda=False): 67 | with torch.no_grad(): 68 | row_sums = torch.sum(torch.abs(weight), dim=1) 69 | 70 | if not scale_all: 71 | if cuda: 72 | clipped_row_sums = torch.max(torch.tensor(1).cuda().float(), row_sums) 73 | else: 74 | clipped_row_sums = torch.max(torch.tensor(1).float(), row_sums) 75 | 76 | return clipped_row_sums 77 | 78 | return row_sums 79 | 80 | 81 | def scale_on_linf_ball(weight, scale_all=True, cuda=False): 82 | with torch.no_grad(): 83 | row_scaling_factors = get_l_inf_row_normalization_factors(weight, scale_all, cuda) 84 | l_inf_weight = weight / row_scaling_factors.unsqueeze(-1) 85 | 86 | return l_inf_weight 87 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/attack/perform_attack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable, grad 6 | 7 | from foolbox.adversarial import Adversarial 8 | 9 | 10 | def cw_loss(logits, y): 11 | one_hot = torch.zeros_like(logits) 12 | one_hot.scatter_(1, y.view(-1, 1), 1) 13 | correct_logit = (logits * one_hot).sum(1) 14 | worst_wrong_logit = logits[one_hot == 0].view(one_hot.size(0), -1).max(1)[0] 15 | 16 | adv_margin = correct_logit - worst_wrong_logit 17 | return -F.relu(adv_margin + 50).mean() 18 | 19 | 20 | def tensor_clamp(x, min_x, max_x): 21 | x[x < min_x] = min_x[x < min_x] 22 | x[x > max_x] = max_x[x > max_x] 23 | return x 24 | 25 | 26 | def manual_fgs(model, x, y, eps=0.1, clamp=True): 27 | model.zero_grad() 28 | x.requires_grad = True 29 | pred = model(x) 30 | loss = cw_loss(pred, y) 31 | 32 | g = torch.sign(grad(loss, x)[0]) 33 | adv_x = x + g * eps 34 | if clamp: 35 | adv_x.clamp_(0, 1) 36 | return adv_x, model(adv_x).argmax(1), pred.argmax(1) 37 | 38 | 39 | def manual_pgd(model, x, y, stepsize=0.01, eps=0.1, iters=100, rand_start=True, clamp=True): 40 | model.zero_grad() 41 | x_adv = torch.zeros_like(x) 42 | x_adv.copy_(x) 43 | 44 | x_min = (x.detach() - eps) 45 | x_max = (x.detach() + eps) 46 | if clamp: 47 | x_min.clamp_(0, 1) 48 | x_max.clamp_(0, 1) 49 | 50 | if rand_start: 51 | rand = torch.zeros_like(x) 52 | rand.uniform_(-eps, eps) 53 | x_adv = x_adv + rand 54 | x_adv.requires_grad = True 55 | for i in range(iters): 56 | model.zero_grad() 57 | if x_adv.grad: 58 | x_adv.grad.zero_() 59 | pred = model(x_adv) 60 | # loss = F.cross_entropy(pred, y) 61 | loss = cw_loss(pred, y) 62 | g = torch.sign(grad(loss, x_adv)[0]) 63 | x_adv = x_adv + g * stepsize 64 | x_adv = tensor_clamp(x_adv, x_min, x_max) 65 | return x_adv, model(x_adv).argmax(1), model(x).argmax(1) 66 | 67 | 68 | def perform_attack(attack, model, input_adv, cuda=True, **attack_kwargs): 69 | adversarial_np = attack(input_adv, **attack_kwargs) 70 | image = torch.Tensor(input_adv.original_image) 71 | label = input_adv.original_class 72 | 73 | if adversarial_np is None: 74 | # Attack failed. 75 | return adversarial_np, 0.0, 0.0, 0.0 76 | else: 77 | # Check if attack was successful. 78 | adversarial = Variable(torch.Tensor(adversarial_np)) 79 | if cuda: 80 | adversarial = adversarial.cuda() 81 | image = image.cuda() 82 | 83 | pred = lambda x: F.softmax(model.forward(x.unsqueeze(0)), dim=1).max(1)[1].data[0] 84 | 85 | # Compute adversarial MSE. 86 | adv_mse = torch.pow(adversarial - Variable(image), 2).mean().item() 87 | adv_inf = torch.max(torch.abs(adversarial - image)) 88 | adv_inf = adv_inf.item() 89 | 90 | if pred(adversarial).item() != label and adv_inf > 0.0: 91 | success = 1.0 92 | else: 93 | success = 0.0 94 | 95 | return adversarial_np, success, adv_mse, adv_inf 96 | 97 | 98 | def batch_attack(attack, model, criterion, x, y, attack_config={}, distance=None): 99 | adv_ex = [] 100 | adv_targets = [] 101 | ret_success = [] 102 | ret_adv_mse = [] 103 | ret_adv_inf = [] 104 | 105 | for i in range(x.shape[0]): 106 | is_cuda = x.is_cuda 107 | input_adv = Adversarial(model, criterion, x[i].cpu().numpy(), y[i], distance=distance) 108 | adv, success, adv_mse, adv_inf = perform_attack(attack, model._model, input_adv, cuda=is_cuda, **attack_config) 109 | if adv is not None: 110 | adv_targets.append(y[i]) 111 | adv_ex.append(adv) 112 | ret_success.append(success) 113 | ret_adv_mse.append(adv_mse) 114 | ret_adv_inf.append(adv_inf) 115 | return torch.Tensor(np.array(adv_ex)), torch.LongTensor(np.array(adv_targets)), torch.Tensor( 116 | ret_success), torch.Tensor(ret_adv_mse), torch.Tensor(ret_adv_inf) 117 | -------------------------------------------------------------------------------- /lnets/models/architectures/fully_connected.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from lnets.models.layers import * 4 | from lnets.models.utils import * 5 | from lnets.models.architectures.base_architecture import Architecture 6 | 7 | 8 | class FCNet(Architecture): 9 | def __init__(self, layers, input_dim, linear_type, activation, bias=True, config=None, dropout=False): 10 | super(FCNet, self).__init__() 11 | self.config = config 12 | 13 | # Bookkeeping related to layer sizes and Lipschitz constant. 14 | self.input_dim = input_dim 15 | self.layer_sizes = layers.copy() 16 | self.layer_sizes.insert(0, self.input_dim) # For bookkeeping purposes. 17 | self.l_constant = config.model.l_constant 18 | self.num_layers = len(self.layer_sizes) 19 | 20 | # Select activation function and grouping. 21 | self.act_func = select_activation_function(activation) 22 | 23 | if "groupings" in self.config.model: 24 | self.groupings = self.config.model.groupings 25 | self.groupings.insert(0, -1) # For easier bookkeeping later on. 26 | 27 | # Select linear layer type. 28 | self.linear_type = linear_type 29 | self.use_bias = bias 30 | self.linear = select_linear_layer(self.linear_type) 31 | 32 | # Construct a sequence of linear + activation function layers. 33 | layers = self._get_sequential_layers(activation=activation, 34 | l_constant_per_layer=self.l_constant ** (1.0 / (self.num_layers - 1)), 35 | config=config, dropout=dropout) 36 | self.model = nn.Sequential(*layers) 37 | 38 | def forward(self, x): 39 | x = x.view(-1, self.input_dim) 40 | 41 | return self.model(x) 42 | 43 | def _get_sequential_layers(self, activation, l_constant_per_layer, config, dropout=False): 44 | # First linear transformation. 45 | # Add layerwise output scaling to control the Lipschitz Constant of the whole network. 46 | layers = list() 47 | if dropout: 48 | layers.append(nn.Dropout(0.2)) 49 | layers.append(self.linear(self.layer_sizes[0], self.layer_sizes[1], bias=self.use_bias, config=config)) 50 | layers.append(Scale(l_constant_per_layer, cuda=self.config.cuda)) 51 | 52 | for i in range(1, len(self.layer_sizes) - 1): 53 | # Determine the downsampling that happens after each activation. 54 | if activation == "maxout": 55 | downsampling_factor = (1.0 / self.groupings[i]) 56 | elif activation == "maxmin" or activation == "norm_twist": 57 | downsampling_factor = (2.0 / self.groupings[i]) 58 | else: 59 | downsampling_factor = 1.0 60 | 61 | # Add the activation function. 62 | if activation in ["maxout", "maxmin", "group_sort", "norm_twist"]: 63 | layers.append(self.act_func(self.layer_sizes[i] // self.groupings[i])) 64 | else: 65 | layers.append(self.act_func()) 66 | 67 | if dropout: 68 | layers.append(nn.Dropout(0.5)) 69 | 70 | # Add the linear transformations. 71 | layers.append( 72 | self.linear(int(downsampling_factor * self.layer_sizes[i]), self.layer_sizes[i + 1], bias=self.use_bias, 73 | config=config)) 74 | layers.append(Scale(l_constant_per_layer, cuda=self.config.cuda)) 75 | 76 | return layers 77 | 78 | def project_network_weights(self, proj_config): 79 | # Project the weights on the manifold of orthonormal matrices. 80 | for i, layer in enumerate(self.model): 81 | if hasattr(self.model[i], 'project_weights'): 82 | self.model[i].project_weights(proj_config) 83 | 84 | def get_activations(self, x): 85 | activations = [] 86 | x = x.view(-1, self.input_dim) 87 | for m in self.model: 88 | x = m(x) 89 | if not isinstance(m, DenseLinear) and not isinstance(m, Scale) and not isinstance(m, nn.Dropout): 90 | activations.append(x.detach().clone()) 91 | return activations 92 | -------------------------------------------------------------------------------- /lnets/models/model_types/classification_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.autograd import Variable 4 | import torchnet as tnt 5 | 6 | from lnets.models.model_types.base_model import ExperimentModel 7 | from lnets.models.regularization.spec_jac import jac_spectral_norm 8 | 9 | 10 | class ClassificationModel(ExperimentModel): 11 | def _init_meters(self): 12 | super(ClassificationModel, self)._init_meters() 13 | self.meters['acc'] = tnt.meter.ClassErrorMeter(accuracy=True) 14 | 15 | def loss(self, sample, test=False): 16 | inputs = Variable(sample[0], volatile=test) 17 | targets = Variable(sample[1], volatile=test) 18 | o = torch.squeeze(self.model.forward(inputs)) 19 | 20 | return F.cross_entropy(o, targets), {'logits': o} 21 | 22 | def add_to_meters(self, state): 23 | self.meters['loss'].add(state['loss'].item()) 24 | self.meters['acc'].add(state['output']['logits'].data, state['sample'][1]) 25 | 26 | 27 | class MarginClassificationModel(ExperimentModel): 28 | def __init__(self, model, config): 29 | super(MarginClassificationModel, self).__init__(model) 30 | self.margin = config.model.margin * config.model.l_constant 31 | 32 | def _init_meters(self): 33 | super(MarginClassificationModel, self)._init_meters() 34 | self.meters['acc'] = tnt.meter.ClassErrorMeter(accuracy=True) 35 | 36 | def loss(self, sample, test=False): 37 | inputs = Variable(sample[0], volatile=test) 38 | targets = Variable(sample[1], volatile=test) 39 | o = torch.squeeze(self.model.forward(inputs)) 40 | logits = o.detach().clone() 41 | 42 | # Add margin buffer to all entries except true class in each row 43 | # Equivalently, subtract the margin from the correct class. 44 | o[torch.arange(o.size(0)), targets] -= self.margin 45 | return F.cross_entropy(o, targets), {'logits': logits} 46 | 47 | def add_to_meters(self, state): 48 | self.meters['loss'].add(state['loss'].item()) 49 | self.meters['acc'].add(state['output']['logits'].data, state['sample'][1]) 50 | 51 | 52 | class HingeLossClassificationModel(ExperimentModel): 53 | def __init__(self, model, config): 54 | super(HingeLossClassificationModel, self).__init__(model) 55 | self.margin = config.model.margin * config.model.l_constant 56 | 57 | def _init_meters(self): 58 | super(HingeLossClassificationModel, self)._init_meters() 59 | self.meters['acc'] = tnt.meter.ClassErrorMeter(accuracy=True) 60 | 61 | def loss(self, sample, test=False): 62 | inputs = Variable(sample[0], volatile=test) 63 | targets = Variable(sample[1], volatile=test) 64 | o = torch.squeeze(self.model.forward(inputs)) 65 | 66 | return F.multi_margin_loss(o, targets, margin=self.margin), {'logits': o} 67 | 68 | def add_to_meters(self, state): 69 | self.meters['loss'].add(state['loss'].item()) 70 | self.meters['acc'].add(state['output']['logits'].data, state['sample'][1]) 71 | 72 | 73 | class JacSpecClassificationModel(ExperimentModel): 74 | def __init__(self, model, reg_scale, cuda=True): 75 | super(JacSpecClassificationModel, self).__init__(model) 76 | self.reg_scale = reg_scale 77 | self.u = torch.randn(10) 78 | if cuda: 79 | self.u = self.u.cuda() 80 | 81 | def _init_meters(self): 82 | super(JacSpecClassificationModel, self)._init_meters() 83 | self.meters['acc'] = tnt.meter.ClassErrorMeter(accuracy=True) 84 | self.meters['sn'] = tnt.meter.AverageValueMeter() 85 | 86 | def loss(self, sample, test=False): 87 | inputs = Variable(sample[0], requires_grad=True) 88 | targets = Variable(sample[1]) 89 | o = self.model.forward(inputs) 90 | xe = F.cross_entropy(o, targets) 91 | spec_norm, u = jac_spectral_norm(o, inputs, self.u) 92 | self.u = u 93 | 94 | return xe + self.reg_scale * spec_norm, {'logits': o, 'sn': spec_norm} 95 | 96 | def add_to_meters(self, state): 97 | self.meters['loss'].add(state['loss'].item()) 98 | self.meters['acc'].add(state['output']['logits'].data, state['sample'][1]) 99 | self.meters['sn'].add(state['output']['sn'].item()) 100 | -------------------------------------------------------------------------------- /lnets/utils/saving_and_loading.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import json 3 | from torchvision.utils import save_image 4 | from munch import Munch 5 | 6 | from lnets.tasks.dualnets.visualize.visualize_dualnet import * 7 | from lnets.models import get_model 8 | 9 | 10 | def save_imgs(tensor, fname, save_dir): 11 | try: 12 | os.makedirs(save_dir) 13 | except OSError as e: 14 | if e.errno != errno.EEXIST: 15 | raise 16 | 17 | save_image(tensor, os.path.join(save_dir, fname)) 18 | 19 | 20 | def save_model(model, save_path): 21 | try: 22 | os.makedirs(os.path.dirname(save_path)) 23 | except OSError as e: 24 | if e.errno != errno.EEXIST: 25 | raise 26 | 27 | torch.save(model.state_dict(), save_path) 28 | 29 | 30 | def save_optimizer(optimizer, save_path): 31 | try: 32 | os.makedirs(os.path.dirname(save_path)) 33 | except OSError as e: 34 | if e.errno != errno.EEXIST: 35 | raise 36 | 37 | torch.save(optimizer.state_dict(), save_path) 38 | 39 | 40 | def save_best_model_and_optimizer(state, best_value, best_path, config): 41 | """ 42 | Save model that performs the best on the validation set. 43 | """ 44 | criterion = config['optim']['criterion'] 45 | new_best = False 46 | for tag, meter in state['model'].meters.items(): 47 | if tag == criterion['tag']: 48 | new_val = meter.value()[0] 49 | if criterion['minmax'] == 'min': 50 | if new_val < best_value: 51 | best_value = new_val 52 | new_best = True 53 | else: 54 | if new_val > best_value: 55 | best_value = new_val 56 | new_best = True 57 | break 58 | if new_best: 59 | best_model_path = os.path.join(best_path, "best_model.pt") 60 | best_optimizer_path = os.path.join(best_path, "best_optimizer.pt") 61 | print('Saving new best model at {}. '.format(best_path)) 62 | save_model(state['model'], best_model_path) 63 | save_optimizer(state['optimizer'], best_optimizer_path) 64 | 65 | return best_value, new_best 66 | 67 | 68 | def save_current_model_and_optimizer(model, optimizer, model_dir, epoch): 69 | # Save model state. 70 | save_model_path = os.path.join(model_dir, "model_{}.pt".format(epoch)) 71 | save_model(model, save_model_path) 72 | 73 | # Save optimizer state. 74 | save_optimizer_path = os.path.join(model_dir, "optimizer_{}.pt".format(epoch)) 75 | save_optimizer(optimizer, save_optimizer_path) 76 | 77 | 78 | def load_model(model, load_path): 79 | model.reset_meters() 80 | print("Reading model from: {}".format(load_path)) 81 | model.load_state_dict(torch.load(load_path)) 82 | 83 | 84 | def load_optimizer(optimizer, load_path): 85 | print("Reading optimizer from: {}".format(load_path)) 86 | optimizer.load_state_dict(torch.load(load_path)) 87 | 88 | 89 | def load_best_model_and_optimizer(model, optimizer, best_path): 90 | best_model_path = os.path.join(best_path, "best_model.pt") 91 | load_model(model, best_model_path) 92 | 93 | best_optimizer_path = os.path.join(best_path, "best_optimizer.pt") 94 | load_optimizer(optimizer, best_optimizer_path) 95 | 96 | 97 | def save_1_or_2_dim_dualnet_visualizations(model, figures_dir, config, epoch=None, loss=None, 98 | after_training=False): 99 | 100 | dim = config.distrib1.dim 101 | if not after_training: 102 | if dim == 2: 103 | save_2d_dualnet_visualizations(model, figures_dir, config, epoch, loss) 104 | if dim == 1: 105 | save_1d_dualnet_visualizations(model, figures_dir, config, epoch, loss) 106 | else: 107 | if dim == 2: 108 | save_2d_dualnet_visualizations(model, figures_dir, config, after_training=True) 109 | if dim == 1: 110 | save_1d_dualnet_visualizations(model, figures_dir, config, after_training=True) 111 | 112 | 113 | def load_model_from_config(pretrained_root): 114 | model_path = os.path.join(pretrained_root, 'checkpoints', 'best', 'best_model.pt') 115 | json_path = os.path.join(pretrained_root, 'logs', 'config.json') 116 | 117 | with open(json_path, 'r') as f: 118 | model_config = Munch.fromDict(json.load(f)) 119 | 120 | # Weird required hack to fix groupings (None is added to start during model training). 121 | if 'groupings' in model_config.model and model_config.model.groupings[0] is -1: 122 | model_config.model.groupings = model_config.model.groupings[1:] 123 | 124 | model = get_model(model_config) 125 | model.load_state_dict(torch.load(model_path)) 126 | 127 | return model, model_config 128 | 129 | 130 | -------------------------------------------------------------------------------- /lnets/tasks/gan/gan_utils.py: -------------------------------------------------------------------------------- 1 | import os, gzip, torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import scipy.misc 5 | import imageio 6 | import matplotlib.pyplot as plt 7 | from torchvision import datasets 8 | 9 | 10 | def load_mnist(dataset): 11 | data_dir = os.path.join("./data", dataset) 12 | 13 | def extract_data(filename, num_data, head_size, data_size): 14 | with gzip.open(filename) as bytestream: 15 | bytestream.read(head_size) 16 | buf = bytestream.read(data_size * num_data) 17 | data = np.frombuffer(buf, dtype=np.uint8).astype(np.float) 18 | return data 19 | 20 | data = extract_data(data_dir + '/train-images-idx3-ubyte.gz', 60000, 16, 28 * 28) 21 | trX = data.reshape((60000, 28, 28, 1)) 22 | 23 | data = extract_data(data_dir + '/train-labels-idx1-ubyte.gz', 60000, 8, 1) 24 | trY = data.reshape((60000)) 25 | 26 | data = extract_data(data_dir + '/t10k-images-idx3-ubyte.gz', 10000, 16, 28 * 28) 27 | teX = data.reshape((10000, 28, 28, 1)) 28 | 29 | data = extract_data(data_dir + '/t10k-labels-idx1-ubyte.gz', 10000, 8, 1) 30 | teY = data.reshape((10000)) 31 | 32 | trY = np.asarray(trY).astype(np.int) 33 | teY = np.asarray(teY) 34 | 35 | X = np.concatenate((trX, teX), axis=0) 36 | y = np.concatenate((trY, teY), axis=0).astype(np.int) 37 | 38 | seed = 547 39 | np.random.seed(seed) 40 | np.random.shuffle(X) 41 | np.random.seed(seed) 42 | np.random.shuffle(y) 43 | 44 | y_vec = np.zeros((len(y), 10), dtype=np.float) 45 | for i, label in enumerate(y): 46 | y_vec[i, y[i]] = 1 47 | 48 | X = X.transpose(0, 3, 1, 2) / 255. 49 | # y_vec = y_vec.transpose(0, 3, 1, 2) 50 | 51 | X = torch.from_numpy(X).type(torch.FloatTensor) 52 | y_vec = torch.from_numpy(y_vec).type(torch.FloatTensor) 53 | return X, y_vec 54 | 55 | 56 | def load_celebA(dir, transform, batch_size, shuffle): 57 | # transform = transforms.Compose([ 58 | # transforms.CenterCrop(160), 59 | # transform.Scale(64) 60 | # transforms.ToTensor(), 61 | # transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) 62 | # ]) 63 | 64 | # data_dir = 'data/celebA' # this path depends on your computer 65 | dset = datasets.ImageFolder(dir, transform) 66 | data_loader = torch.utils.data.DataLoader(dset, batch_size, shuffle) 67 | 68 | return data_loader 69 | 70 | 71 | def print_network(net): 72 | num_params = 0 73 | for param in net.parameters(): 74 | num_params += param.numel() 75 | print(net) 76 | print('Total number of parameters: %d' % num_params) 77 | 78 | 79 | def save_images(images, size, image_path): 80 | return imsave(images, size, image_path) 81 | 82 | 83 | def imsave(images, size, path): 84 | image = np.squeeze(merge(images, size)) 85 | return scipy.misc.imsave(path, image) 86 | 87 | 88 | def merge(images, size): 89 | h, w = images.shape[1], images.shape[2] 90 | if (images.shape[3] in (3,4)): 91 | c = images.shape[3] 92 | img = np.zeros((h * size[0], w * size[1], c)) 93 | for idx, image in enumerate(images): 94 | i = idx % size[1] 95 | j = idx // size[1] 96 | img[j * h:j * h + h, i * w:i * w + w, :] = image 97 | return img 98 | elif images.shape[3]==1: 99 | img = np.zeros((h * size[0], w * size[1])) 100 | for idx, image in enumerate(images): 101 | i = idx % size[1] 102 | j = idx // size[1] 103 | img[j * h:j * h + h, i * w:i * w + w] = image[:,:,0] 104 | return img 105 | else: 106 | raise ValueError('in merge(images,size) images parameter ''must have dimensions: HxW or HxWx3 or HxWx4') 107 | 108 | 109 | def generate_animation(path, num): 110 | images = [] 111 | for e in range(num): 112 | img_name = path + '_epoch%03d' % (e+1) + '.png' 113 | images.append(imageio.imread(img_name)) 114 | imageio.mimsave(path + '_generate_animation.gif', images, fps=5) 115 | 116 | 117 | def loss_plot(hist, path='Train_hist.png', model_name=''): 118 | x = range(len(hist['D_loss'])) 119 | 120 | y1 = hist['D_loss'] 121 | y2 = hist['G_loss'] 122 | 123 | plt.plot(x, y1, label='D_loss') 124 | plt.plot(x, y2, label='G_loss') 125 | 126 | plt.xlabel('Iter') 127 | plt.ylabel('Loss') 128 | 129 | plt.legend(loc=4) 130 | plt.grid(True) 131 | plt.tight_layout() 132 | 133 | path = os.path.join(path, model_name + '_loss.png') 134 | 135 | plt.savefig(path) 136 | 137 | plt.close() 138 | 139 | 140 | def initialize_weights(net): 141 | for m in net.modules(): 142 | if isinstance(m, nn.Conv2d): 143 | m.weight.data.normal_(0, 0.02) 144 | m.bias.data.zero_() 145 | elif isinstance(m, nn.ConvTranspose2d): 146 | m.weight.data.normal_(0, 0.02) 147 | m.bias.data.zero_() 148 | elif isinstance(m, nn.Linear): 149 | m.weight.data.normal_(0, 0.02) 150 | m.bias.data.zero_() 151 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/mains/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | 4 | from PIL import Image 5 | import matplotlib.pyplot as plt 6 | 7 | irange = range 8 | 9 | 10 | def make_grid(tensor, nrow=8, padding=2, 11 | normalize=False, range=None, scale_each=False, colormap=None, pad_value=0): 12 | """Make a grid of images. 13 | Args: 14 | tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W) 15 | or a list of images all of the same size. 16 | nrow (int, optional): Number of images displayed in each row of the grid. 17 | The Final grid size is (B / nrow, nrow). Default is 8. 18 | padding (int, optional): amount of padding. Default is 2. 19 | normalize (bool, optional): If True, shift the image to the range (0, 1), 20 | by subtracting the minimum and dividing by the maximum pixel value. 21 | range (tuple, optional): tuple (min, max) where min and max are numbers, 22 | then these numbers are used to normalize the image. By default, min and max 23 | are computed from the tensor. 24 | scale_each (bool, optional): If True, scale each image in the batch of 25 | images separately rather than the (min, max) over all images. 26 | pad_value (float, optional): Value for the padded pixels. 27 | Example: 28 | See this notebook `here `_ 29 | """ 30 | if not (torch.is_tensor(tensor) or 31 | (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))): 32 | raise TypeError('tensor or list of tensors expected, got {}'.format(type(tensor))) 33 | 34 | # if list of tensors, convert to a 4D mini-batch Tensor 35 | if isinstance(tensor, list): 36 | tensor = torch.stack(tensor, dim=0) 37 | 38 | if tensor.dim() == 2: # single image H x W 39 | tensor = tensor.view(1, tensor.size(0), tensor.size(1)) 40 | if tensor.dim() == 3: # single image 41 | if tensor.size(0) == 1: # if single-channel, convert to 3-channel 42 | tensor = torch.cat((tensor, tensor, tensor), 0) 43 | tensor = tensor.view(1, tensor.size(0), tensor.size(1), tensor.size(2)) 44 | 45 | if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images 46 | tensor = torch.cat((tensor, tensor, tensor), 1) 47 | 48 | if normalize is True: 49 | tensor = tensor.clone() # avoid modifying tensor in-place 50 | if range is not None: 51 | assert isinstance(range, tuple), \ 52 | "range has to be a tuple (min, max) if specified. min and max are numbers" 53 | 54 | def norm_ip(img, min, max): 55 | img.clamp_(min=min, max=max) 56 | img.add_(-min).div_(max - min + 1e-5) 57 | 58 | def norm_range(t, range): 59 | if range is not None: 60 | norm_ip(t, range[0], range[1]) 61 | else: 62 | norm_ip(t, float(t.min()), float(t.max())) 63 | 64 | def custom_norm_range(t): 65 | neg_min = float(t[t < 0].min().abs()) + 1e-6 66 | pos_max = float(t[t > 0].max()) + 1e-6 67 | t[t < 0] = t[t < 0] / neg_min 68 | t[t > 0] = t[t > 0] / pos_max 69 | t.add_(1).div_(2.0) 70 | 71 | if scale_each is True: 72 | for t in tensor[nrow:]: # loop over mini-batch dimension 73 | custom_norm_range(t) 74 | else: 75 | custom_norm_range(tensor[nrow:]) 76 | if colormap is not None: 77 | tensor = tensor.detach().cpu() 78 | # Assuming MNIST structure here 79 | cmap = plt.get_cmap(colormap) 80 | tensor[nrow:] = torch.Tensor(cmap(tensor[nrow:, 0, :, :])).squeeze().permute(0, 3, 1, 2)[:, :3, :, :] 81 | 82 | if tensor.size(0) == 1: 83 | return tensor.squeeze() 84 | 85 | # make the mini-batch of images into a grid 86 | nmaps = tensor.size(0) 87 | xmaps = min(nrow, nmaps) 88 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 89 | height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding) 90 | grid = tensor.new(3, height * ymaps + padding, width * xmaps + padding).fill_(pad_value) 91 | k = 0 92 | for y in irange(ymaps): 93 | for x in irange(xmaps): 94 | if k >= nmaps: 95 | break 96 | grid.narrow(1, y * height + padding, height - padding) \ 97 | .narrow(2, x * width + padding, width - padding) \ 98 | .copy_(tensor[k]) 99 | k = k + 1 100 | return grid 101 | 102 | 103 | def save_image(tensor, filename, nrow=8, padding=2, 104 | normalize=False, range=None, scale_each=False, colormap=None, pad_value=0): 105 | """Save a given Tensor into an image file. 106 | Args: 107 | tensor (Tensor or list): Image to be saved. If given a mini-batch tensor, 108 | saves the tensor as a grid of images by calling ``make_grid``. 109 | **kwargs: Other arguments are documented in ``make_grid``. 110 | """ 111 | grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value, 112 | normalize=normalize, range=range, scale_each=scale_each, colormap=colormap) 113 | ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() 114 | im = Image.fromarray(ndarr) 115 | im.save(filename) 116 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/mains/check_adv_gradients.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from lnets.tasks.adversarial.mains.utils import save_image 6 | 7 | import numpy as np 8 | 9 | from lnets.data.load_data import load_data 10 | from lnets.utils.config import process_config 11 | from lnets.utils.saving_and_loading import load_model_from_config 12 | from lnets.utils.misc import to_cuda 13 | from lnets.models.regularization.spec_jac import jac_spectral_norm 14 | from lnets.utils.math.autodiff import compute_jacobian 15 | 16 | 17 | def get_adv_gradient(model, x, adv_targets): 18 | # The following is needed to backprop on the inputs. 19 | x.requires_grad = True 20 | 21 | # Clear the gradient buffers. 22 | if x.grad is not None: 23 | x.grad.zero_() 24 | for p in model.parameters(): 25 | p.grad.zero_() 26 | 27 | # Take the derivarive of the loss wrt. the inputs. 28 | out = model(x) 29 | loss = F.cross_entropy(out, adv_targets) 30 | loss.backward() 31 | x_grad = x.grad.data 32 | 33 | return x_grad 34 | 35 | 36 | def check_grad_norm(model, data, cuda, epochs=3): 37 | u = to_cuda(torch.randn(10), cuda) 38 | for _ in range(epochs): 39 | for x, _ in data: 40 | model.zero_grad() 41 | x = to_cuda(x, cuda) 42 | x.requires_grad = True 43 | logits = model(x) 44 | s, u = jac_spectral_norm(logits, x, u) 45 | return s 46 | 47 | 48 | def slow_check_grad_norm(model, data, cuda): 49 | spectral_rads = [] 50 | for x, _ in data: 51 | model.zero_grad() 52 | x = to_cuda(x, cuda).view(-1, 784) 53 | x.requires_grad = True 54 | logits = model(x) 55 | jac = compute_jacobian(logits, x) 56 | for j in jac: 57 | _, S, _ = torch.svd(j) 58 | spectral_rads.append(torch.max(S).cpu().detach().item()) 59 | return np.mean(spectral_rads), np.max(spectral_rads) 60 | 61 | 62 | def check_adv_gradients(config): 63 | # Create the output directory. 64 | output_root = config.output_root 65 | if not os.path.isdir(output_root): 66 | os.makedirs(output_root) 67 | 68 | # Load a pretrained model. 69 | pretrained_path = config.pretrained_path 70 | model, pretrained_config = load_model_from_config(pretrained_path) 71 | 72 | # Push model to GPU if available. 73 | if config.cuda: 74 | print('Using cuda: Yes') 75 | model.cuda() 76 | 77 | model.eval() 78 | 79 | # Get data. 80 | pretrained_config.data.cuda = config.cuda 81 | pretrained_config.data.batch_size = config.data.batch_size 82 | data = load_data(pretrained_config) 83 | 84 | # Compute adversarial gradients and save their visualizations. 85 | for i, (x, _) in enumerate(data['test']): 86 | x = to_cuda(x, cuda=config.cuda) 87 | 88 | # Save the input image. 89 | save_path = os.path.join(output_root, 'x{}.png'.format(i)) 90 | save_image(x, save_path) 91 | 92 | # Save the adversarial gradients. 93 | for j in range(pretrained_config.data.class_count): 94 | y = j * torch.ones(x.size(0)).type(torch.long) 95 | y = to_cuda(y, cuda=config.cuda) 96 | 97 | # Compute and save the adversarial gradients. 98 | x_grad = get_adv_gradient(model, x, y) 99 | save_image(x_grad, os.path.join(output_root, 'x_{}_grad_{}.png'.format(i, j)), normalize=True, 100 | scale_each=True) 101 | break 102 | 103 | # Produce joint image. 104 | nrow = config.visualization.num_rows 105 | x_sub = to_cuda(torch.zeros(nrow, *x.size()[1:]).copy_(x[:nrow]).detach(), config.cuda) 106 | print("Size of visualization: ", x_sub.size(), "Maximum pixel value: ", x_sub.max()) 107 | tensors = [] 108 | c = 0 109 | for i, (x, y) in enumerate(data['test']): 110 | for (k, t) in enumerate(y): 111 | if t == c: 112 | c += 1 113 | tensors.append(x[k]) 114 | if len(tensors) == pretrained_config.data.class_count: 115 | break 116 | if len(tensors) == pretrained_config.data.class_count: 117 | break 118 | 119 | # Collect tensors from each class 120 | x_sub = to_cuda(torch.stack(tensors, 0), cuda=config.cuda) 121 | 122 | tensors = [x_sub] 123 | for j in range(pretrained_config.data.class_count): 124 | y = j * torch.ones(x_sub.size(0)).type(torch.long) 125 | y = to_cuda(y, cuda=config.cuda) 126 | 127 | # Compute and visualize the adversarial gradients. 128 | model.zero_grad() 129 | x_grad = get_adv_gradient(model, x_sub, y).clone().detach() 130 | tensors.append(x_grad) 131 | 132 | # Concatenate and visualize. 133 | joint_tensor = torch.cat(tensors, dim=0) 134 | save_image(joint_tensor, os.path.join(output_root, 'x_joint.png'), nrow=pretrained_config.data.class_count, 135 | normalize=True, colormap='seismic') 136 | # print("Train sigma(J): {}".format(check_grad_norm(model, data['train'], config.cuda))) 137 | # print("Val sigma(J): {}".format(check_grad_norm(model, data['validation'], config.cuda))) 138 | # print("Test sigma(J): {}".format(check_grad_norm(model, data['test'], config.cuda))) 139 | 140 | 141 | if __name__ == '__main__': 142 | cfg = process_config() 143 | 144 | check_adv_gradients(cfg) 145 | -------------------------------------------------------------------------------- /lnets/utils/training_getters.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | import torch 4 | import torch.optim.lr_scheduler as lr_scheduler 5 | from munch import Munch 6 | 7 | from lnets.optimizers.aggmo import AggMo 8 | 9 | 10 | def get_optimizer(config, params, momentum=None, betas=None): 11 | optim_name = config.optim.optimizer.lower() 12 | lr = config.optim.lr_schedule.lr_init 13 | 14 | if momentum is None: 15 | momentum = config.optim.momentum 16 | 17 | if betas is None: 18 | betas = config.optim.betas 19 | 20 | if optim_name == 'sgd': 21 | optimizer = torch.optim.SGD(params, lr, momentum=momentum, 22 | weight_decay=config.optim.wdecay) 23 | elif optim_name == 'nesterov': 24 | optimizer = torch.optim.SGD(params, lr, momentum=momentum, nesterov=True, 25 | weight_decay=config.optim.wdecay) 26 | elif optim_name == 'aggmo': 27 | optimizer = AggMo(params, lr, momentum=betas, weight_decay=config.optim.wdecay) 28 | elif optim_name == 'adam': 29 | optimizer = torch.optim.Adam(params, lr, betas=(momentum, 0.999), weight_decay=config.optim.wdecay) 30 | else: 31 | raise ValueError("The requested optimizer type is not supported. ") 32 | 33 | return optimizer 34 | 35 | 36 | def get_scheduler(config, optimizer): 37 | lr_schedule_config = config.optim.lr_schedule 38 | if lr_schedule_config.name == 'exp': 39 | return lr_scheduler.ExponentialLR(optimizer, lr_schedule_config.lr_decay, lr_schedule_config.last_epoch) 40 | elif lr_schedule_config.name == 'step': 41 | return lr_scheduler.MultiStepLR(optimizer, lr_schedule_config.milestones, lr_schedule_config.lr_decay) 42 | 43 | 44 | def get_model_repr(config): 45 | if "fc" in config.model.name: 46 | model_repr = "{}_linear_{}_act_{}_depth_{}_width_{}_grouping_{}".format(config.model.name, 47 | config.model.linear.type, 48 | config.model.activation, 49 | len(config.model.layers), 50 | max(config.model.layers), 51 | max(config.model.groupings)) 52 | elif "fully_conv" in config.model.name: 53 | model_repr = "{}_act_{}_depth_{}_channels_{}_grouping_{}".format(config.model.name, 54 | config.model.activation, 55 | len(config.model.channels), 56 | max(config.model.channels), 57 | max(config.model.groupings)) 58 | elif "lenet" in config.model.name: 59 | model_repr = "lenet" 60 | elif "alexnet" in config.model.name: 61 | model_repr = "alexnet" 62 | elif config.model.name == "lipschitz_infogan_discriminator": 63 | model_repr = "lipschitz_infogan_discriminator" 64 | elif config.model.name == "parseval_infogan_discriminator": 65 | model_repr = "parseval_infogan_discriminator" 66 | else: 67 | model_repr = None 68 | print("Write a new model repr for this architecture. ") 69 | exit(-1) 70 | 71 | return model_repr 72 | 73 | 74 | def get_optimizer_repr(config): 75 | return "{}_{}".format(config.optim.optimizer, config.optim.lr_schedule.lr_init) 76 | 77 | 78 | def get_experiment_name(config): 79 | now = datetime.datetime.now() 80 | 81 | base_exp_name = config.exp_name 82 | task_name = config.task 83 | 84 | try: 85 | data_name = config.data.name 86 | except: 87 | data_name = 'none' 88 | print("No dataset seems to be used for the training. ") 89 | try: 90 | data_name = config.distrib1.name + '_and_' + config.distrib2.name 91 | except: 92 | print("No distribution seem to be used for the training. ") 93 | 94 | optim_name = get_optimizer_repr(config) 95 | 96 | model_name = get_model_repr(config) 97 | 98 | exp_name = "{}_{}_{}_{}_{}_{}".format(task_name, base_exp_name, data_name, optim_name, model_name, 99 | now.strftime("%Y_%m_%d_%H_%M_%S_%f")) 100 | return exp_name 101 | 102 | 103 | def get_training_dirs(config): 104 | exp_dir = os.path.join(config.output_root, get_experiment_name(config)) 105 | log_dir = os.path.join(exp_dir, 'logs') 106 | model_dir = os.path.join(exp_dir, 'checkpoints') 107 | figures_dir = os.path.join(exp_dir, 'figures') 108 | best_path = os.path.join(model_dir, "best") 109 | 110 | print("Experiment dir: {}".format(exp_dir)) 111 | print("Log dir: {}".format(log_dir)) 112 | print("Model dir: {}".format(model_dir)) 113 | print("Figures dis: {}".format(figures_dir)) 114 | print("Best model dir: {}".format(best_path)) 115 | 116 | dirs = dict(exp_dir=exp_dir, log_dir=log_dir, model_dir=model_dir, figures_dir=figures_dir, best_path=best_path) 117 | 118 | for dir_key in dirs: 119 | if not os.path.exists(dirs[dir_key]): 120 | os.makedirs(dirs[dir_key]) 121 | 122 | return Munch.fromDict(dirs) 123 | -------------------------------------------------------------------------------- /lnets/tasks/classification/mains/train_classifier.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from tqdm import tqdm 3 | 4 | from lnets.utils.config import process_config 5 | from lnets.data.load_data import load_data 6 | from lnets.trainers.trainer import Trainer 7 | from lnets.utils.logging import Logger 8 | from lnets.utils.training_getters import get_optimizer, get_scheduler 9 | from lnets.utils.saving_and_loading import * 10 | from lnets.utils.seeding import set_experiment_seed 11 | from lnets.utils.misc import * 12 | from lnets.utils.training_getters import get_training_dirs 13 | from lnets.tasks.dualnets.visualize.visualize_dualnet import * 14 | 15 | 16 | def train(model, loaders, config): 17 | # Set the seed. 18 | set_experiment_seed(config.seed) 19 | 20 | # Get relevant paths. 21 | dirs = get_training_dirs(config) 22 | 23 | # Get optimizer and learning rate scheduler. 24 | optimizer = get_optimizer(config, model.parameters()) 25 | scheduler = get_scheduler(config, optimizer) 26 | 27 | # Load pretrained model and the state of the optimizer when it was saved. 28 | if config.model.pretrained_best_path: 29 | load_best_model_and_optimizer(model, optimizer, config.model.pretrained_best_path) 30 | 31 | # Push model to GPU if available. 32 | if config.cuda: 33 | print('Using cuda: {}'.format("Yes")) 34 | model.cuda() 35 | 36 | # Get logger, and log the config. 37 | logger = Logger(dirs.log_dir) 38 | logger.log_config(config) 39 | 40 | # Instantiate the trainer. 41 | trainer = Trainer() 42 | 43 | # Initialize "best performance" statistic, to be used when saving best model. 44 | best_val = initialize_best_val(config.optim.criterion.minmax) 45 | 46 | # Define hooks. 47 | def on_sample(state): 48 | if config.cuda: 49 | state['sample'] = [x.cuda() for x in state['sample']] 50 | 51 | def on_forward(state): 52 | state['model'].add_to_meters(state) 53 | 54 | # Clip gradients. 55 | torch.nn.utils.clip_grad_norm_(state['model'].parameters(), config.optim.max_grad_norm) 56 | 57 | def on_update(state): 58 | if config.model.per_update_proj.turned_on: 59 | state['model'].model.project_network_weights(config.model.per_update_proj) 60 | 61 | def on_start(state): 62 | state['loader'] = state['iterator'] 63 | state['scheduler'] = scheduler 64 | 65 | def on_start_epoch(state): 66 | state['model'].reset_meters() 67 | state['iterator'] = tqdm(state['loader'], desc='Epoch {}'.format(state['epoch'])) 68 | 69 | # Project the weights on the orthonormal matrix manifold if the layer type is suitable to do so. 70 | if config.model.per_epoch_proj.turned_on: 71 | if state['epoch'] % config.model.per_epoch_proj.every_n_epochs == 0 and state['epoch'] != 0: 72 | state['model'].model.project_network_weights(config.model.per_epoch_proj) 73 | # Reset optimizer is necessary. Especially useful for stateful optimizers. 74 | if config.model.per_epoch_proj.reset_optimizer: 75 | state['optimizer'] = get_optimizer(config, model.parameters()) 76 | 77 | def on_end_epoch(hook_state, state): 78 | scheduler.step() 79 | 80 | print("Training loss: {:.4f}".format(state['model'].meters['loss'].value()[0])) 81 | print("Training acc: {:.4f}".format(state['model'].meters['acc'].value()[0])) 82 | logger.log_meters('train', state) 83 | 84 | if state['epoch'] % config.logging.report_freq == 0: 85 | if config.logging.save_model: 86 | save_current_model_and_optimizer(model, optimizer, model_dir=dirs.model_dir, epoch=state['epoch']) 87 | 88 | # Do validation at the end of each epoch. 89 | if config.data.validation: 90 | state['model'].reset_meters() 91 | trainer.test(model, loaders['validation']) 92 | print("Val loss: {:.4f}".format(state['model'].meters['loss'].value()[0])) 93 | print("Val acc: {:.4f}".format(state['model'].meters['acc'].value()[0])) 94 | logger.log_meters('val', state) 95 | 96 | # Check if this is the best model. 97 | if config.logging.save_best: 98 | hook_state['best_val'], new_best = save_best_model_and_optimizer(state, hook_state['best_val'], 99 | dirs.best_path, config) 100 | 101 | trainer.hooks['on_start'] = on_start 102 | trainer.hooks['on_sample'] = on_sample 103 | trainer.hooks['on_forward'] = on_forward 104 | trainer.hooks['on_update'] = on_update 105 | trainer.hooks['on_start_epoch'] = on_start_epoch 106 | trainer.hooks['on_end_epoch'] = partial(on_end_epoch, {'best_val': best_val, 'wait': 0}) 107 | 108 | # Enter the training loop. 109 | trainer.train(model, loaders['train'], maxepoch=config.optim.epochs, optimizer=optimizer) 110 | 111 | # Pick the best model according to validation score and test it. 112 | model.reset_meters() 113 | best_model_path = os.path.join(dirs.best_path, "best_model.pt") 114 | if os.path.exists(best_model_path): 115 | model.load_state_dict(torch.load(best_model_path)) 116 | if loaders['test'] is not None: 117 | print("Testing the best model. ") 118 | logger.log_meters('test', trainer.test(model, loaders['test'])) 119 | 120 | return model 121 | 122 | 123 | if __name__ == '__main__': 124 | # Get the config, initialize the model and construct the data loader. 125 | cfg = process_config() 126 | model_initialization = get_model(cfg) 127 | print(model_initialization) 128 | data_loaders = load_data(cfg) 129 | 130 | # Train. 131 | trained_model = train(model_initialization, data_loaders, cfg) 132 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/mains/train_pgd.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from tqdm import tqdm 3 | 4 | from lnets.utils.config import process_config 5 | from lnets.data.load_data import load_data 6 | from lnets.trainers.trainer import Trainer 7 | from lnets.utils.logging import Logger 8 | from lnets.utils.training_getters import get_optimizer, get_scheduler 9 | from lnets.utils.saving_and_loading import * 10 | from lnets.utils.seeding import set_experiment_seed 11 | from lnets.utils.misc import * 12 | from lnets.utils.training_getters import get_training_dirs 13 | from lnets.tasks.dualnets.visualize.visualize_dualnet import * 14 | 15 | from lnets.tasks.adversarial.attack.perform_attack import manual_pgd 16 | 17 | 18 | def train(model, loaders, config): 19 | # Set the seed. 20 | set_experiment_seed(config.seed) 21 | 22 | # Get relevant paths. 23 | dirs = get_training_dirs(config) 24 | 25 | # Get optimizer and learning rate scheduler. 26 | optimizer = get_optimizer(config, model.parameters()) 27 | scheduler = get_scheduler(config, optimizer) 28 | 29 | # Load pretrained model and the state of the optimizer when it was saved. 30 | if config.model.pretrained_best_path: 31 | load_best_model_and_optimizer(model, optimizer, config.model.pretrained_best_path) 32 | 33 | # Push model to GPU if available. 34 | if config.cuda: 35 | print('Using cuda: {}'.format("Yes")) 36 | model.cuda() 37 | 38 | # Get logger, and log the config. 39 | logger = Logger(dirs.log_dir) 40 | logger.log_config(config) 41 | 42 | # Instantiate the trainer. 43 | trainer = Trainer() 44 | 45 | # Initialize "best performance" statistic, to be used when saving best model. 46 | best_val = initialize_best_val(config.optim.criterion.minmax) 47 | 48 | # Define hooks. 49 | def on_sample(state): 50 | if config.cuda: 51 | state['sample'] = [x.cuda() for x in state['sample']] 52 | # Run PGD here on the data using the model 53 | pgd_x, _, _ = manual_pgd(state['model'], state['sample'][0], state['sample'][1], eps=0.3, iters=40) 54 | state['sample'][0] = pgd_x 55 | 56 | def on_forward(state): 57 | state['model'].add_to_meters(state) 58 | 59 | # Clip gradients. 60 | torch.nn.utils.clip_grad_norm_(state['model'].parameters(), config.optim.max_grad_norm) 61 | 62 | def on_update(state): 63 | if config.model.per_update_proj.turned_on: 64 | state['model'].model.project_network_weights(config.model.per_update_proj) 65 | 66 | def on_start(state): 67 | state['loader'] = state['iterator'] 68 | state['scheduler'] = scheduler 69 | 70 | def on_start_epoch(state): 71 | state['model'].reset_meters() 72 | state['iterator'] = tqdm(state['loader'], desc='Epoch {}'.format(state['epoch'])) 73 | 74 | # Project the weights on the orthonormal matrix manifold if the layer type is suitable to do so. 75 | if config.model.per_epoch_proj.turned_on: 76 | if state['epoch'] % config.model.per_epoch_proj.every_n_epochs == 0 and state['epoch'] != 0: 77 | state['model'].model.project_network_weights(config.model.per_epoch_proj) 78 | # Reset optimizer is necessary. Especially useful for stateful optimizers. 79 | if config.model.per_epoch_proj.reset_optimizer: 80 | state['optimizer'] = get_optimizer(config, model.parameters()) 81 | 82 | def on_end_epoch(hook_state, state): 83 | scheduler.step() 84 | 85 | print("Training loss: {:.4f}".format(state['model'].meters['loss'].value()[0])) 86 | print("Training acc: {:.4f}".format(state['model'].meters['acc'].value()[0])) 87 | logger.log_meters('train', state) 88 | 89 | if state['epoch'] % config.logging.report_freq == 0: 90 | if config.logging.save_model: 91 | save_current_model_and_optimizer(model, optimizer, model_dir=dirs.model_dir, epoch=state['epoch']) 92 | 93 | # Do validation at the end of each epoch. 94 | if config.data.validation: 95 | state['model'].reset_meters() 96 | trainer.test(model, loaders['validation']) 97 | print("Val loss: {:.4f}".format(state['model'].meters['loss'].value()[0])) 98 | print("Val acc: {:.4f}".format(state['model'].meters['acc'].value()[0])) 99 | logger.log_meters('val', state) 100 | 101 | # Check if this is the best model. 102 | if config.logging.save_best: 103 | hook_state['best_val'], new_best = save_best_model_and_optimizer(state, hook_state['best_val'], 104 | dirs.best_path, config) 105 | 106 | trainer.hooks['on_start'] = on_start 107 | trainer.hooks['on_sample'] = on_sample 108 | trainer.hooks['on_forward'] = on_forward 109 | trainer.hooks['on_update'] = on_update 110 | trainer.hooks['on_start_epoch'] = on_start_epoch 111 | trainer.hooks['on_end_epoch'] = partial(on_end_epoch, {'best_val': best_val, 'wait': 0}) 112 | 113 | # Enter the training loop. 114 | trainer.train(model, loaders['train'], maxepoch=config.optim.epochs, optimizer=optimizer) 115 | 116 | # Pick the best model according to validation score and test it. 117 | model.reset_meters() 118 | best_model_path = os.path.join(dirs.best_path, "best_model.pt") 119 | if os.path.exists(best_model_path): 120 | model.load_state_dict(torch.load(best_model_path)) 121 | if loaders['test'] is not None: 122 | print("Testing the best model. ") 123 | logger.log_meters('test', trainer.test(model, loaders['test'])) 124 | 125 | return model 126 | 127 | 128 | if __name__ == '__main__': 129 | # Get the config, initialize the model and construct the data loader. 130 | cfg = process_config() 131 | model_initialization = get_model(cfg) 132 | print(model_initialization) 133 | data_loaders = load_data(cfg) 134 | 135 | # Train. 136 | trained_model = train(model_initialization, data_loaders, cfg) 137 | -------------------------------------------------------------------------------- /lnets/tasks/adversarial/mains/eval_adv_robustness.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluate adversarial robustness of a given classifier. 3 | """ 4 | import os 5 | import json 6 | from itertools import islice 7 | import math 8 | from tqdm import tqdm 9 | import numpy as np 10 | 11 | import torch 12 | from torch.utils.data import TensorDataset 13 | from torchvision.utils import save_image 14 | 15 | import foolbox.attacks 16 | from foolbox.criteria import Misclassification 17 | from foolbox.models import PyTorchModel 18 | 19 | from lnets.data.load_data import load_data 20 | from lnets.utils.config import process_config 21 | from lnets.utils.saving_and_loading import load_model_from_config 22 | from lnets.utils.misc import to_cuda 23 | from lnets.tasks.adversarial.attack.perform_attack import batch_attack 24 | from lnets.tasks.adversarial.mains.check_adv_gradients import slow_check_grad_norm 25 | 26 | 27 | def loader_accuracy(model, loader): 28 | acc = 0.0 29 | n = 0 30 | for x, y in loader: 31 | x, y = x.cuda(), y.cuda() 32 | preds = model(x) 33 | acc += (preds.argmax(1) == y).type(torch.float).mean() 34 | n += 1 35 | return (acc / n).item() 36 | 37 | 38 | def accuracy(model, x, y): 39 | preds = [] 40 | for ex in x: 41 | preds.append(model(ex).argmax()) 42 | preds = torch.stack(preds) 43 | return (preds == y).type(torch.float).mean() 44 | 45 | 46 | def evaluate_adv_grad_norms(model, adv_ex, adv_t, cuda): 47 | dataset = TensorDataset(adv_ex, adv_t) 48 | s_avg, s_max = slow_check_grad_norm(model, dataset, cuda) 49 | print(s_avg, s_max) 50 | 51 | 52 | def generate_examples(model, config, pretrained_config, output_root): 53 | adv_example_filepath = os.path.join(output_root, 'examples') 54 | adv_targets_filepath = os.path.join(output_root, 'targets') 55 | 56 | # Set up adversarial attack. 57 | adv_model = PyTorchModel(model, (0, 1), pretrained_config.data.class_count, cuda=config.cuda) 58 | criterion = Misclassification() 59 | attack = getattr(foolbox.attacks, config.name)(adv_model, criterion) 60 | 61 | # Get data. 62 | pretrained_config.cuda = config.cuda 63 | pretrained_config.optim.batch_size = config.data.batch_size 64 | data = load_data(pretrained_config) 65 | # print('Test Accuracy:{}'.format(loader_accuracy(model, data['test']))) 66 | 67 | n_examples = config['num_examples'] 68 | n_batches = int(math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size)) 69 | 70 | # Save the results of the computations in the following variable. 71 | adv_ex = torch.Tensor() 72 | adv_targets = torch.LongTensor() 73 | adv_mse = torch.Tensor() 74 | adv_inf = torch.Tensor() 75 | success = torch.Tensor() 76 | 77 | # Set up distance for the adversarial attack. 78 | distance_name = config.get('distance') 79 | distance = getattr(foolbox.distances, distance_name) if distance_name is not None \ 80 | else foolbox.distances.MeanSquaredDistance 81 | 82 | # Perform the attack. 83 | for sample in tqdm(islice(data['validation'], n_batches), total=n_batches): 84 | x = sample[0] 85 | y = sample[1].type(torch.LongTensor) 86 | x = to_cuda(x, cuda=config.cuda) 87 | 88 | adv, adv_t, batch_success, batch_adv_mse, batch_adv_inf = batch_attack(attack, adv_model, criterion, x, 89 | y.cpu().numpy(), 90 | config['attack_kwargs'], distance) 91 | adv_ex = torch.cat([adv_ex, adv], 0) 92 | adv_targets = torch.cat([adv_targets, adv_t], 0) 93 | success = torch.cat([success, batch_success], 0) 94 | adv_mse = torch.cat([adv_mse, batch_adv_mse], 0) 95 | adv_inf = torch.cat([adv_inf, batch_adv_inf], 0) 96 | 97 | # evaluate_adv_grad_norms(model, adv_ex, adv_targets, config.cuda) 98 | # Summarize the results. 99 | results = { 100 | "success_rate": success.mean().item(), 101 | "defense_rate": 1 - success.mean().item(), 102 | "mean_mse": ((adv_mse * success).sum() / success.sum()).item(), 103 | "mean_inf": ((adv_inf * success).sum() / success.sum()).item(), 104 | "mse_quartiles": list(np.percentile(adv_mse[success == 1.0].numpy(), [0, 25, 50, 75, 100])) 105 | } 106 | 107 | results["median_mse"] = results["mse_quartiles"][2] 108 | 109 | print("success rate: {}".format(results["success_rate"])) 110 | print("defense rate: {}".format(results["defense_rate"])) 111 | print("mean MSE for successful attacks: {}".format(results["mean_mse"])) 112 | print("mean L_inf for successful attacks: {}".format(results["mean_inf"])) 113 | print("MSE quartiles for successful attacks: {}".format(results["mse_quartiles"])) 114 | 115 | with open(os.path.join(config['output_root'], 'results.json'), 'w') as f: 116 | json.dump(results, f, sort_keys=True, indent=4) 117 | 118 | np.save(adv_example_filepath, adv_ex) 119 | np.save(adv_targets_filepath, adv_targets) 120 | 121 | print(accuracy(model, to_cuda(adv_ex, cuda=config.cuda), to_cuda(adv_targets, cuda=config.cuda))) 122 | 123 | 124 | def eval_on_examples(model, output_root, cuda=True): 125 | adv_examples = np.load(os.path.join(output_root, 'examples.npy')) 126 | adv_targets = np.load(os.path.join(output_root, 'targets.npy')) 127 | 128 | print(adv_examples.shape) 129 | adv_ex_t = torch.Tensor(adv_examples) 130 | save_image(adv_ex_t, 'test.png') 131 | 132 | adv_examples = to_cuda(torch.Tensor(adv_examples), cuda) 133 | adv_targets = to_cuda(torch.LongTensor(adv_targets), cuda) 134 | print("Adv Accuracy: {}".format(accuracy(model, adv_examples, adv_targets).item())) 135 | 136 | 137 | def main(config): 138 | # Create the output directory. 139 | output_root = config.output_root 140 | if not os.path.isdir(output_root): 141 | os.makedirs(output_root) 142 | 143 | # Load pretrained model 144 | pretrained_path = config.pretrained_path 145 | model, pretrained_config = load_model_from_config(pretrained_path) 146 | 147 | # Push model to GPU if available. 148 | if config.cuda: 149 | print('Using cuda: {}'.format("Yes")) 150 | to_cuda(model, cuda=config.cuda) 151 | 152 | model.eval() 153 | 154 | # model.model.project_network_weights(Munch.fromDict({'type': 'l_inf_projected'})) 155 | generate_examples(model, config, pretrained_config, output_root) 156 | 157 | # eval_on_examples(model, output_root, config.cuda) 158 | 159 | 160 | if __name__ == '__main__': 161 | cfg = process_config() 162 | 163 | main(cfg) 164 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/visualize/visualize_dualnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from itertools import product 3 | import numpy as np 4 | import torch 5 | from torch.autograd import Variable 6 | import matplotlib.pyplot as plt 7 | from matplotlib import cm 8 | from matplotlib.ticker import LinearLocator, FormatStrFormatter 9 | from mpl_toolkits.mplot3d import Axes3D 10 | import matplotlib.ticker as ticker 11 | 12 | X_MAJOR_LOCATOR_1D = 0.2 13 | Y_MAJOR_LOCATOR_1D = 0.1 14 | 15 | ALPHA_2D_PLOT = 0.75 16 | AXIS_RANGE_SLACK = 0.2 17 | NUM_SURFACES = 32 18 | Z_MAJOR_LOCATOR_2D = 10 19 | COLORBAR_ASPECT = 5 20 | COLORBAR_SHRINK = 0.5 21 | 22 | 23 | def visualize_1d_critic(model, xrange, step, cuda=False): 24 | # Create the axis on which the dualnet will be evaluated. 25 | xrange = np.arange(xrange[0], xrange[1], step=step) 26 | inputs = xrange[..., None] 27 | 28 | # Evaluate the critic at those points and reshape on a grid. 29 | if cuda: 30 | outs = model.forward(Variable(torch.from_numpy(inputs).float()).cuda()) 31 | outs = outs.data.cpu().numpy().flatten() 32 | else: 33 | outs = model.forward(Variable(torch.from_numpy(inputs).float())) 34 | outs = outs.data.numpy().flatten() 35 | 36 | # Plot the critic landscape. 37 | plt.figure() 38 | ax = plt.subplot(111) 39 | ax.plot(xrange, outs, label="approximation") 40 | 41 | ax.set_xlabel("input") 42 | ax.set_ylabel("output") 43 | ax.grid() 44 | 45 | # ax.xaxis.set_major_locator(ticker.MultipleLocator(X_MAJOR_LOCATOR_1D)) 46 | # ax.yaxis.set_major_locator(ticker.MultipleLocator(Y_MAJOR_LOCATOR_1D)) 47 | 48 | return ax 49 | 50 | 51 | def visualize_2d_critic(model, xrange, yrange, step, fig_type, cuda=False): 52 | assert fig_type in ["contour", "contourf", "plot_surface"], "Requested 3d plot type not supported. " 53 | # Form the coordinates at which the critic will be evaluated. 54 | xrange = np.arange(xrange[0], xrange[1], step=step) 55 | yrange = np.arange(yrange[0], yrange[1], step=step) 56 | xv, yv = np.meshgrid(xrange, yrange) 57 | full_coords = np.concatenate((xv[None, :], yv[None, :]), axis=0).reshape(2, -1).T 58 | 59 | # Evaluate the critic at those points and reshape on a grid. 60 | if cuda: 61 | critic_vals = model.forward(Variable(torch.from_numpy(full_coords).float()).cuda()) 62 | landscape = critic_vals.data.cpu().numpy().reshape(xv.shape) 63 | else: 64 | critic_vals = model.forward(Variable(torch.from_numpy(full_coords).float())) 65 | landscape = critic_vals.data.numpy().reshape(xv.shape) 66 | 67 | # Plot the critic landscape. 68 | fig = plt.figure() 69 | ax = fig.gca(projection='3d') 70 | if fig_type == "plot_surface": 71 | surf = ax.plot_surface(xv, yv, landscape, cmap=cm.coolwarm, linewidth=0, antialiased=False, 72 | alpha=ALPHA_2D_PLOT) 73 | elif fig_type == "contourf": 74 | surf = ax.contourf(xv, yv, landscape, NUM_SURFACES, cmap=cm.coolwarm, linewidth=0, antialiased=False, 75 | alpha=ALPHA_2D_PLOT) 76 | elif fig_type == "contour": 77 | surf = ax.contour(xv, yv, landscape, NUM_SURFACES, cmap=cm.coolwarm, linewidth=0, antialiased=False, 78 | alpha=ALPHA_2D_PLOT) 79 | 80 | ax.set_xlabel("x") 81 | ax.set_ylabel("y") 82 | 83 | # Customize the z axis. 84 | landscape_range = np.max(landscape) - np.min(landscape) 85 | 86 | ax.set_zlim(np.min(landscape) - AXIS_RANGE_SLACK * landscape_range, 87 | np.max(landscape) + AXIS_RANGE_SLACK * landscape_range) 88 | 89 | ax.zaxis.set_major_locator(LinearLocator(Z_MAJOR_LOCATOR_2D)) 90 | ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) 91 | 92 | # Add a color bar which maps values to colors. 93 | fig.colorbar(surf, shrink=COLORBAR_SHRINK, aspect=COLORBAR_ASPECT) 94 | 95 | return ax 96 | 97 | 98 | def save_2d_dualnet_visualizations(model, figures_dir, config, epoch=None, loss=None, after_training=False): 99 | for fig_type in config.visualize_2d.fig_types: 100 | ax = visualize_2d_critic(model, config.visualize_2d.xrange, config.visualize_2d.yrange, 101 | config.visualize_2d.step, fig_type=fig_type, cuda=config.cuda) 102 | for elev, azim in product(config.visualize_2d.elev, config.visualize_2d.azim): 103 | ax.view_init(elev=elev, azim=azim) 104 | 105 | if after_training: 106 | title_text = "Model: {} - Activation: {}".format(config.model.name, config.model.activation) 107 | save_path = os.path.join(figures_dir, "visualize_2d_" + fig_type + 108 | "_elev_{}_azim{}_best_model".format(elev, azim) + ".png") 109 | else: 110 | title_text = "Model: {} - Activation: {}\nEpoch: {} - Loss: {}".format(config.model.name, 111 | config.model.activation, 112 | epoch, 113 | loss) 114 | save_path = os.path.join(figures_dir, "epoch_{:04}_visualize_2d_".format(epoch) + fig_type + 115 | "_elev_{}_azim{}".format(elev, azim) + ".png") 116 | 117 | plt.title(title_text, x=0.5, y=1.0) 118 | plt.tight_layout() 119 | plt.savefig(save_path) 120 | plt.close('all') 121 | 122 | 123 | def save_1d_dualnet_visualizations(model, figures_dir, config, epoch=None, loss=None, after_training=False): 124 | visualize_1d_critic(model, config.visualize_1d.xrange, config.visualize_1d.step, config.cuda) 125 | 126 | if after_training: 127 | title_text = "Model: {} - Activation: {}".format(config.model.name, config.model.activation) 128 | save_path = os.path.join(figures_dir, "visualize_1d_best_model.png") 129 | else: 130 | title_text = "Model: {} - Activation: {}\nEpoch: {} - Loss: {}".format(config.model.name, 131 | config.model.activation, 132 | epoch, 133 | loss) 134 | save_path = os.path.join(figures_dir, "epoch_{:04}_visualize_1d_".format(epoch)) 135 | 136 | plt.title(title_text, x=0.5, y=1.0) 137 | plt.tight_layout() 138 | plt.legend() 139 | plt.savefig(save_path) 140 | # plt.show(block=True) 141 | plt.close('all') 142 | -------------------------------------------------------------------------------- /lnets/tasks/dualnets/mains/train_dual.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from tqdm import tqdm 3 | 4 | #mpl.use('Agg') 5 | import matplotlib.pyplot as plt 6 | plt.interactive(False) 7 | 8 | from lnets.utils.config import process_config 9 | from lnets.tasks.dualnets.distrib.load_distrib import load_distrib 10 | from lnets.trainers.trainer import Trainer 11 | from lnets.utils.logging import Logger 12 | from lnets.utils.training_getters import get_optimizer, get_scheduler 13 | from lnets.utils.saving_and_loading import * 14 | from lnets.utils.seeding import set_experiment_seed 15 | from lnets.utils.misc import * 16 | from lnets.utils.training_getters import get_training_dirs 17 | from lnets.tasks.dualnets.visualize.visualize_dualnet import * 18 | 19 | 20 | def train_dualnet(model, loaders, config): 21 | # Set the seed. 22 | set_experiment_seed(config.seed) 23 | 24 | # Get relevant paths. 25 | dirs = get_training_dirs(config) 26 | 27 | # Get optimizer and learning rate scheduler. 28 | optimizer = get_optimizer(config, model.parameters()) 29 | scheduler = get_scheduler(config, optimizer) 30 | 31 | # Load pretrained model and the state of the optimizer when it was saved. 32 | if config.model.pretrained_best_path: 33 | load_best_model_and_optimizer(model, optimizer, config.model.pretrained_best_path) 34 | 35 | # Push model to GPU if available. 36 | if config.cuda: 37 | print('Using cuda: {}'.format("Yes")) 38 | model.cuda() 39 | 40 | # Get logger, and log the config. 41 | logger = Logger(dirs.log_dir) 42 | logger.log_config(config) 43 | 44 | # Instantiate the trainer. 45 | trainer = Trainer() 46 | 47 | # Initialize "best performance" statistic, to be used when saving best model. 48 | best_val = initialize_best_val(config.optim.criterion.minmax) 49 | 50 | # Define hooks. 51 | def on_sample(state): 52 | if config.cuda: 53 | state['sample'] = [x.cuda() for x in state['sample']] 54 | 55 | def on_forward(state): 56 | state['model'].add_to_meters(state) 57 | 58 | # Clip gradients. 59 | torch.nn.utils.clip_grad_norm_(state['model'].parameters(), config.optim.max_grad_norm) 60 | 61 | # Save the most recent loss. 62 | state['recent_losses'].append(state['loss'].item()) 63 | 64 | def on_update(state): 65 | if config.model.per_update_proj.turned_on: 66 | state['model'].model.project_network_weights(config.model.per_update_proj) 67 | 68 | def on_start(state): 69 | state['loader'] = state['iterator'] 70 | state['scheduler'] = scheduler 71 | 72 | # Keep track of the max, mean and min singular values of the second layer weights. 73 | state["max_singular"] = list() 74 | state["mean_singular"] = list() 75 | state["min_singular"] = list() 76 | state["singulars"] = list() 77 | 78 | def on_start_val(state): 79 | # Initialize a list that is to store all of the losses encountered in the recent epoch. 80 | state['recent_losses'] = list() 81 | 82 | def on_start_epoch(state): 83 | state['model'].reset_meters() 84 | state['iterator'] = tqdm(state['loader'], desc='Epoch {}'.format(state['epoch'])) 85 | 86 | # Initialize a list that is to store all of the losses encountered in the recent epoch. 87 | state['recent_losses'] = list() 88 | 89 | # Project the weights on the orthonormal matrix manifold if the layer type is suitable to do so. 90 | if config.model.per_epoch_proj.turned_on: 91 | if state['epoch'] % config.model.per_epoch_proj.every_n_epochs == 0 and state['epoch'] != 0: 92 | state['model'].model.project_network_weights(config.model.per_epoch_proj) 93 | # Reset optimizer is necessary. Especially useful for stateful optimizers. 94 | if config.model.per_epoch_proj.reset_optimizer: 95 | state['optimizer'] = get_optimizer(config, model.parameters()) 96 | 97 | def on_end_epoch(hook_state, state): 98 | scheduler.step() 99 | 100 | print("\t\t\tTraining loss: {:.4f}".format(state['model'].meters['loss'].value()[0])) 101 | logger.log_meters('train', state) 102 | 103 | if state['epoch'] % config.logging.report_freq == 0: 104 | if config.logging.save_model: 105 | save_current_model_and_optimizer(model, optimizer, model_dir=dirs.model_dir, epoch=state['epoch']) 106 | 107 | # Visualize the learned critic landscape. 108 | if config.visualize: 109 | save_1_or_2_dim_dualnet_visualizations(model, dirs.figures_dir, config, 110 | state['epoch'], state['loss']) 111 | 112 | # Check if this is the best model. 113 | if config.logging.save_best: 114 | hook_state['best_val'], new_best = save_best_model_and_optimizer(state, hook_state['best_val'], 115 | dirs.best_path, config) 116 | 117 | # Validate the model. 118 | if loaders['validation'] is not None: 119 | valid_state = trainer.test(model, loaders['validation']) 120 | logger.log_meters('validation', valid_state) 121 | 122 | def on_end_val(state): 123 | print("Averaged validation loss: {}".format(np.array(state['recent_losses']).mean())) 124 | 125 | trainer.hooks['on_start'] = on_start 126 | trainer.hooks['on_start_val'] = on_start_val 127 | trainer.hooks['on_sample'] = on_sample 128 | trainer.hooks['on_forward'] = on_forward 129 | trainer.hooks['on_update'] = on_update 130 | trainer.hooks['on_start_epoch'] = on_start_epoch 131 | trainer.hooks['on_end_epoch'] = partial(on_end_epoch, {'best_val': best_val, 'wait': 0}) 132 | trainer.hooks['on_end_val'] = on_end_val 133 | 134 | # Enter the training loop. 135 | training_state = trainer.train(model, loaders['train'], maxepoch=config.optim.epochs, optimizer=optimizer) 136 | 137 | # Save the singular value statistics. 138 | singulars = dict() 139 | singulars['max_singulars'] = training_state['max_singular'] 140 | singulars['mean_singulars'] = training_state['mean_singular'] 141 | singulars['min_singulars'] = training_state['min_singular'] 142 | singulars['singulars'] = training_state['singulars'] 143 | 144 | import pickle 145 | pickle.dump(singulars, open(os.path.join(dirs.log_dir, "singular_vals_dict.pkl"), "wb")) 146 | 147 | # Pick the best model according to validation score and test it. 148 | model.reset_meters() 149 | best_model_path = os.path.join(dirs.best_path, "best_model.pt") 150 | if os.path.exists(dirs.best_path): 151 | model.load_state_dict(torch.load(best_model_path)) 152 | if loaders['test'] is not None: 153 | print("Testing best model. ") 154 | test_state = trainer.test(model, loaders['test']) 155 | logger.log_meters('test', test_state) 156 | else: 157 | raise RuntimeError("The trained models must be tested with a testing distribution. ") 158 | 159 | # Visualize the learned critic landscape. 160 | if config.visualize: 161 | save_1_or_2_dim_dualnet_visualizations(model, dirs.figures_dir, config, 162 | after_training=False) 163 | 164 | return test_state 165 | 166 | 167 | if __name__ == '__main__': 168 | print("test") 169 | # Get the config, initialize the model and construct the distribution loader. 170 | cfg = process_config() 171 | dual_model = get_model(cfg) 172 | print(dual_model) 173 | distrib_loaders = load_distrib(cfg) 174 | 175 | # Train. 176 | final_state = train_dualnet(dual_model, distrib_loaders, cfg) 177 | --------------------------------------------------------------------------------