├── dl_spectral_normalization ├── __init__.py ├── models │ ├── __init__.py │ ├── softmax.py │ ├── elunet.py │ ├── mlp.py │ ├── vgg.py │ ├── resnet.py │ ├── inception.py │ └── alexnet.py ├── sn.py ├── adversarial.py └── dl_utils.py ├── setup.cfg ├── .gitignore ├── requirements.txt ├── notebooks_figures ├── README.md ├── analysis.py └── results_effect_of_SN.ipynb ├── setup.py ├── README.md ├── get_cifar10.py └── train_network_template.ipynb /dl_spectral_normalization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | notebook_figures/.ipynb_checkpoints/ 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | pandas 4 | seaborn 5 | tensorflow 6 | IPython 7 | sklearn 8 | -------------------------------------------------------------------------------- /dl_spectral_normalization/models/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "alexnet", 3 | "inception", 4 | "mlp", 5 | "vgg", 6 | "elunet" 7 | ] 8 | -------------------------------------------------------------------------------- /notebooks_figures/README.md: -------------------------------------------------------------------------------- 1 | # Code for generating the figures in the paper. 2 | 3 | For training the networks, please see `train_network_template.ipynb`. -------------------------------------------------------------------------------- /dl_spectral_normalization/models/softmax.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .. import sn 4 | 5 | def softmax_sn(input_data, num_classes=10, wd=0, beta=1, update_collection=None, reuse=None, training=False): 6 | """Tensorflow implementation of softmax regression (one-layer NN)""" 7 | 8 | fc = sn.linear(input_data, num_classes, scope_name='fc', update_collection=update_collection, 9 | beta=beta, reuse=reuse) 10 | 11 | return fc -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Following http://devarea.com/deploying-a-new-python-package-to-pypi/#.W_IJBJNKjOR 2 | from distutils.core import setup 3 | setup( 4 | name = 'dl_spectral_normalization', 5 | packages = ['dl_spectral_normalization'], # this must be the same as the name above 6 | version = '0.1', 7 | description = 'Library for building neural networks in TensorFlow with spectrally normalized layers', 8 | author = 'Jesse Zhang, Farzan Farnia', 9 | author_email = 'jessez@stanford.edu', 10 | url = 'https://github.com/dev-area/dl_spectral_normalization', 11 | download_url = 'https://github.com/dev-area/dl_spectral_normalization/tarball/0.1', 12 | keywords = ['deep-learning', 'neural-network', 'adversarial-attacks', 'spectral-normalization', 'regularization'], 13 | classifiers = [], 14 | ) 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spectral Normalization for Deep Convolutional Networks 2 | 3 | The code in this repository accompanies the experiments performed in the paper [Generalizable Adversarial Training via Spectral Normalization](https://arxiv.org/abs/1811.07457) by Farnia*, Zhang*, and Tse (*equal contributors), which will be presented as a poster at ICLR 2019. 4 | 5 | The repository contains the following: 6 | - [dl_spectral_normalization](https://github.com/jessemzhang/dl_spectral_normalization/tree/master/dl_spectral_normalization): Python deep learning module with spectral normalization code, code for building and training neural networks using TensorFlow, code for adversarially training networks, and example neural network architectures 7 | - [notebooks_figures](https://github.com/jessemzhang/dl_spectral_normalization/tree/master/notebooks_figures): Contains scripts for generating all figures in the main text of the paper 8 | - [get_cifar10.py](https://github.com/jessemzhang/dl_spectral_normalization/blob/master/get_cifar10.py): Code for downloading and preprocessing datasets as described by [Zhang et al. 2017](https://arxiv.org/pdf/1611.03530.pdf) 9 | - [train_network_template.ipynb](https://github.com/jessemzhang/dl_spectral_normalization/blob/master/train_network_template.ipynb): Example notebook for training a neural network using the [dl_spectral_normalization](https://github.com/jessemzhang/dl_spectral_normalization/tree/master/dl_spectral_normalization) module 10 | 11 | ## Installation 12 | 13 | The dl_spectral_normalization package can be installed via pip: 14 | 15 | ``` 16 | pip install dl_spectral_normalization 17 | ``` 18 | 19 | An example approach for accessing package contents is as follows: 20 | 21 | ```python 22 | # Imports utilities for building and training networks 23 | from dl_spectral_normalization import dl_utils 24 | 25 | # Import one of the provided neural network architectures: AlexNet 26 | from dl_spectral_normalization.models import alexnet 27 | 28 | # Import adversarial training methods 29 | from dl_spectral_normalization import adversarial as ad 30 | ``` 31 | 32 | For a more detailed tutorial, please refer to [train_network_template.ipynb](https://github.com/jessemzhang/dl_spectral_normalization/blob/master/train_network_template.ipynb). For references on visualizing results, we provide several examples in [notebooks_figures](https://github.com/jessemzhang/dl_spectral_normalization/tree/master/notebooks_figures). We were able to run all of our experiments in an [nvidia-docker image](https://github.com/NVIDIA/nvidia-docker) (tensorflow/tensorflow:latest-gpu running TensorFlow version 1.10.1). 33 | -------------------------------------------------------------------------------- /dl_spectral_normalization/models/elunet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .. import sn 4 | 5 | def elunet(input_data, num_classes=10, nb_filters=64, wd=0, beta=1, update_collection=None, reuse=None, training=False): 6 | """Simple network for MNIST dataset 7 | (as described in https://arxiv.org/pdf/1710.10571.pdf) 8 | """ 9 | if wd != 0: 10 | l2_norm = True 11 | else: 12 | l2_norm = False 13 | 14 | conv1 = tf.nn.elu(sn.conv2d(input_data, [8, 8, 1, nb_filters], stride=2, padding="SAME", wd=wd, 15 | scope_name='conv1', spectral_norm=False, xavier=True, reuse=reuse, l2_norm=l2_norm)) 16 | 17 | conv2 = tf.nn.elu(sn.conv2d(conv1, [6, 6, nb_filters, 2*nb_filters], stride=2, padding="VALID", 18 | scope_name='conv2', spectral_norm=False, xavier=True, reuse=reuse, l2_norm=l2_norm)) 19 | 20 | conv3 = tf.nn.elu(sn.conv2d(conv2, [5, 5, 2*nb_filters, 2*nb_filters], stride=1, padding="VALID", 21 | scope_name='conv3', spectral_norm=False, xavier=True, reuse=reuse, l2_norm=l2_norm)) 22 | 23 | reshape = tf.reshape(conv3, [-1, 128]) 24 | 25 | fc = sn.linear(reshape, num_classes, scope_name='fc', spectral_norm=False, xavier=True, reuse=reuse, l2_norm=l2_norm) 26 | 27 | return fc 28 | 29 | 30 | def elunet_sn(input_data, num_classes=10, nb_filters=64, wd=0, beta=1, update_collection=None, reuse=None, training=False): 31 | """Simple network for MNIST dataset with spectral normalization on all layers 32 | (as described in https://arxiv.org/pdf/1710.10571.pdf) 33 | """ 34 | 35 | conv1 = tf.nn.elu(sn.conv2d(input_data, [8, 8, 1, nb_filters], stride=2, padding="SAME", 36 | scope_name='conv1', update_collection=update_collection, 37 | beta=beta, reuse=reuse)) 38 | 39 | conv2 = tf.nn.elu(sn.conv2d(conv1, [6, 6, nb_filters, 2*nb_filters], stride=2, padding="VALID", 40 | scope_name='conv2', update_collection=update_collection, 41 | beta=beta, reuse=reuse)) 42 | 43 | conv3 = tf.nn.elu(sn.conv2d(conv2, [5, 5, 2*nb_filters, 2*nb_filters], stride=1, padding="VALID", 44 | scope_name='conv3', update_collection=update_collection, 45 | beta=beta, reuse=reuse)) 46 | 47 | reshape = tf.reshape(conv3, [-1, 128]) 48 | 49 | fc = sn.linear(reshape, num_classes, scope_name='fc', update_collection=update_collection, 50 | beta=beta, reuse=reuse) 51 | 52 | return fc -------------------------------------------------------------------------------- /dl_spectral_normalization/models/mlp.py: -------------------------------------------------------------------------------- 1 | # https://github.com/rharish101/DLGeneralization/ 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | from .. import sn 6 | 7 | 8 | def mlp1_relu(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 9 | """1-hidden-layer Multilayer Perceptron architecture""" 10 | 11 | hidden = tf.nn.relu(sn.linear(input_data, 512, scope_name='hidden', xavier=True, 12 | spectral_norm=False, reuse=reuse)) 13 | fc = sn.linear(hidden, num_classes, scope_name='fc', xavier=True, 14 | spectral_norm=False, reuse=reuse) 15 | 16 | return fc 17 | 18 | 19 | def mlp1_relu_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 20 | """1-hidden-layer Multilayer Perceptron architecture with spectral normalization on all layers""" 21 | 22 | hidden = tf.nn.relu(sn.linear(input_data, 512, scope_name='hidden', xavier=True, 23 | update_collection=update_collection, beta=beta, reuse=reuse)) 24 | fc = sn.linear(hidden, num_classes, scope_name='fc', xavier=True, 25 | update_collection=update_collection, beta=beta, reuse=reuse) 26 | 27 | return fc 28 | 29 | 30 | def mlp1_elu(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 31 | """1-hidden-layer Multilayer Perceptron architecture""" 32 | 33 | hidden = tf.nn.elu(sn.linear(input_data, 512, scope_name='hidden', xavier=True, 34 | spectral_norm=False, reuse=reuse)) 35 | fc = sn.linear(hidden, num_classes, scope_name='fc', xavier=True, 36 | spectral_norm=False, reuse=reuse) 37 | 38 | return fc 39 | 40 | 41 | def mlp1_elu_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 42 | """1-hidden-layer Multilayer Perceptron architecture with spectral normalization on all layers""" 43 | 44 | hidden = tf.nn.elu(sn.linear(input_data, 512, scope_name='hidden', xavier=True, 45 | update_collection=update_collection, beta=beta, reuse=reuse)) 46 | fc = sn.linear(hidden, num_classes, scope_name='fc', xavier=True, 47 | update_collection=update_collection, beta=beta, reuse=reuse) 48 | 49 | return fc 50 | 51 | 52 | def mlp2_relu(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 53 | """2-hidden-layer Multilayer Perceptron architecture with spectral normalization on all layers""" 54 | 55 | hidden1 = tf.nn.relu(sn.linear(input_data, 512, scope_name='hidden1', xavier=True, 56 | spectral_norm=False, reuse=reuse)) 57 | hidden2 = tf.nn.relu(sn.linear(hidden1, 512, scope_name='hidden2', xavier=True, 58 | spectral_norm=False, reuse=reuse)) 59 | fc = sn.linear(hidden2, num_classes, scope_name='fc', xavier=True, 60 | spectral_norm=False, reuse=reuse) 61 | 62 | return fc 63 | 64 | 65 | def mlp2_relu_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 66 | """2-hidden-layer Multilayer Perceptron architecture with spectral normalization on all layers""" 67 | 68 | hidden1 = tf.nn.relu(sn.linear(input_data, 512, scope_name='hidden1', xavier=True, 69 | update_collection=update_collection, beta=beta, reuse=reuse)) 70 | hidden2 = tf.nn.relu(sn.linear(hidden1, 512, scope_name='hidden2', xavier=True, 71 | update_collection=update_collection, beta=beta, reuse=reuse)) 72 | fc = sn.linear(hidden2, num_classes, scope_name='fc', xavier=True, 73 | update_collection=update_collection, beta=beta, reuse=reuse) 74 | 75 | return fc 76 | 77 | 78 | def mlp2_elu(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 79 | """2-hidden-layer Multilayer Perceptron architecture with spectral normalization on all layers""" 80 | 81 | hidden1 = tf.nn.elu(sn.linear(input_data, 512, scope_name='hidden1', xavier=True, 82 | spectral_norm=False, reuse=reuse)) 83 | hidden2 = tf.nn.elu(sn.linear(hidden1, 512, scope_name='hidden2', xavier=True, 84 | spectral_norm=False, reuse=reuse)) 85 | fc = sn.linear(hidden2, num_classes, scope_name='fc', xavier=True, 86 | spectral_norm=False, reuse=reuse) 87 | 88 | return fc 89 | 90 | 91 | def mlp2_elu_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 92 | """2-hidden-layer Multilayer Perceptron architecture with spectral normalization on all layers""" 93 | 94 | hidden1 = tf.nn.elu(sn.linear(input_data, 512, scope_name='hidden1', xavier=True, 95 | update_collection=update_collection, beta=beta, reuse=reuse)) 96 | hidden2 = tf.nn.elu(sn.linear(hidden1, 512, scope_name='hidden2', xavier=True, 97 | update_collection=update_collection, beta=beta, reuse=reuse)) 98 | fc = sn.linear(hidden2, num_classes, scope_name='fc', xavier=True, 99 | update_collection=update_collection, beta=beta, reuse=reuse) 100 | 101 | return fc 102 | -------------------------------------------------------------------------------- /dl_spectral_normalization/models/vgg.py: -------------------------------------------------------------------------------- 1 | # Basing code off the code here: 2 | # https://github.com/huyng/tensorflow-vgg/blob/master/vgg.py 3 | # 4 | # Using filter sizes from "Exploring Generalization in Deep Learning" 5 | # https://arxiv.org/abs/1706.08947 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | from .. import sn 10 | 11 | def vgg(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 12 | """VGG architecture""" 13 | 14 | snconv_kwargs = {'bn':True, 'xavier':True, 'spectral_norm':False, 'reuse':reuse, 'training':training} 15 | 16 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 64], scope_name='conv1', **snconv_kwargs)) 17 | layer2 = tf.nn.relu(sn.conv2d(layer1, [3, 3, 64, 64], scope_name='conv2', **snconv_kwargs)) 18 | layer3 = tf.nn.max_pool(layer2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool3') 19 | # layer3 = tf.nn.dropout(layer3, 0.5, name='dropout3') 20 | 21 | layer4 = tf.nn.relu(sn.conv2d(layer3, [3, 3, 64, 128], scope_name='conv4', **snconv_kwargs)) 22 | layer5 = tf.nn.relu(sn.conv2d(layer4, [3, 3, 128, 128], scope_name='conv5', **snconv_kwargs)) 23 | layer6 = tf.nn.max_pool(layer5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool6') 24 | # layer6 = tf.nn.dropout(layer6, 0.5, name='dropout6') 25 | 26 | layer7 = tf.nn.relu(sn.conv2d(layer6, [3, 3, 128, 256], scope_name='conv7', **snconv_kwargs)) 27 | layer8 = tf.nn.relu(sn.conv2d(layer7, [3, 3, 256, 256], scope_name='conv8', **snconv_kwargs)) 28 | layer9 = tf.nn.max_pool(layer8, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool9') 29 | # layer9 = tf.nn.dropout(layer9, 0.5, name='dropout9') 30 | 31 | layer10 = tf.nn.relu(sn.conv2d(layer9, [3, 3, 256, 512], scope_name='conv10', **snconv_kwargs)) 32 | layer11 = tf.nn.relu(sn.conv2d(layer10, [3, 3, 512, 512], scope_name='conv11', **snconv_kwargs)) 33 | layer12 = tf.nn.max_pool(layer11, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool12') 34 | # layer12 = tf.nn.dropout(layer12, 0.5, name='dropout12') 35 | 36 | layer13 = tf.nn.pool(layer12, window_shape=[4, 4], pooling_type='AVG', 37 | padding='SAME', strides=[1, 1], name='mean_pool13') 38 | 39 | layer14 = sn.linear(layer13, 512, scope_name='linear14', xavier=True, spectral_norm=False, reuse=reuse) 40 | fc = sn.linear(layer14, num_classes, scope_name='fc', xavier=True, spectral_norm=False, reuse=reuse) 41 | 42 | return fc 43 | 44 | 45 | def vgg_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 46 | """VGG architecture with spectral normalization on all layers""" 47 | 48 | snconv_kwargs = {'bn':True, 'xavier':True, 'spectral_norm':True, 'reuse': reuse, 49 | 'beta':beta, 'update_collection':update_collection, 'training':training} 50 | 51 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 64], scope_name='conv1', **snconv_kwargs)) 52 | layer2 = tf.nn.relu(sn.conv2d(layer1, [3, 3, 64, 64], scope_name='conv2', **snconv_kwargs)) 53 | layer3 = tf.nn.max_pool(layer2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool3') 54 | # layer3 = tf.nn.dropout(layer3, 0.5, name='dropout3') 55 | 56 | layer4 = tf.nn.relu(sn.conv2d(layer3, [3, 3, 64, 128], scope_name='conv4', **snconv_kwargs)) 57 | layer5 = tf.nn.relu(sn.conv2d(layer4, [3, 3, 128, 128], scope_name='conv5', **snconv_kwargs)) 58 | layer6 = tf.nn.max_pool(layer5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool6') 59 | # layer6 = tf.nn.dropout(layer6, 0.5, name='dropout6') 60 | 61 | layer7 = tf.nn.relu(sn.conv2d(layer6, [3, 3, 128, 256], scope_name='conv7', **snconv_kwargs)) 62 | layer8 = tf.nn.relu(sn.conv2d(layer7, [3, 3, 256, 256], scope_name='conv8', **snconv_kwargs)) 63 | layer9 = tf.nn.max_pool(layer8, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool9') 64 | # layer9 = tf.nn.dropout(layer9, 0.5, name='dropout9') 65 | 66 | layer10 = tf.nn.relu(sn.conv2d(layer9, [3, 3, 256, 512], scope_name='conv10', **snconv_kwargs)) 67 | layer11 = tf.nn.relu(sn.conv2d(layer10, [3, 3, 512, 512], scope_name='conv11', **snconv_kwargs)) 68 | layer12 = tf.nn.max_pool(layer11, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool12') 69 | # layer12 = tf.nn.dropout(layer12, 0.5, name='dropout12') 70 | 71 | layer13 = tf.nn.pool(layer12, window_shape=[4, 4], pooling_type='AVG', 72 | padding='SAME', strides=[1, 1], name='mean_pool13') 73 | 74 | layer14 = sn.linear(layer13, 512, scope_name='linear14', xavier=True, 75 | update_collection=update_collection, beta=beta, reuse=reuse) 76 | fc = sn.linear(layer14, num_classes, scope_name='fc', xavier=True, 77 | update_collection=update_collection, beta=beta, reuse=reuse) 78 | 79 | return fc 80 | 81 | 82 | def vgg_snl2(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 83 | """VGG architecture with spectral normalization on all layers except last one, which 84 | can be L2 regularized 85 | """ 86 | 87 | snconv_kwargs = {'bn':True, 'xavier':True, 'spectral_norm':True, 'reuse':reuse, 88 | 'beta':beta, 'update_collection':update_collection} 89 | 90 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 64], scope_name='conv1', **snconv_kwargs)) 91 | layer2 = tf.nn.relu(sn.conv2d(layer1, [3, 3, 64, 64], scope_name='conv2', **snconv_kwargs)) 92 | layer3 = tf.nn.max_pool(layer2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool3') 93 | # layer3 = tf.nn.dropout(layer3, 0.5, name='dropout3') 94 | 95 | layer4 = tf.nn.relu(sn.conv2d(layer3, [3, 3, 64, 128], scope_name='conv4', **snconv_kwargs)) 96 | layer5 = tf.nn.relu(sn.conv2d(layer4, [3, 3, 128, 128], scope_name='conv5', **snconv_kwargs)) 97 | layer6 = tf.nn.max_pool(layer5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool6') 98 | # layer6 = tf.nn.dropout(layer6, 0.5, name='dropout6') 99 | 100 | layer7 = tf.nn.relu(sn.conv2d(layer6, [3, 3, 128, 256], scope_name='conv7', **snconv_kwargs)) 101 | layer8 = tf.nn.relu(sn.conv2d(layer7, [3, 3, 256, 256], scope_name='conv8', **snconv_kwargs)) 102 | layer9 = tf.nn.max_pool(layer8, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool9') 103 | # layer9 = tf.nn.dropout(layer9, 0.5, name='dropout9') 104 | 105 | layer10 = tf.nn.relu(sn.conv2d(layer9, [3, 3, 256, 512], scope_name='conv10', **snconv_kwargs)) 106 | layer11 = tf.nn.relu(sn.conv2d(layer10, [3, 3, 512, 512], scope_name='conv11', **snconv_kwargs)) 107 | layer12 = tf.nn.max_pool(layer11, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool12') 108 | # layer12 = tf.nn.dropout(layer12, 0.5, name='dropout12') 109 | 110 | layer13 = tf.nn.pool(layer12, window_shape=[4, 4], pooling_type='AVG', 111 | padding='SAME', strides=[1, 1], name='mean_pool13') 112 | 113 | layer14 = sn.linear(layer13, 512, scope_name='linear14', xavier=True, 114 | update_collection=update_collection, beta=beta, reuse=reuse) 115 | fc = sn.linear(layer14, num_classes, scope_name='fc', spectral_norm=False, 116 | xavier=True, wd=wd, l2_norm=True, reuse=reuse) 117 | 118 | return fc 119 | -------------------------------------------------------------------------------- /dl_spectral_normalization/sn.py: -------------------------------------------------------------------------------- 1 | ### Code adapted from https://github.com/nnUyi/SNGAN, which is similar to https://github.com/minhnhat93/tf-SNDCGAN 2 | ### Note that if update_collection is not None, then need to run the update ops during training: 3 | # for iter in range(max_iters): 4 | # # Training goes here 5 | # ... 6 | # # Update ops at the end 7 | # for update_op in spectral_norm_update_ops: 8 | # sess.run(update_op) 9 | # 10 | # Setting update_collection is important for grabbing weights from the graph!! 11 | 12 | import tensorflow as tf 13 | from operator import mul 14 | 15 | def l2_norm(input_x, epsilon=1e-12): 16 | """normalize input to unit norm""" 17 | input_x_norm = input_x/(tf.reduce_sum(input_x**2)**0.5 + epsilon) 18 | return input_x_norm 19 | 20 | 21 | def conv2d(input_x, kernel_size, scope_name='conv2d', 22 | xavier=True, variance_scaling=False, stride=1, padding='SAME', use_bias=True, 23 | beta=1., spectral_norm=True, tighter_sn=True, 24 | update_collection=None, reuse=None, 25 | l2_norm=False, wd=0, 26 | bn=False, training=False): 27 | """2D convolution layer with spectral normalization option""" 28 | 29 | shape = input_x.get_shape().as_list() 30 | assert shape[1] == shape[2] 31 | u_width = shape[1] 32 | 33 | output_len = kernel_size[3] 34 | with tf.variable_scope(scope_name, reuse=reuse): 35 | if xavier: 36 | weights = tf.get_variable('weights', kernel_size, tf.float32, 37 | initializer=tf.contrib.layers.xavier_initializer()) 38 | elif variance_scaling: 39 | weights = tf.get_variable('weights', kernel_size, tf.float32, 40 | initializer=tf.variance_scaling_initializer()) 41 | else: 42 | weights = tf.get_variable('weights', kernel_size, tf.float32, 43 | initializer=tf.random_normal_initializer(stddev=0.02)) 44 | if spectral_norm: 45 | weights = weights_spectral_norm(weights, update_collection=update_collection, 46 | tighter_sn=tighter_sn, u_width=u_width, beta=beta, 47 | u_depth=kernel_size[-2], stride=stride, padding=padding) 48 | elif l2_norm: 49 | weight_decay = tf.multiply(tf.nn.l2_loss(weights), wd, name='weight_loss') 50 | tf.add_to_collection('losses', weight_decay) 51 | conv = tf.nn.conv2d(input_x, weights, strides=[1, stride, stride, 1], padding=padding) 52 | if use_bias: 53 | bias = tf.get_variable('bias', output_len, tf.float32, initializer=tf.constant_initializer(0)) 54 | conv = tf.nn.bias_add(conv, bias) 55 | if bn: 56 | conv = tf.layers.batch_normalization(conv, training=training) 57 | return conv 58 | 59 | 60 | def linear(input_x, output_size, scope_name='linear', spectral_norm=True, 61 | update_collection=None, l2_norm=False, wd=0, xavier=True, beta=1., reuse=None): 62 | """Fully connected linear layer with spectral normalization and weight decay options""" 63 | 64 | shape = input_x.get_shape().as_list() 65 | 66 | if len(shape) > 2: 67 | flat_x = tf.reshape(input_x, [-1, reduce(mul, shape[1:])]) 68 | else: 69 | flat_x = input_x 70 | 71 | shape = flat_x.get_shape() 72 | input_size = shape[1] 73 | 74 | with tf.variable_scope(scope_name, reuse=reuse): 75 | if xavier: 76 | weights = tf.get_variable('weights', [input_size, output_size], tf.float32, 77 | initializer=tf.contrib.layers.xavier_initializer()) 78 | else: 79 | weights = tf.get_variable('weights', [input_size, output_size], tf.float32, 80 | initializer=tf.random_normal_initializer(stddev=0.02)) 81 | bias = tf.get_variable('bias', output_size, tf.float32, initializer=tf.constant_initializer(0)) 82 | if spectral_norm: 83 | weights = weights_spectral_norm(weights, update_collection=update_collection, beta=beta) 84 | elif l2_norm: 85 | weight_decay = tf.multiply(tf.nn.l2_loss(weights), wd, name='weight_loss') 86 | tf.add_to_collection('losses', weight_decay) 87 | output = tf.matmul(flat_x, weights) + bias 88 | return output 89 | 90 | 91 | def weights_spectral_norm(weights, u=None, Ip=1, update_collection=None, 92 | reuse=False, name='weights_SN', beta=1., 93 | tighter_sn=False, u_width=28, u_depth=3, stride=1, padding='SAME'): 94 | """Perform spectral normalization""" 95 | 96 | def power_iteration(u, w_mat, Ip): 97 | u_ = u 98 | for _ in range(Ip): 99 | v_ = l2_norm(tf.matmul(u_, tf.transpose(w_mat))) 100 | u_ = l2_norm(tf.matmul(v_, w_mat)) 101 | return u_, v_ 102 | 103 | def power_iteration_conv(u, w_mat, Ip): 104 | u_ = u 105 | for _ in range(Ip): 106 | v_ = l2_norm(tf.nn.conv2d(u_, w_mat, strides=[1, stride, stride, 1], padding=padding)) 107 | u_ = l2_norm(tf.nn.conv2d_transpose(v_, w_mat, [1, u_width, u_width, u_depth], 108 | strides=[1, stride, stride, 1], padding=padding)) 109 | return u_, v_ 110 | 111 | with tf.variable_scope(name) as scope: 112 | if reuse: 113 | scope.reuse_variables() 114 | 115 | w_shape = weights.get_shape().as_list() 116 | 117 | # The tighter spectral normalization approach breaks the [f_in, f_out, d_in, d_out] filters 118 | # into a set of f_in*f_out subfilters each of size d_in*d_out. 119 | # ONLY USE THIS FOR conv2d LAYERS. Original sn works better for fully-connected layers 120 | if tighter_sn: 121 | if u is None: 122 | # Initialize u (our "eigenimage") 123 | u = tf.get_variable('u', shape=[1, u_width, u_width, u_depth], 124 | initializer=tf.truncated_normal_initializer(), trainable=False) 125 | 126 | u_hat, v_hat = power_iteration_conv(u, weights, Ip) 127 | z = tf.nn.conv2d(u_hat, weights, strides=[1, stride, stride, 1], padding=padding) 128 | sigma = tf.maximum(tf.reduce_sum(tf.multiply(z, v_hat))/beta, 1) 129 | 130 | if update_collection is None: 131 | with tf.control_dependencies([u.assign(u_hat)]): 132 | w_norm = weights/sigma 133 | else: 134 | tf.add_to_collection(update_collection, u.assign(u_hat)) 135 | w_norm = weights/sigma 136 | 137 | # Use the spectral normalization proposed in SN-GAN paper 138 | else: 139 | if u is None: 140 | u = tf.get_variable('u', shape=[1, w_shape[-1]], 141 | initializer=tf.truncated_normal_initializer(), trainable=False) 142 | 143 | w_mat = tf.reshape(weights, [-1, w_shape[-1]]) 144 | u_hat, v_hat = power_iteration(u, w_mat, Ip) 145 | sigma = tf.maximum(tf.matmul(tf.matmul(v_hat, w_mat), tf.transpose(u_hat))/beta, 1) 146 | 147 | w_mat = w_mat/sigma 148 | 149 | if update_collection is None: 150 | with tf.control_dependencies([u.assign(u_hat)]): 151 | w_norm = tf.reshape(w_mat, w_shape) 152 | else: 153 | tf.add_to_collection(update_collection, u.assign(u_hat)) 154 | w_norm = tf.reshape(w_mat, w_shape) 155 | 156 | tf.add_to_collection('w_after_sn', w_norm) 157 | 158 | return w_norm -------------------------------------------------------------------------------- /dl_spectral_normalization/models/resnet.py: -------------------------------------------------------------------------------- 1 | # Based off the code found here (using the same architecture): 2 | # https://github.com/tensorflow/models/blob/master/official/resnet/resnet_model.py 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from .. import sn 7 | 8 | 9 | def batch_norm(inputs, training): 10 | """Performs a batch normalization using a standard set of parameters.""" 11 | # We set fused=True for a significant performance boost. See 12 | # https://www.tensorflow.org/performance/performance_guide#common_fused_ops 13 | return tf.layers.batch_normalization( 14 | inputs=inputs, axis=3, momentum=0.997, epsilon=1e-5, center=True, 15 | scale=True, training=training, fused=True) 16 | 17 | 18 | def fixed_padding(inputs, kernel_size): 19 | pad_total = kernel_size - 1 20 | pad_beg = pad_total // 2 21 | pad_end = pad_total - pad_beg 22 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 23 | [pad_beg, pad_end], [0, 0]]) 24 | return padded_inputs 25 | 26 | 27 | def conv2d_fixed_padding(inputs, filters, kernel_size, strides, name='conv2d', **kwargs): 28 | """Strided 2-D convolution with explicit padding.""" 29 | # The padding is consistent and is based only on `kernel_size`, not on the 30 | # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). 31 | if strides > 1: 32 | inputs = fixed_padding(inputs, kernel_size) 33 | in_size = inputs.get_shape().as_list()[-1] 34 | return sn.conv2d(inputs, [kernel_size, kernel_size, in_size, filters], stride=strides, 35 | padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, 36 | xavier=False, variance_scaling=True, bn=False, 37 | scope_name=name, **kwargs) 38 | 39 | 40 | def block_fn(inputs, filters, training, projection_shortcut, strides, name, bn=True, **kwargs): 41 | with tf.variable_scope(name, reuse=tf.AUTO_REUSE): 42 | shortcut = inputs 43 | if bn: 44 | inputs = tf.layers.batch_normalization(inputs, training=training) 45 | inputs = tf.nn.relu(inputs) 46 | 47 | if projection_shortcut is not None: 48 | shortcut = projection_shortcut(inputs) 49 | 50 | inputs = conv2d_fixed_padding(inputs, filters, 3, strides, name='conv1', **kwargs) 51 | if bn: 52 | inputs = batch_norm(inputs, training) 53 | inputs = tf.nn.relu(inputs) 54 | inputs = conv2d_fixed_padding(inputs, filters, 3, 1, name='conv2', **kwargs) 55 | 56 | return inputs + shortcut 57 | 58 | 59 | def block_layer(inputs, filters, block_fn, blocks, strides, 60 | training, name, bn=False, **kwargs): 61 | """Creates one layer of blocks for the ResNet model.""" 62 | 63 | with tf.variable_scope(name, reuse=tf.AUTO_REUSE): 64 | 65 | def projection_shortcut(inputs): 66 | return conv2d_fixed_padding(inputs, filters, 1, strides, 67 | name='projection_shortcut', **kwargs) 68 | 69 | # Only the first block per block_layer uses projection_shortcut and strides 70 | inputs = block_fn(inputs, filters, training, projection_shortcut, strides, 'block0', bn=bn, **kwargs) 71 | 72 | for i in range(1, blocks): 73 | inputs = block_fn(inputs, filters, training, None, 1, 'block{}'.format(i), bn=bn, **kwargs) 74 | 75 | return inputs 76 | 77 | 78 | def resnet(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 79 | 80 | snconv_kwargs = dict(beta=beta, spectral_norm=False, 81 | update_collection=update_collection, reuse=reuse) 82 | 83 | num_filters = 16 84 | kernel_size = 3 85 | conv_stride = 1 86 | resnet_size = 32 87 | num_blocks = (resnet_size - 2) // 6 88 | block_sizes = [num_blocks] * 3 89 | block_strides = [1, 2, 2] 90 | 91 | # inputs: A Tensor representing a batch of input images. 92 | inputs = conv2d_fixed_padding(input_data, num_filters, kernel_size, conv_stride, **snconv_kwargs) 93 | inputs = tf.identity(inputs, 'initial_conv') 94 | 95 | for i, num_blocks in enumerate(block_sizes): 96 | num_filters = num_filters * (2**i) 97 | inputs = block_layer(inputs, num_filters, block_fn, num_blocks, block_strides[i], training, 98 | name='block_layer{}'.format(i + 1), bn=False, **snconv_kwargs) 99 | 100 | inputs = tf.nn.relu(inputs) 101 | 102 | axes = [1, 2] 103 | inputs = tf.reduce_mean(inputs, axes, keepdims=True) 104 | inputs = tf.identity(inputs, 'final_reduce_mean') 105 | 106 | tf.add_to_collection('debug', inputs) 107 | 108 | fc = sn.linear(inputs, num_classes, scope_name='fc', xavier=True, 109 | spectral_norm=False, reuse=reuse) 110 | 111 | return fc 112 | 113 | 114 | def resnet_bn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 115 | 116 | snconv_kwargs = dict(beta=beta, spectral_norm=False, 117 | update_collection=update_collection, reuse=reuse) 118 | 119 | num_filters = 16 120 | kernel_size = 3 121 | conv_stride = 1 122 | resnet_size = 32 123 | num_blocks = (resnet_size - 2) // 6 124 | block_sizes = [num_blocks] * 3 125 | block_strides = [1, 2, 2] 126 | 127 | # inputs: A Tensor representing a batch of input images. 128 | inputs = conv2d_fixed_padding(input_data, num_filters, kernel_size, conv_stride, **snconv_kwargs) 129 | inputs = tf.identity(inputs, 'initial_conv') 130 | 131 | for i, num_blocks in enumerate(block_sizes): 132 | num_filters = num_filters * (2**i) 133 | inputs = block_layer(inputs, num_filters, block_fn, num_blocks, block_strides[i], training, 134 | name='block_layer{}'.format(i + 1), bn=True, **snconv_kwargs) 135 | 136 | inputs = batch_norm(inputs, training) 137 | inputs = tf.nn.relu(inputs) 138 | 139 | axes = [1, 2] 140 | inputs = tf.reduce_mean(inputs, axes, keepdims=True) 141 | inputs = tf.identity(inputs, 'final_reduce_mean') 142 | 143 | tf.add_to_collection('debug', inputs) 144 | 145 | fc = sn.linear(inputs, num_classes, scope_name='fc', xavier=True, 146 | spectral_norm=False, reuse=reuse) 147 | 148 | return fc 149 | 150 | 151 | def resnet_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 152 | 153 | snconv_kwargs = dict(beta=beta, spectral_norm=True, 154 | update_collection=update_collection, reuse=reuse) 155 | 156 | num_filters = 16 157 | kernel_size = 3 158 | conv_stride = 1 159 | resnet_size = 32 160 | num_blocks = (resnet_size - 2) // 6 161 | block_sizes = [num_blocks] * 3 162 | block_strides = [1, 2, 2] 163 | 164 | # inputs: A Tensor representing a batch of input images. 165 | inputs = conv2d_fixed_padding(input_data, num_filters, kernel_size, conv_stride, **snconv_kwargs) 166 | inputs = tf.identity(inputs, 'initial_conv') 167 | 168 | for i, num_blocks in enumerate(block_sizes): 169 | num_filters = num_filters * (2**i) 170 | inputs = block_layer(inputs, num_filters, block_fn, num_blocks, block_strides[i], training, 171 | name='block_layer{}'.format(i + 1), bn=False, **snconv_kwargs) 172 | 173 | inputs = tf.nn.relu(inputs) 174 | 175 | axes = [1, 2] 176 | inputs = tf.reduce_mean(inputs, axes, keepdims=True) 177 | inputs = tf.identity(inputs, 'final_reduce_mean') 178 | 179 | tf.add_to_collection('debug', inputs) 180 | 181 | fc = sn.linear(inputs, num_classes, scope_name='fc', xavier=True, 182 | update_collection=update_collection, beta=beta, reuse=reuse) 183 | 184 | return fc 185 | 186 | 187 | def resnet_sn_large(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 188 | 189 | # For imagenet. Architecture based on 190 | # https://github.com/tensorflow/models/blob/master/official/resnet/imagenet_main.py 191 | 192 | snconv_kwargs = dict(beta=beta, spectral_norm=True, 193 | update_collection=update_collection, reuse=reuse) 194 | 195 | num_filters = 64 196 | kernel_size = 7 197 | conv_stride = 2 198 | resnet_size = 50 199 | num_blocks = (resnet_size - 2) // 6 200 | block_sizes = [num_blocks] * 3 201 | block_strides = [1, 2, 2, 2] 202 | 203 | # inputs: A Tensor representing a batch of input images. 204 | inputs = conv2d_fixed_padding(input_data, num_filters, kernel_size, conv_stride, **snconv_kwargs) 205 | inputs = tf.identity(inputs, 'initial_conv') 206 | 207 | for i, num_blocks in enumerate(block_sizes): 208 | num_filters = num_filters * (2**i) 209 | inputs = block_layer(inputs, num_filters, block_fn, num_blocks, block_strides[i], training, 210 | name='block_layer{}'.format(i + 1), bn=False, **snconv_kwargs) 211 | 212 | inputs = tf.nn.relu(inputs) 213 | 214 | axes = [1, 2] 215 | inputs = tf.reduce_mean(inputs, axes, keepdims=True) 216 | inputs = tf.identity(inputs, 'final_reduce_mean') 217 | 218 | tf.add_to_collection('debug', inputs) 219 | 220 | fc = sn.linear(inputs, num_classes, scope_name='fc', xavier=True, 221 | update_collection=update_collection, beta=beta, reuse=reuse) 222 | 223 | return fc -------------------------------------------------------------------------------- /dl_spectral_normalization/models/inception.py: -------------------------------------------------------------------------------- 1 | # Using the filter sizes found here: 2 | # https://github.com/rharish101/DLGeneralization/blob/master/Mini%20Inception/cifar10_tf_inception.py 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from .. import sn 7 | 8 | def incept(input_x, input_filters, ch1_filters, ch3_filters, spectral_norm=True, tighter_sn=True, 9 | scope_name='incept', update_collection=None, beta=1., bn=True, reuse=None, training=False): 10 | """Inception module""" 11 | 12 | with tf.variable_scope(scope_name, reuse=reuse): 13 | ch1_output = tf.nn.relu(sn.conv2d(input_x, [1, 1, input_filters, ch1_filters], 14 | scope_name='conv_ch1', spectral_norm=spectral_norm, 15 | xavier=True, bn=bn, beta=beta, tighter_sn=tighter_sn, 16 | update_collection=update_collection, reuse=reuse, training=training)) 17 | ch3_output = tf.nn.relu(sn.conv2d(input_x, [3, 3, input_filters, ch3_filters], 18 | scope_name='conv_ch3', spectral_norm=spectral_norm, 19 | xavier=True, bn=bn, beta=beta, tighter_sn=tighter_sn, 20 | update_collection=update_collection, reuse=reuse, training=training)) 21 | return tf.concat([ch1_output, ch3_output], axis=-1) 22 | 23 | 24 | def downsample(input_x, input_filters, ch3_filters, spectral_norm=True, tighter_sn=True, 25 | scope_name='downsamp', update_collection=None, beta=1., bn=True, reuse=None, training=False): 26 | """Downsample module""" 27 | 28 | with tf.variable_scope(scope_name, reuse=reuse): 29 | ch3_output = tf.nn.relu(sn.conv2d(input_x, [3, 3, input_filters, ch3_filters], tighter_sn=tighter_sn, 30 | scope_name='conv_ch3', spectral_norm=spectral_norm, 31 | xavier=True, bn=bn, stride=2, beta=beta, reuse=reuse, 32 | update_collection=update_collection, training=training)) 33 | pool_output = tf.nn.max_pool(input_x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 34 | padding='SAME', name='pool') 35 | return tf.concat([ch3_output, pool_output], axis=-1) 36 | 37 | 38 | def inception(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 39 | """Mini-inception architecture (note that we do batch norm in absence of spectral norm)""" 40 | 41 | snconv_kwargs = dict(spectral_norm=False, reuse=reuse, training=training, bn=True) 42 | 43 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 96], scope_name='conv1', **snconv_kwargs)) 44 | layer2 = incept(layer1, 96, 32, 32, scope_name='incept2', **snconv_kwargs) 45 | layer3 = incept(layer2, 32+32, 32, 48, scope_name='incept3', **snconv_kwargs) 46 | layer4 = downsample(layer3, 32+48, 80, scope_name='downsamp4', **snconv_kwargs) 47 | layer5 = incept(layer4, 80+32+48, 112, 48, scope_name='incept5', **snconv_kwargs) 48 | layer6 = incept(layer5, 112+48, 96, 64, scope_name='incept6', **snconv_kwargs) 49 | layer7 = incept(layer6, 96+64, 80, 80, scope_name='incept7', **snconv_kwargs) 50 | layer8 = incept(layer7, 80+80, 48, 96, scope_name='incept8', **snconv_kwargs) 51 | layer9 = downsample(layer8, 48+96, 96, scope_name='downsamp9', **snconv_kwargs) 52 | layer10 = incept(layer9, 96+48+96, 176, 160, scope_name='incept10', **snconv_kwargs) 53 | layer11 = incept(layer10, 176+160, 176, 160, scope_name='incept11', **snconv_kwargs) 54 | layer12 = tf.nn.pool(layer11, window_shape=[7, 7], pooling_type='AVG', 55 | padding='SAME', strides=[1, 1], name='mean_pool12') 56 | 57 | fc = sn.linear(layer12, num_classes, scope_name='fc', spectral_norm=False, xavier=True, reuse=reuse) 58 | 59 | return fc 60 | 61 | 62 | def inception_miyato_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False, bn=True): 63 | """Mini-inception architecture with spectral normalization on all layers""" 64 | 65 | snconv_kwargs = dict(update_collection=update_collection, beta=beta, 66 | reuse=reuse, training=training, bn=False, tighter_sn=False) 67 | 68 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 96], scope_name='conv1', **snconv_kwargs)) 69 | layer2 = incept(layer1, 96, 32, 32, scope_name='incept2', **snconv_kwargs) 70 | layer3 = incept(layer2, 32+32, 32, 48, scope_name='incept3', **snconv_kwargs) 71 | layer4 = downsample(layer3, 32+48, 80, scope_name='downsamp4', **snconv_kwargs) 72 | layer5 = incept(layer4, 80+32+48, 112, 48, scope_name='incept5', **snconv_kwargs) 73 | layer6 = incept(layer5, 112+48, 96, 64, scope_name='incept6', **snconv_kwargs) 74 | layer7 = incept(layer6, 96+64, 80, 80, scope_name='incept7', **snconv_kwargs) 75 | layer8 = incept(layer7, 80+80, 48, 96, scope_name='incept8', **snconv_kwargs) 76 | layer9 = downsample(layer8, 48+96, 96, scope_name='downsamp9', **snconv_kwargs) 77 | layer10 = incept(layer9, 96+48+96, 176, 160, scope_name='incept10', **snconv_kwargs) 78 | layer11 = incept(layer10, 176+160, 176, 160, scope_name='incept11', **snconv_kwargs) 79 | layer12 = tf.nn.pool(layer11, window_shape=[7, 7], pooling_type='AVG', 80 | padding='SAME', strides=[1, 1], name='mean_pool12') 81 | 82 | fc = sn.linear(layer12, num_classes, scope_name='fc', xavier=True, 83 | update_collection=update_collection, beta=beta, reuse=reuse) 84 | 85 | return fc 86 | 87 | 88 | def inception_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False, bn=True): 89 | """Mini-inception architecture with spectral normalization on all layers""" 90 | 91 | snconv_kwargs = dict(update_collection=update_collection, beta=beta, 92 | reuse=reuse, training=training, bn=False) 93 | 94 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 96], scope_name='conv1', **snconv_kwargs)) 95 | layer2 = incept(layer1, 96, 32, 32, scope_name='incept2', **snconv_kwargs) 96 | layer3 = incept(layer2, 32+32, 32, 48, scope_name='incept3', **snconv_kwargs) 97 | layer4 = downsample(layer3, 32+48, 80, scope_name='downsamp4', **snconv_kwargs) 98 | layer5 = incept(layer4, 80+32+48, 112, 48, scope_name='incept5', **snconv_kwargs) 99 | layer6 = incept(layer5, 112+48, 96, 64, scope_name='incept6', **snconv_kwargs) 100 | layer7 = incept(layer6, 96+64, 80, 80, scope_name='incept7', **snconv_kwargs) 101 | layer8 = incept(layer7, 80+80, 48, 96, scope_name='incept8', **snconv_kwargs) 102 | layer9 = downsample(layer8, 48+96, 96, scope_name='downsamp9', **snconv_kwargs) 103 | layer10 = incept(layer9, 96+48+96, 176, 160, scope_name='incept10', **snconv_kwargs) 104 | layer11 = incept(layer10, 176+160, 176, 160, scope_name='incept11', **snconv_kwargs) 105 | layer12 = tf.nn.pool(layer11, window_shape=[7, 7], pooling_type='AVG', 106 | padding='SAME', strides=[1, 1], name='mean_pool12') 107 | 108 | fc = sn.linear(layer12, num_classes, scope_name='fc', xavier=True, 109 | update_collection=update_collection, beta=beta, reuse=reuse) 110 | 111 | return fc 112 | 113 | 114 | def inception_snl2(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 115 | """Mini-inception architecture with spectral normalization on all layers except last one, 116 | which can be L2 regularized 117 | """ 118 | 119 | layer1 = tf.nn.relu(sn.conv2d(input_data, [3, 3, 3, 96], scope_name='conv1', 120 | update_collection=update_collection, beta=beta, reuse=reuse)) 121 | layer2 = incept(layer1, 96, 32, 32, scope_name='incept2', 122 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 123 | layer3 = incept(layer2, 32+32, 32, 48, scope_name='incept3', 124 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 125 | layer4 = downsample(layer3, 32+48, 80, scope_name='downsamp4', 126 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 127 | layer5 = incept(layer4, 80+32+48, 112, 48, scope_name='incept5', 128 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 129 | layer6 = incept(layer5, 112+48, 96, 64, scope_name='incept6', 130 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 131 | layer7 = incept(layer6, 96+64, 80, 80, scope_name='incept7', 132 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 133 | layer8 = incept(layer7, 80+80, 48, 96, scope_name='incept8', 134 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 135 | layer9 = downsample(layer8, 48+96, 96, scope_name='downsamp9', 136 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 137 | layer10 = incept(layer9, 96+48+96, 176, 160, scope_name='incept10', 138 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 139 | layer11 = incept(layer10, 176+160, 176, 160, scope_name='incept11', 140 | update_collection=update_collection, beta=beta, reuse=reuse, training=training) 141 | layer12 = tf.nn.pool(layer11, window_shape=[7, 7], pooling_type='AVG', 142 | padding='SAME', strides=[1, 1], name='mean_pool12') 143 | 144 | fc = sn.linear(layer12, num_classes, scope_name='fc', spectral_norm=False, 145 | xavier=True, wd=wd, l2_norm=True, reuse=reuse) 146 | 147 | return fc 148 | -------------------------------------------------------------------------------- /get_cifar10.py: -------------------------------------------------------------------------------- 1 | # Requires tflearn master version: 2 | # pip install git+https://github.com/tflearn/tflearn.git 3 | # much of the cifar10 fetching code is from Chiyuan Zhang at MIT 4 | 5 | import os 6 | import re 7 | import numpy as np 8 | import tensorflow as tf 9 | import tflearn 10 | 11 | from tflearn.data_preprocessing import ImagePreprocessing 12 | from tflearn.data_augmentation import ImageAugmentation 13 | from scipy.io import loadmat 14 | 15 | def unpickle(file): 16 | import cPickle 17 | with open(file, 'rb') as fo: 18 | dict = cPickle.load(fo) 19 | return dict 20 | 21 | 22 | class dataset(): 23 | 24 | def __init__(self,FLAGS): 25 | self.FLAGS = FLAGS 26 | 27 | def per_image_whitening(self,images): 28 | "Mimic tensorflow per_image_whitening" 29 | orig_shape = images.shape 30 | images = images.reshape((images.shape[0], -1)) 31 | img_means = np.mean(images, axis=1, keepdims=True) 32 | img_stds = np.std(images, axis=1, keepdims=True) 33 | adj_stds = np.maximum(img_stds, 1.0 / np.sqrt(images.shape[1])) 34 | whiten_imgs = (images - img_means) / adj_stds 35 | return whiten_imgs.reshape(orig_shape) 36 | 37 | 38 | def crop_datasets(self, datasets, do_whitening=True): 39 | datasets_cropped = [] 40 | for dset_x, dset_y in datasets: 41 | new_x = dset_x.reshape((-1, 32, 32, 3)) 42 | if do_whitening: 43 | new_x = self.per_image_whitening(new_x) 44 | new_x = new_x[:, 2:30, 2:30, :] 45 | datasets_cropped.append((new_x, dset_y)) 46 | return datasets_cropped 47 | 48 | 49 | def corrupt_dataset(self, dataset, percent_corrupt): 50 | # set random seed so that we get the same corrupted dataset 51 | if not self.FLAGS.rand_seed >= 0: 52 | np.random.seed(percent_corrupt) 53 | 54 | p_corrupt = percent_corrupt / 100.0 55 | dset_x, dset_y = dataset 56 | 57 | b_corrupt = np.random.rand(len(dset_y)) <= p_corrupt 58 | rand_y = np.random.choice(self.FLAGS.n_classes, len(dset_y)) 59 | new_y = np.copy(dset_y) 60 | new_y[b_corrupt] = rand_y[b_corrupt] 61 | 62 | return dset_x, new_y 63 | 64 | 65 | def get_datasets(self): 66 | commands = self.FLAGS.dataset.split('|') 67 | name = commands[0] 68 | commands = commands[1:] 69 | if name == 'cifar10': 70 | datasets = list(tflearn.datasets.cifar10.load_data()) 71 | elif name == 'cifar100': 72 | dataset_tr = unpickle('./cifar-100/train') 73 | dataset_tt = unpickle('./cifar-100/test') 74 | datasets = [(np.transpose(dataset_tr['data'].reshape(-1,3,32,32).astype(float)/255,(0,2,3,1)), 75 | np.array(dataset_tr['fine_labels'])), 76 | (np.transpose(dataset_tt['data'].reshape(-1,3,32,32).astype(float)/255,(0,2,3,1)), 77 | np.array(dataset_tt['fine_labels']))] 78 | elif name == 'cifar20': 79 | dataset_tr = unpickle('./cifar-100/train') 80 | dataset_tt = unpickle('./cifar-100/test') 81 | datasets = [(np.transpose(dataset_tr['data'].reshape(-1,3,32,32).astype(float)/255,(0,2,3,1)), 82 | np.array(dataset_tr['coarse_labels'])), 83 | (np.transpose(dataset_tt['data'].reshape(-1,3,32,32).astype(float)/255,(0,2,3,1)), 84 | np.array(dataset_tt['coarse_labels']))] 85 | elif name == 'svhn': 86 | if not os.path.isdir('./svhn'): 87 | os.system('mkdir svhn') 88 | if not os.path.isfile('./svhn/train_32x32.mat'): 89 | print('Downloading SVHN train set..') 90 | os.system('wget -O ./svhn/train_32x32.mat http://ufldl.stanford.edu/housenumbers/train_32x32.mat') 91 | if not os.path.isfile('./svhn/test_32x32.mat'): 92 | print('Downloading SVHN test set..') 93 | os.system('wget -O ./svhn/test_32x32.mat http://ufldl.stanford.edu/housenumbers/test_32x32.mat') 94 | dataset_tr = loadmat('./svhn/train_32x32.mat') 95 | dataset_tt = loadmat('./svhn/test_32x32.mat') 96 | y_tr = dataset_tr['y'].reshape(-1).astype(int) 97 | y_tr[y_tr == 10] = 0 98 | y_tt = dataset_tt['y'].reshape(-1).astype(int) 99 | y_tt[y_tt == 10] = 0 100 | datasets = [(np.transpose(dataset_tr['X'].astype(float)/255, axes=[3, 0, 1, 2]), y_tr), 101 | (np.transpose(dataset_tt['X'].astype(float)/255, axes=[3, 0, 1, 2]), y_tt)] 102 | elif name == 'mnist': 103 | def rgb_and_pad_mnist(im): 104 | im_ = np.zeros((32,32)) 105 | im_[2:30,2:30] = im 106 | return np.repeat(im_[:, :, np.newaxis],3,axis=2) 107 | a = list(tflearn.datasets.mnist.load_data()) 108 | datasets = [(np.array(map(rgb_and_pad_mnist,a[0].reshape(-1,28,28))),a[1]), 109 | (np.array(map(rgb_and_pad_mnist,a[2].reshape(-1,28,28))),a[3])] 110 | elif name.startswith('data-gen:'): 111 | ret = re.match(r'data-gen:(.*)', name) 112 | dset_fn = os.path.join(self.FLAGS.logdir, 'data-gen', DATAGEN_MAPPING[ret.group(1)]) 113 | data = np.load(dset_fn) 114 | datasets = [(data['x_tr'], data['y_tr']), (data['x_tt'], data['y_tt'])] 115 | 116 | def get_dset_idx(pattern): 117 | if pattern == 'tr': 118 | return [0] 119 | if pattern == 'tt': 120 | return [1] 121 | if pattern == 'trtt': 122 | return [0, 1] 123 | 124 | for cmd in commands: 125 | if cmd.startswith('SubS:'): 126 | # take subset, e.g. take a random subset of 5000 samples for training 127 | # SubS:tr:5000 128 | ret = re.match(r'SubS:([^:]*):(.*)', cmd) 129 | 130 | subset_count = int(ret.group(2)) 131 | # set random seed for reproducibility 132 | if not self.FLAGS.rand_seed >= 0: 133 | np.random.seed(subset_count) 134 | 135 | for ds_idx in get_dset_idx(ret.group(1)): 136 | dset_x, dset_y = datasets[ds_idx] 137 | dset_count = dset_x.shape[0] 138 | assert dset_count >= subset_count 139 | subset_idx = np.random.choice(dset_count, subset_count, replace=False) 140 | datasets[ds_idx] = (dset_x[subset_idx], dset_y[subset_idx]) 141 | 142 | if cmd.startswith('RndL:'): 143 | # randomly corrupt labels, e.g. randomly corrupt the train and test set labels with 20% probability 144 | # RndL:trtt:20 145 | ret = re.match(r'RndL:([^:]*):(.*)', cmd) 146 | p_corrupt = int(ret.group(2)) 147 | 148 | for ds_idx in get_dset_idx(ret.group(1)): 149 | datasets[ds_idx] = self.corrupt_dataset(datasets[ds_idx], p_corrupt) 150 | 151 | if cmd.startswith('Cls:'): 152 | # take only samples belong to the specificed classes here 153 | ret = re.match(r'Cls:([^:]*):(.*)', cmd) 154 | classes = [int(x) for x in ret.group(2)] 155 | 156 | for ds_idx in get_dset_idx(ret.group(1)): 157 | dset_x, dset_y = datasets[ds_idx] 158 | dset_y = np.array(dset_y) 159 | idx_sel = np.zeros(dset_y.shape, dtype=bool) 160 | for c in classes: 161 | idx_sel += np.equal(dset_y, c) 162 | 163 | datasets[ds_idx] = (dset_x[idx_sel], dset_y[idx_sel]) 164 | 165 | return datasets 166 | 167 | 168 | def prepare_inputs(self): 169 | datasets = self.crop_datasets(self.get_datasets(), do_whitening=self.FLAGS.per_image_whitening) 170 | return datasets 171 | 172 | 173 | # default parameters 174 | class cifar10_parameters(): 175 | # Name of the dataset 176 | dataset = 'cifar10' 177 | # Whether to perform tf style per image whitening 178 | per_image_whitening = True 179 | # Number of classes 180 | n_classes = 10 181 | # Use this random seed if non-negative 182 | rand_seed = -1 183 | 184 | class cifar20_parameters(): 185 | dataset = 'cifar20' 186 | per_image_whitening = True 187 | n_classes = 20 188 | rand_seed = -1 189 | 190 | class cifar100_parameters(): 191 | dataset = 'cifar100' 192 | per_image_whitening = True 193 | n_classes = 100 194 | rand_seed = -1 195 | 196 | class svhn_parameters(): 197 | dataset = 'svhn' 198 | per_image_whitening = True 199 | n_classes = 10 200 | rand_seed = -1 201 | 202 | class mnist_parameters(): 203 | dataset = 'mnist' 204 | per_image_whitening = True 205 | n_classes = 10 206 | rand_seed = -1 207 | 208 | def cifar10_one_hot(i): 209 | v = np.zeros(10) 210 | v[i] = 1 211 | return v 212 | 213 | 214 | def get_cifar10_dataset(p_corrupt_label,n_samps=50000,rand_seed=None,onehot=False,whiten=True): 215 | class params(cifar10_parameters): 216 | def __init__(self,p,rand_seed,n_samp=50000,whiten=True): 217 | self.dataset = 'cifar10|SubS:tr:%s|RndL:trtt:%s'%(int(n_samp),int(p)) 218 | self.rand_seed = rand_seed 219 | self.per_image_whitening=whiten 220 | 221 | p = params(p_corrupt_label,rand_seed,n_samp=n_samps,whiten=whiten) 222 | c = dataset(p) 223 | datasets = c.prepare_inputs() 224 | 225 | if onehot: 226 | return datasets[0][0],np.array(map(cifar10_one_hot,datasets[0][1])), \ 227 | datasets[1][0],np.array(map(cifar10_one_hot,datasets[1][1])) 228 | 229 | return datasets[0][0],datasets[0][1],datasets[1][0],datasets[1][1] 230 | 231 | 232 | def get_cifar100_dataset(p_corrupt_label,n_samps=50000,rand_seed=None): 233 | class params(cifar100_parameters): 234 | def __init__(self,p,rand_seed,n_samp=50000): 235 | self.dataset = 'cifar100|SubS:tr:%s|RndL:trtt:%s'%(int(n_samp),int(p)) 236 | self.rand_seed = rand_seed 237 | 238 | p = params(p_corrupt_label,rand_seed,n_samp=n_samps) 239 | c = dataset(p) 240 | datasets = c.prepare_inputs() 241 | 242 | return datasets[0][0],datasets[0][1],datasets[1][0],datasets[1][1] 243 | 244 | 245 | # Same as cifar100 except with coarser labels 246 | def get_cifar20_dataset(p_corrupt_label,n_samps=50000,rand_seed=None): 247 | class params(cifar20_parameters): 248 | def __init__(self,p,rand_seed,n_samp=50000): 249 | self.dataset = 'cifar20|SubS:tr:%s|RndL:trtt:%s'%(int(n_samp),int(p)) 250 | self.rand_seed = rand_seed 251 | 252 | p = params(p_corrupt_label,rand_seed,n_samp=n_samps) 253 | c = dataset(p) 254 | datasets = c.prepare_inputs() 255 | 256 | return datasets[0][0],datasets[0][1],datasets[1][0],datasets[1][1] 257 | 258 | 259 | def get_svhn_dataset(p_corrupt_label,n_samps=73257,rand_seed=None): 260 | class params(svhn_parameters): 261 | def __init__(self,p,rand_seed,n_samp=73257): 262 | self.dataset = 'svhn|SubS:tr:%s|RndL:trtt:%s'%(int(n_samp),int(p)) 263 | self.rand_seed = rand_seed 264 | 265 | p = params(p_corrupt_label,rand_seed,n_samp=n_samps) 266 | c = dataset(p) 267 | datasets = c.prepare_inputs() 268 | 269 | return datasets[0][0],datasets[0][1],datasets[1][0],datasets[1][1] 270 | 271 | 272 | def get_mnist_dataset(p_corrupt_label,n_samps=50000,rand_seed=None): 273 | class params(mnist_parameters): 274 | def __init__(self,p,rand_seed,n_samp=50000): 275 | self.dataset = 'mnist|SubS:tr:%s|RndL:trtt:%s'%(int(n_samp),int(p)) 276 | self.rand_seed = rand_seed 277 | 278 | p = params(p_corrupt_label,rand_seed,n_samp=n_samps) 279 | c = dataset(p) 280 | datasets = c.prepare_inputs() 281 | 282 | return datasets[0][0],datasets[0][1],datasets[1][0],datasets[1][1] 283 | -------------------------------------------------------------------------------- /dl_spectral_normalization/adversarial.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import numpy as np 4 | import tensorflow as tf 5 | import dl_utils 6 | 7 | def l1_norm_tf(input_x, epsilon=1e-24): 8 | """get L1 norm""" 9 | reduc_ind = list(xrange(1, len(input_x.get_shape()))) 10 | return tf.reduce_sum(tf.abs(input_x), 11 | reduction_indices=reduc_ind, 12 | keep_dims=True) + epsilon 13 | 14 | 15 | def l2_norm_tf(input_x, epsilon=1e-24): 16 | """get L2 norm""" 17 | reduc_ind = list(xrange(1, len(input_x.get_shape()))) 18 | return tf.sqrt(tf.reduce_sum(tf.square(input_x), 19 | reduction_indices=reduc_ind, 20 | keep_dims=True)) + epsilon 21 | 22 | 23 | def project_back_onto_unit_ball(x_adv, x, eps=0.3, order=2): 24 | """Projects x_adv back to eps-ball around x""" 25 | 26 | delta = x_adv-x 27 | 28 | if order == 1: 29 | norms = l1_norm_tf(delta) 30 | elif order == 2: 31 | norms = l2_norm_tf(delta) 32 | 33 | adj_norms = tf.maximum(tf.ones_like(norms), norms/eps) 34 | return x+delta/adj_norms 35 | 36 | 37 | def fgm(x, preds, y=None, eps=0.3, order=2, clip_min=None, clip_max=None, 38 | **kwargs): 39 | """ 40 | TensorFlow implementation of the Fast Gradient Method. Code adapted from 41 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/attacks_tf.py 42 | :param x: the input placeholder 43 | :param preds: the model's output tensor 44 | :param y: (optional) A placeholder for the model labels. Only provide 45 | this parameter if you'd like to use true labels when crafting 46 | adversarial samples. Otherwise, model predictions are used as 47 | labels to avoid the "label leaking" effect (explained in this 48 | paper: https://arxiv.org/abs/1611.01236). Default is None. 49 | Labels should be one-hot-encoded. 50 | :param eps: the epsilon (input variation parameter) 51 | :param ord: (optional) Order of the norm (mimics Numpy). 52 | Possible values: np.inf, 1 or 2. 53 | :param clip_min: Minimum float value for adversarial example components 54 | :param clip_max: Maximum float value for adversarial example components 55 | :return: a tensor for the adversarial example 56 | """ 57 | 58 | if y is None: 59 | # Using model predictions as ground truth to avoid label leaking 60 | y = tf.argmax(preds, 1) 61 | 62 | # Compute loss (without taking the mean across samples) 63 | loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=preds) 64 | 65 | # Define gradient of loss wrt input 66 | grad, = tf.gradients(loss_, x) 67 | 68 | if order == np.inf: 69 | # Take sign of gradient 70 | signed_grad = tf.sign(grad) 71 | 72 | elif order == 1: 73 | signed_grad = grad / l1_norm_tf(grad) 74 | 75 | elif order == 2: 76 | signed_grad = grad / l2_norm_tf(grad) 77 | 78 | else: 79 | raise NotImplementedError("Only L-inf, L1 and L2 norms are " 80 | "currently implemented.") 81 | 82 | # Multiply by constant epsilon 83 | scaled_signed_grad = eps * signed_grad 84 | 85 | # Add perturbation to original example to obtain adversarial example 86 | adv_x = tf.stop_gradient(x + scaled_signed_grad) 87 | 88 | # If clipping is needed, reset all values outside of [clip_min, clip_max] 89 | if (clip_min is not None) and (clip_max is not None): 90 | adv_x = tf.clip_by_value(adv_x, clip_min, clip_max) 91 | 92 | return adv_x 93 | 94 | 95 | def wrm(x, preds, y=None, eps=0.3, order=2, model=None, k=15, 96 | reuse=True, update_collection='_', graph_beta=1.0, num_classes=10, training=False): 97 | 98 | """ 99 | TensorFlow implementation of the Wasserstein distributionally 100 | adversarial training method. Code adapted from 101 | https://github.com/duchi-lab/certifiable-distributional-robustness/blob/master/attacks_tf.py 102 | :param x: the input placeholder 103 | :param preds: the model's output tensor 104 | :param y: (optional) A placeholder for the model labels. Only provide 105 | this parameter if you'd like to use true labels when crafting 106 | adversarial samples. Otherwise, model predictions are used as 107 | labels to avoid the "label leaking" effect (explained in this 108 | paper: https://arxiv.org/abs/1611.01236). Default is None. 109 | Labels should be one-hot-encoded. 110 | :param eps: .5 / gamma (Lagrange dual parameter) 111 | in the ICLR paper (see link above) 112 | :param model: TF graph model (**kwargs goes to this) 113 | :param k: how many gradient ascent steps to take 114 | when finding adversarial example 115 | :return: a tensor for the adversarial example 116 | """ 117 | 118 | if y is None: 119 | # Using model predictions as ground truth to avoid label leaking 120 | y = tf.argmax(preds, 1) 121 | 122 | # Compute loss 123 | loss_ = dl_utils.loss(preds, y, mean=False) 124 | 125 | grad, = tf.gradients(eps*loss_, x) 126 | x_adv = tf.stop_gradient(x+grad) 127 | x = tf.stop_gradient(x) 128 | 129 | for t in xrange(k): 130 | loss_ = dl_utils.loss(model(x_adv, reuse=True, beta=graph_beta, 131 | update_collection=update_collection, 132 | num_classes=num_classes), y, mean=False) 133 | grad, = tf.gradients(eps*loss_, x_adv) 134 | grad2, = tf.gradients(tf.nn.l2_loss(x_adv-x), x_adv) 135 | grad = grad - grad2 136 | x_adv = tf.stop_gradient(x_adv+1./np.sqrt(t+2)*grad) 137 | 138 | return x_adv 139 | 140 | 141 | def pgm(x, preds, y=None, eps=0.3, order=2, model=None, a=None, k=15, 142 | reuse=True, update_collection='_', graph_beta=1., num_classes=10, training=False): 143 | """ 144 | TensorFlow implementation of the Projected Gradient Method. 145 | :param x: the input placeholder 146 | :param preds: the model's output tensor 147 | :param y: (optional) A placeholder for the model labels. Only provide 148 | this parameter if you'd like to use true labels when crafting 149 | adversarial samples. Otherwise, model predictions are used as 150 | labels to avoid the "label leaking" effect (explained in this 151 | paper: https://arxiv.org/abs/1611.01236). Default is None. 152 | Labels should be one-hot-encoded. 153 | :param eps: the epsilon (input variation parameter) 154 | :param k: number of steps to take, each of size a 155 | :param a: size of each step 156 | :param model: TF graph model (**kwargs goes to this) 157 | :param ord: (optional) Order of the norm (mimics Numpy). 158 | Possible values: 1 or 2. 159 | :return: a tensor for the adversarial example 160 | """ 161 | 162 | if a is None: 163 | a = 2.*eps/k 164 | 165 | if y is None: 166 | y = tf.argmax(preds, 1) 167 | 168 | x_adv = x 169 | 170 | for t in xrange(k): 171 | loss_ = dl_utils.loss(model(x_adv, reuse=reuse, beta=graph_beta, 172 | update_collection=update_collection, 173 | num_classes=num_classes), y, mean=False) 174 | grad, = tf.gradients(loss_, x_adv) 175 | 176 | if order == 1: 177 | scaled_grad = grad / l1_norm_tf(grad) 178 | 179 | elif order == 2: 180 | scaled_grad = grad / l2_norm_tf(grad) 181 | 182 | elif order == np.inf: 183 | scaled_grad = tf.sign(grad) 184 | 185 | x_adv = tf.stop_gradient(x_adv + a*scaled_grad) 186 | 187 | if order in [1, 2]: 188 | x_adv = project_back_onto_unit_ball(x_adv, x, eps=eps, order=order) 189 | 190 | elif order == np.inf: 191 | x_adv = tf.clip_by_value(x_adv, x-eps, x+eps) 192 | 193 | return x_adv 194 | 195 | 196 | def gen_adv_examples_in_sess(X, graph, sess, batch_size=100, method=fgm, num_classes=10, **kwargs): 197 | """Use trained model to generate adversarial examples from X within a session""" 198 | 199 | adv_tensor = method(graph['input_data'], graph['fc_out'], num_classes=num_classes, **kwargs) 200 | 201 | adv_x = np.zeros(np.shape(X)) 202 | for i in range(0, len(X), batch_size): 203 | adv_x[i:i+batch_size] = sess.run(adv_tensor, feed_dict={graph['input_data']: X[i:i+batch_size]}) 204 | 205 | return adv_x 206 | 207 | 208 | def build_graph_and_gen_adv_examples(X, arch, load_dir, num_classes=10, beta=1, num_channels=3, 209 | gpu_prop=0.2, gpu_id=0, load_epoch=None, method=fgm, **kwargs): 210 | """Build a tensorflow graph and generate adversarial examples""" 211 | 212 | if load_epoch is None: 213 | load_epoch = dl_utils.latest_epoch(load_dir) 214 | else: 215 | load_epoch = np.min((dl_utils.latest_epoch(load_dir), load_epoch)) 216 | 217 | tf.reset_default_graph() 218 | with tf.device("/gpu:%s"%(gpu_id)): 219 | graph = dl_utils.graph_builder_wrapper(arch, num_classes=num_classes, save_dir=load_dir, beta=beta, 220 | num_channels=num_channels, update_collection='_') 221 | 222 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 223 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 224 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 225 | return gen_adv_examples_in_sess(X, graph, sess, method=method, model=arch, graph_beta=beta, 226 | num_classes=num_classes, **kwargs) 227 | 228 | 229 | def test_net_against_adv_examples(X, Y, load_dir, arch, d=None, beta=1., num_channels=3, 230 | verbose=True, gpu_id=0, gpu_prop=0.2, load_epoch=None, 231 | fix_adv=False, num_classes=10, method=fgm, opt='momentum', **kwargs): 232 | """For a trained network, generate and get accuracy for adversarially-perturbed samples""" 233 | 234 | start = time.time() 235 | 236 | # Use previously fitted network which had achieved 100% training accuracy 237 | tf.reset_default_graph() 238 | with tf.device("/gpu:%s"%(gpu_id)): 239 | graph = dl_utils.graph_builder_wrapper(arch, num_classes=num_classes, save_dir=load_dir, beta=beta, 240 | num_channels=num_channels, update_collection='_', opt=opt) 241 | 242 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 243 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 244 | if d is None: 245 | if load_epoch is None: 246 | load_epoch = dl_utils.latest_epoch(load_dir) 247 | else: 248 | load_epoch = np.min((dl_utils.latest_epoch(load_dir), load_epoch)) 249 | 250 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 251 | 252 | else: 253 | for v in tf.trainable_variables(): 254 | sess.run(v.assign(d[v.name])) 255 | 256 | # Generate adversarial samples and predict 257 | X_adv = gen_adv_examples_in_sess(X, graph, sess, method=method, model=arch, graph_beta=beta, 258 | num_classes=num_classes, **kwargs) 259 | 260 | # Gradients for some examples will sometimes be zero.. ignore this 261 | if fix_adv: 262 | reduc_ind = tuple(xrange(1, len(X.shape))) 263 | mag_delta = np.sqrt(np.sum(np.square(X_adv-X), axis=reduc_ind)) 264 | keep_inds = mag_delta > 1e-4 265 | if np.sum(keep_inds) > 0: 266 | X_adv, Y = X_adv[keep_inds], Y[keep_inds] 267 | 268 | Yhat_adv = dl_utils.predict_labels_in_sess(X_adv, graph, sess) 269 | 270 | accs_adv = np.sum(Yhat_adv == Y)/float(len(Y)) 271 | 272 | if verbose: 273 | print('Acc on adv examples: %.4f (%.3f s elapsed)' \ 274 | %(accs_adv, time.time()-start)) 275 | 276 | return accs_adv 277 | -------------------------------------------------------------------------------- /train_network_template.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pickle\n", 20 | "import time\n", 21 | "import tensorflow as tf\n", 22 | "import numpy as np\n", 23 | "\n", 24 | "from dl_spectral_normalization import dl_utils\n", 25 | "from dl_spectral_normalization import adversarial as ad\n", 26 | "\n", 27 | "%matplotlib inline" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# Load dataset\n", 35 | "\n", 36 | "We provide the code for downloading and loading one of three types of datasets:\n", 37 | "- CIFAR10\n", 38 | "- MNIST\n", 39 | "- SVHN" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# CIFAR10\n", 49 | "from get_cifar10 import get_cifar10_dataset\n", 50 | "Xtr, Ytr, Xtt, Ytt = get_cifar10_dataset(0, n_samps=50000)\n", 51 | "val_set = {'X': Xtt[:500], 'Y': Ytt[:500]}\n", 52 | "Xtt, Ytt = Xtt[500:], Ytt[500:]" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# MNIST\n", 62 | "# NOTE: If you use MNIST, a lot of the dl_spectral_normalization functions \n", 63 | "# will require you to set num_channels=1\n", 64 | "from tensorflow.examples.tutorials.mnist import input_data\n", 65 | "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=False)\n", 66 | "Xtr = mnist.train.images.reshape(-1, 28, 28, 1)\n", 67 | "Ytr = mnist.train.labels.astype(float)\n", 68 | "Xtt = mnist.test.images.reshape(-1, 28, 28, 1)\n", 69 | "Ytt = mnist.test.labels.astype(float)\n", 70 | "val_set = {'X': Xtt[:500], 'Y': Ytt[:500]}\n", 71 | "Xtt, Ytt = Xtt[500:], Ytt[500:]" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# SVHN\n", 81 | "from get_cifar10 import get_svhn_dataset\n", 82 | "Xtr, Ytr, Xtt, Ytt = get_svhn_dataset(0)\n", 83 | "val_set = {'X': Xtt[:500], 'Y': Ytt[:500]}\n", 84 | "Xtt, Ytt = Xtt[500:], Ytt[500:]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# Select network\n", 92 | "\n", 93 | "Please see `spectral_adversarial_regularization/models` for the full list of models provided. We give examples of networks trained in the paper. " 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "from dl_spectral_normalization.models import alexnet as model\n", 103 | "arch = model.alexnet_sn" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "# Train network" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 5, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "def train_network(Xtr, Ytr, val_set, arch, save_dir, \n", 120 | " beta=1,\n", 121 | " adv=None, order=2, eps=0.3,\n", 122 | " opt='momentum', lr_initial=0.01,\n", 123 | " num_epochs=200, save_every=25,\n", 124 | " gpu_prop=0.3, retrain=False):\n", 125 | " \"\"\"\n", 126 | " Wrapper for training a network using dl_spectral_normalization\n", 127 | " \n", 128 | " Inputs\n", 129 | " ----------------------------------------------------------------------\n", 130 | " Xtr: Training data (channels last format)\n", 131 | " Ytr: Training labels (not one-hot encoded)\n", 132 | " val_set: Dict with keys 'X' and 'Y' for the validation\n", 133 | " data and labels\n", 134 | " arch: One of the architectures from\n", 135 | " dl_spectral_normalization/models\n", 136 | " save_dir: Directory to save weights and TensorBoard logs\n", 137 | " beta: Amount of spectral normalization (max spectral \n", 138 | " norm of a layer). Use beta=np.inf for no\n", 139 | " spectral normalization\n", 140 | " adv: String representing adversarial training scheme \n", 141 | " ('erm', 'fgm', 'pgm', or 'wrm')\n", 142 | " order: Order of attack (np.inf, 1, or 2) for FGM or PGM\n", 143 | " eps: Magnitude of attack during training\n", 144 | " opt: Optimizer type ('adam' or 'momentum')\n", 145 | " lr_initial: Initial learning rate\n", 146 | " num_epochs: Number of epochs to train for\n", 147 | " save_every: Save weights every this many epochs\n", 148 | " gpu_prop: Proportion of GPU to allocate for training process\n", 149 | " retrain: Whether or not to delete the existing weights and\n", 150 | " retrain the network\n", 151 | " \"\"\"\n", 152 | " \n", 153 | " if os.path.isdir(save_dir): \n", 154 | " if retrain: os.system('rm -rf %s'%(save_dir))\n", 155 | " else: return\n", 156 | "\n", 157 | " print('eps = %.4f, saving weights to %s'%(eps, save_dir))\n", 158 | " _ = dl_utils.build_graph_and_train(Xtr, Ytr, save_dir, arch,\n", 159 | " val_set=val_set,\n", 160 | " num_channels=Xtr.shape[-1],\n", 161 | " beta=beta,\n", 162 | " adv=adv, order=order, eps=eps,\n", 163 | " opt=opt, lr_initial=lr_initial,\n", 164 | " num_epochs=num_epochs, save_every=save_every,\n", 165 | " gpu_prop=gpu_prop, \n", 166 | " batch_size=128,\n", 167 | " early_stop_acc=0.999,\n", 168 | " early_stop_acc_num=5)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 6, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "# Directory in which we save weights\n", 178 | "dirname = '/data/save_weights_tf1.10.1/cifar10/alexnet/'\n", 179 | "\n", 180 | "# List of betas to sweep through (np.inf means no spectral normalization)\n", 181 | "beta_list = np.array([np.inf, 1.0, 1.3, 1.6, 2.0, 4.0])\n", 182 | "\n", 183 | "# Specify the amount of perturbation to use during training\n", 184 | "C2 = np.mean([np.sqrt(np.sum(np.square(i))) for i in Xtr])\n", 185 | "gamma = 0.002*C2 # for MNIST, use 0.04*C2\n", 186 | "eps_wrm = 1./(2*gamma)\n", 187 | "eps = 0.05*C2" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 7, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# ERM\n", 197 | "for beta in beta_list:\n", 198 | " save_dir = os.path.join(dirname, 'erm_beta%s'%(beta))\n", 199 | " train_network(Xtr, Ytr, val_set, arch, save_dir, adv='erm', beta=beta)\n", 200 | " \n", 201 | "# FGM\n", 202 | "for beta in beta_list:\n", 203 | " save_dir = os.path.join(dirname, 'fgm_beta%s'%(beta))\n", 204 | " train_network(Xtr, Ytr, val_set, arch, save_dir, adv='fgm', beta=beta, eps=eps)\n", 205 | " \n", 206 | "# PGM\n", 207 | "for beta in beta_list:\n", 208 | " save_dir = os.path.join(dirname, 'pgm_beta%s'%(beta))\n", 209 | " train_network(Xtr, Ytr, val_set, arch, save_dir, adv='pgm', beta=beta, eps=eps)\n", 210 | " \n", 211 | "# WRM\n", 212 | "for beta in beta_list:\n", 213 | " save_dir = os.path.join(dirname, 'wrm_beta%s'%(beta))\n", 214 | " train_network(Xtr, Ytr, val_set, arch, save_dir, adv='wrm', beta=beta, eps=eps_wrm)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "# Test trained networks with various-magnitude attacks" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 8, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "def generate_adv_attack_curves(Xtt, Ytt, arch, eps_list, defense, attack, \n", 231 | " resultsfile, beta_list, dirname,\n", 232 | " load_epoch=None, order=2, opt='momentum'):\n", 233 | " \"\"\"\n", 234 | " Wrapper for testing a set of trained networks (with various amounts of\n", 235 | " spectral normalization) against various-magnitude adversarial attacks\n", 236 | " \n", 237 | " Inputs\n", 238 | " ----------------------------------------------------------------------\n", 239 | " Xtt: Test data (channels last format)\n", 240 | " Ytt: Training labels (not one-hot encoded)\n", 241 | " arch: One of the architectures from\n", 242 | " dl_spectral_normalization/models. Need to match \n", 243 | " architecture used for training to properly load\n", 244 | " weights\n", 245 | " defense: String representing adversarial training scheme \n", 246 | " ('erm', 'fgm', 'pgm', or 'wrm')\n", 247 | " attack: One of the attacks from \n", 248 | " dl_spectral_normalization/adversarial: \n", 249 | " (ad.fgm, ad.pgm, ad.wrm)\n", 250 | " resultsfile: Pickled file to save results to\n", 251 | " beta_list: List of betas to test (trained networks)\n", 252 | " dirname: Directory where trained networks are saved\n", 253 | " load_epoch: Epoch corresponding to weights to load. If None,\n", 254 | " loads the latest epoch\n", 255 | " order: Order of attack (np.inf, 1, or 2) for FGM or PGM\n", 256 | " eps: Magnitude of attack during training\n", 257 | " opt: Optimizer type ('adam' or 'momentum'). Need to \n", 258 | " match optimizer used to training to properly load\n", 259 | " weights\n", 260 | " \n", 261 | " Outputs\n", 262 | " ----------------------------------------------------------------------\n", 263 | " adv_results: Dict with tested betas as keys. For each beta, the\n", 264 | " value is a len(eps_list) array indicating the \n", 265 | " adversarial performance for each eps in eps_list\n", 266 | " \"\"\"\n", 267 | " \n", 268 | " num_channels = Xtt.shape[-1]\n", 269 | " if os.path.isfile(resultsfile):\n", 270 | " adv_results = pickle.load(file(resultsfile, 'rb'))\n", 271 | " else:\n", 272 | " adv_results = {}\n", 273 | " \n", 274 | " for beta in beta_list:\n", 275 | " if beta in adv_results: continue\n", 276 | " save_dir = os.path.join(dirname, '%s_beta%s'%(defense, beta))\n", 277 | " \n", 278 | " adv_accs = np.zeros(len(eps_list))\n", 279 | " for i, eps in enumerate(eps_list):\n", 280 | " adv_accs[i] = ad.test_net_against_adv_examples(Xtt, Ytt, save_dir, arch, \n", 281 | " beta=beta, method=attack,\n", 282 | " load_epoch=load_epoch,\n", 283 | " num_channels=num_channels,\n", 284 | " order=order,\n", 285 | " opt=opt, eps=eps)\n", 286 | " adv_results[beta] = adv_accs\n", 287 | " pickle.dump(adv_results, file(resultsfile, 'wb'))\n", 288 | " \n", 289 | " return adv_results" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 9, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "# Eps attack values to sweep over\n", 299 | "eps_list = np.linspace(0, 5, 6)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 10, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "resultsfile = os.path.join(dirname, 'erm_defense_pgm_attacks_testset.pickle')\n", 309 | "adv_results = generate_adv_attack_curves(Xtt, Ytt, arch, eps_list, 'erm', ad.pgm,\n", 310 | " resultsfile, beta_list, dirname)\n", 311 | "\n", 312 | "resultsfile = os.path.join(dirname, 'fgm_defense_fgm_attacks_testset.pickle')\n", 313 | "adv_results = generate_adv_attack_curves(Xtt, Ytt, arch, eps_list, 'fgm', ad.fgm,\n", 314 | " resultsfile, beta_list, dirname)\n", 315 | "\n", 316 | "resultsfile = os.path.join(dirname, 'pgm_defense_pgm_attacks_testset.pickle')\n", 317 | "adv_results = generate_adv_attack_curves(Xtt, Ytt, arch, eps_list, 'pgm', ad.pgm,\n", 318 | " resultsfile, beta_list, dirname)\n", 319 | "\n", 320 | "resultsfile = os.path.join(dirname, 'wrm_defense_wrm_attacks_testset.pickle')\n", 321 | "adv_results = generate_adv_attack_curves(Xtt, Ytt, arch, eps_list, 'wrm', ad.wrm,\n", 322 | " resultsfile, beta_list, dirname)" 323 | ] 324 | } 325 | ], 326 | "metadata": { 327 | "kernelspec": { 328 | "display_name": "Python 2", 329 | "language": "python", 330 | "name": "python2" 331 | }, 332 | "language_info": { 333 | "codemirror_mode": { 334 | "name": "ipython", 335 | "version": 2 336 | }, 337 | "file_extension": ".py", 338 | "mimetype": "text/x-python", 339 | "name": "python", 340 | "nbconvert_exporter": "python", 341 | "pygments_lexer": "ipython2", 342 | "version": "2.7.12" 343 | } 344 | }, 345 | "nbformat": 4, 346 | "nbformat_minor": 2 347 | } 348 | -------------------------------------------------------------------------------- /dl_spectral_normalization/models/alexnet.py: -------------------------------------------------------------------------------- 1 | # Using the filter sizes found here: 2 | # https://github.com/rharish101/DLGeneralization/blob/master/Mini%20Alexnet/cifar10_alexnet.py 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from .. import sn 7 | 8 | def alexnet(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 9 | """AlexNet architecture 10 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 11 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 12 | Finally a NUM_CLASSES-way linear layer is used for prediction 13 | """ 14 | 15 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', spectral_norm=False, reuse=reuse) 16 | conv1 = tf.nn.relu(conv, name='conv1_relu') 17 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 18 | padding='VALID', name='pool1') 19 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 20 | 21 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', spectral_norm=False, reuse=reuse) 22 | conv2 = tf.nn.relu(conv, name='conv2_relu') 23 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 24 | padding='VALID', name='pool2') 25 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 26 | 27 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 28 | lin = sn.linear(reshape, 384, scope_name='linear1', spectral_norm=False, reuse=reuse) 29 | lin1 = tf.nn.relu(lin, name='linear1_relu') 30 | 31 | lin = sn.linear(lin1, 192, scope_name='linear2', spectral_norm=False, reuse=reuse) 32 | lin2 = tf.nn.relu(lin, name='linear2_relu') 33 | 34 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, reuse=reuse) 35 | 36 | return fc 37 | 38 | 39 | def alexnet_bn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 40 | """AlexNet architecture with batch normalization 41 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 42 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 43 | Finally a NUM_CLASSES-way linear layer is used for prediction 44 | """ 45 | 46 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', spectral_norm=False, reuse=reuse, 47 | bn=True, training=training) 48 | conv1 = tf.nn.relu(conv, name='conv1_relu') 49 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 50 | padding='VALID', name='pool1') 51 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 52 | 53 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', spectral_norm=False, reuse=reuse, 54 | bn=True, training=training) 55 | conv2 = tf.nn.relu(conv, name='conv2_relu') 56 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 57 | padding='VALID', name='pool2') 58 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 59 | 60 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 61 | lin = sn.linear(reshape, 384, scope_name='linear1', spectral_norm=False, reuse=reuse) 62 | lin1 = tf.nn.relu(lin, name='linear1_relu') 63 | 64 | lin = sn.linear(lin1, 192, scope_name='linear2', spectral_norm=False, reuse=reuse) 65 | lin2 = tf.nn.relu(lin, name='linear2_relu') 66 | 67 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, reuse=reuse) 68 | 69 | return fc 70 | 71 | 72 | def alexnet_dropout(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 73 | """AlexNet architecture with dropout layers 74 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 75 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 76 | Finally a NUM_CLASSES-way linear layer is used for prediction 77 | """ 78 | 79 | dropout = 0.8 80 | 81 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', spectral_norm=False, reuse=reuse) 82 | conv1 = tf.nn.relu(conv, name='conv1_relu') 83 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 84 | padding='VALID', name='pool1') 85 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 86 | norm1 = tf.nn.dropout(norm1, dropout) 87 | 88 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', spectral_norm=False, reuse=reuse) 89 | conv2 = tf.nn.relu(conv, name='conv2_relu') 90 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 91 | padding='VALID', name='pool2') 92 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 93 | norm2 = tf.nn.dropout(norm2, dropout) 94 | 95 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 96 | lin = sn.linear(reshape, 384, scope_name='linear1', spectral_norm=False, reuse=reuse) 97 | lin1 = tf.nn.relu(lin, name='linear1_relu') 98 | 99 | lin = sn.linear(lin1, 192, scope_name='linear2', spectral_norm=False, reuse=reuse) 100 | lin2 = tf.nn.relu(lin, name='linear2_relu') 101 | 102 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, reuse=reuse) 103 | 104 | return fc 105 | 106 | 107 | def alexnet_wd(input_data, num_classes=10, wd=5e-4, update_collection=None, beta=1., reuse=None, training=False): 108 | """AlexNet architecture with weight decay 109 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 110 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 111 | Finally a NUM_CLASSES-way linear layer is used for prediction 112 | """ 113 | 114 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', spectral_norm=False, reuse=reuse, 115 | l2_norm=True, wd=wd) 116 | conv1 = tf.nn.relu(conv, name='conv1_relu') 117 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 118 | padding='VALID', name='pool1') 119 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 120 | 121 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', spectral_norm=False, reuse=reuse, 122 | l2_norm=True, wd=wd) 123 | conv2 = tf.nn.relu(conv, name='conv2_relu') 124 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 125 | padding='VALID', name='pool2') 126 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 127 | 128 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 129 | lin = sn.linear(reshape, 384, scope_name='linear1', spectral_norm=False, reuse=reuse, 130 | l2_norm=True, wd=wd) 131 | lin1 = tf.nn.relu(lin, name='linear1_relu') 132 | 133 | lin = sn.linear(lin1, 192, scope_name='linear2', spectral_norm=False, reuse=reuse, 134 | l2_norm=True, wd=wd) 135 | lin2 = tf.nn.relu(lin, name='linear2_relu') 136 | 137 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, reuse=reuse, 138 | l2_norm=True, wd=wd) 139 | 140 | return fc 141 | 142 | 143 | def alexnet_nolrn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 144 | """AlexNet architecture 145 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 146 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 147 | Finally a NUM_CLASSES-way linear layer is used for prediction 148 | """ 149 | 150 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', spectral_norm=False, reuse=reuse) 151 | conv1 = tf.nn.relu(conv, name='conv1_relu') 152 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 153 | padding='VALID', name='pool1') 154 | 155 | conv = sn.conv2d(pool1, [5, 5, 96, 256], scope_name='conv2', spectral_norm=False, reuse=reuse) 156 | conv2 = tf.nn.relu(conv, name='conv2_relu') 157 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 158 | padding='VALID', name='pool2') 159 | 160 | reshape = tf.reshape(pool2, [-1, 6*6*256]) 161 | lin = sn.linear(reshape, 384, scope_name='linear1', spectral_norm=False, reuse=reuse) 162 | lin1 = tf.nn.relu(lin, name='linear1_relu') 163 | 164 | lin = sn.linear(lin1, 192, scope_name='linear2', spectral_norm=False, reuse=reuse) 165 | lin2 = tf.nn.relu(lin, name='linear2_relu') 166 | 167 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, reuse=reuse) 168 | 169 | return fc 170 | 171 | 172 | def alexnet_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 173 | """AlexNet architecture with spectral normalization on all layers 174 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 175 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 176 | Finally a NUM_CLASSES-way linear layer is used for prediction 177 | """ 178 | 179 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', 180 | update_collection=update_collection, beta=beta, reuse=reuse) 181 | conv1 = tf.nn.relu(conv, name='conv1_relu') 182 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 183 | padding='VALID', name='pool1') 184 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 185 | 186 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', 187 | update_collection=update_collection, beta=beta, reuse=reuse) 188 | conv2 = tf.nn.relu(conv, name='conv2_relu') 189 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 190 | padding='VALID', name='pool2') 191 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 192 | 193 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 194 | lin = sn.linear(reshape, 384, scope_name='linear1', 195 | update_collection=update_collection, beta=beta, reuse=reuse) 196 | lin1 = tf.nn.relu(lin, name='linear1_relu') 197 | 198 | lin = sn.linear(lin1, 192, scope_name='linear2', 199 | update_collection=update_collection, beta=beta, reuse=reuse) 200 | lin2 = tf.nn.relu(lin, name='linear2_relu') 201 | 202 | fc = sn.linear(lin2, num_classes, scope_name='fc', 203 | update_collection=update_collection, beta=beta, reuse=reuse) 204 | 205 | return fc 206 | 207 | 208 | def alexnet_miyato_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 209 | """AlexNet architecture with spectral normalization on all layers 210 | SN is performed using Miyato's strategy (normalizing the convolutional kernel only) 211 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 212 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 213 | Finally a NUM_CLASSES-way linear layer is used for prediction 214 | """ 215 | 216 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', tighter_sn=False, 217 | update_collection=update_collection, beta=beta, reuse=reuse) 218 | conv1 = tf.nn.relu(conv, name='conv1_relu') 219 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 220 | padding='VALID', name='pool1') 221 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 222 | 223 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', tighter_sn=False, 224 | update_collection=update_collection, beta=beta, reuse=reuse) 225 | conv2 = tf.nn.relu(conv, name='conv2_relu') 226 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 227 | padding='VALID', name='pool2') 228 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 229 | 230 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 231 | lin = sn.linear(reshape, 384, scope_name='linear1', 232 | update_collection=update_collection, beta=beta, reuse=reuse) 233 | lin1 = tf.nn.relu(lin, name='linear1_relu') 234 | 235 | lin = sn.linear(lin1, 192, scope_name='linear2', 236 | update_collection=update_collection, beta=beta, reuse=reuse) 237 | lin2 = tf.nn.relu(lin, name='linear2_relu') 238 | 239 | fc = sn.linear(lin2, num_classes, scope_name='fc', 240 | update_collection=update_collection, beta=beta, reuse=reuse) 241 | 242 | return fc 243 | 244 | 245 | def alexnet_elu(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 246 | """AlexNet architecture with spectral normalization on all layers 247 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 248 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 249 | Finally a NUM_CLASSES-way linear layer is used for prediction 250 | """ 251 | 252 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', spectral_norm=False, reuse=reuse) 253 | conv1 = tf.nn.elu(conv, name='conv1_elu') 254 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 255 | padding='VALID', name='pool1') 256 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 257 | 258 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', spectral_norm=False, reuse=reuse) 259 | conv2 = tf.nn.elu(conv, name='conv2_elu') 260 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 261 | padding='VALID', name='pool2') 262 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 263 | 264 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 265 | lin = sn.linear(reshape, 384, scope_name='linear1', spectral_norm=False, reuse=reuse) 266 | lin1 = tf.nn.elu(lin, name='linear1_elu') 267 | 268 | lin = sn.linear(lin1, 192, scope_name='linear2', spectral_norm=False, reuse=reuse) 269 | lin2 = tf.nn.elu(lin, name='linear2_elu') 270 | 271 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, reuse=reuse) 272 | 273 | return fc 274 | 275 | 276 | def alexnet_elu_sn(input_data, num_classes=10, wd=0, update_collection=None, beta=1., reuse=None, training=False): 277 | """AlexNet architecture with spectral normalization on all layers 278 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 279 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 280 | Finally a NUM_CLASSES-way linear layer is used for prediction 281 | """ 282 | 283 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', 284 | update_collection=update_collection, beta=beta, reuse=reuse) 285 | conv1 = tf.nn.elu(conv, name='conv1_elu') 286 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 287 | padding='VALID', name='pool1') 288 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 289 | 290 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', 291 | update_collection=update_collection, beta=beta, reuse=reuse) 292 | conv2 = tf.nn.elu(conv, name='conv2_elu') 293 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 294 | padding='VALID', name='pool2') 295 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 296 | 297 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 298 | lin = sn.linear(reshape, 384, scope_name='linear1', 299 | update_collection=update_collection, beta=beta, reuse=reuse) 300 | lin1 = tf.nn.elu(lin, name='linear1_elu') 301 | 302 | lin = sn.linear(lin1, 192, scope_name='linear2', 303 | update_collection=update_collection, beta=beta, reuse=reuse) 304 | lin2 = tf.nn.elu(lin, name='linear2_elu') 305 | 306 | fc = sn.linear(lin2, num_classes, scope_name='fc', 307 | update_collection=update_collection, beta=beta, reuse=reuse) 308 | 309 | return fc 310 | 311 | 312 | def alexnet_snl2(input_data, num_classes, wd=0, update_collection=None, beta=1., reuse=None, training=False): 313 | """AlexNet architecture with spectral normalization on all layers except last one, which 314 | can be L2 regularized 315 | two [convolution 5x5 -> max-pool 3x3 -> local-response-normalization] modules 316 | followed by two fully connected layers with 384 and 192 hidden units, respectively. 317 | Finally a NUM_CLASSES-way linear layer is used for prediction 318 | """ 319 | 320 | conv = sn.conv2d(input_data, [5, 5, 3, 96], scope_name='conv1', 321 | update_collection=update_collection, beta=beta, reuse=reuse) 322 | conv1 = tf.nn.relu(conv, name='conv1_relu') 323 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 324 | padding='VALID', name='pool1') 325 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 326 | 327 | conv = sn.conv2d(norm1, [5, 5, 96, 256], scope_name='conv2', 328 | update_collection=update_collection, beta=beta, reuse=reuse) 329 | conv2 = tf.nn.relu(conv, name='conv2_relu') 330 | pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 331 | padding='VALID', name='pool2') 332 | norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 333 | 334 | reshape = tf.reshape(norm2, [-1, 6*6*256]) 335 | lin = sn.linear(reshape, 384, scope_name='linear1', 336 | update_collection=update_collection, beta=beta, reuse=reuse) 337 | lin1 = tf.nn.relu(lin, name='linear1_relu') 338 | 339 | lin = sn.linear(lin1, 192, scope_name='linear2', update_collection=update_collection, beta=beta) 340 | lin2 = tf.nn.relu(lin, name='linear2_relu') 341 | 342 | fc = sn.linear(lin2, num_classes, scope_name='fc', spectral_norm=False, wd=wd, l2_norm=True, reuse=reuse) 343 | 344 | return fc 345 | -------------------------------------------------------------------------------- /notebooks_figures/analysis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import itertools 3 | import pickle 4 | import time 5 | import tensorflow as tf 6 | import numpy as np 7 | import pandas as pd 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | 11 | 12 | import sys 13 | sys.path.insert(0, '../') 14 | from dl_spectral_normalization import dl_utils 15 | 16 | 17 | def plot_adv_attack_curves(data, eps_list, eps_train=None, lw=3, marker=True, 18 | xlim=None, ylim=None, title=None, legend=True): 19 | """Generate plot of error after sweeping through various magnitude attacks""" 20 | markers = ['s', 'o', 'v', 'D', '+', '.', '^', '*'] 21 | colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 22 | for i, k in enumerate(data): 23 | plt.plot(eps_list, 1-data[k], label=k, lw=lw, ms=7, c = colors[i], 24 | marker=markers[i] if marker else None, 25 | markeredgecolor='k' if marker else None) 26 | if eps_train is not None: 27 | plt.axvline(x=eps_train, color='k', linestyle='--') 28 | if legend: plt.legend() 29 | plt.xlabel(r'$\epsilon$') 30 | plt.ylabel('error') 31 | if ylim is not None: plt.ylim(ylim) 32 | if xlim is not None: plt.xlim(xlim) 33 | if title is not None: plt.title(title) 34 | 35 | 36 | def get_l2_of_grads(X, Y, beta, save_dir, defense, arch, gpu_prop=0.2, load_epoch=None, batch_size=100): 37 | """Get the L2 norm of gradients through the network""" 38 | 39 | tf.reset_default_graph() 40 | g = dl_utils.graph_builder_wrapper(arch, adv='erm', save_dir=save_dir, update_collection='_', beta=beta) 41 | grads = tf.gradients(g['total_loss'], g['input_data']) 42 | if load_epoch is None: 43 | load_epoch = dl_utils.latest_epoch(save_dir) 44 | else: 45 | load_epoch = np.min((dl_utils.latest_epoch(save_dir), load_epoch)) 46 | 47 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 48 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 49 | g['saver'].restore(sess, os.path.join(save_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 50 | 51 | grads_eval = np.zeros(np.shape(X)) 52 | for i in range(0, len(X), batch_size): 53 | feed_dict = {g['input_data']: X[i:i+batch_size], g['input_labels']: Y[i:i+batch_size]} 54 | grads_eval[i:i+batch_size] = sess.run(grads, feed_dict=feed_dict)[0] * batch_size 55 | 56 | l2_norms = np.zeros(len(grads_eval)) 57 | for i, grad in enumerate(grads_eval): 58 | l2_norms[i] = np.sqrt(np.sum(np.square((grad)))) 59 | 60 | return l2_norms 61 | 62 | 63 | def make_kappa_plots(X, Y, beta_list, dirname, defense, resultsfile, arch, plot_stuff=True): 64 | """For all betas in a directory, get the l2_norms of the gradients""" 65 | if os.path.isfile(resultsfile): 66 | results = pickle.load(file(resultsfile, 'rb')) 67 | else: 68 | results = {} 69 | if plot_stuff: 70 | plt.figure(figsize=(16, 6)) 71 | for i, beta in enumerate(beta_list): 72 | if beta not in results: 73 | save_dir = os.path.join(dirname, '%s_beta%s'%(defense, beta)) 74 | l2_norms = get_l2_of_grads(X, Y, beta, save_dir, defense, arch, load_epoch=None) 75 | results[beta] = l2_norms 76 | pickle.dump(results, file(resultsfile, 'wb')) 77 | else: 78 | l2_norms = results[beta] 79 | if plot_stuff: 80 | plt.subplot(2, 3, i+1) 81 | plt.hist(l2_norms, density=True, bins=100) 82 | plt.xlabel(r'$\kappa$') 83 | plt.title(r'%s with $\beta = %s$'%(defense.upper(), beta)) 84 | if plot_stuff: 85 | plt.tight_layout() 86 | plt.show() 87 | return results 88 | 89 | 90 | def plot_hists(ratios, title=None, value_name=None, legend=True, add_markers=False): 91 | """Plots multiple histograms, one for each key: value pair in ratios 92 | (key = legend label, value = array of values to make hist of) 93 | """ 94 | colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 95 | markers = ['s', 'D', 'v', 'o', '*', '^', '+', '.'] 96 | df = pd.DataFrame.from_dict(ratios) 97 | for i, beta in enumerate(df.columns): 98 | ax = sns.distplot(df[beta], rug=False) 99 | if add_markers: 100 | x = ax.lines[2*i].get_xdata() 101 | y = ax.lines[2*i].get_ydata() 102 | plt.plot(x[np.argmax(y)], np.max(y), marker=markers[i], markeredgecolor='k', 103 | color=colors[i], label=r'$\beta$=%s'%(beta)) 104 | plt.xlabel('data' if value_name is None else value_name) 105 | if title is not None: 106 | plt.title(title) 107 | if legend: plt.legend() 108 | 109 | 110 | def plot_training_curves(beta, defense, dirname, compare_inf=True, ylim=None, lw=2, 111 | num_batches_per_epoch=1, legend=True, ylabel=True, printfinal=False): 112 | """Plot training and validation curves for paper""" 113 | markers = ['s', 'D', 'v', 'o', '+', '.', '^', '*'] 114 | 115 | def smoothen(x, c=10): 116 | x_new = np.array(x) 117 | for i in range(len(x)): 118 | if i >= c-1: 119 | x_new[i] = np.mean(x[np.max((c-1, i-c+1)):i+1]) 120 | return x_new 121 | 122 | if compare_inf: 123 | save_dir = os.path.join(dirname, '%s_betainf'%(defense)) 124 | curves1 = dl_utils.extract_train_valid_tensorboard(save_dir, curve='accuracy', 125 | show_plot=False, only_final_value=False) 126 | tr_acc1, tt_acc1 = smoothen(curves1[0]), smoothen(curves1[1]) 127 | xaxis = np.arange(len(tr_acc1))*num_batches_per_epoch 128 | plt.plot(xaxis, tr_acc1, lw=lw, c='navy') 129 | plt.plot(xaxis, tt_acc1, lw=lw, c='royalblue') 130 | 131 | save_dir = os.path.join(dirname, '%s_beta%s'%(defense, beta)) 132 | curves2 = dl_utils.extract_train_valid_tensorboard(save_dir, curve='accuracy', 133 | show_plot=False, only_final_value=False) 134 | tr_acc2, tt_acc2 = smoothen(curves2[0]), smoothen(curves2[1]) 135 | xaxis = np.arange(len(tr_acc2))*num_batches_per_epoch 136 | plt.plot(xaxis, tr_acc2, lw=lw, c='orangered') 137 | plt.plot(xaxis, tt_acc2, lw=lw, c='orange') 138 | if len(tr_acc1) < len(tr_acc2): 139 | plt.plot([len(tr_acc1)*num_batches_per_epoch, len(tr_acc2)*num_batches_per_epoch], 140 | [tr_acc1[-1], tr_acc1[-1]], '--', lw=lw, c='navy') 141 | plt.plot([len(tt_acc1)*num_batches_per_epoch, len(tr_acc2)*num_batches_per_epoch], 142 | [tt_acc1[-1], tt_acc1[-1]], '--', lw=lw, c='royalblue') 143 | elif len(tr_acc2) < len(tr_acc1): 144 | plt.plot([len(tr_acc2)*num_batches_per_epoch, len(tr_acc1)*num_batches_per_epoch], 145 | [tr_acc2[-1], tr_acc2[-1]], '--', lw=lw, c='orangered') 146 | plt.plot([len(curves2[1])*num_batches_per_epoch, len(curves1[1])*num_batches_per_epoch], 147 | [tt_acc2[-1], tt_acc2[-1]], '--', lw=lw, c='orange') 148 | 149 | xmax = int(np.max([[len(tr_acc1), len(tr_acc2)]])) 150 | xm = xmax/2 - int(0.075*xmax) 151 | plt.plot(xm*num_batches_per_epoch, tr_acc1[xm] if xm < len(tr_acc1) else tr_acc1[-1], 152 | marker=markers[0], label=r'train', lw=lw, ms=10, markeredgecolor='k', c='navy') 153 | xm = xmax/2 - int(0.0375*xmax) 154 | plt.plot(xm*num_batches_per_epoch, tt_acc1[xm] if xm < len(tt_acc1) else tt_acc1[-1], 155 | marker=markers[1], label=r'valid', lw=lw, ms=10, markeredgecolor='k', c='royalblue' ) 156 | xm = xmax/2 + int(0.0375*xmax) 157 | plt.plot(xm*num_batches_per_epoch, tr_acc2[xm] if xm < len(tr_acc2) else tr_acc2[-1], 158 | marker=markers[2], label=r'train (SN)', lw=lw, ms=10, markeredgecolor='k', c='orangered' ) 159 | xm = xmax/2 + int(0.075*xmax) 160 | plt.plot(xm*num_batches_per_epoch, tt_acc2[xm] if xm < len(tt_acc2) else tt_acc2[-1], 161 | marker=markers[3], label=r'valid (SN)', lw=lw, ms=10, markeredgecolor='k', c='orange' ) 162 | 163 | if printfinal: 164 | if compare_inf: 165 | print('Final train acc (beta = %s): %.4f'%(np.inf, curves1[0][-1])) 166 | print('Final test acc (beta = %s): %.4f'%(np.inf, curves1[1][-1])) 167 | print('Final train acc (beta = %s): %.4f'%(beta, curves2[0][-1])) 168 | print('Final test acc (beta = %s): %.4f'%(beta, curves2[1][-1])) 169 | 170 | if num_batches_per_epoch != 1: 171 | plt.xlabel('training steps') 172 | else: 173 | plt.xlabel('epoch') 174 | if ylabel: plt.ylabel('accuracy') 175 | if ylim is not None: plt.ylim(ylim) 176 | plt.title('%s training'%(defense.upper())) 177 | # plt.grid() 178 | if legend: plt.legend() 179 | 180 | 181 | def plot_training_curves_onenet(save_dir, ylim=None, lw=2, title=None, printfinal=False, 182 | num_batches_per_epoch=1, legend=True, ylabel=True): 183 | """Plot training and validation curves for paper (just for a particular network)""" 184 | markers = ['s', 'D', 'v', 'o', '+', '.', '^', '*'] 185 | colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 186 | 187 | def smoothen(x, c=10): 188 | x_new = np.array(x) 189 | for i in range(len(x)): 190 | if i >= c-1: 191 | x_new[i] = np.mean(x[np.max((c-1, i-c+1)):i+1]) 192 | return x_new 193 | 194 | curves = dl_utils.extract_train_valid_tensorboard(save_dir, curve='accuracy', 195 | show_plot=False, only_final_value=False) 196 | tr_acc, tt_acc = smoothen(curves[0]), smoothen(curves[1]) 197 | xaxis = np.arange(len(tr_acc))*num_batches_per_epoch 198 | plt.plot(xaxis, tr_acc, lw=lw, c=colors[0]) 199 | plt.plot(xaxis, tt_acc, lw=lw, c=colors[1]) 200 | 201 | xmax = len(tr_acc) 202 | xm = xmax/2 - int(0.0375*xmax) 203 | plt.plot(xm*num_batches_per_epoch, tr_acc[xm], 204 | marker=markers[0], label=r'train', lw=lw, ms=10, markeredgecolor='k', c=colors[0]) 205 | xm = xmax/2 + int(0.0375*xmax) 206 | plt.plot(xm*num_batches_per_epoch, tt_acc[xm], 207 | marker=markers[1], label=r'valid', lw=lw, ms=10, markeredgecolor='k', c=colors[1]) 208 | 209 | if printfinal: 210 | print('Final train acc: %.4f'%(curves[0][-1])) 211 | print('Final test acc: %.4f'%(curves[1][-1])) 212 | 213 | if num_batches_per_epoch != 1: 214 | plt.xlabel('training steps') 215 | else: 216 | plt.xlabel('epoch') 217 | if ylabel: plt.ylabel('accuracy') 218 | if ylim is not None: plt.ylim(ylim) 219 | if title is not None: plt.title(title) 220 | # plt.grid() 221 | if legend: plt.legend(loc=3) 222 | 223 | 224 | def get_margins(X, Y, save_dir, beta, arch, adv='erm', eps=0.3): 225 | """Get margins from trained network 226 | For a (X, Y) pair, the margin is the difference between 227 | 1) the fully-connected-layer-entry corresponding to the correct prediction 228 | 2) the highest other fully-connected-layer-entry 229 | """ 230 | embeddings = dl_utils.get_embedding(X, save_dir, arch, beta=beta, adv=adv, eps=eps) 231 | print('Sanity check: accuracy is %.5f.' 232 | %(np.sum(np.argmax(embeddings, 1) == Y)/float(len(Y)))) 233 | margins = np.zeros(len(embeddings)) 234 | for i, e in enumerate(embeddings): 235 | margins[i] = e[Y[i]]-np.max(np.delete(e, Y[i])) 236 | return margins 237 | 238 | 239 | def compute_margins(X, Y, dirname, beta_list, defense, eps=0.3, mode=1, extra_correction=False): 240 | """For AlexNet, get the margins and normalize using the bounds discussed in the paper""" 241 | key_order = [ 242 | 'conv1/weights:0', 243 | 'conv2/weights:0', 244 | 'linear1/weights:0', 245 | 'linear2/weights:0', 246 | 'fc/weights:0' 247 | ] 248 | conv_input_shapes = { 249 | 'conv1/weights:0': (28, 28, 3), 250 | 'conv2/weights:0': (13, 13, 96) 251 | } 252 | 253 | all_margins = [] 254 | all_gammas = [] 255 | for beta in beta_list: 256 | save_dir = os.path.join(dirname, '%s_beta%s'%(defense, beta)) 257 | margins = get_margins(X, Y, save_dir, beta, arch, adv=defense, eps=eps) 258 | if mode == 0: 259 | gamma = 1. 260 | elif mode == 1: 261 | gamma = dl_utils.get_overall_sn(save_dir, arch, return_snorms=False, beta=beta) 262 | else: 263 | snorms = dl_utils.get_overall_sn(save_dir, arch, return_snorms=True, beta=beta) 264 | c = 1. 265 | for i in range(len(key_order)): 266 | c += np.prod([snorms[key_order[j]] for j in range(i)]) 267 | gamma = np.prod(snorms.values())*c 268 | # correct by sum of frobenius norms divided by spectral norms 269 | if extra_correction: 270 | d = dl_utils.get_weights(save_dir, arch, beta=beta) 271 | snorms = dl_utils.get_overall_sn(save_dir, arch, return_snorms=True, beta=beta) 272 | c = 0. 273 | for k in key_order: 274 | fnorm = np.sum(np.square(d[k])) 275 | if 'conv' in k: 276 | fnorm *= np.prod(conv_input_shapes[k])/2**2 # Divide by stride length squared 277 | c += fnorm/snorms[k]**2 278 | gamma *= np.sqrt(c) 279 | all_gammas.append(gamma) 280 | all_margins.append(margins/gamma) 281 | return all_margins, all_gammas 282 | 283 | 284 | def smoothen(x, c=10): 285 | """Smoothen curves for plotting""" 286 | x_new = np.array(x) 287 | for i in range(len(x)): 288 | if i >= c-1: 289 | x_new[i] = np.mean(x[np.max((c-1, i-c+1)):i+1]) 290 | return x_new 291 | 292 | 293 | def plot_curve_set(curve_set, num_batches_per_epoch=1, lw=2, xm=None, 294 | left_marker_pos=0.2, right_marker_pos=0.8, legend=True): 295 | """Plot set of curves in curve_set, which is a dictionary with 296 | keys being legend labels and values being arrays to plot 297 | """ 298 | colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 299 | markers = ['s', 'D', 'v', 'o', '+', '.', '^', '*'] 300 | 301 | num_points = np.max([len(curve_set[i]) for i in curve_set]) 302 | if xm is None or len(xm) != len(curve_set): 303 | xm = np.random.choice(range(int(num_points*left_marker_pos), 304 | int(num_points*right_marker_pos)), 4) 305 | 306 | for i, k in enumerate(sorted(curve_set)): 307 | xaxis = np.arange(len(curve_set[k]))*num_batches_per_epoch 308 | plt.plot(xaxis, curve_set[k], lw=lw, c=colors[i]) 309 | 310 | if len(curve_set[k]) < num_points: 311 | plt.plot([len(curve_set[k])*num_batches_per_epoch, num_points*num_batches_per_epoch], 312 | [curve_set[k][-1], curve_set[k][-1]], '--', lw=lw, c=colors[i]) 313 | 314 | if xm[i] > len(curve_set[k]): 315 | y_dots = [curve_set[k][-1], curve_set[k][-1]] 316 | else: 317 | y_dots = [curve_set[k][xm[i]], curve_set[k][xm[i]]] 318 | plt.plot([xm[i]*num_batches_per_epoch, xm[i]*num_batches_per_epoch], 319 | y_dots, 320 | lw=lw, c=colors[i], label=k, 321 | marker=markers[i], ms=10, markeredgecolor='k') 322 | 323 | if legend: plt.legend() 324 | 325 | 326 | def print_best_beta(tr_accs, va_accs, tt_accs, verbose=False, printinf=True, return_beta=False): 327 | """Choose the value of beta that achieved the highest validation accuracy""" 328 | best_beta = sorted(va_accs.items(), key=lambda x:x[1])[-1][0] 329 | if verbose: 330 | if printinf: 331 | print('beta = inf:\ttrain acc %.4f\tvalidation acc %.4f\ttest acc %.4f'\ 332 | %(tr_accs[np.inf], va_accs[np.inf], tt_accs[np.inf])) 333 | print('beta = %s:\ttrain acc %.4f\tvalidation acc %.4f\ttest acc %.4f'\ 334 | %(best_beta, tr_accs[best_beta], va_accs[best_beta], tt_accs[best_beta])) 335 | if return_beta: 336 | return best_beta 337 | 338 | 339 | def get_table_results(Xtr, Ytr, Xva, Yva, Xtt, Ytt, arch, attacks_dict, results_file, dirname, 340 | load_epoch=None, printinf=True, opt='momentum', order=2): 341 | """Get final train, validation, test accuracies for all trained networks in a directory""" 342 | num_channels = Xtr.shape[-1] 343 | if os.path.isfile(results_file): 344 | table_results = pickle.load(file(results_file, 'rb')) 345 | else: 346 | table_results = {} 347 | 348 | for adv in attacks_dict.keys(): 349 | print('%s training'%(adv.upper())) 350 | eps = attacks_dict[adv] 351 | if adv in table_results: 352 | tr_accs, va_accs, tt_accs = table_results[adv] 353 | else: 354 | tr_accs, va_accs, tt_accs = {}, {}, {} 355 | for f in os.listdir(dirname): 356 | if adv not in f or 'pickle' in f or 'beta' not in f or 'rand' in f: continue 357 | save_dir = os.path.join(dirname, f) 358 | beta = float(f.split('beta')[1]) 359 | if beta in tr_accs and beta in va_accs and beta in tt_accs: continue 360 | print('processing file %s...'%(f)) 361 | tr_accs[beta] = dl_utils.build_graph_and_get_acc(Xtr, Ytr, arch, adv=adv, eps=eps, 362 | save_dir=save_dir, beta=beta, 363 | num_channels=num_channels, order=order, 364 | load_epoch=load_epoch, opt=opt) 365 | va_accs[beta] = dl_utils.build_graph_and_get_acc(Xva, Yva, arch, adv=adv, eps=eps, 366 | save_dir=save_dir, beta=beta, 367 | num_channels=num_channels, order=order, 368 | load_epoch=load_epoch, opt=opt) 369 | tt_accs[beta] = dl_utils.build_graph_and_get_acc(Xtt, Ytt, arch, adv=adv, eps=eps, 370 | save_dir=save_dir, beta=beta, 371 | num_channels=num_channels, order=order, 372 | load_epoch=load_epoch, opt=opt) 373 | table_results[adv] = (tr_accs, va_accs, tt_accs) 374 | pickle.dump(table_results, file(results_file, 'wb')) 375 | print_best_beta(tr_accs, va_accs, tt_accs, verbose=True, printinf=printinf) 376 | return table_results 377 | 378 | 379 | def compute_output_input_norm_ratios(X, save_dir, arch, beta=1.0): 380 | """Ratio of L2 norm of embedded layer v L2 norm of input""" 381 | input_norms = np.sqrt(np.sum(np.square(X), axis=(1, 2, 3))) 382 | output_norms = np.linalg.norm(dl_utils.get_embedding(X, save_dir, arch, adv='erm', 383 | beta=beta, num_channels=X.shape[-1]), 384 | axis=1) 385 | return output_norms/input_norms -------------------------------------------------------------------------------- /dl_spectral_normalization/dl_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import time 4 | import os 5 | import tensorflow as tf 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | import adversarial as ad 10 | from sklearn.utils import shuffle 11 | 12 | 13 | def loss(g, Y, mean=True, add_other_losses=True): 14 | """Cross-entropy loss between labels and output of linear activation function""" 15 | out = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y, logits=g) 16 | 17 | if mean: 18 | out = tf.reduce_mean(out) 19 | 20 | if add_other_losses: 21 | tf.add_to_collection('losses', out) 22 | return tf.add_n(tf.get_collection('losses')) 23 | 24 | return out 25 | 26 | 27 | def acc(g, Y): 28 | """Accuracy""" 29 | correct_prediction = tf.equal(Y, tf.argmax(g, 1)) 30 | return tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 31 | 32 | 33 | def graph_builder_wrapper(arch, 34 | num_classes=10, 35 | adv='erm', 36 | eps=0.3, 37 | save_dir=None, 38 | wd=0, 39 | update_collection=None, 40 | beta=1., 41 | save_histograms=False, 42 | num_channels=3, 43 | max_save=200, 44 | training=False, 45 | loss=loss, 46 | order=2, 47 | opt='momentum'): 48 | """Wrapper for building graph and accessing all relevant ops/placeholders""" 49 | 50 | assert isinstance(adv, str) 51 | 52 | input_data = tf.placeholder(tf.float32, shape=[None, 28, 28, num_channels], name='in_data') 53 | input_labels = tf.placeholder(tf.int64, shape=[None], name='in_labels') 54 | 55 | fc_out = arch(input_data, num_classes=num_classes, wd=wd, training=training, 56 | beta=beta, update_collection=update_collection) 57 | 58 | # Loss and optimizer (with adversarial training options) 59 | learning_rate = tf.Variable(0.01, name='learning_rate', trainable=False) 60 | 61 | if adv in ['wrm', 'fgm', 'pgm']: 62 | if adv == 'wrm': 63 | adv_x = ad.wrm(input_data, fc_out, eps=eps, order=order, model=arch, k=15, 64 | num_classes=num_classes, graph_beta=beta, training=training) 65 | elif adv == 'fgm': 66 | adv_x = ad.fgm(input_data, fc_out, eps=eps, order=order, training=training) 67 | 68 | elif adv == 'pgm': 69 | adv_x = ad.pgm(input_data, fc_out, eps=eps, order=order, model=arch, k=15, 70 | num_classes=num_classes, graph_beta=beta, training=training) 71 | 72 | fc_out_adv = arch(adv_x, num_classes=num_classes, wd=wd, 73 | beta=beta, update_collection=update_collection, reuse=True, training=training) 74 | 75 | else: 76 | fc_out_adv = fc_out 77 | 78 | total_loss = loss(fc_out_adv, input_labels) 79 | total_acc = acc(fc_out_adv, input_labels) 80 | 81 | with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): 82 | if num_channels == 1 or opt == 'adam': # For MNIST dataset 83 | opt_step = tf.train.AdamOptimizer(0.001).minimize(total_loss) 84 | else: 85 | opt_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(total_loss) 86 | 87 | # Output dictionary to useful tf ops in the graph 88 | graph = dict( 89 | input_data = input_data, 90 | input_labels = input_labels, 91 | total_loss = total_loss, 92 | total_acc = total_acc, 93 | fc_out = fc_out, 94 | fc_out_adv = fc_out_adv, 95 | opt_step = opt_step, 96 | learning_rate = learning_rate 97 | ) 98 | 99 | # Saving weights and useful information to tensorboard 100 | if save_dir is not None: 101 | saver = tf.train.Saver(max_to_keep=max_save) 102 | graph['saver'] = saver 103 | 104 | if not os.path.isdir(save_dir): 105 | tf.summary.scalar('loss', total_loss) 106 | tf.summary.scalar('accuracy', total_acc) 107 | 108 | # Add histograms for trainable variables (really slows down training though) 109 | if save_histograms: 110 | for var in tf.trainable_variables(): 111 | tf.summary.histogram(var.op.name, var) 112 | 113 | # Merge all the summaries and write them out to save_dir 114 | merged = tf.summary.merge_all() 115 | train_writer = tf.summary.FileWriter(os.path.join(save_dir, 'train')) 116 | graph_writer = tf.summary.FileWriter(os.path.join(save_dir, 'graph'), graph=tf.get_default_graph()) 117 | valid_writer = tf.summary.FileWriter(os.path.join(save_dir, 'validation')) 118 | 119 | graph['merged'] = merged 120 | graph['train_writer'] = train_writer 121 | graph['graph_writer'] = graph_writer 122 | graph['valid_writer'] = valid_writer 123 | 124 | return graph 125 | 126 | 127 | def train(Xtr, Ytr, graph, save_dir, 128 | val_set=None, 129 | lr_initial=0.01, 130 | seed=0, 131 | num_epochs=100, 132 | batch_size=100, 133 | write_every=1, 134 | save_every=None, 135 | verbose=True, 136 | load_epoch=-1, 137 | early_stop_acc=None, 138 | early_stop_acc_num=10, 139 | gpu_prop=0.2, 140 | shuffle_data=True): 141 | """Train the graph""" 142 | 143 | np.random.seed(seed) 144 | tf.set_random_seed(seed) 145 | 146 | if save_every is None: 147 | if num_epochs > 100: 148 | save_every = num_epochs/100 149 | else: 150 | save_every = 1 151 | 152 | start = time.time() 153 | training_losses, training_accs = [], [] 154 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 155 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 156 | 157 | sess.run(tf.global_variables_initializer()) 158 | 159 | if load_epoch > -1: 160 | if verbose: 161 | print('Continuing training starting at epoch %s+1'%(load_epoch)) 162 | if save_dir is not None: 163 | restore_weights_file = os.path.join(save_dir, 'checkpoints', 'epoch%s'%(load_epoch)) 164 | if 'saver' in graph: 165 | graph['saver'].restore(sess, restore_weights_file) 166 | 167 | else: 168 | if save_dir is not None and not os.path.exists(os.path.join(save_dir, 'checkpoints')): 169 | os.mkdir(os.path.join(save_dir, 'checkpoints')) 170 | if 'saver' in graph and save_dir is not None: 171 | graph['saver'].save(sess, os.path.join(save_dir, 'checkpoints', 'epoch0')) 172 | 173 | for epoch in range(load_epoch+2, load_epoch+num_epochs+2): 174 | 175 | lr = lr_initial*0.95**(epoch/390.) # initial lr * decay rate ^(step/decay_steps) 176 | sess.run(graph['learning_rate'].assign(lr)) 177 | 178 | t = time.time() 179 | training_loss = 0. 180 | training_acc = 0. 181 | steps = 0 182 | if shuffle_data: 183 | Xtr_, Ytr_ = shuffle(Xtr, Ytr) 184 | else: 185 | Xtr_, Ytr_ = Xtr, Ytr 186 | 187 | if len(Xtr_)%batch_size == 0: 188 | end = len(Xtr_) 189 | else: 190 | end = len(Xtr_)-batch_size 191 | for i in range(0, end, batch_size): 192 | 193 | x, y = Xtr_[i:i+batch_size], Ytr_[i:i+batch_size] 194 | 195 | feed_dict = {graph['input_data']: x, graph['input_labels']: y} 196 | training_loss_, training_acc_, _ = \ 197 | sess.run([graph['total_loss'], graph['total_acc'], graph['opt_step']], 198 | feed_dict=feed_dict) 199 | training_loss += training_loss_ 200 | training_acc += training_acc_ 201 | steps += 1 202 | 203 | if verbose: 204 | print('\rEpoch %s/%s (%.3f s), batch %s/%s (%.3f s): loss %.3f, acc %.3f' 205 | %(epoch, load_epoch+num_epochs+1, time.time()-start, steps, 206 | len(Xtr_)/batch_size, time.time()-t, training_loss_, training_acc_), 207 | end='') 208 | 209 | if 'saver' in graph and epoch%write_every == 0: # writing to tensorboard 210 | summary = sess.run(graph['merged'], feed_dict=feed_dict) 211 | graph['train_writer'].add_summary(summary, epoch) 212 | 213 | if val_set is not None: # make sure to keep the val_set small 214 | feed_dict = {graph['input_data']: val_set['X'], 215 | graph['input_labels']: val_set['Y']} 216 | summary = sess.run(graph['merged'], feed_dict=feed_dict) 217 | graph['valid_writer'].add_summary(summary, epoch) 218 | 219 | if 'saver' in graph and save_dir is not None and epoch%save_every == 0: 220 | graph['saver'].save(sess, os.path.join(save_dir, 'checkpoints', 'epoch%s'%(epoch))) 221 | 222 | training_losses.append(training_loss/float(steps)) 223 | training_accs.append(training_acc/float(steps)) 224 | 225 | if early_stop_acc is not None and np.mean(training_accs[-early_stop_acc_num:]) >= early_stop_acc: 226 | if verbose: 227 | print('\rMean acc >= %s for last %s epochs. Stopping training after epoch %s/%s.' 228 | %(early_stop_acc, early_stop_acc_num, epoch, load_epoch+num_epochs+1), end='') 229 | break 230 | 231 | if verbose: print('\nDONE: Trained for %s epochs.'%(epoch)) 232 | if 'saver' in graph and save_dir is not None and not os.path.exists(os.path.join(save_dir, 'checkpoints', 'epoch%s'%(epoch))): 233 | graph['saver'].save(sess, os.path.join(save_dir, 'checkpoints', 'epoch%s'%(epoch))) 234 | 235 | return training_losses, training_accs 236 | 237 | 238 | def build_graph_and_train(Xtr, Ytr, save_dir, arch, 239 | num_classes=10, 240 | num_channels=3, 241 | adv=None, 242 | eps=0.3, 243 | wd=0, 244 | gpu_id=0, 245 | verbose=True, 246 | beta=1., 247 | order=2, 248 | opt='momentum', 249 | get_train_time=False, 250 | **kwargs): 251 | """Build tensorflow graph and train""" 252 | 253 | tf.reset_default_graph() 254 | 255 | if verbose: start = time.time() 256 | with tf.device("/gpu:%s"%(gpu_id)): 257 | if save_dir is None or not os.path.exists(save_dir) or 'checkpoints' not in os.listdir(save_dir): 258 | graph = graph_builder_wrapper(arch, adv=adv, eps=eps, 259 | num_classes=num_classes, save_dir=save_dir, 260 | wd=wd, beta=beta, num_channels=num_channels, 261 | order=order, training=True, opt=opt) 262 | if get_train_time: 263 | start = time.time() 264 | tr_losses, tr_accs = train(Xtr, Ytr, graph, save_dir, **kwargs) 265 | if get_train_time: 266 | train_time = time.time()-start 267 | else: 268 | 269 | graph = graph_builder_wrapper(arch, num_classes=num_classes, save_dir=save_dir, 270 | wd=wd, beta=beta, num_channels=num_channels, 271 | order=order, update_collection='_', opt=opt) 272 | if verbose: 273 | print('Model already exists.. loading trained model..') 274 | 275 | if 'gpu_prop' in kwargs: 276 | gpu_prop = kwargs.get('gpu_prop', "default value") 277 | if save_dir is None: 278 | train_acc = np.nan 279 | if verbose: 280 | print('save_dir set to None.. returning NaN since weights not saved') 281 | else: 282 | Ytrhat = predict_labels(Xtr, graph, save_dir, gpu_prop=gpu_prop) 283 | train_acc = np.sum(Ytrhat == Ytr)/float(len(Ytr)) 284 | 285 | if verbose: 286 | print('Train acc: %.2f (%.1f s elapsed)'%(train_acc, time.time()-start)) 287 | 288 | if get_train_time: 289 | return train_acc, train_time 290 | 291 | return train_acc 292 | 293 | 294 | def predict_labels_in_sess(X, graph, sess, batch_size=100): 295 | """Predict labels within a session""" 296 | 297 | labels = np.zeros(len(X)) 298 | for i in range(0, len(X), batch_size): 299 | g_ = sess.run(graph['fc_out'], feed_dict = {graph['input_data']:X[i:i+batch_size]}) 300 | labels[i:i+batch_size] = np.argmax(g_, 1) 301 | return labels 302 | 303 | 304 | def latest_epoch(save_dir): 305 | """Grabs int corresponding to last epoch of weights saved in save_dir""" 306 | 307 | return max([int(f.split('epoch')[1].split('.')[0]) 308 | for f in os.listdir(os.path.join(save_dir, 'checkpoints')) if 'epoch' in f]) 309 | 310 | 311 | def predict_labels(X, graph, load_dir, 312 | batch_size=100, 313 | load_epoch=None, 314 | gpu_prop=0.2): 315 | """Use trained model to predict""" 316 | 317 | # Load from checkpoint corresponding to latest epoch if none given 318 | if load_epoch is None: 319 | load_epoch = latest_epoch(load_dir) 320 | else: 321 | load_epoch = np.min((latest_epoch(load_dir), load_epoch)) 322 | 323 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 324 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 325 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 326 | return predict_labels_in_sess(X, graph, sess, batch_size=batch_size) 327 | 328 | 329 | def build_graph_and_predict(X, load_dir, arch, 330 | Y=None, 331 | num_classes=10, 332 | gpu_id=0, 333 | beta=1., 334 | num_channels=3, 335 | load_epoch=None, 336 | gpu_prop=0.2, 337 | order=2, 338 | opt='momentum'): 339 | """Build a tensorflow graph and predict labels""" 340 | 341 | tf.reset_default_graph() 342 | with tf.device("/gpu:%s"%(gpu_id)): 343 | graph = graph_builder_wrapper(arch, num_classes=num_classes, save_dir=load_dir, 344 | order=order, beta=beta, opt=opt, 345 | update_collection='_', num_channels=num_channels) 346 | Yhat = predict_labels(X, graph, load_dir, load_epoch=load_epoch, gpu_prop=gpu_prop) 347 | if Y is None: 348 | return Yhat 349 | return np.sum(Yhat == Y)/float(len(Y)) 350 | 351 | 352 | def build_graph_and_get_acc(X, Y, arch, adv='erm', eps=0.3, save_dir=None, beta=1., order=2, 353 | batch_size=100, gpu_prop=0.2, load_epoch=None, num_channels=3, opt='momentum'): 354 | """Build a tensorflow graph and gets accuracy""" 355 | 356 | tf.reset_default_graph() 357 | graph = graph_builder_wrapper(arch, adv=adv, eps=eps, save_dir=save_dir, 358 | update_collection='_', beta=beta, opt=opt, 359 | order=order, num_channels=num_channels) 360 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 361 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 362 | load_file = tf.train.latest_checkpoint(os.path.join(save_dir, 'checkpoints')) 363 | if load_epoch is not None: 364 | load_file = load_file.replace(load_file.split('epoch')[1], str(load_epoch)) 365 | graph['saver'].restore(sess, load_file) 366 | 367 | num_correct = 0 368 | num_total_samples = 0 369 | for i in range(0, len(X), batch_size): 370 | x, y = X[i:i+batch_size], Y[i:i+batch_size] 371 | num_batch_samples = len(x) 372 | feed_dict = {graph['input_data']: x, graph['input_labels']: y} 373 | num_correct += sess.run(graph['total_acc'], feed_dict=feed_dict)*num_batch_samples 374 | num_total_samples += num_batch_samples 375 | 376 | return num_correct/num_total_samples 377 | 378 | 379 | def recover_curve(X, Y, load_dir, 380 | num_classes=10, 381 | gpu_id=0, 382 | verbose=True, 383 | keyword='epoch'): 384 | """Evaluate performance on a dataset during training""" 385 | 386 | list_epochs = np.unique([int(f.split(keyword)[1].split('.')[0]) \ 387 | for f in os.listdir(os.path.join(load_dir, 'checkpoints')) if keyword in f]) 388 | accs = np.zeros(len(list_epochs)) 389 | 390 | if verbose: start = time.time() 391 | for i, epoch in enumerate(list_epochs): 392 | accs[i] = build_graph_and_predict(X, load_dir, 393 | Y=Y, 394 | num_classes=num_classes, 395 | gpu_id=gpu_id, 396 | load_epoch=epoch) 397 | if verbose: 398 | print('\rRecovered accuracy for %s %s/%s: %.2f (%.2f s elapsed)' 399 | %(keyword, i+1, len(list_epochs), accs[i], time.time()-start), end='') 400 | if verbose: 401 | print('') 402 | return accs 403 | 404 | 405 | def recover_train_and_test_curves(Xtr, Ytr, Xtt, Ytt, load_dir, 406 | num_classes=10, 407 | gpu_id=0, 408 | verbose=True): 409 | """Recover training and test curves""" 410 | 411 | train_accs = recover_curve(Xtr, Ytr, load_dir, 412 | num_classes=num_classes, 413 | gpu_id=gpu_id, 414 | verbose=verbose) 415 | test_accs = recover_curve(Xtt, Ytt, load_dir, 416 | num_classes=num_classes, 417 | gpu_id=gpu_id, 418 | verbose=verbose) 419 | return train_accs,test_accs 420 | 421 | 422 | def get_embedding_in_sess(X, graph, sess, batch_size=100): 423 | """Gets embedding (last layer output) within a session""" 424 | 425 | num_classes = graph['fc_out_adv'].shape.as_list()[1] 426 | embedding = np.zeros((len(X), num_classes)) 427 | for i in range(0, len(X), batch_size): 428 | embedding_ = sess.run(graph['fc_out_adv'], feed_dict = {graph['input_data']:X[i:i+batch_size]}) 429 | embedding[i:i+batch_size] = embedding_ 430 | return embedding 431 | 432 | 433 | def get_embedding(X, load_dir, arch, num_classes=10, num_channels=3, beta=1., 434 | adv='erm', eps=0.3, order=2, 435 | batch_size=100, sn_fc=False, load_epoch=None, gpu_prop=0.2): 436 | """recovers the representation of the data at the layer before the softmax layer 437 | Use sn_fc to indicate that last layer (should be named 'fc/weights:0') needs to be 438 | spectrally normalized. 439 | """ 440 | 441 | tf.reset_default_graph() 442 | graph = graph_builder_wrapper(arch, num_classes=num_classes, num_channels=num_channels, 443 | save_dir=load_dir, beta=beta, update_collection='_', 444 | order=order, adv=adv, eps=eps) 445 | 446 | if load_epoch is None: 447 | load_epoch = latest_epoch(load_dir) 448 | else: 449 | load_epoch = np.min((latest_epoch(load_dir), load_epoch)) 450 | 451 | if sn_fc: 452 | assert 'fc/weights:0' in [v.name for v in tf.global_variables()] 453 | W_fc_tensor = [v for v in tf.global_variables() if v.name == 'fc/weights:0'][0] 454 | b_fc_tensor = [v for v in tf.global_variables() if v.name == 'fc/bias:0'][0] 455 | 456 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 457 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 458 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 459 | 460 | # spectral normalization on last layer (fully connected) 461 | if sn_fc: 462 | W_fc, b_fc = sess.run([W_fc_tensor, b_fc_tensor]) 463 | sigma = np.linalg.svd(W_fc.T)[1][0] 464 | sess.run([W_fc_tensor.assign(W_fc/sigma), b_fc_tensor.assign(b_fc/sigma)]) 465 | 466 | return get_embedding_in_sess(X, graph, sess, batch_size=batch_size) 467 | 468 | 469 | def get_grads_wrt_samples(X, Y, load_dir, arch, num_classes=10, num_channels=3, beta=1., 470 | batch_size=100, load_epoch=None, gpu_prop=0.2): 471 | """Computes gradients with respect to samples""" 472 | 473 | if load_epoch is None: 474 | load_epoch = latest_epoch(load_dir) 475 | else: 476 | load_epoch = np.min((latest_epoch(load_dir), load_epoch)) 477 | 478 | tf.reset_default_graph() 479 | graph = graph_builder_wrapper(arch, num_classes=num_classes, num_channels=num_channels, 480 | save_dir=load_dir, beta=beta, update_collection='_') 481 | 482 | grad, = tf.gradients(graph['total_loss'], graph['input_data']) 483 | 484 | g = np.zeros(np.shape(X)) 485 | 486 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 487 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 488 | 489 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 490 | 491 | for i in range(0, len(X), batch_size): 492 | g_ = sess.run(grad, feed_dict={graph['input_data']: X[i:i+batch_size], 493 | graph['input_labels']: Y[i:i+batch_size]}) 494 | g[i:i+batch_size] = g_ 495 | 496 | return g 497 | 498 | 499 | def check_weights_svs(load_dir, arch, num_classes=10, n=2, load_epoch=None, beta=1.): 500 | """Check singular value of all weights""" 501 | 502 | tf.reset_default_graph() 503 | graph = graph_builder_wrapper(arch, num_classes=num_classes, save_dir=load_dir, 504 | update_collection='_', beta=beta) 505 | 506 | if load_epoch is None: 507 | load_epoch = latest_epoch(load_dir) 508 | else: 509 | load_epoch = np.min((latest_epoch(load_dir), load_epoch)) 510 | 511 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 512 | 513 | # Grab all weights 514 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 515 | 516 | for tfvar in tf.get_collection('w_after_sn'): 517 | if 'weights' in tfvar.name: 518 | W = tfvar.eval(session=sess) 519 | print('%30s with shape %15s and top %s sv(s): %s' \ 520 | %(tfvar.name, np.shape(W), n, 521 | ', '.join(['%.2f'%(i) for i in np.linalg.svd(W.reshape(-1, np.shape(W)[-1]))[1][:n]]))) 522 | 523 | 524 | def print_total_number_of_trainable_params(): 525 | """prints total number of trainable parameters according to default graph""" 526 | total_parameters = 0 527 | for variable in tf.trainable_variables(): 528 | # shape is an array of tf.Dimension 529 | shape = variable.get_shape() 530 | variable_parameters = 1 531 | for dim in shape: 532 | variable_parameters *= dim.value 533 | total_parameters += variable_parameters 534 | print(total_parameters) 535 | 536 | 537 | def extract_curve_tensorboard(tb_log_file, curve='loss'): 538 | """Given the name of a tensorboard event file, returns the desired curve""" 539 | 540 | values = [] 541 | for e in tf.train.summary_iterator(tb_log_file): 542 | for v in e.summary.value: 543 | if v.tag == curve: 544 | values.append(v.simple_value) 545 | return np.array(values) 546 | 547 | 548 | def extract_train_valid_tensorboard(load_dir, curve='accuracy', show_plot=False, only_final_value=False): 549 | """For a particular model, grab the tfevents training and validation curves""" 550 | 551 | # get train 552 | event_file = sorted(os.listdir(os.path.join(load_dir, 'train')))[0] 553 | tb_log_file = os.path.join(load_dir, 'train', event_file) 554 | train_values = extract_curve_tensorboard(tb_log_file, curve=curve) 555 | 556 | # get validation 557 | event_file = sorted(os.listdir(os.path.join(load_dir, 'validation')))[0] 558 | tb_log_file = os.path.join(load_dir, 'validation', event_file) 559 | valid_values = extract_curve_tensorboard(tb_log_file, curve=curve) 560 | 561 | if show_plot: 562 | plt.figure() 563 | plt.plot(train_values, label='training %s'%(curve)) 564 | plt.plot(valid_values, label='validation %s'%(curve)) 565 | plt.grid() 566 | plt.legend() 567 | plt.xlabel('epoch') 568 | plt.ylabel(curve) 569 | plt.show() 570 | 571 | if only_final_value: 572 | return train_values[-1], valid_values[-1] 573 | 574 | return train_values, valid_values 575 | 576 | 577 | def plot_stacked_hist(v0, v1, labels=None, bins=20): 578 | """Plots two histograms on top of one another""" 579 | if labels is None: 580 | labels = ['0', '1'] 581 | bins = np.histogram(np.hstack((v0, v1)), bins=bins)[1] 582 | data = [v0, v1] 583 | plt.hist(data, bins, label=labels, alpha=0.8, color=['r','g'], 584 | normed=True, edgecolor='none') 585 | plt.legend() 586 | 587 | 588 | def get_margins(X, Y, load_dir, arch, sn_fc=True, beta=1.): 589 | """Compute margins for X (margin = last layer difference between true label and 590 | highest value that's not the true label) 591 | """ 592 | 593 | num_classes = len(np.unique(Y)) 594 | embeddings = get_embedding(X, load_dir, arch, num_classes=10, beta=beta, sn_fc=sn_fc) 595 | # embeddings = np.exp(embeddings) 596 | # embeddings /= np.sum(embeddings, 1).reshape(-1, 1) 597 | margins = np.zeros(len(embeddings)) 598 | 599 | print('Sanity check: accuracy is %.5f.' 600 | %(np.sum(np.argmax(embeddings, 1) == Y)/float(len(Y)))) 601 | 602 | for i in range(len(embeddings)): 603 | if Y[i] == 0: 604 | margins[i] = np.max(embeddings[i][1:]) 605 | elif Y[i] == len(embeddings[0])-1: 606 | margins[i] = np.max(embeddings[i][:-1]) 607 | else: 608 | margins[i] = np.max([np.max(embeddings[i][:int(Y[i])]), 609 | np.max(embeddings[i][int(Y[i])+1:])]) 610 | 611 | return margins 612 | 613 | 614 | def get_weights(load_dir, arch, num_classes=10, beta=1., num_channels=3, 615 | load_epoch=None, verbose=False, gpu_prop=0.2): 616 | """Grab all weights from graph (also works for spectrally-normalized models)""" 617 | 618 | if load_epoch is None: 619 | load_epoch = latest_epoch(load_dir) 620 | else: 621 | load_epoch = np.min((latest_epoch(load_dir), load_epoch)) 622 | 623 | tf.reset_default_graph() 624 | graph = graph_builder_wrapper(arch, save_dir=load_dir, num_classes=num_classes, beta=beta, 625 | update_collection='_', num_channels=num_channels) 626 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 627 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 628 | graph['saver'].restore(sess, os.path.join(load_dir, 'checkpoints', 'epoch%s'%(load_epoch))) 629 | d = {v.name:sess.run(v) for v in tf.trainable_variables()} 630 | for v in tf.get_collection('w_after_sn'): 631 | key = v.name.split('_SN')[0]+':0' 632 | d[key] = sess.run(v) 633 | if verbose: 634 | dim = d[key].shape[-1] 635 | print('%30s with shape %15s and top 2 sv(s): %s' \ 636 | %(key, np.shape(d[key]), 637 | ', '.join(['%.2f'%(i) for i in np.linalg.svd(d[key].reshape(-1, dim))[1][:2]]))) 638 | 639 | return d 640 | 641 | 642 | def l2_norm(input_x, epsilon=1e-12): 643 | """normalize input to unit norm""" 644 | input_x_norm = input_x/(tf.reduce_sum(input_x**2)**0.5 + epsilon) 645 | return input_x_norm 646 | 647 | 648 | def power_iteration_tf(W, Ip=20, seed=0): 649 | """Power method for computing top singular value of a matrix W 650 | NOTE: resets tensorflow graph 651 | """ 652 | 653 | def power_iteration(u, w_mat, Ip): 654 | u_ = u 655 | for _ in range(Ip): 656 | v_ = l2_norm(tf.matmul(u_, tf.transpose(w_mat))) 657 | u_ = l2_norm(tf.matmul(v_, w_mat)) 658 | return u_, v_ 659 | 660 | tf.reset_default_graph() 661 | if seed is not None: 662 | tf.set_random_seed(seed) 663 | 664 | u = tf.get_variable('u', shape=[1, W.shape[-1]], 665 | initializer=tf.truncated_normal_initializer(), trainable=False) 666 | 667 | w_mat = tf.Variable(W) 668 | u_hat, v_hat = power_iteration(u, w_mat, Ip) 669 | sigma = tf.matmul(tf.matmul(v_hat, w_mat), tf.transpose(u_hat)) 670 | 671 | with tf.Session() as sess: 672 | sess.run(tf.global_variables_initializer()) 673 | return sess.run(sigma).reshape(-1) 674 | 675 | 676 | def power_iteration_conv_tf(W, length=28, width=28, stride=1, Ip=20, seed=0, padding='SAME'): 677 | """Power method for computing top singular value of a convolution operation using W. 678 | NOTE: resets tensorflow graph 679 | Also, note that if you set stride to 1 when the network is trained with stride = 2, 680 | the output will be twice as large as expected 681 | """ 682 | 683 | u_dims = [1, length, width, W.shape[-2]] 684 | 685 | def power_iteration_conv(u, w_mat, Ip): 686 | u_ = u 687 | for _ in range(Ip): 688 | v_ = l2_norm(tf.nn.conv2d(u_, w_mat, strides=[1, stride, stride, 1], padding=padding)) 689 | u_ = l2_norm(tf.nn.conv2d_transpose(v_, w_mat, u_dims, 690 | strides=[1, stride, stride, 1], padding=padding)) 691 | return u_, v_ 692 | 693 | tf.reset_default_graph() 694 | if seed is not None: 695 | tf.set_random_seed(seed) 696 | 697 | # Initialize u (our "eigenimage") 698 | u = tf.get_variable('u', shape=u_dims, 699 | initializer=tf.truncated_normal_initializer(), trainable=False) 700 | 701 | w_mat = tf.Variable(W) 702 | u_hat, v_hat = power_iteration_conv(u, w_mat, Ip) 703 | z = tf.nn.conv2d(u_hat, w_mat, strides=[1, stride, stride, 1], padding=padding) 704 | sigma = tf.reduce_sum(tf.multiply(z, v_hat)) 705 | 706 | with tf.Session() as sess: 707 | sess.run(tf.global_variables_initializer()) 708 | return sess.run(sigma).reshape(-1) 709 | 710 | 711 | def get_overall_sn(load_dir, arch, num_classes=10, verbose=True, return_snorms=False, 712 | num_channels=3, seed=0, load_epoch=None, beta=1., gpu_prop=0.2): 713 | """Gets the overall spectral norm of a network with specified weights""" 714 | 715 | d = get_weights(load_dir, arch, num_classes=num_classes, gpu_prop=gpu_prop, 716 | num_channels=num_channels, load_epoch=load_epoch, beta=beta) 717 | 718 | s_norms = {} 719 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 720 | gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=gpu_prop))) as sess: 721 | 722 | conv_ops_dict = {'/'.join(i.name.split('/')[:-1]): {'stride':int(i.get_attr('strides')[1]), 723 | 'padding':i.get_attr('padding'), 724 | 'length':i.inputs[0].get_shape().as_list()[1], 725 | 'width':i.inputs[0].get_shape().as_list()[2], 726 | 'seed':seed} 727 | for i in sess.graph.get_operations() 728 | if 'Conv2D' in i.name and 'gradients' not in i.name} 729 | 730 | for i in sorted(d.keys()): 731 | if 'weights' in i: 732 | if 'conv' in i: 733 | key = '/'.join(i.split('/')[:-1]) 734 | s_norms[i] = power_iteration_conv_tf(d[i], **conv_ops_dict[key])[0] 735 | else: 736 | s_norms[i] = power_iteration_tf(d[i], seed=seed)[0] 737 | 738 | if verbose: 739 | print('%20s with spectral norm %.4f'%(i, s_norms[i])) 740 | 741 | if return_snorms: 742 | return s_norms 743 | 744 | return(np.prod(s_norms.values())) 745 | 746 | 747 | from IPython.display import clear_output, Image, display, HTML 748 | 749 | def strip_consts(graph_def, max_const_size=32): 750 | """Strip large constant values from graph_def.""" 751 | strip_def = tf.GraphDef() 752 | for n0 in graph_def.node: 753 | n = strip_def.node.add() 754 | n.MergeFrom(n0) 755 | if n.op == 'Const': 756 | tensor = n.attr['value'].tensor 757 | size = len(tensor.tensor_content) 758 | if size > max_const_size: 759 | tensor.tensor_content = ""%size 760 | return strip_def 761 | 762 | def show_graph(graph_def, max_const_size=32): 763 | """Visualize TensorFlow graph.""" 764 | if hasattr(graph_def, 'as_graph_def'): 765 | graph_def = graph_def.as_graph_def() 766 | strip_def = strip_consts(graph_def, max_const_size=max_const_size) 767 | code = """ 768 | 773 | 774 |
775 | 776 |
777 | """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand())) 778 | 779 | iframe = """ 780 | 781 | """.format(code.replace('"', '"')) 782 | display(HTML(iframe)) -------------------------------------------------------------------------------- /notebooks_figures/results_effect_of_SN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pickle\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "\n", 23 | "from analysis import *\n", 24 | "\n", 25 | "import sys\n", 26 | "sys.path.insert(0, '../') \n", 27 | "from dl_spectral_normalization.models import alexnet as model\n", 28 | "from get_cifar10 import get_cifar10_dataset\n", 29 | "\n", 30 | "%matplotlib inline" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "Xtr, Ytr, Xtt, Ytt = get_cifar10_dataset(0, n_samps=50000)\n", 40 | "_, Ytr_rand, _, Ytt_rand = get_cifar10_dataset(100, n_samps=50000)\n", 41 | "val_set = {'X': Xtt[:500], 'Y': Ytt[:500]}\n", 42 | "Xtt, Ytt = Xtt[500:], Ytt[500:]\n", 43 | "beta_list = np.array([np.inf, 1.0, 1.3, 1.6, 2.0, 4.0])\n", 44 | "arch = model.alexnet_sn\n", 45 | "C2 = np.mean([np.sqrt(np.sum(np.square(i))) for i in Xtr])\n", 46 | "gamma = 0.002*C2\n", 47 | "eps_wrm = 1./(2*gamma)\n", 48 | "eps = 0.05*C2" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "# Distribution of magnitude of gradients" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": { 62 | "scrolled": false 63 | }, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUMAAAD0CAYAAAABi0R+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xd4HNW5+PHvu7vqvVdbsuSGKy6AbcAFU4yB0HvvBEJIckNy0/O73JuE5F4ChFBCCL13AiF02xjcjY17k6tk2ZJs9S6d3x8zMmshyyrb9X6eZx9pdsq+szp6Z+bMnHPEGINSSg10Dn8HoJRSgUCToVJKoclQKaUATYZKKQVoMlRKKUCToVJKAZoMlVIK0GT4LSKyQ0QaRKTW7fWQiFwnIm32dLWIrBaRs93WyxcRIyJfddpeqog0i8gOn++MCgpHKHPZIhIuIr8WkU0iUicixSLyvoic3mndZhFJ7bTNr+zymO/r/QlWmgy7do4xJtbt9T37/UXGmFggEXgYeElEEjutGy0iY9ymrwC2+yBmFdw6l7kS4DXgXOAaIAkYAjwAnNVp3e3A5R0TIjIWiPZN2KFDk2EfGGPagWeBGGBYp9nPAte6TV8DPOOj0FSIEJFTgdOAc40xS4wxzfbr38aYuzot/ixWOetwLVrmek2TYR+IiBO4HmgBdnaa/RxwmYg4RWQUEAss8XGIKvidCiwxxuzpwbKLgXgROcYum5dhlUPVCy5/BxCg3hKRVrfpu7ES3xQRqcQ6I2wFrjLG7O+07h5gE1ZhnoV11FbqaNzL3DygHCjtmCkiyUARIECEMSay0/odZ4fzgQ1AsbcDDjV6Zti184wxiW6vx+33FxtjErHqb94BTj7C+s8A12HV42gyVD3hXubOAyqArI6ZxpgDdtmbBER0sf6zWPXT16GXyH2iybAPjDG1wHeBq0VkQheLvI5VyV1kjNnl0+BUqPgEOE5EcnuysDFmJ9aNlLnAG94MLFRpMuwjY8wB4O/Ar7uYVwecAtzk67hUaDDGfAh8hnX5fIL9mE0YMKWb1W4ETrHLn+olrTPs2j9FpM1t+iPg7S6Wux/YJiLjgGr3GcaY5V6MTw0M5wM/x7oZkgMcANYAZ3S1sDFmm+9CCz2inbsqpZReJiulFKDJUCmlAE2GSikFaDJUSimgl3eTU1NTTX5+vpdCUaFkxYoV5caYtP5sQ8ub6ilPlLdeJcP8/HyWL9cnRtTRiUjnNtu9puVN9ZQnypteJiulFD5Mhi8s2cULS7RlmvIdLXOqN3x+ZqgPeSt/2F5ex/zNZdQ0tvg7FBWgvN4cr+PI3Nzazicb9nHPu+s5Z3wWf7xovLc/WikA7nh+Je+t2QtAZJiD2SMzeOiKCYiInyNTgcRnZ4Yfb9jH51vLiQp38sryPbyybLevPloNYOU1TXywrpThGbFcOzWfwcnRvLdmL7c+u4L65tajb0ANGD5LhhtLqxmeEcsPZg9jaHosv3x7LetKqnz18WqA+mjDPlxO4YKJuYzIjOPaqfnMHZvFxxv2cfnjSzhY1+zvEFWA8EkyPFjXTHltM8PS43A5HVwyeRDJ0eHc/vxKquq1Dkd5R7sxbNlfw+jsBOIjwwAQEU4amsoVx+exrriKK/+uCVFZfJIMt+6vBWBYeiwAsREu/nrlREoqG/j+S1/R2tbuizDUAFNS2UBjSzuFabHfmjcqO56rpuSxtayWG55eRkNzWxdbUAOJT5Lhlv01JESFkRb3TW/lm0prmDs2i/mby/jJ61/T3q53mZVnbSuz+jgtTIvpcv7wjDgumpjLql2V3PniStq0DA5oPkmGxZUN5KVEf+vu3QlDUjj1mHTeWFnMBY98SXOrniEqzykqqyU9LoI4+xK5K2NyEjhrXBYfb9jPH/+90YfRqUDj9WTY1m6orG8hJaarMWxg1oh0Th+VwardlVz9xBLKapq8HZIKcS8s2cVzi3eys6KegiOcFbqbWpDCCUOSeWxBET9+dbUPIlSByOvJsLK+GQMkx3R9dBYRZo5I55LJuazaXck5f1nIqt2V3g5Lhbjqhhaa29rJiO88oua3iQhnj8umIDWGt74qZsXOgz6IUAUaryfDA/XWnbqkmPBulzt2UBKvf3caTodw4SNf8vM31ng7NBXCymqtK4zU2K6vSDpzOoQrjh9MfFQYtzyznN0H6r0ZngpA3k+G9mMLydHdJ0OAr/dUcd20fHISo3hx6S5eWa4PZqu+KberW9J6mAwBoiNcXDs1n5a2dq57cqk+cjPAeD0ZHqxrxukQ4qOOXIntLibCxQ0nDmFoeiw/ff1rTYiqT8pqmwl3OYiL7F2L07S4CC49bjA7K+q56Znl+sjNAOKDy+QWkqLDcPSiHWi4y8FVU/I4aWgqP3nta+15RPVaeW0TabERfWp/PCQ1hksmD2LlzoOc//AX+hzsAOGTM8OkHlwidxbmdPD4NZOZNSKNn7+5hgc/2aI93qgeK69pIjW29+Wuw5icBM4Zn83G0hp+9fY6LXsDgE/qDJOPcvPkSN5YWcyskelMGJTIfR9t5qJHF9GiR2l1FC1t7VQ2tBz2kH9fTClIYcbwNF5cuouH5+n47KHOq8mwqqGFhpa2Pp0ZdnA5HFw0KZdZI9JZsfMgNzy1TPukU90q7+Wd5O6cPiqDc4/N5k8fbOLtVcX93p4KXF5NhvuqGwFIiO7ZzZMjERFOG5XBhRNz+HJbBZc+tvjQtpXqrNLu/KOvVyTuRIRJg5PIT4nm7le/Zun2A/3epgpMPkmG8d00h+qNSXnJPHHtZHZW1HHBw1+ytli7AFPfVmk/25rYjysSdy6ng6tOyCM3KYqbn1nOln01HtmuCixeTYb7q63Lld4+3tCdkspGrj9xCO3GcOEjX/Lsoh3ayYM6TGVDCy6HEBPu9Ng2oyNcXDAxl7Z2wzX/WEpxZYPHtq0Cg3eTYY3nkyFAdmIU1584hMHJ0fzq7XVc8tgibcKnDqmsbyEhKszj3fonx4Rz3bR8aptaufxvi9lbpQkxlHg5GTYS7nIQ4fLcEbpDbISL66blc++FY9lRUcd5f/2CuQ98zv9+sMnjn6WCS1VDC4n9rKc+kuzEKK46IY991Y3MfeBzdpTXeeVzlO95/TI53sNnhe5EhLZ2uGPmUE49Jp2i8loe+mwrNzy1jIVbyvXyeYCqrG8mMcoz9YVdGZQczY0nDaGptZ2LHv2Slbu0Y4dQ4PUzw+76kvOUiDAnp4zM4O7TRzL7mHRW767kqieWMPu++TyxcDt1TTrwz0DR0tZOTWNrv59gOJrcpGhunV5ITISLy/62mDe/2uPVz1Pe5/U6Q0/XF3YnKtzJ7JEZfH/2MC6eZFV23/Puek7+42c8+cV2fWB7ACitasQAiT1sC98faXERXHVCHjmJUfzw5dX87l8btLfsIOa1ZGiMYX91E3ERvkuGHcKcDiYMTuK2GYXcNqOQpOgw/t8/13PyvZ/p4zghrsS+y+upx2qOpqNjkSkFyfxtQRHXPbn00KM9Krh4LRnWNrXS0NLW495qvGVwcjQ3nDiEK08YTF1zK995aCF3PL9S25qGqBL7Dq8vzgw7OB3Cd8bncMGEHBYXVXDuX79gY2m1zz5feYbXkuE+Lzxj2FciwujsBH4wezgjMuN5b81efvLa13rZHIJKKu0H/f1wEJ6cn8yNJw7hQF0z5//1S15bofWIwcRryXB/jVUofXEDpaeiwp1cdcJgThmZzqsr9nDNE0sPdT6rQkNxZQMx4U7CXT4Z6+xbBqfEcMesoYzLTeDHr67mRy+volrb0gcFr5WYjoGd/FFn2B0R4dRjMrh4Ui7LdhzgrAc/5/01e/UxnBBRUtngs/rCI4mPDOPscdmcMjKdt1YVc+b9n/P5ljK/xqSOzmvJsLTKf5crPTFhcBK3TC8gISqM7z6/klP+bx4PfrJFH6INciWVDSQEQJlzOqyD7q3TC2lqbefqJ5bysze+1rPEAOa9ZFjdSEy4k8gwz7c+8ZTcpGjevfMk/nzpeNLjI/nzR5uZ+b/zuOLxxdo7SRAyxlB8sMFrrU/6YlByNHeeMpSTh6Xy8rLdnH7fAv61Zq/ewAtAXryB0khGwtGHafS3V5bvoaG5nfOOzeEnc0Zy+qgM1hRXcclji/jRy6uoqtcjebCobmylrrnNp3eSeyLM6eDMMVncOr0QgNufX8kljy1i3qb9Wj0TQLxWoVda1UhmD8asDSQJUWHMHJHOtMJU5m8u453VJSwuquCByydwXH6yv8NTR9HxjGGCn+sMj2RQcjR3zBrK8p0H+Gzjfq57chn5KdFcPHkQF03K7dEYz8p7vFpnGGzJsEO4y8FpozK4ZXoBja3tXPrYIu7990YaW3SktEB26IHrADszdOd0CCcMSeHHp4/g4km5APzpg01M/f0n3PT0Mj7duE9bsfiJV84M29sN+2uaguIyuTu5SdHcOWsom/bV8Mi8bXy4rpT/vXg8EwYn+Ts01YXiQ61PAjcZdnDZraQmDE6ivLaJFTsPsrjoAB9v2E9OYhRXT83jsuMG+f3O+EDilTPD8romWtsNWUGeDMHqBGJcbiLXTcunvLaZix5dxP0fb9ajdwAqrmwg3OkgJsAe5zqa1NgIzhidyU/mjODy4wcT7nLwh/c3cvz/fMJv31nH7gP1/g5xQPBKqdlXZT1jmBEfSUVtaDzUPDwjju+fMox/fl3C/R9vYUnRAR64/FjS44I/4YeKkspGshIjezVGdyBxORyMzUlgbE4Ce6saWLilnOcW7+TZxTs5fVQGlx8/mBOHpuJ0BOf+BTqvJMNSe+yTzBBKhmC1YLl4Ui6FaTG8t2Yvcx9YyJ8vHc/Jw9L8HZoCig/Wk50Q5e8wPCIrIYqLJw/i9NGZLNpWwaKiCt5fW0pyTDgzhqdx4tBUpg9P1YOxB3k3GSZEsq4ktBqsiwiT8pLJTYrmxaW7uOYfS7llegH/cdoIvzUBU5ai8jrOHJPl7zA8KiEqjDljMpl9TDqbSmtYv7eaD9aV8uZX1rClg5OjuWNWIecemxPQz/QGAy9dJjfidIhHxq0NVBnxkdw+cyjvrdnLY/OL+GJrOfdfOoGh6bH+Dm1AOlDXTGV9C4VpMf4OxSvCnA7G5CQwJieBdmMorWpk074aVu+u5Kevr+FPH2zi+hOHcNWUvIBogROMvHIqs7eqkbTYiJCv2wh3OTh/Qg5XnTCY4oMNnP2Xz3lu8U5tXeAHRWW1ABSEaDJ05xAhOzGKWSPSuWv2MG48aQhJ0eH86YNNTPu9ddNluzYr7TWvnBluL68lLyXaG5sOSKOyE8hNjub1FXv45VtrWVxUwb0Xjgu6u5rBrKjM+ucvSI2l1L6BNxCICIVpsRSmxVJS2cDCreU8u2gnT3+5g2Oy4vnTxeMYnZ3g7zCDgsfPDI0xbN1fO+AuF+Mjw7h2Wj5njMrgva/3ctp989lv150q79tWXkuYU8hNCo0bKH2RnRjFJZMHcfecEcwckUZReS1nPbiQH7+6moragXOA6CuPJ8Oy2iaqG1sHXDIE6/Jlxoh0rplqPZN4wSNfsrNCL1d8oaisjryUGFxOvYkVHxnGaaMyufv0kUwflsqbK4uZfd983vqqWKtwuuHxkrN1v1V3MxCTYYcRmXHcdPIQaptaufjRRWwqrfF3SCGvqKyWgtTQry/sjahwJ3PGZPG9U4YSF+HiBy+v4rvPrdQOjY/A48lwm113U5g2cJMhWE35rpmaT0NLG5c8toglRRX+DilktbS1s+tAPQUDvMwdSUZ8JLfOKGTO6Ew+2rCP6X/8jF+/vdbfYQUczyfD/bXEhDtDoilef2XGR3Lr9EJSYsK58u9LeGLhdu2yyQvWFFfR0mYYm6M3Co7EIcL04WncPrOQ2AgXzyzayQ9fXnVoeA7lpcvkwvRYJEibRHlackw4V03JY1h6LPe8u57L/rZYR07zsEXbrLPuqYUpfo4k8GUlRHH7zEJmjUjn3a9LmPWnedz34SbttxMPJ0NjDJv21TBUL1cOExnm5KopeVwwIYc1xVWcef/n/OyNNXpU9pAvt5UzMjOO5Bjt4aUnXE6ri7o7TxlGfmoMD366leN+9zG//9eGQ8N1DEQefRBu1e5KymqaOHFoqic3GxJEhMn5yYzKjueTjft5dflu3l5VzE0nDeGm6QXEB9AogsGksaWN5TsOcuUJef4OJeikxkZw5Ql57K1qYP7mMh7/vIgnFm5nzphMLpqUy0lDUwfU3XmPJsP3vt5LuNPBqaMyPLnZkBId7uKccdlMK0jhw/X7ePDTrTy9aCc3nDiEq6fm6dlNLy3ZfoCm1nam6SVyn2UlRHHZcYM5MKqZRdvKWbC5jHe/3ktidBinjEzn9FEZTB+eRnR4aDci8Njetbcb/rVmL9OHp2rbyB5IiY3g8uMHM/1gA59u3MefP97MX+dtZc7oTM4Zn81JQ1OJCteG991pbzfc99Fm0uMi9GrEA5JjwjlrXDZnjM5k874a1pZU8/6aUt5YWUyEy8G0whRmjUxnWmEKhWmhd1/AY8nwkfnbKKlq5CdzRnpqkwNCTlIUV0/NZ191I4uLKliwxRp7JdzpYGyu1bfd8Iw48lOiyUqMIiM+IuSP0D1hjOHpRTtYvbuSP100Tg8cHuRyOhiVncCo7ATa2g07KupYv7ea1Xuq+GyTNf5zUnQYY3MTOSYzjsK0WHKTo8iMjyQtLoLYCFdQJsp+/1c9vqCID9eXsmzHQb4zPptzxmd7Iq4BJyM+knOPzaG1vZ0d5fVs2V/Dzop6nl+yk5a2wx/HiQpzkhQdRkJ0OPGRLmIjXESGO4lwOQhzOHA4BIeACAhi/+RQAXV/v0NPGiZ0LN+xWkSYg7vP8O3Bb29VAw9+soU1xVWsLa5mWmEKF07M9WkMA4nT8U3b53PGWb0DFZXVsvNAPZtLa1i8rYLmtvbD1gl3OUiODicu0kV0uJMIl5NwlwOXU3A5HIQ5BZfTgcshuByC0yGICE6H9QiQQ+RbZbSjzE0pSPFaNZz0pnmOiJQBO3v5GalAeS/XCVShtC/g3f3JM8b0q9fbPpY3CMy/UyDGBIEZV19i6n9583ZbRRFZboyZ7NUP8ZFQ2hcIvf3pEIj7FYgxQWDG5a+YBs59c6WU6oYmQ6WUwjfJ8G8++AxfCaV9gdDbnw6BuF+BGBMEZlx+icnrdYZKKRUM9DJZKaXQZKiUUoCHkqGIzBGRTSKyVUT+s4v5ESLysj1/iYjke+JzvaUH+3OdiJSJyCr7dZM/4uwJEfmHiOwXkS578xTLg/a+fi0iE30dY18FYrnrQUw/EpH19nf9iYh4vYeJo8XkttyFImJExCePtfQkLhG5xP6+1onIC14NyBjTrxfgBLYBBUA4sBoY1WmZ24FH7d8vA17u7+d669XD/bkOeMjfsfZwf6YDE4G1R5g/F3gf6yH/KcASf8fswb+TT8tdD2OaBUTbv383EGKyl4sDFgCLgckB8vcbBnwFJNnT6d6MyRNnhscDW40xRcaYZuAl4NxOy5wLPG3//howWwK38WJP9idoGGMWAAe6WeRc4BljWQwkikiWb6Lrl0Asd0eNyRjzmTGm3p5cDHi7LWFPy/M9wL2Arzo07ElcNwN/NcYcBDDG7PdmQJ5IhjnAbrfpPfZ7XS5jjGkFqoBA7XOpJ/sDcKF9qfOaiAzyTWhe0dP9DTSBWO56+13eiHVW7k1HjcmuGhlkjHnPy7H0Ki5gODBcRL4QkcUiMsebAWn3J33zT+BFY0yTiNyKdfZxip9jUkFERK4CJgMz/ByHA7gPq+on0LiwLpVnYp1BLxCRscaYSm98mCfODIsB9zOjXPu9LpcREReQAATqcHFH3R9jTIUxpmNU7r8Dk3wUmzf05O8XiAKx3PXouxSRU4FfAN9xK0f+iikOGAPME5EdWPXG7/jgJkpPvqs9wDvGmBZjzHZgM1Zy9A4PVIS6gCJgCN9UhI7utMwdHF6R/Yq3K2i9vD9Zbr+fDyz2d9xH2ad8jnwD5SwOv4Gy1N/xevDv5NNy18OYJmDdOBgWKN9Tp+Xn4ZsbKD35ruYAT9u/p2JdVqd4LSYP7dhcrKy9DfiF/d5/YR35ACKBV4GtwFKgwBcFwYv783tgnf0H/AwY6e+Yu9mXF4G9QAvWkfZG4DbgNnu+AH+193WNL/4RfPh38nm560FMHwP7gFX26x1/x9RpWZ8kwx5+V4J1Cb/eLpuXeTMebY6nlFJoCxSllAI0GSqlFKDJUCmlAE2GSikFaDJUSilAk6FSSgGaDJVSCgiRZCgi80RkpojM6+d2YkTkIRGZYk9Hich8EXG6LfN/IrJaRP7SzXa+tV4Xy4SLyAK7mdi3pj25X8rztMyFnpBIhp3ZHUHutTuN7HjViMj/HGXV27BaLZxkT98AvGGMabO3WwicaIwZb4y5s5vtHLZeV4zVbdEnwKVdTavgomUu+IVkMsTqPOFZY8xQY8xQrMbdpcATR1lvDrAJq5kUwJXA2wAiMgKrqVKeiHwlIjHdbMd9vdvt3oNvE5EEESmx/3EigLfsZTt0nlbBQ8tckAvVZPgMcKnb6f9MYIcxpuhIK4hIJFbvuxOB+SISjtWWdQeAMWYTVlddvzLGTDDG1B1hO4etBzyCdfT9A9Y/TDpwrbF6K1kLHOe2eudpFTy0zAW5kEyGxpgKYBFwtv3WjViFojvDsArmRmNMC1YvGZ37TRuL1TlDdw5bz1iNv2+0t30RcK8xZrk9rw1oFpG4rqY7E5HzRORxscb1OP0ocSgf0jIX/EIyGdoeB24UkQSscUDeFJGf2T3mnmPX76S5LZ+G1bPuA/Z0A1ZdjrvRwFoRecqu1L5FRLaJSLqItItI5hHWSwIi7N87d6kfweFdrXeePsQY85Yx5maseiat5wk8WuaCWCgnw0+xCtp/AK/alcUdYzy8BtxojClzWz4beB1wiEiSscZdcNqXMthHzhZjTANWPc5J9kuAi4HNxpjSLtYLA54CyoGHgetFZK49LwUot88KvjXdjV9idbulAouWuSAWssnQvlR4CqtH4Y7LFRdWz77NQGLHsnY9z0QgE3gU6Lgj9yHf3OUbg1W/AlbBnAAci1VB/mP7PbpY71fAeOB7WP8km4DHRSQRa6Q093EnOk8fRiz3Au8bY1Z2+wUon9MyF+R80Ymjt19YhWImMK/T+zHAGLfpe7E615yO1f37oKNsdyLWHcKu5u3AKkTTAYNbx5PdrddpG28Aw7uZPmy/gO8DK7D+eW7z9/c+kF9a5kLvFdIDQhnr7ttat+mfus0+6ihpxpiVIvKZiDhNp+e3jDH5bpPS0/UOrWDdAXzLGLO5q+kjxPMg8ODR4lb+o2UueIVET9cich32Ec0Y85Rfg/GgUN2vUBCqf5tQ3a+eCIlkqJRS/RWyN1CUUqo3NBkqpRSaDJVSCtBkqJRSgCZDpZQCNBkqpRSgyVAppQBNhkopBWgyVEopQJOhUkoBmgyVUgrQZKiUUoAmw28RkR0i0iAitSKyz+5uPdaed5rdTVKNiFSIyCoR+albD8O/tUclu6vTNu+y3/+tH3ZJBSB7OID3O7235QjvXWaXnzq7XBaLyH1y+NjK8+xlxnda/037/Zle3aEQoMmwa+cYY2KxOsycDPxSRC7G6rr9BSDPGJOCNSZELjDIbd3NwDWdtnet/b5SHRYA0zoSmohkAWHAhE7vDbWXBRhvl8sZWGXvhk7bPKzs2V36TwXKUEelybAbxphi4H2sEcruA/7LGPO4MeaAPX+TMeZOY8wWt9WWAdEiMhrA/hlpv69Uh2VYye9Ye/pk4DOsLvrd39tmjClxX9EYsxX4wm25Ds9jDVfaccZ4OfAm1pAD6ig0GXZDRAYBc4F6rDPA13u46rN8c4S+1p5W6hBjDRa1BKsLf+yfnwMLO723oPO6IjISK1Fu7TSrBFgPdAzpeQ3WeM6qBzQZdu0tEanEKpjz+abb89KOBUTkJRGpFJF6Ebm60/rPAZfbo5RdZk8r1dl8vkl8J2Mlw887vTffbfmVIlIHbMDqjfrhLrb5DHCNnTATjTGLvBB3SNJk2LXzjDGJxpg8Y8ztWEMugtv4s8aYy4wxicBKrMG6cZu3C+uo/TtgizFmt4/iVsFlAXCSiCQDaXZ1y5dYdYnJWKPjuZ8ZTgRiseoLT8AafKqzN4BTsEbG0yuSXtBk2DObgGLggl6s8wzWMI16maKOZBGQANyMVQeIMaYa63L3ZqDEGLPdfQVjecVe99edN2iMqceq5/4umgx7RZNhDxhj2rES229E5GYRSbLHkx0GZBxhtZex6m5e8VWcKrgYa3D45cCPsC6POyy03/tWfaGbPwA3i0hmF/N+DswwxuzwUKgDgibDHjLGvAxcAlwF7Ma6dH4F+BvwahfLNxhjPrYLvFJHMh9Ix0qAHT633ztiMjTGrLHn393FvBJjzMJvr6W6o6PjKaUUemaolFKAJkOllAI0GSqlFKDJUCmlAHD1ZuHU1FSTn5/vpVBUKFmxYkW5MSatP9vQ8qZ6yhPlrVfJMD8/n+XLl/fn89QAISI7+7sNLW+qpzxR3vQyWSml6OWZYX+8sGQXAFecMNhXH6nUIcYY7v33JkqrGnEIXDllMONyEwlz6vmAsvgsGQK0trfzvRdWUl7bxNC0OO45bzQi4ssQ1ADScQCeMyaTn7y2mo837D80742viomLcDEyK45phan88LTh/gpTBQifJsOFW8r5cP0+wpzC4qIDlFY3cN+lxxIfGebLMNQAc+0/lrJ+bzVzRmcyNjcBY6AwLYaPNuzjn6tLWL7jIPuqG/nZ3GNIiNKyOFD57BqhobmNBVvKGJkZx6/PHs3cMZnM21TGhQ9/yf6aRl+FoQaYLftqWFNcxawRaUwfnkZSdDjJMeEcrG9hcl4yPz1jJCcOTeXVFXs4488LWLil/OgbVSHJZ8lwyfYKGlvaOW1UBk6HcNKwNK6dls+OijrOfnAhf/+8yFehqAFkwZYykqLDmD6s66cuoiNczB2bxa3TC2hrN1z1xBJ+9dZaaptafRyp8jevJ8MXluzihSW72F5eR1ZCJFkJUYfmFabFcs3UfCrqmnl52W7a2rXTCOU5Ta1t7KioZ0xOAq6j3CgZx7o4AAAW3ElEQVTJTYrmjllDueHEITy3ZCdn/HkBX2zVs8SBxCdnhsYYiisbyE6M+ta8wrRYvjMumy37a3ngky1drK1U32wvq6Ot3TAsPa5Hy4e7HAxNj+WWkwtoam3nqr8v4aanl6M9Ow0MPkmGlQ0t1De3kdNFMgQ4bkgyEwcn8tCnW1i244AvQlIDwOb9NYQ5hfyU6F6tl5cSw/dmDWX8oEQ+3rCPX7+9jna9agl5PkmGxQet/k2PlAwBzhmXTW5SND94aRXVjS2+CEuFuC37ailMiz3qJXJXwl0OLp6Uy8nDUnl28U6ufmKJFyJUgcQ3ybCyAYdAZkLkEZeJCHNy/2XHUlrdyG/eXueLsFQIq2lsoaKumcHJvTsrdCcizBmdydTCFL7YVsETC7cffSUVtHySDEsqG8iIjzzq0/4b99Ywc3gab35VzBsr9/giNBWitpXVAZAeF9Gv7YgIZ43NYlRWPP/z3nrmbdp/9JVUUPJJMiyraSIj/shnhe5mjkgnPyWa/3x9DQ98rDdUVN9s3V8LQFpcz8pddxwiXDw5l+EZcdz54ldsK6vt9zZV4PF6MmxrN1Q1tJAU3bMn+50O4dLjBuNyCs8v2anPe6lee2HJLt5ZVYJThOSYcI9sM8Ll5O/XTibM6eDmZ5ZT1aD12qHG68mwuqEFAyRG97xQJkSFcfnxgymraeJHL6/S5w9Vr5XVNJISG47T4bm27ws2l3PhxFx2VdRz10tfabkMMV5PhgcbmgFI6kUyBOv5w7PGZfHh+n3893vr9Vkv1Sv7a5pI62d9YVeGpMZw9rhs5m0q47p/LPX49pX/eD0ZVtZblxOJPbxMdjetMJUTC1N48osd+kC26rHWtnYO1DX3++bJkRw/JJkpBcl8vrWc11bojb5Q4fVeaw7WW2eGfe0N5MyxWTS2tHP/x1twiPD92cM8GZ4KQRV1zRg8c/PkSM4am83+miZ+/sYahqRGMykv2WufpXzDJ2eGcRGuPnei6RDh/Ik5TBycxH0fbeZ/P9ikl8yqWwfqrANwiodunnTF6RCuOG4wcZEurvnHMnZV1Hvts5Rv+CAZNvfpEtmdQ4QLJuZwXH4SD322ld/9a4MmRHVElQ19r5rpjegIF9dMzae93XDtk0spr23y6ucp7/LJmWFv7iQfiUOE847NYUpBCo9/vp3/fk8ToupaZX0zLocQE+H9vovT4iK4Zmoee6sauPxvi7VvziDm1WTY3m6o7MUzhkcjIpwzLouphSk8sXA79/5bL5nVt1XWt5AQFYbDR0NK5KXE8OR1x7PnYAMXPbKIIn0oOyh5NRmW1zbR1m5I8MCZYQcR4eyxWVx5wmAenb+Nh+dt89i2VWjwRNVMb20vr+O6aflU1DZx1oMLWa69LwUdrybD0mrrkiHBw2OciAjHZMUzPjeBP32wiVeW7fbo9lVwq2poITHKezdPjmRQcjS3zSgkOtzJFX9fwr/Xlvo8BtV3Xk2G+6qtCuX4KM/X3ThEuHBSLsPSY/n5m2t07AoFQHNrOzWNrT4/M+yQEhvBd2cUkhEXwXefW8EPXlrllzhU73k5GVpnht4a/c7lcHD58YNJjY3gpmeWaV2NorSq0W7+6b9R7qIjXNx4UgHDMmJ5a1Ux//fhJu0cNgh4PRkKePWuXmSYk6un5uEQ4aZnllOjHcMOaMWVVkfCnniCoT/CXQ6unpLPpLwk/vLpVm59bgVV9Vo2A5nXk2FspMujjeW7khQdzhXHD2ZnRT0/emW1HoUHsEPJMADGP3Y6hAsm5PDrs0fx2cb9zH3wc5YUVfg7LHUEXq8z9NUA8QVpscwZnclH6/dx/8ebffKZKvCU2MkwPgCSIVg3+yLDnNx8cgENLW1c9rfF/P5fG2hqbfN3aKoTr58Zxkd6/8HXDtMKU5iUl8SDn27VO8wDVEllA7H9aP7pLYOSo7nzlKFMzk/msQVFnPvQF2zZV+PvsJQbryfDOB8eocVupXLysFR++sbXPL9kp88+WwWGkqpGv9486U6Ey8n5E3J44trJlNU0cc5DC3l1uR60A4XXkmFTaxsH61t8dpncwekQTj0mg+HpcfzizbXc+eJXh+qRVOgrqWzocw9JvrKvuolbpheQnRjF3a99zSWPLaKlrd3fYQ14XkuG+zueMfThZXKHMKeDq6bk8ePTh/PvtXuZ8cfPuPmZ5bz1VTGVdpdiKvQYY9hb2RAQN0+OJi4yjBtOHML0Yaks3X6Aa55YysE6LZv+5LVMdegZQz8VTKdDSI6J4IenDufLbRUsKargo/X7cDmE00ZlcPvMoYzNTfBLbMo7qhtaqWtuC/gzww4OEeaMySIjPpI3vipm9n3zeeaG4xmTo+XSH7yWDDua4sX54czQXWJ0OHPHZjFnTCbFBxtYU1zFvE1lvL+2lMl5Sfzj+uN8fimvvKOkyqoO8WRbeF+YMDiJlJhwXli6iwse/pLbZhRw64xCn/S6o77htcvkvZVWMvRHG9GuOEQYlBzN3LFZ3H3GCE4elsqKnQc5488L+EzHwg0JJQH0jGFvDU6J4XunDOOYrDge/HQrU3//Cb95ey2Liyp04Ckf8dqhp9h+xCEyLLAecQCr1cqZY7IYk53Axxv2cf2TyzhlZDrfnz2MYwcl+js81UclVXbHIEGYDAFiI1xcetxgphXWs/tgPS8t283Ti3YSG+Himql5XDctn/Qejj+ues+ryTAnMQrxUZ9yfTEoOZqrp+TxxbYK5m/ez6cb95ObFMUds4ZyzvhsYvUyJaiUVDYQ5hRi/Vw101+DkqMZlBzNpLwkNpXWsHpPFY/O38aTX+zg1hkF3DajkMgwp7/DDDneS4YHG8hODPyjmMvpYMbwNKYMSWbFroMs3X6An72xhnveXc9ZY7O4cFIux+cn4/Byk0LVfyWVDWTER/qsU1dvi3A5GZebyLjcRCpqm/hw/T7u/3gLb35VzH+dO4YZw9P8HWJI8VoyLKlqYGJe8FxyRoQ5mVaYytSCFPYcbGDZjgO8vbqEV1fsISM+gtNGZTBrRDrTClOJCtejciAqqWwgOzHK32F4RUpsBJcfP5jj9tfy9qpirv3HUk4flcHP5h7DkNQYf4cXErySDOuaWqmsbyEnMdobm/cqsW+0DEqO5uxx2WzYW83akipeXrab5xbvIswpDM+I467Zw5h9TIbXO6FQPbejop5ZI0L7bGloeix3zR7Gwq3lfLG1nFPvm8/5E3K4fWYhBWmx/g4vqHklGXa0+MhJiqK2sdUbH+ET4S4H4wclMn5QIi1t7ewor2NDaQ1ri6u45dkV5CRGccv0Ai49bpDW4fhZTWMLZTVN5A+AsySX08HMEelMykti/uYy/rm6hNdX7uHMMZncNqOQcbnBc0UWSLybDBMj2VQaGh2uhjkdDMuIY1hGHGeNzWJjaTULt5Tzm3fW8ci8bdw5eyiXTB4UcB0EDBQ7yq1xiwtSYzhQNzD6DYyLDOPscdnMGJ7Gl9sq+HTjfv61ppShabHcc94YphQkB/QNzEDjlf/c4oMdyTD4LpN7wukQRmcncMv0Am48aQgRLge/eHMtZ9y/gE837vN3eANSUbl10B2SOvAuFeMiwzhjdCY/OWMkc0ZnUlrdyOWPL+bSxxazdLsOTNVT3kmG9iMO6XER3th8wBARCtNiuWV6AVdPyaO6oZUbnlrOTU8vP/QAsPKNHeX1iEBeSmgegHsiMszJ9OFp3H3GCM4Zn83G0moueWwRZz34OXurtDwejVeS4e4D9WQlRA2Yx1E6Ruu7a/YwzhyTyfzN+znl/+bx9qpif4c2YGwvryU7IUrrbrGqdKYWpPCj00Zw6jEZbCqt4fT7FvDq8t06zng3vJIMN+ytZkRmnDc2HdCcDuHkYWncNXs46XGR3PXSKn751hrt1dgHtpfX6SMmnYS7HJwyMp0fnDqclNhw7n7ta257bgUVtU3+Di0geTwZ1je3UlRex+jseE9vOmgkx4Rz88kFnDw0lecW7+KSRxex+0C9v8MKWcYYijQZHlFyTDg3nVzAmWMy+WxjGafeN5/XVuzRs8ROPJ4MN5bWYAyMyhq4yRCss8Qzx2Zx1QmDKSqr44z7F/D0lzto1U48Pa64soGaxlaGpg+8myc95RDrquW7MwuJjXDx41dXc/Gji1hbXOXv0AKGx5PhupJqAEYN4DNDd6OyE/j3D6czKS+J37yzjml/+JRfvrlGj8oetKTIumN6/JBkP0cS+DLiI7l1RiEXTMhhw95qzvnLQn77zjodYhcvJMP1JdUkRIWRE6LNovpi/qYy5ozO5IrjB9NuDM8t2cV3HvqCD9eV6rCmHrBkewWJ0WGMyBh49dR94RBhcn4yPzptBCcUJPP0oh2cdt8CPlhX6u/Q/MoLybCKUVnx+rBnJyLCmJwE7po9nAsn5lBc2cAtz67g9PsX8PySndQ3B29LHX9bXHSA47QzjV6LCnfynfE53Da9EIBbn13BTU8vZ1fFwKzf9mgy3F/TyLqSaiYM1uZAR+J0CJPykvnhqcO5ZHIu9c2t/OLNtUz53Sfc8+56tpfX+TvEoFJS2cCuA/VMKUjxdyhBa1ByNHfMGsqc0Zks2FzGqX+ezz3vrme/3Vv9QOHR5nivLNtNa7vhokm5ntxsSHI6hGMHJTE+N5FdB+pZVFTBk19s54mF25lSkMx5x+Yw+5gM0kL8wfX+emd1CWCNma36zukQpg9PY/ygRLbur+XJL7bzzKIdnHpMBnPGZHLi0FRSY0O7LHosGba1G15cupsTh6Zo7xm9ICLkpcSQlxJD9dgWVuw8yMqdB/nPN9YgsoaRmfEcl5/EuNxERmfHMzQ9Vts/2w7WNfPXz7Yyc0Qaxwzwpxc8JSEqjEl5SeSnRLO4qILlOw/y/lqrLrEwLYaJg5OYMDiJ8YMSGJ4RF1Jlsd/J0BjD7gMN/Pd76ymubODX54zyRFwDUnxkGLNGpDNzeBp7qxrZtK+GorJaXlq2m2cW7QQgzGk1ASxMi2VwSjSZ8ZEkx4STEBVGTISL6HAn0eFOosKdRLichDsdOBwgCB3VuIJ1JhCs9bqtbe1s3lfLPe+up66plZ+deYy/Qwo5KbERnDUumzONofhgA0Vltew8UM97a/by6oo9gPVQ9/CMWIanx5GXEkN2YiQZ8ZEkRYcTH+UiOtwa9iPM6bDKGxwqcw4h4Mpfv5PhFY8vYVFRBQ6B354zitNHZXgirgFNRMhOjCI7MYpZI9JpN4bymib2VjWyt6qRfdWNLC6q4P21e+nPzWgRcIrgsAuqQ75JmEd68sc9ocZGuljy81P7HkAfbNlXw9wHP6elzRAb4eJ3548dkK2dfMXh1r8nWCc/B+qa2XOwgeLKBkqrG/lk436qGvr2aI7TIVZiRKxCZbMSp/3+od/h+hOH8OMzRvRzr7omvXneTUTKgJ29/IxUoLyX6/iLxuo5ecaYfvW02sfyBoH/3UBwxAjBEWcqENPv8ubth39FZLkxZrJXP8RDNNbQEAzfTTDECMERp6diDJ3aT6WU6gdNhkophW+S4d988BmeorGGhmD4boIhRgiOOD0So9frDJVSKhjoZbJSSqHJUCmlgH4kQxGZIyKbRGSriPxnF/MjRORle/4SEcl3m/cz+/1NInJGX2PwYKw/EpH1IvK1iHwiInlu89pEZJX9esfbsfYw3utEpMwtrpvc5l0rIlvs17W+iNdXgqXMBUN5C5Yy1oM4/+wW42YRqXSb17vv0hjT6xfgBLYBBUA4sBoY1WmZ24FH7d8vA162fx9lLx8BDLG34+xLHB6MdRYQbf/+3Y5Y7elab8XWj3ivAx7qYt1koMj+mWT/nuTL+P38vfi9zAVDeQuWMtaTODstfyfwj75+l309Mzwe2GqMKTLGNAMvAed2WuZc4Gn799eA2WI1RjwXeMkY02SM2Q5stbfnLUeN1RjzmTGmoxO3xYA/u93pyXd7JGcAHxljDhhjDgIfAXO8FKevBUuZC4byFixlrLdxXg682NcP62syzAF2u03vsd/rchljTCtQBaT0cF1P6u3n3Qi87zYdKSLLRWSxiJznjQA76Wm8F9qXWa+JyKBerhuMgqXMBUN5C5Yy1uPPsqsahgCfur3dq+/So/0ZBjsRuQqYDMxwezvPGFMsIgXApyKyxhizzT8RHvJP4EVjTJOI3Ip1NnSKn2NSvRTg5S3YythlwGvGGPdxeXv1Xfb1zLAYGOQ2nWu/1+UyIuICEoCKHq7rST36PBE5FfgF8B1jzKGBZY0xxfbPImAeMMGLsUIP4jXGVLjF+HdgUk/XDWLBUuaCobwFSxnrzWddRqdL5F5/l32s2HRhVZwO4ZuKzdGdlrmDwyuzX7F/H83hldlFePcGSk9inYBVUTus0/tJQIT9eyqwhW4qcH0Yb5bb7+cDi803ldvb7biT7N+TvRmvr17BUuaCobwFSxnrSZz2ciOBHdiNSPr6XfYn0LnAZvuP+gv7vf/COtIBRAKvYlVWLwUK3Nb9hb3eJuBMb3yRvYz1Y2AfsMp+vWO/Pw1YY/8R1gA3ejvWHsb7e2CdHddnwEi3dW+wv/OtwPW+iNdXr2Apc8FQ3oKljB0tTnv6t8AfOq3X6+9Sm+MppRTaAkUppQBNhkopBWgyVEopQJOhUkoBmgyVUgrQZKiUUoAmQ6WUAkIkGYrIPBGZKSLz+rmdGBF5SESm2NNRIjJfRJxuy/yfiKwWkb90s51vrdfFMuEissBuNvataU/ul/I8LXOhJySSYWcisk5E9todQna8akTkf46y6m1YrRhOsqdvAN4wduNvESkETjTGjDfG3NnNdg5bryvG6pLoE+DSrqZVcNEyF/xCMhliNSx/1hgz1BgzFBgGlAJPHGW9OVjNtVbZ01cCbwOIyAisxt55IvKViMR0sx339W4XESMit4lIgoiU2P84EcBb9rIdOk+r4KFlLsiFajJ8BrjU7fR/JrDDWL1XdElEIrF61p0IzBeRcKy2rTsAjDGbsLox+pUxZoIxpu4I2zlsPeARrKPvH7D+YdKBa43VI8ha4Di31TtPq+ChZS7IhWQyNMZUAIuAs+23bsQqFN0ZhlUwNxpjWrB6uqjstMxYrIbf3TlsPWM1/r7R3vZFwL3GmOX2vDagWUTiupruTETOE5HHxRrn4/SjxKF8SMtc8AvJZGh7HLhRRBKA6cCbYg0KtFhEzrHrd9Lclk8DhgMP2NMNWHU57kYDa0XkKbtS+xYR2SYi6SLSLiKZR1gvCav7KICsTvMigMZupg8xxrxljLkZq55J63kCj5a5IBbKyfBTrIL2H8CrdmXxvVh/9NewuvQpc1s+G3gdcIhIkrHGd3DalzLYR84WY0wDVj3OSfZLgIuBzcaY0i7WCwOeAsqBh4HrRWSuPS8FKLfPCr413Y1fAn/t8zejvEXLXBAL2WRoXyo8hdWPXcfliguIA5qBxI5l7XqeiUAm8CjQcUfuQ765yzcGq34Fvuk191isCvIf2+/RxXq/AsYD38P6J9kEPC4iiVijpL3ntl7n6cOI5V7gfWPMym6/AOVzWuaCnDc7ZvTVC6tQzATmdXo/BhjjNn0vVsea07G6gx90lO1OxLpD2NW8HViFaDpggMt6sl6nbbwBDO9m+rD9Ar4PrMD657nN39/7QH5pmQu9V0gPCGWsu29r3aZ/6jY7pQfrrxSRz0TEaTo9v2WMyXeblJ6ud2gF6w7gW8aYzV1NHyGeB4EHjxa38h8tc8ErJHq6FpHrsI9oxpin/BqMB4XqfoWCUP3bhOp+9URIJEOllOqvkL2BopRSvaHJUCml0GSolFKAJkOllAI0GSqlFKDJUCmlAPj/DxvQ8pqjnLQAAAAASUVORK5CYII=\n", 68 | "text/plain": [ 69 | "
" 70 | ] 71 | }, 72 | "metadata": {}, 73 | "output_type": "display_data" 74 | } 75 | ], 76 | "source": [ 77 | "beta_ignore = [1.0, 1.6, 2.0, 4.0, np.inf]\n", 78 | "xlabel=r'$\\Vert\\nabla_{\\mathbf{x}}\\, \\ell(f_{\\mathbf{w}}(\\mathbf{x}))\\Vert_2$'\n", 79 | "\n", 80 | "plt.figure(figsize=(4.5, 3.5))\n", 81 | "dirname = '/data/save_weights_tf1.10.1/cifar10/alexnet/'\n", 82 | "defense = 'erm'\n", 83 | "resultsfile = '/data/save_weights_tf1.10.1/results/kappahist_results_cifar10_alexnet_%s'%(defense)\n", 84 | "kappa_plots = make_kappa_plots(Xtr, Ytr, beta_list, dirname, defense, resultsfile, arch, plot_stuff=False)\n", 85 | "for k in beta_ignore:\n", 86 | " if k in kappa_plots: kappa_plots.pop(k)\n", 87 | "plt.subplot(2, 2, 1)\n", 88 | "plot_hists(kappa_plots, value_name=xlabel, legend=False)\n", 89 | "plt.title('ERM')\n", 90 | "plt.yticks([])\n", 91 | "\n", 92 | "defense = 'fgm'\n", 93 | "resultsfile = '/data/save_weights_tf1.10.1/results/kappahist_results_cifar10_alexnet_%s'%(defense)\n", 94 | "kappa_plots = make_kappa_plots(Xtr, Ytr, beta_list, dirname, defense, resultsfile, arch, plot_stuff=False)\n", 95 | "for k in beta_ignore:\n", 96 | " if k in kappa_plots: kappa_plots.pop(k)\n", 97 | "plt.subplot(2, 2, 2)\n", 98 | "plot_hists(kappa_plots, value_name=xlabel, legend=False)\n", 99 | "plt.title('FGM')\n", 100 | "plt.yticks([])\n", 101 | "\n", 102 | "defense = 'pgm'\n", 103 | "resultsfile = '/data/save_weights_tf1.10.1/results/kappahist_results_cifar10_alexnet_%s'%(defense)\n", 104 | "kappa_plots = make_kappa_plots(Xtr, Ytr, beta_list, dirname, defense, resultsfile, arch, plot_stuff=False)\n", 105 | "for k in beta_ignore:\n", 106 | " if k in kappa_plots: kappa_plots.pop(k)\n", 107 | "plt.subplot(2, 2, 3)\n", 108 | "plot_hists(kappa_plots, value_name=xlabel, legend=False)\n", 109 | "plt.title('PGM')\n", 110 | "plt.yticks([])\n", 111 | "\n", 112 | "defense = 'wrm'\n", 113 | "resultsfile = '/data/save_weights_tf1.10.1/results/kappahist_results_cifar10_alexnet_%s'%(defense)\n", 114 | "kappa_plots = make_kappa_plots(Xtr, Ytr, beta_list, dirname, defense, resultsfile, arch, plot_stuff=False)\n", 115 | "for k in beta_ignore:\n", 116 | " if k in kappa_plots: kappa_plots.pop(k)\n", 117 | "plt.subplot(2, 2, 4)\n", 118 | "plot_hists(kappa_plots, value_name=xlabel, legend=False)\n", 119 | "plt.title('WRM')\n", 120 | "plt.yticks([])\n", 121 | "\n", 122 | "plt.tight_layout()\n", 123 | "plt.savefig('/data/Figures/cifar10_alexnet_kappadist.pdf', format='pdf', dpi=500, bbox_inches='tight')\n", 124 | "plt.show()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "# Distribution of norm gains" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "INFO:tensorflow:Restoring parameters from /data/save_weights_tf1.10.1/cifar10/alexnet/erm_betainf/checkpoints/epoch46\n", 144 | "INFO:tensorflow:Restoring parameters from /data/save_weights_tf1.10.1/cifar10/alexnet/erm_beta1.0/checkpoints/epoch200\n", 145 | "INFO:tensorflow:Restoring parameters from /data/save_weights_tf1.10.1/cifar10/alexnet/erm_beta1.3/checkpoints/epoch200\n", 146 | "INFO:tensorflow:Restoring parameters from /data/save_weights_tf1.10.1/cifar10/alexnet/erm_beta1.6/checkpoints/epoch200\n", 147 | "INFO:tensorflow:Restoring parameters from /data/save_weights_tf1.10.1/cifar10/alexnet/erm_beta2.0/checkpoints/epoch46\n", 148 | "INFO:tensorflow:Restoring parameters from /data/save_weights_tf1.10.1/cifar10/alexnet/erm_beta4.0/checkpoints/epoch19\n" 149 | ] 150 | }, 151 | { 152 | "data": { 153 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAD/CAYAAABLoOtAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztvXmcHWWV//8+VXfp2/uSDr1mIyQkARIIO+oQAUFEEVFAmO+w6IjCb746znfUcRgFB5RhFEXAmVFcZhAQGGUZhIwCiSxCWJJmSwgBEkhn7e6kO73cter5/VF1u29v6U4v1d3p83697it1q5566txK6pNzTj3PecQYg6IoylTAmmgDFEVRhosKlqIoUwYVLEVRpgwqWIqiTBlUsBRFmTKoYCmKMmVQwZqEiMivROT6ibZjuiIil4jIHybaDqU/KlgTiIisFpG9IhIdx/4TIlKfs+90EdkyzPOvFZFfD9Fmi4jsFpGCnH2fF5HVI7V7tIhIRES+JSIbRaRTRLaJyGMi8pHhnG+MucsYM6y2SrCoYE0QIjIH+CBggE+M46U6gX8ax/4BbODLo+1EPMbi3+R/A+cCfwWUAXOBW4CPjUHfygSigjVx/BXwPPAr4NL9NRSRc0SkQURaReTPInKUv/9QEdkjIsf432tEpElETs05/cfAZ0Xk0EH6rhGR3/rnbRaR/+vvPwv4JnChiHSIyCv7MfFfgf8nIqWDXONkEXlRRNr8P0/OObZaRG4QkWeBLmCev+96/7d2iMj/iEiFiNwlIvv8PuYMcq3TgTOAc40xa4wxKf+z0hjz5Zx23xCRd0SkXUTWi8h5OccuE5Fncr4bEfmiiGzy/w5uFxHZz/1QxgkVrInjr4C7/M+ZInLIQI1E5GjgF8CVQAXwH8DDIhI1xrwDfB34tYjkA78E/tMYszqni23Az4DrBujbAv4HeAWoBU4DviIiZxpjVgLfBe41xhQaY5bu57e8BKwG/t8A1ygHfo8nnBXAzcDvRaQip9n/Ab4AFAHv+fsu8vfXAocCz/m/rxzYAHx7EFtOB9YYYxr3Yy/AO3gebgnevfm1iFTvp/05wHHAUcAFwJlD9K+MAypYE4CIfACYDdxnjHkZ7+G5eJDmXwD+w/cWHGPMfwJJ4EQAY8zPgLeBNUA18I8D9PE94OMisqTP/uOASmPMd3wv5F08cbtoBD/rW8DfiEhln/0fAzYZY+40xmSMMfcAbwIfz2nzK2PMG/7xtL/vl8aYd4wxbcBjwDvGmMeNMRngfuDoQeyYAezMfhGRct8rahORRHa/MeZ+Y8x2Y4xrjLkX2AQcv5/fd6MxptUY8z6wClg21A1Rxh4VrInhUuAPxphm//vdDB4Wzgb+zn/oWkWkFagHanLa/Aw4ArjVGJPs24Expgm4DfjOAH3X9On7m8CA3t7+MMa8DjwCfKPPoRp6vKYs7+F5Tlm2DtDlrpzt+ADfCwcxpQVPuLN27THGlALLge6XGyLyVzlhdive/ZsxSJ+QI4J4oetg11fGkdBEGzDdEJEYXkhhi0j2IYgCpSKy1BjTN1e0FbjBGHPDIP0VAj8Cfg5cKyK/NcbsGaDpvwLvAi/06XuzMeawQcw90FIe3wbWAj/I2bcdTxhzmQWsHMV19scTeJ5e3WBhoYjMxhP504DnjDGOiDQAmpea5KiHFTyfBBxgMV5YsQxYBDyNl9fqy8+AL4rICf5btAIR+ZiIFPnHbwFeMsZ8Hi9X9O8DXdQY04onJF/L2f0C0C4iXxeRmIjYInKEiBznH98FzBnumztjzNvAvcD/zdn9KLBARC4WkZCIXOj/9keG0+eBYoz5A17I9qB/zyIiEsYPoX0K8ESyCUBELsfzsJRJjgpW8FyKl5953xizM/vBC9kuEZFeXq8x5iXgr/3je/HyVZcBiMi5wFnAl/zmXwWOEZFLBrn2LXhime3bwUsmLwM2A83AHXiJaPByRQAtIrJ2mL/vO3iCkL1Gi3+Nv8ML174GnJMTDo8H5+EJ4q+BVrzfdgl+otwYsx5PvJ/DE+UjgWfH0R5ljBAt4KcoylRBPSxFUaYMKliKokwZVLAURZkyqGApijJlUMFSFGXKcEADR2fMmGHmzJkzTqYoijJdefnll5uNMX2ndfXjgARrzpw5vPTSSyO3SlEUZQBEpO/0rQHRkFBRlCmDCpaiKFMGFSxFUaYMWq1BUcaZdDpNY2MjiURi6MYHOXl5edTV1REOh0d0vgqWoowzjY2NFBUVMWfOHKZzZWVjDC0tLTQ2NjJ37twR9aEhoaKMM4lEgoqKimktVgAiQkVFxag8TRUsRQmA6S5WWUZ7H1SwFEWZMqhgKYoyZVDBUpRpxLe+9S2OPPJIFixYwE9/+tNhnXPFFVcwc+ZMjjhi/1WkV65cycKFC5k/fz433njjWJjbj0AFq7puFiLS71NdNytIMxRl0jKez8j//u//sm7dOhoaGvjtb3/Lgw8+OKzzLrvsMlauXLnfNo7jcPXVV/PYY4+xfv167rnnHtavXz9qm/sS6LCGndu2Mvvr/dceeO9fzgnSDEWZtIznM/Lwww9z2WWXkU6nue222zj//POHdd6HPvQhtmzZst82L7zwAvPnz2fevHkAXHTRRTz00EMsXrx4tGb3QsdhKUqAXPc/b7B++74RnXvhfzw34P7FNcV8++N918jtz8svv8xxxx1HRUUFc+bM4Yc//CEAH/zgB2lvb+/X/vvf/z6nn376sGzbtm0b9fX13d/r6upYs2bNsM49EFSwFGUa4LoujY2NXHbZZVx44YVceeWV3HzzzVxzzTU8/fTTE23esFHBUpQAGcoTuu+Lgx+798qTRnzdjRs3cthh3nq5sViMU045hZ07vXV8x8LDqq2tZevWngW8Gxsbqa2t3c8ZI0MFS1GmAevWrSOZTOI4DplMhrvvvpsf//jHAGPiYR133HFs2rSJzZs3U1tby29+8xvuvvvuUffbl0AFq6q2fsDkYVVt/QCtFWX6MV7PSENDA/F4nEMPPZQZM2Zw1VVXsXTp0mGd+9nPfpbVq1fT3NxMXV0d1113HZ/73OcAOPvss7njjjuoqanhtttu48wzz8RxHK644gqWLBk6r3agHNBCqscee6wZbcXRHz3+Fj96fBNl+WHWfesjo+pLUaYCGzZsYNGiRRNqwxlnnMEPf/jDIcdSBcFA90NEXjbGHDvUuYEPHI2nnV5/Kooy/rz55pscfvjhE23GqAk8h5VIeUKVTLu4rsGydFKooow3uQnxqUzgHlbKcQEwQHsiE/TlFUWZwgQuWMm02729L5EO+vKKokxhghesTI9gqYelKMqBMKGC1ZVSwVIUZfhMgGD1vB3sTOmbQkVRhk/ggpV2XPLC3mU7k+phKYoyfIJ/S5hxiYVtABI6FktRlANgQoY19AiWO0RrRVGUHibEw8rzBUtHuyvKwKxatYrFh81h1apVY9rveJVITiQSHH/88SxdupQlS5bw7W9/e6xM7sUE5LBMt2BpSKgo/Vm1ahUXnHcOl8/ZyQXnnTNmojWeJZKj0ShPPvkkr7zyCg0NDaxcuZLnn39+LMzuxYR4WNGQ1b2tKEoPWbG6/5Pw96dEuf+TjJlojaZEcnl5+X7biAiFhYUApNNp0un0uKzFGPhcwrTjYltCyBISGfWwlGnGY9+Ana8NeGjVhhYu+MlL3P/pKKfO8R7NU+eEuP+TGT7z8TO476pjWbGoov+JVUfCR4depWY8SySDtxDF8uXLefvtt7n66qs54YQThn3ucJmQYQ22JYRs6TVNR1GmO1ff+SpfOynULVZZTp0T4msnhbj6zldH3HduieTm5maWL1/OzTffDHgF/BoaGvp9DkSsAGzbpqGhgcbGRl544QVef/31Eds7GBPgYRnfw7J6jXpXlGnBfjyh2+d44eBxtZleorV6S4abXrS574FHYMWKEV12vEsk51JaWsqKFStYuXLlmNffmpCQMGQJtiWaw1KUHFasWMF9DzzCZ847h/s/6YnW6i0ZPvMg3PfAI6wYoVjB+JdIbmpqIhwOU1paSjwe549//CNf//rXR91vXyYkJLT8HFbaUcFSlFy6RetB+Ndnk2MiVtC7RPIpp5zCpZdeekAlkk866SQ2btxIXV0dP//5z7uPnX322Wzfvp0dO3awYsUKjjrqKI477jjOOOMMzjln7NcbDdTDcl2Da8AW9bAUZTCyonX1Fy7nvgd+OWqxAs/DuvPOO0cUot1zzz2DHnv00UcBqKmpYd26dSO2b7gEKlhp1xOobNI9pR6WogzIihUrWL9py5j1pyWSR0Da8Ra8sC3BFg0JFSUotETyCMj4AmWJYFuWCpaiKAdEoILVy8Oyer4riqIMh0AFy3F9wfKT7hn1sBRFOQAC9rD8kNDyRCulHpaiKAfAhHhYlu9haQ5LUZQDIdike1awLMHSkFBRlAMkYMHKeUso0i1giqIowyHgYQ09SXfPw1LBUhRl+ExQDgvfw9KQUFGCZLxKJAO0trby6U9/msMPP5xFixbx3HPPjYXJvZjQHJaOw1KU3iw/cTki0u+z/MTlo+57PEskA3z5y1/mrLPO4s033+SVV15h0aJFozW5H4FOzen1llDQpLui9OHE409kR9kOKi7qqSza8psWTlpw0qj7Hk2J5C1btuy3TVtbG0899RS/+tWvAIhEIkQikVFa3J9ABasn6e6Ndk9r0l2ZZvzLC//Cm3veHPR414ldNP2iieKzigmXhkm3pml+ppmm/9PE5SsvH/Ccw8sP5+vHD117ajxLJG/evJnKykouv/xyXnnlFZYvX84tt9xCQUHBsM4fLoGGhNmUlSWCJdLtcSmK4pFfns/80+fT8mgLAC2PtnDo6YeSX54/qn7Hu0RyJpNh7dq1fOlLX2LdunUUFBRw441D15k/UCbGw/JzWI5rMMaMy+oaijIZGY4ntGPpDuYfPp/4KXHia+KsenMVVVVVo7rueJdIrquro66urnvhiU9/+tNTX7By3xJavkhlXEPYVsFSlCzV1dVcdull/PSmn3Ll564ctVjB+JdIrqqqor6+no0bN7Jw4UKeeOIJFi9ePOp++zJxU3Ok9z5FUXq45h+u4ZSTTuGaf7hmTPob7xLJALfeeiuXXHIJRx11FA0NDXzzm98cE9tzmbC3hJbV42EpitKb6upqVv9x9Zj1N94lkgGWLVvGSy+9NCL7hsuEjMOS3JDQcVn1719j8WFzxmxJbkVRenOwlEgO9i2h6V0PC/ylub92C5fP2TlmS3IritKbrVu3EgoFvqrfmDMhcwmzHlbivVf564s/xf2fCvH3p0S5/5OoaCmKMijBJt1Nz9ScbetfpOPh6/jv8+he5fbUOSEVLUVRBmXC3hI+8/Nr+fbJ9FqSG7zvXzvO4eovDDyqV1GU6cuEVWv4iy9cx3V/htVbMr3arN6S4aYXbW7/6S+DNE1RlCnAhAiWiDDvyBMo/MS3+cxv092itXpLZsyW5lYU5eBjwlbNsUTIm30Ud3z5TD5zf5x/fTapYqUoyn6ZIA8LbP/Kf3FoPvd9OsYvG9Lcd+fPVawURRmUYEe6m/4j3SXdxYq5IdZfXQjHLAjSHEVRphgTWiIZQDJdILbXINEWpDmKMu040BLJW7duZcWKFSxevJglS5Zwyy23DNp25cqVLFy4kPnz549LpQaYwKS7neNhkVfiNUi0BmmOokxK7r7rLhbPnYttWSyeO5e777prTPodSYnkUCjED37wA9avX8/zzz/P7bffzvr16/u1cxyHq6++mscee4z169dzzz33DNhutEych+ULlpWJ5wiWeljK9Obuu+7iG1+6iq85DusOW8DXHIdvfOmqMRGtkZRIrq6u5phjjgGgqKiIRYsWsW3btn7tXnjhBebPn8+8efOIRCJcdNFFPPTQQ6O2uS+BV2sQPA8rO/nZysSh0K/3o4KlHOTs/O53SW4YvETytQ89yHdKSzkh3ystfEJ+Ad8Brv3Slzhl5f8OeE500eFUDaOUy2hLJG/ZsoV169Z1F+nLZdu2bdTX13d/r6urY82aNUPadKAEnnTPClXWw7KdOEQKQSxI9r9pijKdeKe9nWOqa3rtOyaWzztbt46q39wSyRdeeCFXXnklN998M9dcc82wCvh1dHRw/vnn86Mf/Yji4uJR2TIaAhUs1zVYfhDaHRI6SbDDEC1SwVIOeobyhBY+8zRr413dHhbA2ngXC+fMYfad/zXi646mRHI6neb888/nkksu4VOf+tSA/dfW1rI1R1QbGxupra0dsb2DEXBN95767Z5gGUJOwhesYhUsZdpzzfXX840vXcV38DyrtfEuvtXaxo3/9pNR9TvSEsnGGD73uc+xaNEivvrVrw7a7rjjjmPTpk1s3ryZ2tpafvOb33D33XePyuaBCDzp7jtW2JYQJe1/8T0szWEp05yLL7mEG//tJ9xk2xy96S1usm1u/LefcPEll4yq35GWSH722We58847efLJJ1m2bBnLli3rVWU0WyI5FApx2223ceaZZ7Jo0SIuuOAClixZMiqbByLYkDA3hyVClJR3wApDugtSnUGaoyiTkosvuWTUAtWXkZZI/sAHPoAxg5cx7yteZ5999ohtHA6Bl0jODQnzcj0sO6qCpSjjxMFSIjnwpLudGxJKjocVikKqI0hzFGXasHWUbxknC5PIw4qoYCmKsl8CXqq+J+luiRDr62ElVbAURRmcwD0sK2dZ+nyrj4eVjgdpjqIoU4wJGNbQI1gF4pdHzgpWJg6uG6RJiqJMIYIXrJwr5mU9LCvkhYTgDW9QFEUZgAkNCQukT0gIGhYqijIoga/8nCtYsaxgWbmCpWOxFEUZmAkY1tDzvfstoXpYiqIMgwkY1jCAh2WHvQ9oDktRxpEDLZGcxXEcjj76aM4555xB2xx0JZIzrttHsJLehh3p8bBSKliKsmPHDk7/8BndJWDGgpGUSM5yyy23sGjRokGPH7QlknPfEsYkjYt4i1B0e1gaEirK9264keefW8N3r//emPU5khLJ4NW2+v3vf8/nP//5QdsctCWSe3lYpEgSJSaSk8NSD0s5eHn6vrdo3jr4jI6LvnEyqYwXeXzj/P/gxtuv5Nbbf0wkFOU3N/55wHNm1BfywQuGXiJvpCWSv/KVr3DTTTcN2CZLUCWSJ6weFkCepEjie1YqWIrCT775ECuWn0NV2SzqZsynqmwWK479OP/2zYdH1W9uieTm5maWL1/OzTffDHgF/BoaGvp9Tj/9dB555BFmzpzJ8uXLx+LnjZrAK4729rASJPEHjGpIqEwDhuMJNTT/nhfe+BPX3HMB6VSKI06YyxXXfWRU1x1pieRnn32Whx9+mEcffZREIsG+ffv4y7/8S37961/3antQlkjuGxLmkSLeLVg6rEFRAJqbW/jc56/gis9fzi/u+CW7dzeNus+Rlkg+/fTT+d73vDza6tWr+f73v99PrCC4EsmBC1YolONhmQRxfKFSwVIUAO65t2cNwltu/dGY9JlbInnGjBlcddVVwyqRPBRnn302d9xxBzU1Nd0lkh3H4Yorrpj6JZId01MPCyBKkq6sh2XZ3lJfmsNSlDFnpCWSczn11FM59dRTe+07qEskO67pXt4LII8kcRPtaaAlZhRlXNASySMg4/TJYZkEXaasp4Ed0bmEijIOaInkEdBvWINJ0G7yenbYYUgngjRJUZQpxISWl4maBB29BMsv4qcoijIAAXtYbq+pOVE3QSd5ZLJFRu2w5rAURRmUwEPC7FtCcdOESdNloqS7BUuT7oqiDE7wbwl9wQo5njDFiZJys4sVRnRYg6IogzJhBfxCGU+YOskjlRsSankZRVEGYcJWzcl6WF0mr8fDssKadFcUZVCCFSyTK1ieJ9VFlEyvkFCHNSiKMjCBCZbrGoyh+y1heLCQMKOCpSjjxUhLJE8WAhOsjGsA+ifdTU/SffkNzyDfbEREen2Wnzg5avEoSlB4JZI/PGlKJE8WApua4/iClR3WYGdzWERJOV6bEw+tYMccoeKSmu7zWn7TwkkLTgrKTEWZFHz3hht47s/P8t0brufHt942Jn2OtETyZCI4wTKeYGWn5oQcL/SLE+n2sK45dym/+rsHKP5YJeHSMOnWNPue28c1v7wmKDMVZVxZ9aufsvu9d/fbZm97B7+442dc+aHj+enP7mBJFEqLCgdtP3P2PFZc9oUhrz3SEsmPPfYYN910Ex/4wAf48Ic/zLXXXsuxxx7LD37wgyGvOdYEFhI6Tlaw+oaEPSPdq2eUcMHSMM2PNgPQtrKNyy69jKqqqqDMVJQJ5+FnnufY2bXUlpWwfHYNDz3z/Kj7HGmJZIAnnniCVatWUVRUxA9/+EMef/xxli5dOi6r4gxFYB5WxvVUKeth2b08LH+5LyvEqacXc+fNeyg9pZTWP7eqd6UcVAzlCe3YsYO//tfb+NsPnwjAqQvn8aNVa/jFg4+M6j/ukZZIPv300zF+dFReXk5HRweO42CM6d4fJIGHhDLgSPfs+oRhts/Ip/QUly03beHkT5ys3pUyrfjuDTewfHYNxTGvKEBxLI9jZlWPOpc10hLJ4JVJPu2001i2bBnXXnstH/nIR1i6dCmXXnrpiO0ZKYEn3XPfEjrYuFikcwaObgmHOPK8w3hl10bqPlEXlHmKMil4Yc3zvLB+E6vXb+q1vyu/eFT9jqZE8kc/+lE++tGPdn9/6qmnRmXLaAguJMzmsPysme0mcSxvpZyeuYRh3guHmVVWwewbP8Gbe97EcR1syw7KTEWZUNa8+NK49DsWJZInA8ENHDV9hzUkccUTrGy1hoxY7AjZVIbyqS+qpzPdyTtt7wRloqIctBwsJZKDe0vo9n9L6PbxsJrFwRWhXCLUF3mryK7btS4oExXloGXr1q2EQoFWRB8XJkCwvO+2m8QR7wZmp+bsNGkAKqwIpdFSCsOFvLz75aBMVBRlkhOcYPULCRMYKxsSevt2uSkAKiSMiFBXVMf6luDHeiiKMjkJ3MOyc8ZhGcv3sPypOS3GE6wyvEZVBVW8t+89OnUlHUVRmADB6vaw3BTGCiGYbg+r2U1iG0OJHyLOjM0EYMu+LUGZqSjjwkQMspyMjPY+TFjS3XYTuBImJKY7h7XHSVLmuITdDABled6ahVvbD4411ZTpSV5eHi0tLdNetIwxtLS0kJeXN3TjQQh84GjP5OckyXAxIenxsFrcBBWOg+V4yfesYDW2NwZlpqKMOXV1dTQ2NtLU1DTRpkw4eXl51NWNfED4hJWXsdwkroQIW4a0/x/PXidBueti+4IVsSPEQjF2dOwIykxFGXPC4TBz586daDMOCgJ/S9gz+TmJkZAXEjrezlYnTonrYjup7vOKI8Xs6toVlJmKokxiAiyR7F8wO3DUTeJaIT8k9I7tSndQ4oLtZLrPU8FSFCVLgCWSc8rLGIPleCGhl3QX0sYhYTIUG9MdEgIUR4vZ1amCpSjKBM0ltEwGCxfXChOyPA+rza+PVWykV0hYFClib3IvqZx9iqJMTwIc1uBf0BIsx6t/lfWw0q7Q5q+WU2Sk+y0hQFG4CICmeBN7770vKHMVRZmEBChYPSGh7Rfs68lhCa1+Qb9CpFdIWBTxBGt31+6gTFUUZZISuIclIt2C5b0lhKQLrX5IWIjVT7DanmvlU8vPZsZnL2Lx3LncfdddQZmtKMokIvhVc/CGNEBPSJhxhbashyV2rxzWG394ncSdu7ipoopjDlvA2ngX3/jSVQBcfMklQZmvKMokYAJCQsH2qzJ0J91Nj4dVQG/Beuy2B7ipoooT8gsIi3BCfgHfKS3h+mt0cQpFmW5MSNI9u2KOKzZ2NoeViWMjhCwbO9MTEm7d1sQxsfxefR0Ty2fje+8FZbqiKJOEAAeOZoc19CTdjRXqnvzc5iSIWWHP68rxsOprK1kb7+rV19p4Fwtnzw7KdEVRJgkTMDVHuld97q7W4HhvCfOtMI4V6hUSnvuVC7lm107WdHWSNoY1XZ380+4mrrn++qBMVxRlkjABk5/B6s5h+Ul3P4cVsyI4toud6RGs05YdTn1FJd9s282uxhSHFhTwldmz+OzFFwdluqIok4TABMsdzMOy/IGjTsL3sBxsNwPGBbEoWrOBjxUX8/A3y2iY8Y+kX3yZfb//Pelt24nU1QZlvqIok4Dg5hI6PdUa+g4cTblepYaYFSbjl03OellFa99mb2WMvUXCXjoJ+7V0Eq+9GpTpiqJMEgKfS2hJ7lvCsD/S3dCa6clhAYQySSSdIX/D+7TWlQDQZroIzZwJtk3ijTeCMl1RlEnCBNR0z/GwsnMJSeBgyLciZGzfw3JS5L29HXFcuqpLAU+wJBQidMghxF97PSjTFUWZJEzIW8Luke5WGFsMru0NWyjo5WGlyH/LK43sHFIBQCteu3BNDYn16zHZIluKokwLJmgcVgJXbBAhJAbL9pbxiuV4WKFMkthb28gUxojmexOg20yPYLnt7aR08KiiTCsCLODX+y2hK94iqmHLIL5g5eaw7EyS2NvbSM8sJd+EsRBas4KVTby/qol3RZlOBOphCT0hoZsN/cQgoQ4ACq1I91vCcEcn0R17SM0owUIoIo82vAnSocpKrIICuhoagjJfUZRJQKAelnQv8RXH9ZepD+cIVoEVwfFDwvAWb0mkdKX3hjDqWN0hoVgWsaVHEX/llaDMVxRlEhBo0r1nEdVkd0gYsgxidxCVMLZY3R5W5P09QI9g5ZsI+3wPC4BwhOSbG3E7dRl7RZkuBCdYTo5gOYl+IWFMogA9wxq2teHEorj53iqxhSbc7WEBROrrwXV1eIOiTCOC9bD8q4WcOI4V8bbFIKF28vAEy/FDRWtHO+mKou7z802kO+kOEK71puXENfGuKNOGQAeOCjkeVs5bQivUTkxiXjvLxjUgu+Jkyou7zy90I7TSicmO58rPx66o0DyWokwjAhUsy1/2OeQkupPuWQ8rihf6IUIyHkHSLumywu7zC02EDC6dJLv3hWtqdIqOokwjghWsAd4SYqUQK0WUWHfbeIefzyrvCQkLjRdC7jU9SfZwTQ2ZnTvJNDWNs/WKokwGAh3W0Cvp7oeESfGGNEToKYOc3Ocdy5TmelgapdjQAAAYjUlEQVSeiGWn50BOHku9LEWZFkyYh5VNrifE85jCpkewUu0hyBPcWLR7X6HreVitOR5WqKoKRDQsVJRpQqAelm0JGJeQ25PDygpWyC3obpveZxEqNL3OL/Y9rD05gmVFo9gzZpDQoQ2KMi0IdJmvXtVG/WENneKFeLbb42Fl9gnh4t6CVWgiiIEW09Frf7i2lvhrr3W/PVQU5eAl0IqjnmB5ApUNCbvoxBgbfMGSZAbTKYSLnF7n21jkmwh76T2yPVJbi9PSQnrb9gB+haIoE0nAwxrA9ld4zibdO4ljMoVkXBuAaLMnaJHCTL8+ik2UFtPea1+4vh6A+Cs6EVpRDnYCf0sYygqWHxJ20IXJFJF2PMGK7PYEK68w2a+PYjfaLyQMzZyJRCLE164bT/MVRZkEBPyWUAhlPMHKhoT7TBycAlKuZ0qkyQv58gpT/fooMVGa+wiW2LaXx1qngqUoBzuBCVbacft4WJ5gtZo44hR0e1jRpi7Ig3A4jZjeJZCL3TxaTHv3ghZZwrNmkdiwAadDKzcoysFMwMMa6BUSOsalgwTiFJDKhoRNXVDkbdsm3auPEpOHK4Y99PayIrNmgTEkXn8tgF+iKMpEEeBbQhfb6vGwHCtMu0lgAMspIOVmBasTU+hth0zvsLDM9eYb7jb7eu3vHvHeoBOhFeVgZuKS7hLpnmYTMjFSjo0VzxBuT+EW+bWyTO/Ee6nrzTfcbdp67bdiMewZM7TUjKIc5AQ/Divjj8OyI7S6/qISbj7JjN2dcM8U+bWy+nlYnmDt6CNYoJUbFGU6EFzS3e0dEroS6Z5mEzH5pByb6C7ve7rIC/3CfTysfMIUuGG2mT39+g9XV5PZtUsrNyjKQUygHpYnWF24YmEsmz2mExuLCBFSrk10dydGIFnoeVJ9Q0KASreArQMJVk0NAIkNG0Zs444dOzj9w2ewc+fOEfehKMr4EeiwBtvPYWUHje4xnZRJPhHLIZkJEd3ViZMfJm371Ujd/oI10y3gPbe/FxWqqgIgsX79iG284Z+/x/PPreGfr7thxH0oijJ+BCtYtpfDcsQTrGbTQQiLsOX4IWEH6ZI8HEK4RgibRL9+qp0i9tDZq8wMgJWXR3j2rAPOY6WTDnnRPESE2//tVv7mY9/nJ/9+GyJCJJzH6rs38trqRpob2zGuTrBWlIkkFNSFUhkvhxV2OnFtT7C2u63UWWVYVoZ0GiK7O+k8rAIQ0oQHFizXq0L6jrub5fbcXsdiRx5F14svDtumd9bu5pn7N/GtC+7ksYZf8vb2N6ibMZ+q8lksmruUT33wC2x8fgeZlDeANa8wzKzF5Zx22eLucs+KogRHYIKVdgwh/y2hY0VImjQJ0hRKlLTlUN25B8sxpIu9uldpIkTceL9+ahxvYYp3zW6W01uwjOOQ2bWL9I4dhKurB7Ulk3J46t632PDsDgrLovzFuct4o6uatZue4R/v+gzpdJqa2TNZ8amjMcaQ7Mywd1cX2ze18tYLu9jXHOf0yxdTUpk/6DUURRl7AhQsF8sSwpkOHKtnTmARebTbGebu88rDZEo9wUoSIWL6C1aRiVLoRnjH7O53LDJnDgCdf36O0vM/NaAd6/74Hq//aRv7mhPMWlLOnKNmYFlCa+tezjvnAs772IU88Pt72bvXS+yLCHmFYaoLS6iaV8zuLe2829DE/d97iTOuWMLsIypGfW8URRkegQiW4xoyriFkC+FMJ44Vocn1ysQUSh4JK0Nd2y6M0O1hpYhQ4g48N7DaLWKz21+wQoccQqiykvZVTw4oWO17Eqz7w/skuzIs+WANlbN6Frm46dpbu7f/4cvXDXhdEeGQucUUV+bx+lPbeeT2Vzj5vPksO6MeEQ0RFWW8CSTpnsp4OaCQZfmCFaXJZAUrStjKMK9tO/GiGNieSUmiRN0uGKCSaJVTxBbT1G8StIhQfPZH6Vi1mszevb2O7WuO88AP1pJKOBz14bpeYnWgxAojHPORWRx6dCV//t3b/PEX60knvYKD3tCID+vQCEUZBwIWLCGcaSdjx2jxx2BFCRG2HA5t20Z7ac/CqUnysHEGTLzXOEXESbPD7O13TAoKwXFoe+DB7n1tTXEeuHktqXiGpafVUzpz9LknO2RRd3gZc5fOYNOLu7j/ey/S3NjBd2+4gef+/CzfveH6UV9DUZTeBCJYyYznfYQtl0imA8f2CvEVEkVEKIp3MCOxj9ayku5zEv7Cqnlue7/+avw3hW+bXf2OhQ85hHBdHW0P/A5jDG1NXTx481rSSYdzv3I0xRV5Y/KbXCdDe0sjsYJtVM3dyp7Gp/jJV2/gF3fcwZUfOp5f/vwXPPXgb3n/9VfZs30bmVT/+l6KohwYgeSwkr6HVUC2UkOUFreZAvHyVdVNXoK7qbScQjyBivsLq8bcdtqZ2au/aqcIywhvuTv4C3tRv+vFli1j3yOP8PLP/0TDegvHMSw7rY7d7+3r13a4pBKd7GncSEvjm7TueIeOPdsxfep1Pd6wnuWzaqgtK+Houir+6et/zyeWLe4+XnJIFRW19VTOnsvMOfM4ZN5hFFfO1PyXogyTQAQrkfY8rGL/zWAmlM8e00G1VQrAzF17cBF2l5Yzt69gOf1FJoxNtVvEejPwwhN5ixez68k1NLwUx4nms+y0OgrLDtyzSsU72PnOWt5reILO1l2AwbJD5BdXUlG/iLzCUsJ5hYQjMdq64qx9+An+7oxTAFhx+Dx+8Idn+eIVV1NWZBNvb6OztZXdm99h87qXulf5KSgrp2bB4cxaspS5Ry+nZGbVAdupKNOFgATL80RK/ER7px2hgyQF/oj3yp1tNBZW0hHuyS05hEiZMPlu/8oMAHMypay1t5ExLiHpHdnG7SIajvkqTsZh2Vk1ByRWxhhaGt+k8Y2n2fVuA8Z1iMSKmDFrMUUVNcSKZiBW/0j6vx+6n+WzaymOedcqjuVxzOw6fnbX03z181cx92iX7GlOJkPH3hb2Ne2mbddOtr7xGpvW/BmA8po6Fpx4CgtO/AAzZs1R70tRcghGsPwcVpFfeG+X/yawgCgYQ8X2Np6qWErcRHqd10U++c7AgjXPKedZ3meT2ckiqek5J2nxxLoSMmHD0S9+n4JlV0LFiUPa6GTSbN+4hrfXPEyyqw07FKGsej5l1YcSLSgdUjg2vvc2b255l6c2vtNr//wa4b31IZoaXQ5b7lBcYbBDIUoqD6Gk8hDqFx+JMYautlZaGt8n3t7Gmgfu5/nf3UtZdS0LTvwAC048hcrZc1W8lGlPIILVlfJDQtcTrJ2WARcKJEKsNUm0K83b82uJ01+wygcRrPkZb8DmOncLiyxPsOJJ4cmGYrpSFofXdlDwQhvyh/swSwcXrEwqzvuvP8WWhsdJde0jr7CM2sNPorhyNpZtD/s3/vBrg78V7GzbS/O2Yl5ZFeKQ2S6zj3CIxnqOiwgFpWUUlJYBMHfZseza/C67N7/NmgfvY80D91JQWsbso46mfvGRVM1fQHltHZY1fPsU5WAgEMGKp7w1BosdbxjCbssB1xs0Wr7VE6Qt5VUUmN4eRBcF1Jgd2CbVPWE6S7GJUu0U8qL1LhdzMvGk8ERDCV1Jm4W1cQrzIX3Uh4g+9whm02tw2JG9zk/FO3jv1SfZvO6PuJkUBWXVVB92HAWlh4y5J1NQkiJW2MKeXQXsfj+f3Vstqua61B7mECvs3z4Sy6d+8RHULz6CVLyLaGEh773awLvrXmL9U08CEM6LMWPWbCpnzaFy1lwqZ8+lcvYcIjGdLqQcvATqYRVl9uJKiN0ksBFiRCjfug/HFlqLC3FM79WeOykAoMDZy77QIf36XZSZyVP2FnbHUzz9chXpjLCwLk5RvtdPetHxhBtWI4/8GvO3/wJAomMvWxoeZ+sbT+OkkxTNqKdy1hJixeM7xcayDTNqOiipiLN3dwE7381jxzsW5VWGqnkO5VUGGWCQSSSWj3FcZi05ivrFR9LV1krb7l3sa9pF+54W1j+1ikxqpddYhPKaOqoPW0jNgkXULzmS0kOqNZRUDhoCEayOpO9hZZpIhYvYbdopl0IsEWZsbqWzIkYRcVoo730envtRlGkZULCWpapZ58R5fEMZtiMsrO+iKJYz1CAcIX3UB4mueYx9655ic8u77HhrDRioXnAc85afxd7tb4/fDx+AcNRhZv0+yqs62Ncco605xp6dYeyQQ/WhMHO2Q0HxwOfmho41Cw4H8Cdnd9De0kx7SzNtTbt46/lneGP14wAUVVQy64ilzD5yKbOOXNYddirKVCQQwWpPeIJVlNxFKlTEDreVKCFirQkKW+JsX1TBDNp408wmYyxC4olOnBgZY1PotAzYb2nboXxq2yfoslMcOnsPRZHeT7oxhl31s9jZUkfzs3chlk1Z9WFU1B1OJFYYuFjlEgq7lFd3UlbVSee+KO0tMRrfitC40Sa/xGVGjaGsyqWo3LA/B8mbnF1EXmERlbO96hXZJP6e7Y3s2d7Ixuef5o0/eQJWXlNH3eIjqFmwiKpDF1BeUzvgW09FmYwEIlht8TQhSyiKN7InVsMu08ZCq4rqDc3e8aoC6s1uHGw2mUNYJDv8M4V2iijO9K4w6ro2TbsPo3XPbCLhDh5afBv5EYtr9n2REDaZTIpdzW/x/rYG0uk4oYIYC3Y0UbTwWBKHHRvETx42IlBYkqSwJEkmbdHRGiWdLOT9DRbvb7Cxw4aSGYaSSpfSSkNB6f4FzOuzxxOrX3wkxnXZ19JENL+AxvWv8eazf+LVx70wMpwXo6Kunoq6WZTX1FEys4rSQ6ooLK8gv7hExUyZVAQiWHs7U1RF4uQnm/hT+WFk2MsMKWT2y5vZW1NIqiDCArOVqEmxxpnHImtH97mtlDLbeR/bpEibPNpaa9nTPI9MJo/SsveJRDtY0XEkv614nD90Psbc7fm07NmCaxzyokWUz1hAYUEFNfvWEHv6cdrb2uhaeATJ6jqGfPIDJhR2Ka2MA3HKq4Su9gjxjgjtLRH27PCXPgsbSmYaSme6lM50iRUO/TPEsiip9ELq+cedxKHLT6CzdS/7mnezr7mJzr172LTmz6TiXb3Os+wQBaVl5JeUUlBaSn5JmS+EpRSUlVPgf88vLSWSFxvo0ooypgQiWLvak5wcfhvS8Kco2Fic0OCFg+8d7T1IETIsNO/zqluHayBb0LPJzCSazqe9sZZtnUfiumFisb0UFjeC20TH3lZmNLZxSecsbHcHO22hrPAQSooOIS/aU5Fh+5LjqTRC8WsvU/zqi6QqZtJy2jmkquuCuAUHjB0yFJUlKSrz6tpn0hbxjgjx9ghtuyO0bPP+6sJRQ2GpIb/EECswRGKGUARCYS/Rb9n0fCxP3MSyKCyvoLC8gpoFPVObMqkU8fZ9xPe1kejqJNnZQbKrk1Q8zu4tm0nF3yCViA9YQSMUjRIrKia/uKQ7RM0rLCJWWEheYRHRgkLy/E+0oIBofgHhWIxwNA87FNIXA8qwCESwtjR3cpk8w+MFxaxOb+PcTfkcvXILHRUxWmsKMQaMsTnKbaSZObyWWkqVkyGeKiGRLuZ510HcVqLWSgztdHW10ZpKds/ls0N55OWV8lp9My/M2c7cRILj90aoShoKMzEibggrFGL3spNoXryc/J3vMWPDK1Td/0u65i0kNbMap6CIdHkFmdIK3LyY92S7fgJ/EoRFobBLUVmCorIExkA6ZZPoiJDoDNPZFqJ1dwhjhnroDXYYwhEIRwzhPIjkGSJ5nvCFInmEwlGihZXESrMC54egkvXkXDKpBOlEF6lEJ+lEF+lknFSii3QiQToRp3Xndm87mSCdSg0ocLlYto0dCmOHw9ihEJYdwgrZ2HbI2xcOEwpHvD8jEexwhFB2X8T7MxSJEIpECYXD3vFIBCsU8vqzbMSysCzLE0b/TxELsaT7mGXZiG1j2TaWZWPZFmLZ3nnZj0jv7WwfOdsg3j7/pqkYjx1ihvjHlMuxxx5rXnrppWG13fDMav74s9tJp1MYJ03IBceK9Gll+nz2j4VLcThJWSROebSLqlg7ldE4TyQ+TqcpwWB4tWIrz9a8RSKU7nVuyLWwsHBxyVgusYThM88aTtrgUtGnIIQB7x+1L1jG8pYlS9bUs/u8vxzW7w8aY8DJWGTSFq5j4TqCcQVjBNfF23YF17VwHMFJW357G9fxFWkskN45NmMMmCTGJMD1/zQpjEmCSQFpjEmDcQDvY4dcyqoyGMfFdR1cx8F1MjiZnm3vz+z+DMZ1BzFoEiK+mIH/H0HP/fc2e8SuH/7zanKfl6GeYb+f7j6Fnuvndj2MZ7C7y4H+vfjXqT5sIZ+55sDKK4nIy8aYIRPMByRYItIEvHdAlsAMoPkAzwmCyWiX2jQ81KbhMZVsmm2MqRzq5AMSrJEgIi8NRzmDZjLapTYND7VpeByMNk18ckZRFGWYqGApijJlCEKwfhrANUbCZLRLbRoeatPwOOhsGvcclqIoylihIaGiKFOGMRMsETlLRDaKyNsi8o0BjkdF5F7/+BoRmTNW1x6FTZeJSJOINPifzwdg0y9EZLeIvD7IcRGRH/s2vyoix0wCm04Vkbac+/StAGyqF5FVIrJeRN4QkS8P0CbQezVMmwK9VyKSJyIviMgrvk39VgEO+tkbpk0je/aMMaP+ADbwDjAPiACvAIv7tLkK+Hd/+yLg3rG49ihtugy4bTztGMCuDwHHAK8Pcvxs4DG8kYQnAmsmgU2nAo8EfJ+qgWP87SLgrQH+/gK9V8O0KdB75f/2Qn87DKwBTuzTJuhnbzg2jejZGysP63jgbWPMu8aYFPAb4Nw+bc4F/tPf/m/gNBnfOQvDsSlwjDFPAXv20+Rc4L+Mx/NAqYhUT7BNgWOM2WGMWetvtwMbgNo+zQK9V8O0KVD8397hfw37n76J6UCfvWHaNCLGSrBqga053xvp/xfZ3cYYkwHagPEs8zkcmwDO98OJ/xaR+nG0Z7gM1+6gOcl38R8TkSVBXtgPYY7G+586lwm7V/uxCQK+VyJii0gDsBv4ozFm0PsU0LM3HJtgBM/edE+6/w8wxxhzFPBHev4XUnqzFm/qxFLgVuDBoC4sIoXAb4GvGGNGvhLuGDKETYHfK2OMY4xZBtQBx4vIEeN9zTGwaUTP3lgJ1jYgVyHr/H0DthGREFACDFxKNCCbjDEtxpik//UOYPk42jNchnMvA8UYsy/r4htjHgXCIjJjvK8rImE8YbjLGPO7AZoEfq+Gsmmi7pV/vVZgFXBWn0NBP3tD2jTSZ2+sBOtF4DARmSsiEbzE3sN92jwMXOpvfxp40vjZt3FiSJv65Ds+gZeTmGgeBv7KfwN2ItBmjNkx1EnjiYhUZXMeInI83r+bcf0H71/v58AGY8zNgzQL9F4Nx6ag75WIVIpIqb8dA84A3uzTLNBnbzg2jfjZG8M3A2fjvTV5B/hHf993gE/423nA/cDbwAvAvPF6S3EANn0PeAPvDeIq4PAAbLoH2AGk8XIunwO+CHzR9Lxhud23+TXg2Elg0/+Xc5+eB04OwKYP4CVqXwUa/M/ZE3mvhmlToPcKOApY59v0OvCtAf6dB/rsDdOmET17OtJdUZQpw3RPuiuKMoVQwVIUZcqggqUoypRBBUtRlCmDCpaiKFMGFSxFUaYMKliKokwZVLCmISKyOlsTKXfb//4Df+LurSISE5E/iYi9n74iIvKUP+VjoP76fv93ETllKDuGsnu45ygHFypYSjcicihwijFmqTHmb4ArgN8ZY5zBzjFe6Z4ngAuHeZkT8UaAK8oBo4KlACAiC4HVwGwRWSciBcAlwEM5bVaJyBn+9vUicqt/6EG/7VDXWAS8tT8BHOI6yjQnNNEGKJMDY8xGEflPYIsx5g5/wvg8Y8yWnGbfBr4jIjPxakF9wt//OnDcMC7zUWDlMNoNdh1lmqMelpLLkXiTUcFbUrw196DxKpMK8FXgoqyn5P+ZEpGiIfo/k2EI1mDXURQVLCWXJXjeEkAcb5Z/NyJyJF5d85TxSgTnEgUSg3UsIvlAqTFm+1BGDHEdZRqjgqUA4HtHaWNMHMAYsxewRSTPP14N3IVXH7xDRM7KObcCaDbGpPdziRV4ZUSGsmPQ6yiKCpaS5Qh6vKssfwA+4HtHvwP+zhizAfhnvDxTlhXA74fofzj5q6Guo0xzNOmuAGCMeQ74TJ/dtwN/a4x5HDgpp+1Tud+Bi4F+6z724WTgb4do02WM2d91lGmOeljKoBhvSatVQw0cBR40xrw1RF/HDBEyKsqQqIc1PfkVPW8Ac7f7YYz5xf468geO/tcgfQ/V/7DtGEHfykGIlkhWFGXKoCGhoihTBhUsRVGmDCpYiqJMGVSwFEWZMqhgKYoyZVDBUhRlyqCCpSjKlOH/Bw0gqoeWADVsAAAAAElFTkSuQmCC\n", 154 | "text/plain": [ 155 | "
" 156 | ] 157 | }, 158 | "metadata": {}, 159 | "output_type": "display_data" 160 | } 161 | ], 162 | "source": [ 163 | "def compute_output_input_norm_ratios(X, save_dir, arch, beta=1.0):\n", 164 | " input_norms = np.sqrt(np.sum(np.square(X), axis=(1, 2, 3)))\n", 165 | " output_norms = np.linalg.norm(dl_utils.get_embedding(X, save_dir, arch, \n", 166 | " beta=beta, num_channels=X.shape[-1]),\n", 167 | " axis=1)\n", 168 | " return output_norms/input_norms\n", 169 | "\n", 170 | "plt.figure(figsize=(5, 3.5))\n", 171 | "\n", 172 | "resultsfile = '/data/save_weights_tf1.10.1/results/normratios_results_cifar10_alexnet_erm_redo.pickle'\n", 173 | "if os.path.isfile(resultsfile):\n", 174 | " ratios = pickle.load(file(resultsfile, 'rb'))\n", 175 | "else:\n", 176 | " ratios = {} \n", 177 | "for beta in beta_list:\n", 178 | " if beta not in ratios:\n", 179 | " save_dir = os.path.join(dirname, 'erm_beta%s'%(beta))\n", 180 | " ratios[beta] = compute_output_input_norm_ratios(Xtt, save_dir, arch, beta=beta)\n", 181 | " pickle.dump(ratios, file(resultsfile, 'wb'))\n", 182 | "\n", 183 | "ratios[r'$\\infty$'] = ratios.pop(np.inf)\n", 184 | "\n", 185 | "plot_hists(ratios, value_name=r'$||f(x)||/||x||$', add_markers=True)\n", 186 | "plt.yticks([])\n", 187 | "plt.title('AlexNet Norm Gain')\n", 188 | "plt.savefig('/data/Figures/cifar10_alexnet_normgain.pdf', format='pdf', dpi=500, bbox_inches='tight')\n", 189 | "plt.show()" 190 | ] 191 | } 192 | ], 193 | "metadata": { 194 | "kernelspec": { 195 | "display_name": "Python 2", 196 | "language": "python", 197 | "name": "python2" 198 | }, 199 | "language_info": { 200 | "codemirror_mode": { 201 | "name": "ipython", 202 | "version": 2 203 | }, 204 | "file_extension": ".py", 205 | "mimetype": "text/x-python", 206 | "name": "python", 207 | "nbconvert_exporter": "python", 208 | "pygments_lexer": "ipython2", 209 | "version": "2.7.12" 210 | } 211 | }, 212 | "nbformat": 4, 213 | "nbformat_minor": 2 214 | } 215 | --------------------------------------------------------------------------------