├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── examples ├── eval.py ├── language │ ├── README.md │ ├── config.py │ ├── data │ │ ├── character_substitution_enkey_sub1.json │ │ ├── sst_binary_character_vocabulary_sorted.txt │ │ └── sst_binary_character_vocabulary_sorted_pad.txt │ ├── exhaustive_verification.py │ ├── interactive_example.py │ ├── models.py │ ├── robust_model.py │ ├── robust_train.py │ └── utils.py └── train.py ├── interval_bound_propagation ├── __init__.py ├── src │ ├── __init__.py │ ├── attacks.py │ ├── bounds.py │ ├── crown.py │ ├── fastlin.py │ ├── layer_utils.py │ ├── layers.py │ ├── loss.py │ ├── model.py │ ├── relative_bounds.py │ ├── simplex_bounds.py │ ├── specification.py │ ├── utils.py │ └── verifiable_wrapper.py └── tests │ ├── attacks_test.py │ ├── bounds_test.py │ ├── crown_test.py │ ├── fastlin_test.py │ ├── layers_test.py │ ├── loss_test.py │ ├── model_test.py │ ├── relative_bounds_test.py │ ├── simplex_bounds_test.py │ └── specification_test.py └── setup.py /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows 28 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Interval Bound Propagation for Training Verifiably Robust Models 2 | 3 | This repository contains a simple implementation of Interval Bound Propagation 4 | (IBP) using TensorFlow: 5 | [https://arxiv.org/abs/1810.12715](https://arxiv.org/abs/1810.12715). 6 | It also contains an implementation of CROWN-IBP: 7 | [https://arxiv.org/abs/1906.06316](https://arxiv.org/abs/1906.06316). 8 | It also contains a sentiment analysis example under [`examples/language`](https://github.com/deepmind/interval-bound-propagation/tree/master/examples/language) 9 | for [https://arxiv.org/abs/1909.01492](https://arxiv.org/abs/1909.01492). 10 | 11 | This is not an official Google product 12 | 13 | ## Installation 14 | 15 | IBP can be installed with the following command: 16 | 17 | ```bash 18 | pip install git+https://github.com/deepmind/interval-bound-propagation 19 | ``` 20 | 21 | IBP will work with both the CPU and GPU version of tensorflow and dm-sonnet, but 22 | to allow for that it does not list Tensorflow as a requirement, so you need to 23 | install Tensorflow and Sonnet separately if you haven't already done so. 24 | 25 | ## Usage 26 | 27 | The following command trains a small model on MNIST with epsilon set to 0.3: 28 | 29 | ```bash 30 | cd examples 31 | python train.py --model=small --output_dir=/tmp/small_model 32 | ``` 33 | 34 | ## Pretrained Models 35 | 36 | Models trained using IBP and CROWN-IBP can be downloaded 37 | [here](https://drive.google.com/open?id=1lovI-fUabgs3swMgIe7MLRvHB9KtjzNT). 38 | 39 | ### IBP models: 40 | 41 | | Dataset | Test epsilon | Model path | Clean accuracy | Verified accuracy | Accuracy under attack | 42 | |----------|--------------|----------------------------|----------------|-------------------|-----------------------| 43 | | MNIST | 0.1 | ibp/mnist_0.2_medium | 98.94% | 97.08% | 97.99% | 44 | | MNIST | 0.2 | ibp/mnist_0.4_large_200 | 98.34% | 95.47% | 97.06% | 45 | | MNIST | 0.3 | ibp/mnist_0.4_large_200 | 98.34% | 91.79% | 96.03% | 46 | | MNIST | 0.4 | ibp/mnist_0.4_large_200 | 98.34% | 84.99% | 94.56% | 47 | | CIFAR-10 | 2/255 | ibp/cifar_2-255_large_200 | 70.21% | 44.12% | 56.53% | 48 | | CIFAR-10 | 8/255 | ibp/cifar_8-255_large | 49.49% | 31.56% | 39.53% | 49 | 50 | ### CROWN-IBP models: 51 | 52 | | Dataset | Test epsilon | Model path | Clean accuracy | Verified accuracy | Accuracy under attack | 53 | |----------|--------------|------------------------------|----------------|-------------------|-----------------------| 54 | | MNIST | 0.1 | crown-ibp/mnist_0.2_large | 99.03% | 97.75% | 98.34% | 55 | | MNIST | 0.2 | crown-ibp/mnist_0.4_large | 98.38% | 96.13% | 97.28% | 56 | | MNIST | 0.3 | crown-ibp/mnist_0.4_large | 98.38% | 93.32% | 96.38% | 57 | | MNIST | 0.4 | crown-ibp/mnist_0.4_large | 98.38% | 87.51% | 94.95% | 58 | | CIFAR-10 | 2/255 | crown-ibp/cifar_2-255_large | 71.52% | 53.97% | 59.72% | 59 | | CIFAR-10 | 8/255 | crown-ibp/cifar_8-255_large | 47.14% | 33.30% | 36.81% | 60 | | CIFAR-10 | 16/255 | crown-ibp/cifar_16-255_large | 34.19% | 23.08% | 26.55% | 61 | 62 | In these tables, we evaluated the verified accuracy using IBP only. 63 | We evaluted the accuracy under attack using a 20-step untargeted PGD attack. 64 | You can evaluate these models yourself using `eval.py`, for example: 65 | 66 | ```bash 67 | cd examples 68 | python eval.py --model_dir pretrained_models/ibp/mnist_0.4_large_200/ \ 69 | --epsilon 0.3 70 | ``` 71 | 72 | Note that we evaluated the CIFAR-10 2/255 CROWN-IBP model using CROWN-IBP 73 | (instead of pure IBP). You can do so yourself by setting the flag 74 | `--bound_method=crown-ibp`: 75 | 76 | ```bash 77 | python eval.py --model_dir pretrained_models/crown-ibp/cifar_2-255_large/ \ 78 | --epsilon 0.00784313725490196 --bound_method=crown-ibp 79 | ``` 80 | 81 | ## Giving credit 82 | 83 | If you use this code in your work, we ask that you cite this paper: 84 | 85 | Sven Gowal, Krishnamurthy Dvijotham, Robert Stanforth, Rudy Bunel, Chongli Qin, 86 | Jonathan Uesato, Relja Arandjelovic, Timothy Mann, and Pushmeet Kohli. 87 | "On the Effectiveness of Interval Bound Propagation for Training Verifiably 88 | Robust Models." _arXiv preprint arXiv:1810.12715 (2018)_. 89 | 90 | If you use CROWN-IBP, we also ask that you cite: 91 | 92 | Huan Zhang, Hongge Chen, Chaowei Xiao, Sven Gowal, Robert Stanforth, Bo Li, 93 | Duane Boning, Cho-Jui Hsieh. 94 | "Towards Stable and Efficient Training of Verifiably Robust Neural Networks." 95 | _arXiv preprint arXiv:1906.06316 (2019)_. 96 | 97 | If you use the sentiment analysis example, please cite: 98 | 99 | Po-Sen Huang, Robert Stanforth, Johannes Welbl, Chris Dyer, Dani Yogatama, Sven Gowal, Krishnamurthy Dvijotham, Pushmeet Kohli. 100 | "Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation." 101 | _EMNLP 2019_. 102 | 103 | 104 | ## Acknowledgements 105 | 106 | In addition to the people involved in the original IBP publication, we would 107 | like to thank Huan Zhang, Sumanth Dathathri and Johannes Welbl for their 108 | contributions. 109 | 110 | -------------------------------------------------------------------------------- /examples/eval.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Evaluates a verifiable model on Mnist or CIFAR-10.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl import app 23 | from absl import flags 24 | from absl import logging 25 | import interval_bound_propagation as ibp 26 | import tensorflow.compat.v1 as tf 27 | 28 | 29 | FLAGS = flags.FLAGS 30 | flags.DEFINE_enum('dataset', 'auto', ['auto', 'mnist', 'cifar10'], 'Dataset ' 31 | '("auto", "mnist" or "cifar10"). When set to "auto", ' 32 | 'the dataset is inferred from the model directory path.') 33 | flags.DEFINE_enum('model', 'auto', ['auto', 'tiny', 'small', 'medium', 34 | 'large_200', 'large'], 'Model size. ' 35 | 'When set to "auto", the model name is inferred from the ' 36 | 'model directory path.') 37 | flags.DEFINE_string('model_dir', None, 'Model checkpoint directory.') 38 | flags.DEFINE_enum('bound_method', 'ibp', ['ibp', 'crown-ibp'], 39 | 'Bound progataion method. For models trained with CROWN-IBP ' 40 | 'and beta_final=1 (e.g., CIFAR 2/255), use "crown-ibp". ' 41 | 'Otherwise use "ibp".') 42 | flags.DEFINE_integer('batch_size', 200, 'Batch size.') 43 | flags.DEFINE_float('epsilon', .3, 'Target epsilon.') 44 | 45 | 46 | def layers(model_size): 47 | """Returns the layer specification for a given model name.""" 48 | if model_size == 'tiny': 49 | return ( 50 | ('linear', 100), 51 | ('activation', 'relu')) 52 | elif model_size == 'small': 53 | return ( 54 | ('conv2d', (4, 4), 16, 'VALID', 2), 55 | ('activation', 'relu'), 56 | ('conv2d', (4, 4), 32, 'VALID', 1), 57 | ('activation', 'relu'), 58 | ('linear', 100), 59 | ('activation', 'relu')) 60 | elif model_size == 'medium': 61 | return ( 62 | ('conv2d', (3, 3), 32, 'VALID', 1), 63 | ('activation', 'relu'), 64 | ('conv2d', (4, 4), 32, 'VALID', 2), 65 | ('activation', 'relu'), 66 | ('conv2d', (3, 3), 64, 'VALID', 1), 67 | ('activation', 'relu'), 68 | ('conv2d', (4, 4), 64, 'VALID', 2), 69 | ('activation', 'relu'), 70 | ('linear', 512), 71 | ('activation', 'relu'), 72 | ('linear', 512), 73 | ('activation', 'relu')) 74 | elif model_size == 'large_200': 75 | # Some old large checkpoints have 200 hidden neurons in the last linear 76 | # layer. 77 | return ( 78 | ('conv2d', (3, 3), 64, 'SAME', 1), 79 | ('activation', 'relu'), 80 | ('conv2d', (3, 3), 64, 'SAME', 1), 81 | ('activation', 'relu'), 82 | ('conv2d', (3, 3), 128, 'SAME', 2), 83 | ('activation', 'relu'), 84 | ('conv2d', (3, 3), 128, 'SAME', 1), 85 | ('activation', 'relu'), 86 | ('conv2d', (3, 3), 128, 'SAME', 1), 87 | ('activation', 'relu'), 88 | ('linear', 200), 89 | ('activation', 'relu')) 90 | elif model_size == 'large': 91 | return ( 92 | ('conv2d', (3, 3), 64, 'SAME', 1), 93 | ('activation', 'relu'), 94 | ('conv2d', (3, 3), 64, 'SAME', 1), 95 | ('activation', 'relu'), 96 | ('conv2d', (3, 3), 128, 'SAME', 2), 97 | ('activation', 'relu'), 98 | ('conv2d', (3, 3), 128, 'SAME', 1), 99 | ('activation', 'relu'), 100 | ('conv2d', (3, 3), 128, 'SAME', 1), 101 | ('activation', 'relu'), 102 | ('linear', 512), 103 | ('activation', 'relu')) 104 | else: 105 | raise ValueError('Unknown model: "{}"'.format(model_size)) 106 | 107 | 108 | def show_metrics(metric_values, bound_method='ibp'): 109 | if bound_method == 'crown-ibp': 110 | verified_accuracy = metric_values.crown_ibp_verified_accuracy 111 | else: 112 | verified_accuracy = metric_values.verified_accuracy 113 | print('nominal accuracy = {:.2f}%, ' 114 | 'verified accuracy = {:.2f}%, ' 115 | 'accuracy under PGD attack = {:.2f}%'.format( 116 | metric_values.nominal_accuracy * 100., 117 | verified_accuracy* 100., 118 | metric_values.attack_accuracy * 100.)) 119 | 120 | 121 | def main(unused_args): 122 | dataset = FLAGS.dataset 123 | if FLAGS.dataset == 'auto': 124 | if 'mnist' in FLAGS.model_dir: 125 | dataset = 'mnist' 126 | elif 'cifar' in FLAGS.model_dir: 127 | dataset = 'cifar10' 128 | else: 129 | raise ValueError('Cannot guess the dataset name. Please specify ' 130 | '--dataset manually.') 131 | 132 | model_name = FLAGS.model 133 | if FLAGS.model == 'auto': 134 | model_names = ['large_200', 'large', 'medium', 'small', 'tiny'] 135 | for name in model_names: 136 | if name in FLAGS.model_dir: 137 | model_name = name 138 | logging.info('Using guessed model name "%s".', model_name) 139 | break 140 | if model_name == 'auto': 141 | raise ValueError('Cannot guess the model name. Please specify --model ' 142 | 'manually.') 143 | 144 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir) 145 | if checkpoint_path is None: 146 | raise OSError('Cannot find a valid checkpoint in {}.'.format( 147 | FLAGS.model_dir)) 148 | 149 | # Dataset. 150 | input_bounds = (0., 1.) 151 | num_classes = 10 152 | if dataset == 'mnist': 153 | data_train, data_test = tf.keras.datasets.mnist.load_data() 154 | else: 155 | assert dataset == 'cifar10', ( 156 | 'Unknown dataset "{}"'.format(dataset)) 157 | data_train, data_test = tf.keras.datasets.cifar10.load_data() 158 | data_train = (data_train[0], data_train[1].flatten()) 159 | data_test = (data_test[0], data_test[1].flatten()) 160 | 161 | # Base predictor network. 162 | original_predictor = ibp.DNN(num_classes, layers(model_name)) 163 | predictor = original_predictor 164 | if dataset == 'cifar10': 165 | mean = (0.4914, 0.4822, 0.4465) 166 | std = (0.2023, 0.1994, 0.2010) 167 | predictor = ibp.add_image_normalization(original_predictor, mean, std) 168 | if FLAGS.bound_method == 'crown-ibp': 169 | predictor = ibp.crown.VerifiableModelWrapper(predictor) 170 | else: 171 | predictor = ibp.VerifiableModelWrapper(predictor) 172 | 173 | # Test using while loop. 174 | def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack): 175 | """Returns the test metrics.""" 176 | num_test_batches = len(data_test[0]) // batch_size 177 | assert len(data_test[0]) % batch_size == 0, ( 178 | 'Test data is not a multiple of batch size.') 179 | 180 | def cond(i, *unused_args): 181 | return i < num_test_batches 182 | 183 | def body(i, metrics): 184 | """Compute the sum of all metrics.""" 185 | test_data = ibp.build_dataset(data_test, batch_size=batch_size, 186 | sequential=True) 187 | predictor(test_data.image, override=True, is_training=False) 188 | input_interval_bounds = ibp.IntervalBounds( 189 | tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]), 190 | tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1])) 191 | predictor.propagate_bounds(input_interval_bounds) 192 | test_specification = ibp.ClassificationSpecification( 193 | test_data.label, num_classes) 194 | test_attack = attack_builder(predictor, test_specification, FLAGS.epsilon, 195 | input_bounds=input_bounds, 196 | optimizer_builder=ibp.UnrolledAdam) 197 | 198 | # Use CROWN-IBP bound or IBP bound. 199 | if FLAGS.bound_method == 'crown-ibp': 200 | test_losses = ibp.crown.Losses(predictor, test_specification, 201 | test_attack, use_crown_ibp=True, 202 | crown_bound_schedule=tf.constant(1.)) 203 | else: 204 | test_losses = ibp.Losses(predictor, test_specification, test_attack) 205 | 206 | test_losses(test_data.label) 207 | new_metrics = [] 208 | for m, n in zip(metrics, test_losses.scalar_metrics): 209 | new_metrics.append(m + n) 210 | return i + 1, new_metrics 211 | 212 | if FLAGS.bound_method == 'crown-ibp': 213 | metrics = ibp.crown.ScalarMetrics 214 | else: 215 | metrics = ibp.ScalarMetrics 216 | total_count = tf.constant(0, dtype=tf.int32) 217 | total_metrics = [tf.constant(0, dtype=tf.float32) 218 | for _ in range(len(metrics._fields))] 219 | total_count, total_metrics = tf.while_loop( 220 | cond, 221 | body, 222 | loop_vars=[total_count, total_metrics], 223 | back_prop=False, 224 | parallel_iterations=1) 225 | total_count = tf.cast(total_count, tf.float32) 226 | test_metrics = [] 227 | for m in total_metrics: 228 | test_metrics.append(m / total_count) 229 | return metrics(*test_metrics) 230 | 231 | test_metrics = get_test_metrics( 232 | FLAGS.batch_size, ibp.UntargetedPGDAttack) 233 | 234 | # Prepare to load the pretrained-model. 235 | saver = tf.compat.v1.train.Saver(original_predictor.get_variables()) 236 | 237 | # Run everything. 238 | tf_config = tf.ConfigProto() 239 | tf_config.gpu_options.allow_growth = True 240 | with tf.train.SingularMonitoredSession(config=tf_config) as sess: 241 | logging.info('Restoring from checkpoint "%s".', checkpoint_path) 242 | saver.restore(sess, checkpoint_path) 243 | logging.info('Evaluating at epsilon = %f.', FLAGS.epsilon) 244 | metric_values = sess.run(test_metrics) 245 | show_metrics(metric_values, FLAGS.bound_method) 246 | 247 | 248 | if __name__ == '__main__': 249 | flags.mark_flag_as_required('model_dir') 250 | app.run(main) 251 | -------------------------------------------------------------------------------- /examples/language/README.md: -------------------------------------------------------------------------------- 1 | # Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation 2 | 3 | Here contains an implementation of 4 | [Achieving Verified Robustness to Symbol Substitutions via Interval Bound 5 | Propagation](https://arxiv.org/abs/1909.01492). 6 | 7 | ## Installation 8 | 9 | The installation can be done with the following commands: 10 | 11 | ```bash 12 | pip3 install "tensorflow-gpu<2" "dm-sonnet<2" "tensorflow-probability==0.7.0" "tensorflow-datasets" "absl-py" 13 | pip3 install git+https://github.com/deepmind/interval-bound-propagation 14 | ``` 15 | 16 | 17 | ## Usage 18 | 19 | The following command reproduces the [SST](https://nlp.stanford.edu/sentiment/) 20 | character level experiments using perturbation radius of 3: 21 | 22 | ```bash 23 | cd examples/language 24 | python3 robust_train.py 25 | ``` 26 | 27 | You should expect to see the following at the end of training 28 | (note we only use SST dev set only for evaluation here). 29 | 30 | ```bash 31 | step: 149900, train loss: 0.392112, verifiable train loss: 0.826042, 32 | train accuracy: 0.850000, dev accuracy: 0.747619, test accuracy: 0.747619, 33 | Train Bound = -0.42432, train verified: 0.800, 34 | dev verified: 0.695, test verified: 0.695 35 | best dev acc 0.780952 best test acc 0.780952 36 | best verified dev acc 0.716667 best verified test acc 0.716667 37 | ``` 38 | 39 | We can verify the model in 40 | `config['model_location']='/tmp/robust_model/checkpoint/final'` using IBP. 41 | 42 | For example, after changing `config['delta']=1.`, we can evaluate the IBP 43 | verified accuracy with perturbation radius of 1: 44 | 45 | ```bash 46 | python3 robust_train.py --analysis --batch_size=1 47 | ``` 48 | 49 | We expect to see results like the following: 50 | 51 | ```bash 52 | test final correct: 0.748, verified: 0.722 53 | {'datasplit': 'test', 'nominal': 0.7477064220183486, 54 | 'verify': 0.7224770642201835, 'delta': 1.0, 55 | 'num_perturbations': 268, 56 | 'model_location': '/tmp/robust_model/checkpoint/final', 'final': True} 57 | ``` 58 | 59 | We can also exhaustively search all valid perturbations to exhaustively verify 60 | the models. 61 | 62 | ```bash 63 | python3 exhaustive_verification.py --num_examples=0 64 | ``` 65 | 66 | We should expect the following results 67 | 68 | ```bash 69 | verified_proportion: 0.7350917431192661 70 | {'delta': 1, 'character_level': True, 'mode': 'validation', 'checkpoint_path': '/tmp/robust_model/checkpoint/final', 'verified_proportion': 0.7350917431192661} 71 | ``` 72 | 73 | The IBP verified accuracy ` 0.7224770642201835` is a lower bound of the 74 | exhaustive verification results, `0.7350917431192661`. 75 | 76 | Furthermore, we can also align the predictions between the IBP verification 77 | and exhaustive verification. There should not be cases where IBP can verify 78 | (no attack can change the predictions) and exhaustive verification cannot 79 | verify (there exist an attack that can change the predictions), since IBP 80 | provides a lower bound on the true robustness accuracy (via exhaustive search). 81 | 82 | 83 | ## Reference 84 | 85 | If you use this code in your work, please cite the accompanying paper: 86 | 87 | ``` 88 | @inproceedings{huang-2019-achieving, 89 | title = "Achieving Verified Robustness to Symbol Substitutions via Interval Bound Propagation", 90 | author = "Po-Sen Huang and 91 | Robert Stanforth and 92 | Johannes Welbl and 93 | Chris Dyer and 94 | Dani Yogatama and 95 | Sven Gowal and 96 | Krishnamurthy Dvijotham and 97 | Pushmeet Kohli", 98 | booktitle = "Empirical Methods in Natural Language Processing (EMNLP)", 99 | year = "2019", 100 | pages = "4081--4091", 101 | } 102 | ``` 103 | 104 | ## Disclaimer 105 | 106 | This is not an official Google product. 107 | -------------------------------------------------------------------------------- /examples/language/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Configuration parameters for sentence representation models.""" 17 | 18 | 19 | def get_config(): 20 | """Returns the default configuration as a dict.""" 21 | 22 | config = {} 23 | 24 | config['dataset'] = 'sst' 25 | # Convolutional architecture. 26 | # Format: Tuple/List for a Conv layer (filters, kernel_size, pooling_size) 27 | # Otherwise, nonlinearity. 28 | config['conv_architecture'] = ((100, 5, 1), 'relu') 29 | 30 | # Fully connected layer 1 hidden sizes (0 means no layer). 31 | config['conv_fc1'] = 0 32 | 33 | # Fully connected layer 2 hidden sizes (0 means no layer). 34 | config['conv_fc2'] = 0 35 | 36 | # Number of allowable perturbations. 37 | # (delta specifies the budget, i.e., how many may be used at once.) 38 | config['delta'] = 3.0 39 | 40 | # Allow each character to be changed to another character. 41 | config['synonym_filepath'] = 'data/character_substitution_enkey_sub1.json' 42 | config['max_padded_length'] = 268 43 | # (~1*268) Max num_perturbations. 44 | # seqlen * max_number_synonyms (total number of elementary perturbations) 45 | config['num_perturbations'] = 268 46 | 47 | config['vocab_filename'] = 'data/sst_binary_character_vocabulary_sorted.txt' 48 | # Need to add pad for analysis (which is what is used after 49 | # utils.get_merged_vocabulary_file). 50 | config['vocab_filename_pad'] = ( 51 | 'data/sst_binary_character_vocabulary_sorted_pad.txt') 52 | 53 | config['embedding_dim'] = 150 54 | 55 | config['delta_schedule'] = True 56 | config['verifiable_loss_schedule'] = True 57 | 58 | # Ratio between the task loss and verifiable loss. 59 | config['verifiable_loss_ratio'] = 0.75 60 | 61 | # Aggregrated loss of the verifiable training objective 62 | # (among softmax, mean, max). 63 | config['verifiable_training_aggregation'] = 'softmax' 64 | 65 | config['data_id'] = 1 66 | 67 | config['model_location'] = '/tmp/robust_model/checkpoint/final' 68 | 69 | return config 70 | -------------------------------------------------------------------------------- /examples/language/data/character_substitution_enkey_sub1.json: -------------------------------------------------------------------------------- 1 | {"z": ["x"], "y": ["t"], "x": ["s"], "w": ["d"], "v": ["c"], "u": ["8"], "t": ["f"], "s": ["e"], "r": ["g"], "q": ["s"], "p": [";"], "o": ["k"], "n": ["m"], "m": ["j"], "l": ["p"], "k": ["."], "j": ["i"], "i": ["u"], "h": ["n"], "g": ["v"], "f": ["c"], "e": ["r"], "d": ["f"], "c": ["d"], "b": ["g"], "a": ["x"]} 2 | -------------------------------------------------------------------------------- /examples/language/data/sst_binary_character_vocabulary_sorted.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-deepmind/interval-bound-propagation/217a14d12686e08ebb5cfea1f2748cce58a55913/examples/language/data/sst_binary_character_vocabulary_sorted.txt -------------------------------------------------------------------------------- /examples/language/data/sst_binary_character_vocabulary_sorted_pad.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-deepmind/interval-bound-propagation/217a14d12686e08ebb5cfea1f2748cce58a55913/examples/language/data/sst_binary_character_vocabulary_sorted_pad.txt -------------------------------------------------------------------------------- /examples/language/interactive_example.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Minimum code to interact with a pretrained Stanford Sentiment Treebank model. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | import collections 24 | 25 | import numpy as np 26 | from six.moves import range 27 | import tensorflow.compat.v1 as tf 28 | 29 | import robust_model 30 | 31 | 32 | SparseTensorValue = collections.namedtuple( 33 | 'SparseTensorValue', ['indices', 'values', 'dense_shape']) 34 | 35 | 36 | class InteractiveSentimentPredictor(object): 37 | """Can be used to interact with a trained sentiment analysis model.""" 38 | 39 | def __init__(self, config_dict, model_location, max_padded_length=0, 40 | num_perturbations=0): 41 | self.graph_tensor_producer = robust_model.RobustModel(**config_dict) 42 | 43 | self.batch_size = self.graph_tensor_producer.batch_size 44 | if max_padded_length: 45 | self.graph_tensor_producer.config.max_padded_length = max_padded_length 46 | if num_perturbations: 47 | self.graph_tensor_producer.config.num_perturbations = num_perturbations 48 | self.graph_tensors = self.graph_tensor_producer() 49 | 50 | network_saver = tf.train.Saver(self.graph_tensor_producer.variables) 51 | self.open_session = tf.Session() 52 | self.open_session.run(tf.tables_initializer()) 53 | network_saver.restore(self.open_session, model_location) 54 | 55 | def batch_predict_sentiment(self, list_of_sentences, is_tokenised=True): 56 | """Computes sentiment predictions for a batch of sentences. 57 | 58 | Note: the model batch size is usually hard-coded in the model (e.g. at 64). 59 | We require that len(list_of_sentences)==self.batch_size. 60 | If padding is necessary to reach as many sentences, this should happen 61 | outside of this function. 62 | 63 | Important: we assume that each sentence has the same number of tokens. 64 | Args: 65 | list_of_sentences: List[str] in case is_tokenised is False, or 66 | List[List[str]] in case is_tokenised is True. Holds inputs whose 67 | sentiment is to be classified. 68 | is_tokenised: bool. Whether sentences are already tokenised. If not, 69 | naive whitespace splitting tokenisation is applied. 70 | Returns: 71 | batch_label_predictions: np.array of shape [self.batch_size] holding 72 | integers, representing model predictions for each input. 73 | """ 74 | 75 | # Prepare inputs. 76 | tokenised_sentence_list = [] 77 | for sentence in list_of_sentences: 78 | if not is_tokenised: 79 | tokenised_sentence = sentence.lower().split(' ') 80 | else: 81 | tokenised_sentence = sentence 82 | tokenised_sentence_list.append(tokenised_sentence) 83 | length = len(tokenised_sentence_list[0]) 84 | assert all([len(x) == length for x in tokenised_sentence_list]) 85 | assert len(tokenised_sentence_list) == self.batch_size 86 | 87 | # Construct sparse tensor holding token information. 88 | indices = np.zeros([self.batch_size*length, 2]) 89 | dense_shape = [self.batch_size, length] 90 | # Loop over words. All sentences have the same length. 91 | for j, _ in enumerate(tokenised_sentence_list[0]): 92 | for i in range(self.batch_size): # Loop over samples. 93 | offset = i*length + j 94 | indices[offset, 0] = i 95 | indices[offset, 1] = j 96 | 97 | # Define sparse tensor values. 98 | tokenised_sentence_list = [word for sentence in tokenised_sentence_list # pylint:disable=g-complex-comprehension 99 | for word in sentence] 100 | values = np.array(tokenised_sentence_list) 101 | mb_tokens = SparseTensorValue(indices=indices, values=values, 102 | dense_shape=dense_shape) 103 | mb_num_tokens = np.array([length]*self.batch_size) 104 | 105 | # Fill feed_dict with input token information. 106 | feed_dict = {} 107 | feed_dict[self.graph_tensors['dev']['tokens']] = mb_tokens 108 | feed_dict[self.graph_tensors['dev']['num_tokens']] = mb_num_tokens 109 | 110 | # Generate model predictions [batch_size x n_labels]. 111 | logits = self.open_session.run(self.graph_tensors['dev']['predictions'], 112 | feed_dict) 113 | batch_label_predictions = np.argmax(logits, axis=1) 114 | 115 | return batch_label_predictions, logits 116 | 117 | def predict_sentiment(self, sentence, tokenised=False): 118 | """Computes sentiment of a sentence.""" 119 | # Create inputs to tensorflow graph. 120 | if tokenised: 121 | inputstring_tokenised = sentence 122 | else: 123 | assert isinstance(sentence, str) 124 | # Simple tokenisation. 125 | inputstring_tokenised = sentence.lower().split(' ') 126 | length = len(inputstring_tokenised) 127 | 128 | # Construct inputs to sparse tensor holding token information. 129 | indices = np.zeros([self.batch_size*length, 2]) 130 | dense_shape = [self.batch_size, length] 131 | for j, _ in enumerate(inputstring_tokenised): 132 | for i in range(self.batch_size): 133 | offset = i*length + j 134 | indices[offset, 0] = i 135 | indices[offset, 1] = j 136 | values = inputstring_tokenised*self.batch_size 137 | mb_tokens = SparseTensorValue(indices=indices, values=np.array(values), 138 | dense_shape=dense_shape) 139 | mb_num_tokens = np.array([length]*self.batch_size) 140 | 141 | # Fill feeddict with input token information. 142 | feed_dict = {} 143 | feed_dict[self.graph_tensors['dev']['tokens']] = mb_tokens 144 | feed_dict[self.graph_tensors['dev']['num_tokens']] = mb_num_tokens 145 | # Generate predictions. 146 | logits = self.open_session.run(self.graph_tensors['dev']['predictions'], 147 | feed_dict) 148 | predicted_label = np.argmax(logits, axis=1) 149 | final_prediction = predicted_label[0] 150 | # Check that prediction same everywhere (had batch of identical inputs). 151 | assert np.all(predicted_label == final_prediction) 152 | return final_prediction, logits 153 | -------------------------------------------------------------------------------- /examples/language/models.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Models for sentence representation.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import sonnet as snt 22 | import tensorflow.compat.v1 as tf 23 | 24 | 25 | def _max_pool_1d(x, pool_size=2, name='max_pool_1d'): 26 | with tf.name_scope(name, 'MaxPool1D', [x, pool_size]): 27 | return tf.squeeze( 28 | tf.nn.max_pool(tf.expand_dims(x, 1), 29 | [1, 1, pool_size, 1], 30 | [1, 1, pool_size, 1], 31 | 'VALID'), 32 | axis=1) 33 | 34 | 35 | class SentenceRepresenterConv(snt.AbstractModule): 36 | """Use stacks of 1D Convolutions to build a sentence representation.""" 37 | 38 | def __init__(self, 39 | config, 40 | keep_prob=1., 41 | pooling='max', 42 | name='sentence_rep_conv'): 43 | super(SentenceRepresenterConv, self).__init__(name=name) 44 | self._config = config 45 | self._pooling = pooling 46 | self._keep_prob = keep_prob 47 | 48 | def _build(self, padded_word_embeddings, length): 49 | x = padded_word_embeddings 50 | for layer in self._config['conv_architecture']: 51 | if isinstance(layer, tuple) or isinstance(layer, list): 52 | filters, kernel_size, pooling_size = layer 53 | conv = snt.Conv1D( 54 | output_channels=filters, 55 | kernel_shape=kernel_size) 56 | x = conv(x) 57 | if pooling_size and pooling_size > 1: 58 | x = _max_pool_1d(x, pooling_size) 59 | elif layer == 'relu': 60 | x = tf.nn.relu(x) 61 | if self._keep_prob < 1: 62 | x = tf.nn.dropout(x, keep_prob=self._keep_prob) 63 | else: 64 | raise RuntimeError('Bad layer type {} in conv'.format(layer)) 65 | # Final layer pools over the remaining sequence length to get a 66 | # fixed sized vector. 67 | if self._pooling == 'max': 68 | x = tf.reduce_max(x, axis=1) 69 | elif self._pooling == 'average': 70 | x = tf.reduce_sum(x, axis=1) 71 | lengths = tf.expand_dims(tf.cast(length, tf.float32), axis=1) 72 | x = x / lengths 73 | 74 | if self._config['conv_fc1']: 75 | fc1_layer = snt.Linear(output_size=self._config['conv_fc1']) 76 | x = tf.nn.relu(fc1_layer(x)) 77 | if self._keep_prob < 1: 78 | x = tf.nn.dropout(x, keep_prob=self._keep_prob) 79 | if self._config['conv_fc2']: 80 | fc2_layer = snt.Linear(output_size=self._config['conv_fc2']) 81 | x = tf.nn.relu(fc2_layer(x)) 82 | if self._keep_prob < 1: 83 | x = tf.nn.dropout(x, keep_prob=self._keep_prob) 84 | 85 | return x 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /examples/language/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Utilities for sentence representation.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tempfile 22 | 23 | from absl import logging 24 | import sonnet as snt 25 | import tensorflow as tf 26 | from tensorflow.contrib import lookup as contrib_lookup 27 | 28 | 29 | def get_padded_embeddings(embeddings, 30 | vocabulary_table, 31 | tokens, batch_size, 32 | token_indexes=None): 33 | """Reshapes and pads 'raw' word embeddings. 34 | 35 | Say we have batch of B tokenized sentences, of variable length, with a total 36 | of W tokens. For example, B = 2 and W = 3 + 4 = 7: 37 | [['The', 'cat', 'eats'], 38 | [ 'A', 'black', 'cat', 'jumps']] 39 | 40 | Since rows have variable length, this cannot be represented as a tf.Tensor. 41 | It is represented as a tf.SparseTensor, with 7 values & indexes: 42 | indices: [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [1,3]] 43 | values: ['The', 'cat', 'eats', 'A', 'black', 'cat', 'jumps'] 44 | 45 | We have also built a vocabulary table: 46 | vocabulary table: ['cat', 'The', 'A', 'black', 'eats', 'jumps'] 47 | 48 | We also have the embeddings, a WxD matrix of floats 49 | representing each word in the vocabulary table as a normal tf.Tensor. 50 | 51 | For example, with D=3, embeddings could be: 52 | [[0.4, 0.5, -0.6], # This is the embedding for word 0 = 'cat' 53 | [0.1, -0.3, 0.6], # This is the embedding for word 1 = 'The'' 54 | [0.7, 0.8, -0.9], # This is the embedding for word 2 = 'A' 55 | [-0.1, 0.9, 0.7], # This is the embedding for word 3 = 'black' 56 | [-0.2, 0.4, 0.7], # This is the embedding for word 4 = 'eats 57 | [0.3, -0.5, 0.2]] # This is the embedding for word 5 = 'jumps' 58 | 59 | This function builds a normal tf.Tensor containing the embeddings for the 60 | tokens provided, in the correct order, with appropriate 0 padding. 61 | 62 | In our example, the returned tensor would be: 63 | [[[0.1, -0.3, 0.6], [0.4, 0.5, -0.6], [-0.2, 0.4, 0.7], [0.0, 0.0, 0.0]], 64 | [[0.7, 0.8, -0.9], [-0.1, 0.9, 0.7], [0.4, 0.5, -0.6], [0.3, -0.5, 0.2]]] 65 | 66 | Note that since the first sentence has only 3 words, the 4th embedding gets 67 | replaced by a D-dimensional vector of 0. 68 | 69 | Args: 70 | embeddings: [W, D] Tensor of floats, containing the embeddings, initialized 71 | with the same vocabulary file as vocabulary_table. 72 | vocabulary_table: a tf.contrib.lookup.LookupInterface, 73 | containing the vocabulary, initialized with the same vocabulary file as 74 | embeddings. 75 | tokens: [B, ?] SparseTensor of strings, the tokens. 76 | batch_size: Python integer. 77 | token_indexes: A Boolean, indicating whether the input tokens are 78 | token ids or string. 79 | 80 | Returns: 81 | [B, L, D] Tensor of floats: the embeddings in the correct order, 82 | appropriately padded with 0.0, where L = max(num_tokens) and B = batch_size 83 | """ 84 | embedding_dim = embeddings.get_shape()[1].value # D in docstring above. 85 | num_tokens_in_batch = tf.shape(tokens.indices)[0] # W in the docstring above. 86 | max_length = tokens.dense_shape[1] # This is L in the docstring above. 87 | 88 | # Get indices of tokens in vocabulary_table. 89 | if token_indexes is not None: 90 | indexes = token_indexes 91 | else: 92 | indexes = vocabulary_table.lookup(tokens.values) 93 | 94 | # Get word embeddings. 95 | tokens_embeddings = tf.gather(embeddings, indexes) 96 | 97 | # Shape of the return tensor. 98 | new_shape = tf.cast( 99 | tf.stack([batch_size, max_length, embedding_dim], axis=0), tf.int32) 100 | 101 | # Build the vector of indices for the return Tensor. 102 | # In the example above, indices_final would be: 103 | # [[[0,0,0], [0,0,1], [0,0,2]], 104 | # [[0,1,0], [0,1,1], [0,1,2]], 105 | # [[0,2,0], [0,2,1], [0,2,2]], 106 | # [[1,0,0], [1,0,1], [1,0,2]], 107 | # [[1,1,0], [1,1,1], [1,1,2]], 108 | # [[1,2,0], [1,2,1], [1,2,2]], 109 | # [[1,3,0], [1,3,1], [1,3,2]]] 110 | tiled = tf.tile(tokens.indices, [1, embedding_dim]) 111 | indices_tiled = tf.cast( 112 | tf.reshape(tiled, [num_tokens_in_batch * embedding_dim, 2]), tf.int32) 113 | indices_linear = tf.expand_dims( 114 | tf.tile(tf.range(0, embedding_dim), [num_tokens_in_batch]), axis=1) 115 | indices_final = tf.concat([indices_tiled, indices_linear], axis=1) 116 | 117 | # Build the dense Tensor. 118 | embeddings_padded = tf.sparse_to_dense( 119 | sparse_indices=indices_final, 120 | output_shape=new_shape, 121 | sparse_values=tf.reshape(tokens_embeddings, 122 | [num_tokens_in_batch * embedding_dim])) 123 | embeddings_padded.set_shape((batch_size, None, embedding_dim)) 124 | 125 | return embeddings_padded 126 | 127 | 128 | def get_padded_indexes(vocabulary_table, 129 | tokens, batch_size, 130 | token_indexes=None): 131 | """Get the indices of tokens from vocabulary table. 132 | 133 | Args: 134 | vocabulary_table: a tf.contrib.lookup.LookupInterface, 135 | containing the vocabulary, initialized with the same vocabulary file as 136 | embeddings. 137 | tokens: [B, ?] SparseTensor of strings, the tokens. 138 | batch_size: Python integer. 139 | token_indexes: A Boolean, indicating whether the input tokens are 140 | token ids or string. 141 | 142 | Returns: 143 | [B, L] Tensor of integers: indices of tokens in the correct order, 144 | appropriately padded with 0, where L = max(num_tokens) and B = batch_size 145 | """ 146 | num_tokens_in_batch = tf.shape(tokens.indices)[0] 147 | max_length = tokens.dense_shape[1] 148 | 149 | # Get indices of tokens in vocabulary_table. 150 | if token_indexes is not None: 151 | indexes = token_indexes 152 | else: 153 | indexes = vocabulary_table.lookup(tokens.values) 154 | 155 | # Build the dense Tensor. 156 | indexes_padded = tf.sparse_to_dense( 157 | sparse_indices=tokens.indices, 158 | output_shape=[batch_size, max_length], 159 | sparse_values=tf.reshape(indexes, 160 | [num_tokens_in_batch])) 161 | indexes_padded.set_shape((batch_size, None)) 162 | 163 | return indexes_padded 164 | 165 | 166 | class EmbedAndPad(snt.AbstractModule): 167 | """Embed and pad tokenized words. 168 | 169 | This class primary functionality is similar to get_padded_embeddings. 170 | It stores references to the embeddings and vocabulary table for convenience, 171 | so that the user does not have to keep and pass them around. 172 | """ 173 | 174 | def __init__(self, 175 | batch_size, 176 | vocabularies, 177 | embedding_dim, 178 | num_oov_buckets=1000, 179 | fine_tune_embeddings=False, 180 | padded_token=None, 181 | name='embed_and_pad'): 182 | super(EmbedAndPad, self).__init__(name=name) 183 | self._batch_size = batch_size 184 | vocab_file, vocab_size = get_merged_vocabulary_file(vocabularies, 185 | padded_token) 186 | self._vocab_size = vocab_size 187 | self._num_oov_buckets = num_oov_buckets 188 | 189 | # Load vocabulary table for index lookup. 190 | self._vocabulary_table = contrib_lookup.index_table_from_file( 191 | vocabulary_file=vocab_file, 192 | num_oov_buckets=num_oov_buckets, 193 | vocab_size=self._vocab_size) 194 | 195 | def create_initializer(initializer_range=0.02): 196 | """Creates a `truncated_normal_initializer` with the given range.""" 197 | # The default value is chosen from language/bert/modeling.py. 198 | return tf.truncated_normal_initializer(stddev=initializer_range) 199 | 200 | self._embeddings = tf.get_variable('embeddings_matrix', 201 | [self._vocab_size + num_oov_buckets, 202 | embedding_dim], 203 | trainable=fine_tune_embeddings, 204 | initializer=create_initializer()) 205 | 206 | def _build(self, tokens): 207 | padded_embeddings = get_padded_embeddings( 208 | self._embeddings, self._vocabulary_table, tokens, self._batch_size) 209 | return padded_embeddings 210 | 211 | @property 212 | def vocab_table(self): 213 | return self._vocabulary_table 214 | 215 | @property 216 | def vocab_size(self): 217 | return self._vocab_size + self._num_oov_buckets 218 | 219 | 220 | def get_accuracy(logits, labels): 221 | """Top 1 accuracy from logits and labels.""" 222 | return tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32)) 223 | 224 | 225 | def get_num_correct_predictions(logits, labels): 226 | """Get the number of correct predictions over a batch.""" 227 | predictions = tf.cast(tf.argmax(logits, axis=1), tf.int64) 228 | evals = tf.equal(predictions, labels) 229 | num_correct = tf.reduce_sum(tf.cast(evals, tf.float64)) 230 | return num_correct 231 | 232 | 233 | def get_merged_vocabulary_file(vocabularies, padded_token=None): 234 | """Merges several vocabulary files into one temporary file. 235 | 236 | The TF object that loads the embedding expects a vocabulary file, to know 237 | which embeddings it should load. 238 | See tf.contrib.embedding.load_embedding_initializer. 239 | 240 | When we want to train/test on several datasets simultaneously we need to merge 241 | their vocabulary files into a single file. 242 | 243 | Args: 244 | vocabularies: Iterable of vocabularies. Each vocabulary should be 245 | a list of tokens. 246 | padded_token: If not None, add the padded_token to the first index. 247 | Returns: 248 | outfilename: Name of the merged file. Contains the union of all tokens in 249 | filenames, without duplicates, one token per line. 250 | vocabulary_size: Count of tokens in the merged file. 251 | """ 252 | uniques = [set(vocabulary) for vocabulary in vocabularies] 253 | unique_merged = frozenset().union(*uniques) 254 | unique_merged_sorted = sorted(unique_merged) 255 | if padded_token is not None: 256 | # Add padded token as 0 index. 257 | unique_merged_sorted = [padded_token] + unique_merged_sorted 258 | vocabulary_size = len(unique_merged_sorted) 259 | outfile = tempfile.NamedTemporaryFile(delete=False) 260 | outfile.write(b'\n'.join(unique_merged_sorted)) 261 | outfilename = outfile.name 262 | logging.info('Merged vocabulary file with %d tokens: %s', vocabulary_size, 263 | outfilename) 264 | outfile.close() 265 | return outfilename, vocabulary_size 266 | -------------------------------------------------------------------------------- /examples/train.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Trains a verifiable model on Mnist or CIFAR-10.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | 24 | from absl import app 25 | from absl import flags 26 | from absl import logging 27 | import interval_bound_propagation as ibp 28 | import tensorflow.compat.v1 as tf 29 | 30 | 31 | FLAGS = flags.FLAGS 32 | flags.DEFINE_enum('dataset', 'mnist', ['mnist', 'cifar10'], 33 | 'Dataset (either "mnist" or "cifar10").') 34 | flags.DEFINE_enum('model', 'tiny', ['tiny', 'small', 'medium', 'large'], 35 | 'Model size.') 36 | flags.DEFINE_string('output_dir', '/tmp/ibp_model', 'Output directory.') 37 | 38 | # Options. 39 | flags.DEFINE_integer('steps', 60001, 'Number of steps in total.') 40 | flags.DEFINE_integer('test_every_n', 2000, 41 | 'Number of steps between testing iterations.') 42 | flags.DEFINE_integer('warmup_steps', 2000, 'Number of warm-up steps.') 43 | flags.DEFINE_integer('rampup_steps', 10000, 'Number of ramp-up steps.') 44 | flags.DEFINE_integer('batch_size', 200, 'Batch size.') 45 | flags.DEFINE_float('epsilon', .3, 'Target epsilon.') 46 | flags.DEFINE_float('epsilon_train', .33, 'Train epsilon.') 47 | flags.DEFINE_string('learning_rate', '1e-3,1e-4@15000,1e-5@25000', 48 | 'Learning rate schedule of the form: ' 49 | 'initial_learning_rate[,learning:steps]*. E.g., "1e-3" or ' 50 | '"1e-3,1e-4@15000,1e-5@25000".') 51 | flags.DEFINE_float('nominal_xent_init', 1., 52 | 'Initial weight for the nominal cross-entropy.') 53 | flags.DEFINE_float('nominal_xent_final', .5, 54 | 'Final weight for the nominal cross-entropy.') 55 | flags.DEFINE_float('verified_xent_init', 0., 56 | 'Initial weight for the verified cross-entropy.') 57 | flags.DEFINE_float('verified_xent_final', .5, 58 | 'Final weight for the verified cross-entropy.') 59 | flags.DEFINE_float('crown_bound_init', 0., 60 | 'Initial weight for mixing the CROWN bound with the IBP ' 61 | 'bound in the verified cross-entropy.') 62 | flags.DEFINE_float('crown_bound_final', 0., 63 | 'Final weight for mixing the CROWN bound with the IBP ' 64 | 'bound in the verified cross-entropy.') 65 | flags.DEFINE_float('attack_xent_init', 0., 66 | 'Initial weight for the attack cross-entropy.') 67 | flags.DEFINE_float('attack_xent_final', 0., 68 | 'Initial weight for the attack cross-entropy.') 69 | 70 | 71 | def show_metrics(step_value, metric_values, loss_value=None): 72 | print('{}: {}nominal accuracy = {:.2f}%, ' 73 | 'verified = {:.2f}%, attack = {:.2f}%'.format( 74 | step_value, 75 | 'loss = {}, '.format(loss_value) if loss_value is not None else '', 76 | metric_values.nominal_accuracy * 100., 77 | metric_values.verified_accuracy * 100., 78 | metric_values.attack_accuracy * 100.)) 79 | 80 | 81 | def layers(model_size): 82 | """Returns the layer specification for a given model name.""" 83 | if model_size == 'tiny': 84 | return ( 85 | ('linear', 100), 86 | ('activation', 'relu')) 87 | elif model_size == 'small': 88 | return ( 89 | ('conv2d', (4, 4), 16, 'VALID', 2), 90 | ('activation', 'relu'), 91 | ('conv2d', (4, 4), 32, 'VALID', 1), 92 | ('activation', 'relu'), 93 | ('linear', 100), 94 | ('activation', 'relu')) 95 | elif model_size == 'medium': 96 | return ( 97 | ('conv2d', (3, 3), 32, 'VALID', 1), 98 | ('activation', 'relu'), 99 | ('conv2d', (4, 4), 32, 'VALID', 2), 100 | ('activation', 'relu'), 101 | ('conv2d', (3, 3), 64, 'VALID', 1), 102 | ('activation', 'relu'), 103 | ('conv2d', (4, 4), 64, 'VALID', 2), 104 | ('activation', 'relu'), 105 | ('linear', 512), 106 | ('activation', 'relu'), 107 | ('linear', 512), 108 | ('activation', 'relu')) 109 | elif model_size == 'large': 110 | return ( 111 | ('conv2d', (3, 3), 64, 'SAME', 1), 112 | ('activation', 'relu'), 113 | ('conv2d', (3, 3), 64, 'SAME', 1), 114 | ('activation', 'relu'), 115 | ('conv2d', (3, 3), 128, 'SAME', 2), 116 | ('activation', 'relu'), 117 | ('conv2d', (3, 3), 128, 'SAME', 1), 118 | ('activation', 'relu'), 119 | ('conv2d', (3, 3), 128, 'SAME', 1), 120 | ('activation', 'relu'), 121 | ('linear', 512), 122 | ('activation', 'relu')) 123 | else: 124 | raise ValueError('Unknown model: "{}"'.format(model_size)) 125 | 126 | 127 | def main(unused_args): 128 | logging.info('Training IBP on %s...', FLAGS.dataset.upper()) 129 | step = tf.train.get_or_create_global_step() 130 | 131 | # Learning rate. 132 | learning_rate = ibp.parse_learning_rate(step, FLAGS.learning_rate) 133 | 134 | # Dataset. 135 | input_bounds = (0., 1.) 136 | num_classes = 10 137 | if FLAGS.dataset == 'mnist': 138 | data_train, data_test = tf.keras.datasets.mnist.load_data() 139 | else: 140 | assert FLAGS.dataset == 'cifar10', ( 141 | 'Unknown dataset "{}"'.format(FLAGS.dataset)) 142 | data_train, data_test = tf.keras.datasets.cifar10.load_data() 143 | data_train = (data_train[0], data_train[1].flatten()) 144 | data_test = (data_test[0], data_test[1].flatten()) 145 | data = ibp.build_dataset(data_train, batch_size=FLAGS.batch_size, 146 | sequential=False) 147 | if FLAGS.dataset == 'cifar10': 148 | data = data._replace(image=ibp.randomize( 149 | data.image, (32, 32, 3), expand_shape=(40, 40, 3), 150 | crop_shape=(32, 32, 3), vertical_flip=True)) 151 | 152 | # Base predictor network. 153 | original_predictor = ibp.DNN(num_classes, layers(FLAGS.model)) 154 | predictor = original_predictor 155 | if FLAGS.dataset == 'cifar10': 156 | mean = (0.4914, 0.4822, 0.4465) 157 | std = (0.2023, 0.1994, 0.2010) 158 | predictor = ibp.add_image_normalization(original_predictor, mean, std) 159 | if FLAGS.crown_bound_init > 0 or FLAGS.crown_bound_final > 0: 160 | logging.info('Using CROWN-IBP loss.') 161 | model_wrapper = ibp.crown.VerifiableModelWrapper 162 | loss_helper = ibp.crown.create_classification_losses 163 | else: 164 | model_wrapper = ibp.VerifiableModelWrapper 165 | loss_helper = ibp.create_classification_losses 166 | predictor = model_wrapper(predictor) 167 | 168 | # Training. 169 | train_losses, train_loss, _ = loss_helper( 170 | step, 171 | data.image, 172 | data.label, 173 | predictor, 174 | FLAGS.epsilon_train, 175 | loss_weights={ 176 | 'nominal': { 177 | 'init': FLAGS.nominal_xent_init, 178 | 'final': FLAGS.nominal_xent_final, 179 | 'warmup': FLAGS.verified_xent_init + FLAGS.nominal_xent_init 180 | }, 181 | 'attack': { 182 | 'init': FLAGS.attack_xent_init, 183 | 'final': FLAGS.attack_xent_final 184 | }, 185 | 'verified': { 186 | 'init': FLAGS.verified_xent_init, 187 | 'final': FLAGS.verified_xent_final, 188 | 'warmup': 0. 189 | }, 190 | 'crown_bound': { 191 | 'init': FLAGS.crown_bound_init, 192 | 'final': FLAGS.crown_bound_final, 193 | 'warmup': 0. 194 | }, 195 | }, 196 | warmup_steps=FLAGS.warmup_steps, 197 | rampup_steps=FLAGS.rampup_steps, 198 | input_bounds=input_bounds) 199 | saver = tf.train.Saver(original_predictor.get_variables()) 200 | optimizer = tf.train.AdamOptimizer(learning_rate) 201 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 202 | with tf.control_dependencies(update_ops): 203 | train_op = optimizer.minimize(train_loss, step) 204 | 205 | # Test using while loop. 206 | def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack): 207 | """Returns the test metrics.""" 208 | num_test_batches = len(data_test[0]) // batch_size 209 | assert len(data_test[0]) % batch_size == 0, ( 210 | 'Test data is not a multiple of batch size.') 211 | 212 | def cond(i, *unused_args): 213 | return i < num_test_batches 214 | 215 | def body(i, metrics): 216 | """Compute the sum of all metrics.""" 217 | test_data = ibp.build_dataset(data_test, batch_size=batch_size, 218 | sequential=True) 219 | predictor(test_data.image, override=True, is_training=False) 220 | input_interval_bounds = ibp.IntervalBounds( 221 | tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]), 222 | tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1])) 223 | predictor.propagate_bounds(input_interval_bounds) 224 | test_specification = ibp.ClassificationSpecification( 225 | test_data.label, num_classes) 226 | test_attack = attack_builder(predictor, test_specification, FLAGS.epsilon, 227 | input_bounds=input_bounds, 228 | optimizer_builder=ibp.UnrolledAdam) 229 | test_losses = ibp.Losses(predictor, test_specification, test_attack) 230 | test_losses(test_data.label) 231 | new_metrics = [] 232 | for m, n in zip(metrics, test_losses.scalar_metrics): 233 | new_metrics.append(m + n) 234 | return i + 1, new_metrics 235 | 236 | total_count = tf.constant(0, dtype=tf.int32) 237 | total_metrics = [tf.constant(0, dtype=tf.float32) 238 | for _ in range(len(ibp.ScalarMetrics._fields))] 239 | total_count, total_metrics = tf.while_loop( 240 | cond, 241 | body, 242 | loop_vars=[total_count, total_metrics], 243 | back_prop=False, 244 | parallel_iterations=1) 245 | total_count = tf.cast(total_count, tf.float32) 246 | test_metrics = [] 247 | for m in total_metrics: 248 | test_metrics.append(m / total_count) 249 | return ibp.ScalarMetrics(*test_metrics) 250 | 251 | test_metrics = get_test_metrics( 252 | FLAGS.batch_size, ibp.UntargetedPGDAttack) 253 | summaries = [] 254 | for f in test_metrics._fields: 255 | summaries.append( 256 | tf.summary.scalar(f, getattr(test_metrics, f))) 257 | test_summaries = tf.summary.merge(summaries) 258 | test_writer = tf.summary.FileWriter(os.path.join(FLAGS.output_dir, 'test')) 259 | 260 | # Run everything. 261 | tf_config = tf.ConfigProto() 262 | tf_config.gpu_options.allow_growth = True 263 | with tf.train.SingularMonitoredSession(config=tf_config) as sess: 264 | for _ in range(FLAGS.steps): 265 | iteration, loss_value, _ = sess.run( 266 | [step, train_losses.scalar_losses.nominal_cross_entropy, train_op]) 267 | if iteration % FLAGS.test_every_n == 0: 268 | metric_values, summary = sess.run([test_metrics, test_summaries]) 269 | test_writer.add_summary(summary, iteration) 270 | show_metrics(iteration, metric_values, loss_value=loss_value) 271 | saver.save(sess._tf_sess(), # pylint: disable=protected-access 272 | os.path.join(FLAGS.output_dir, 'model'), 273 | global_step=FLAGS.steps - 1) 274 | 275 | 276 | if __name__ == '__main__': 277 | app.run(main) 278 | -------------------------------------------------------------------------------- /interval_bound_propagation/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Library to train verifiably robust neural networks. 17 | 18 | For more details see paper: On the Effectiveness of Interval Bound Propagation 19 | for Training Verifiably Robust Models. 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | from interval_bound_propagation.src.attacks import MemoryEfficientMultiTargetedPGDAttack 27 | from interval_bound_propagation.src.attacks import MultiTargetedPGDAttack 28 | from interval_bound_propagation.src.attacks import pgd_attack 29 | from interval_bound_propagation.src.attacks import RestartedAttack 30 | from interval_bound_propagation.src.attacks import UnrolledAdam 31 | from interval_bound_propagation.src.attacks import UnrolledFGSMDescent 32 | from interval_bound_propagation.src.attacks import UnrolledGradientDescent 33 | from interval_bound_propagation.src.attacks import UnrolledSPSAAdam 34 | from interval_bound_propagation.src.attacks import UnrolledSPSAFGSMDescent 35 | from interval_bound_propagation.src.attacks import UnrolledSPSAGradientDescent 36 | from interval_bound_propagation.src.attacks import UntargetedAdaptivePGDAttack 37 | from interval_bound_propagation.src.attacks import UntargetedPGDAttack 38 | from interval_bound_propagation.src.attacks import UntargetedTop5PGDAttack 39 | from interval_bound_propagation.src.bounds import AbstractBounds 40 | from interval_bound_propagation.src.bounds import IntervalBounds 41 | import interval_bound_propagation.src.crown as crown 42 | from interval_bound_propagation.src.fastlin import RelativeSymbolicBounds 43 | from interval_bound_propagation.src.fastlin import SymbolicBounds 44 | import interval_bound_propagation.src.layer_utils as layer_utils 45 | from interval_bound_propagation.src.layers import BatchNorm 46 | from interval_bound_propagation.src.layers import ImageNorm 47 | from interval_bound_propagation.src.loss import Losses 48 | from interval_bound_propagation.src.loss import ScalarLosses 49 | from interval_bound_propagation.src.loss import ScalarMetrics 50 | from interval_bound_propagation.src.model import DNN 51 | from interval_bound_propagation.src.model import StandardModelWrapper 52 | from interval_bound_propagation.src.model import VerifiableModelWrapper 53 | from interval_bound_propagation.src.relative_bounds import RelativeIntervalBounds 54 | from interval_bound_propagation.src.simplex_bounds import SimplexBounds 55 | from interval_bound_propagation.src.specification import ClassificationSpecification 56 | from interval_bound_propagation.src.specification import LeastLikelyClassificationSpecification 57 | from interval_bound_propagation.src.specification import LinearSpecification 58 | from interval_bound_propagation.src.specification import RandomClassificationSpecification 59 | from interval_bound_propagation.src.specification import Specification 60 | from interval_bound_propagation.src.specification import TargetedClassificationSpecification 61 | from interval_bound_propagation.src.utils import add_image_normalization 62 | from interval_bound_propagation.src.utils import build_dataset 63 | from interval_bound_propagation.src.utils import create_attack 64 | from interval_bound_propagation.src.utils import create_classification_losses 65 | from interval_bound_propagation.src.utils import create_specification 66 | from interval_bound_propagation.src.utils import get_attack_builder 67 | from interval_bound_propagation.src.utils import linear_schedule 68 | from interval_bound_propagation.src.utils import parse_learning_rate 69 | from interval_bound_propagation.src.utils import randomize 70 | from interval_bound_propagation.src.utils import smooth_schedule 71 | from interval_bound_propagation.src.verifiable_wrapper import BatchFlattenWrapper 72 | from interval_bound_propagation.src.verifiable_wrapper import BatchNormWrapper 73 | from interval_bound_propagation.src.verifiable_wrapper import BatchReshapeWrapper 74 | from interval_bound_propagation.src.verifiable_wrapper import ConstWrapper 75 | from interval_bound_propagation.src.verifiable_wrapper import ImageNormWrapper 76 | from interval_bound_propagation.src.verifiable_wrapper import IncreasingMonotonicWrapper 77 | from interval_bound_propagation.src.verifiable_wrapper import LinearConv1dWrapper 78 | from interval_bound_propagation.src.verifiable_wrapper import LinearConv2dWrapper 79 | from interval_bound_propagation.src.verifiable_wrapper import LinearConvWrapper 80 | from interval_bound_propagation.src.verifiable_wrapper import LinearFCWrapper 81 | from interval_bound_propagation.src.verifiable_wrapper import ModelInputWrapper 82 | from interval_bound_propagation.src.verifiable_wrapper import PiecewiseMonotonicWrapper 83 | from interval_bound_propagation.src.verifiable_wrapper import VerifiableWrapper 84 | 85 | 86 | __version__ = '1.10' 87 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Library to train verifiably robust neural networks.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/bounds.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Definition of input bounds to each layer.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import abc 23 | import itertools 24 | 25 | import six 26 | import sonnet as snt 27 | import tensorflow.compat.v1 as tf 28 | 29 | 30 | @six.add_metaclass(abc.ABCMeta) 31 | class AbstractBounds(object): 32 | """Abstract bounds class.""" 33 | 34 | def __init__(self): 35 | self._update_cache_op = None 36 | 37 | @classmethod 38 | @abc.abstractmethod 39 | def convert(cls, bounds): 40 | """Converts another bound type to this type.""" 41 | 42 | @abc.abstractproperty 43 | def shape(self): 44 | """Returns shape (as list) of the tensor, including batch dimension.""" 45 | 46 | def concretize(self): 47 | return self 48 | 49 | def _raise_not_implemented(self, name): 50 | raise NotImplementedError( 51 | '{} modules are not supported by "{}".'.format( 52 | name, self.__class__.__name__)) 53 | 54 | def apply_linear(self, wrapper, w, b): # pylint: disable=unused-argument 55 | self._raise_not_implemented('snt.Linear') 56 | 57 | def apply_conv1d(self, wrapper, w, b, padding, stride): # pylint: disable=unused-argument 58 | self._raise_not_implemented('snt.Conv1D') 59 | 60 | def apply_conv2d(self, wrapper, w, b, padding, strides): # pylint: disable=unused-argument 61 | self._raise_not_implemented('snt.Conv2D') 62 | 63 | def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): # pylint: disable=unused-argument 64 | self._raise_not_implemented(fn.__name__) 65 | 66 | def apply_piecewise_monotonic_fn(self, wrapper, fn, boundaries, *args): # pylint: disable=unused-argument 67 | self._raise_not_implemented(fn.__name__) 68 | 69 | def apply_batch_norm(self, wrapper, mean, variance, scale, bias, epsilon): # pylint: disable=unused-argument 70 | self._raise_not_implemented('ibp.BatchNorm') 71 | 72 | def apply_batch_reshape(self, wrapper, shape): # pylint: disable=unused-argument 73 | self._raise_not_implemented('snt.BatchReshape') 74 | 75 | def apply_softmax(self, wrapper): # pylint: disable=unused-argument 76 | self._raise_not_implemented('tf.nn.softmax') 77 | 78 | @property 79 | def update_cache_op(self): 80 | """TF op to update cached bounds for re-use across session.run calls.""" 81 | if self._update_cache_op is None: 82 | raise ValueError('Bounds not cached: enable_caching() not called.') 83 | return self._update_cache_op 84 | 85 | def enable_caching(self): 86 | """Enables caching the bounds for re-use across session.run calls.""" 87 | if self._update_cache_op is not None: 88 | raise ValueError('Bounds already cached: enable_caching() called twice.') 89 | self._update_cache_op = self._set_up_cache() 90 | 91 | def _set_up_cache(self): 92 | """Replace fields with cached versions. 93 | 94 | Returns: 95 | TensorFlow op to update the cache. 96 | """ 97 | return tf.no_op() # By default, don't cache. 98 | 99 | def _cache_with_update_op(self, tensor): 100 | """Creates non-trainable variable to cache the tensor across sess.run calls. 101 | 102 | Args: 103 | tensor: Tensor to cache. 104 | 105 | Returns: 106 | cached_tensor: Non-trainable variable to contain the cached value 107 | of `tensor`. 108 | update_op: TensorFlow op to re-evaluate `tensor` and assign the result 109 | to `cached_tensor`. 110 | """ 111 | cached_tensor = tf.get_variable( 112 | tensor.name.replace(':', '__') + '_ibp_cache', 113 | shape=tensor.shape, dtype=tensor.dtype, trainable=False) 114 | update_op = tf.assign(cached_tensor, tensor) 115 | return cached_tensor, update_op 116 | 117 | 118 | class IntervalBounds(AbstractBounds): 119 | """Axis-aligned bounding box.""" 120 | 121 | def __init__(self, lower, upper): 122 | super(IntervalBounds, self).__init__() 123 | self._lower = lower 124 | self._upper = upper 125 | 126 | @property 127 | def lower(self): 128 | return self._lower 129 | 130 | @property 131 | def upper(self): 132 | return self._upper 133 | 134 | @property 135 | def shape(self): 136 | return self.lower.shape.as_list() 137 | 138 | def __iter__(self): 139 | yield self.lower 140 | yield self.upper 141 | 142 | @classmethod 143 | def convert(cls, bounds): 144 | if isinstance(bounds, tf.Tensor): 145 | return cls(bounds, bounds) 146 | bounds = bounds.concretize() 147 | if not isinstance(bounds, cls): 148 | raise ValueError('Cannot convert "{}" to "{}"'.format(bounds, 149 | cls.__name__)) 150 | return bounds 151 | 152 | def apply_linear(self, wrapper, w, b): 153 | return self._affine(w, b, tf.matmul) 154 | 155 | def apply_conv1d(self, wrapper, w, b, padding, stride): 156 | return self._affine(w, b, tf.nn.conv1d, padding=padding, stride=stride) 157 | 158 | def apply_conv2d(self, wrapper, w, b, padding, strides): 159 | return self._affine(w, b, tf.nn.convolution, 160 | padding=padding, strides=strides) 161 | 162 | def _affine(self, w, b, fn, **kwargs): 163 | c = (self.lower + self.upper) / 2. 164 | r = (self.upper - self.lower) / 2. 165 | c = fn(c, w, **kwargs) 166 | if b is not None: 167 | c = c + b 168 | r = fn(r, tf.abs(w), **kwargs) 169 | return IntervalBounds(c - r, c + r) 170 | 171 | def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): 172 | args_lower = [self.lower] + [a.lower for a in args] 173 | args_upper = [self.upper] + [a.upper for a in args] 174 | return IntervalBounds(fn(*args_lower), fn(*args_upper)) 175 | 176 | def apply_piecewise_monotonic_fn(self, wrapper, fn, boundaries, *args): 177 | valid_values = [] 178 | for a in [self] + list(args): 179 | vs = [] 180 | vs.append(a.lower) 181 | vs.append(a.upper) 182 | for b in boundaries: 183 | vs.append( 184 | tf.maximum(a.lower, tf.minimum(a.upper, b * tf.ones_like(a.lower)))) 185 | valid_values.append(vs) 186 | outputs = [] 187 | for inputs in itertools.product(*valid_values): 188 | outputs.append(fn(*inputs)) 189 | outputs = tf.stack(outputs, axis=-1) 190 | return IntervalBounds(tf.reduce_min(outputs, axis=-1), 191 | tf.reduce_max(outputs, axis=-1)) 192 | 193 | def apply_batch_norm(self, wrapper, mean, variance, scale, bias, epsilon): 194 | # Element-wise multiplier. 195 | multiplier = tf.rsqrt(variance + epsilon) 196 | if scale is not None: 197 | multiplier *= scale 198 | w = multiplier 199 | # Element-wise bias. 200 | b = -multiplier * mean 201 | if bias is not None: 202 | b += bias 203 | b = tf.squeeze(b, axis=0) 204 | # Because the scale might be negative, we need to apply a strategy similar 205 | # to linear. 206 | c = (self.lower + self.upper) / 2. 207 | r = (self.upper - self.lower) / 2. 208 | c = tf.multiply(c, w) + b 209 | r = tf.multiply(r, tf.abs(w)) 210 | return IntervalBounds(c - r, c + r) 211 | 212 | def apply_batch_reshape(self, wrapper, shape): 213 | return IntervalBounds(snt.BatchReshape(shape)(self.lower), 214 | snt.BatchReshape(shape)(self.upper)) 215 | 216 | def apply_softmax(self, wrapper): 217 | ub = self.upper 218 | lb = self.lower 219 | # Keep diagonal and take opposite bound for non-diagonals. 220 | lbs = tf.matrix_diag(lb) + tf.expand_dims(ub, axis=-2) - tf.matrix_diag(ub) 221 | ubs = tf.matrix_diag(ub) + tf.expand_dims(lb, axis=-2) - tf.matrix_diag(lb) 222 | # Get diagonal entries after softmax operation. 223 | ubs = tf.matrix_diag_part(tf.nn.softmax(ubs)) 224 | lbs = tf.matrix_diag_part(tf.nn.softmax(lbs)) 225 | return IntervalBounds(lbs, ubs) 226 | 227 | def _set_up_cache(self): 228 | self._lower, update_lower_op = self._cache_with_update_op(self._lower) 229 | self._upper, update_upper_op = self._cache_with_update_op(self._upper) 230 | return tf.group([update_lower_op, update_upper_op]) 231 | 232 | 233 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/layer_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Graph construction for dual verification.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from interval_bound_propagation.src import layers 23 | import sonnet as snt 24 | import tensorflow.compat.v1 as tf 25 | 26 | 27 | def conv_output_shape(input_shape, w, padding, strides): 28 | """Calculates the output shape of the given N-D convolution. 29 | 30 | Args: 31 | input_shape: Integer list of length N+1 specifying the non-batch dimensions 32 | of the inputs: [input_height, input_width, input_channels]. 33 | w: (N+2)D tensor of shape (kernel_height, kernel_width, input_channels, 34 | output_channels) containing weights for the convolution. 35 | padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. 36 | strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. 37 | 38 | Returns: 39 | Integer list of length N+1 specifying the non-batch dimensions 40 | of the outputs: [output_height, output_width, output_channels]. 41 | 42 | Raises: 43 | ValueError: if an unsupported convolution dimensionality is encountered. 44 | """ 45 | # Connect a convolution (never to be run) to infer the output's 46 | # spatial structure. 47 | dummy_inputs = tf.zeros(dtype=w.dtype, shape=([1] + input_shape)) 48 | if len(w.shape) == 4: 49 | dummy_outputs = tf.nn.convolution(dummy_inputs, 50 | w, padding=padding, strides=strides) 51 | elif len(w.shape) == 3: 52 | dummy_outputs = tf.nn.conv1d(dummy_inputs, 53 | w, padding=padding, stride=strides[0]) 54 | else: 55 | raise ValueError() 56 | return dummy_outputs.shape.as_list()[1:] 57 | 58 | 59 | def materialise_conv(w, b, input_shape, padding, strides): 60 | """Converts an N-D convolution to an equivalent linear layer. 61 | 62 | Args: 63 | w: (N+2)D tensor of shape (kernel_height, kernel_width, input_channels, 64 | output_channels) containing the convolution weights. 65 | b: 1D tensor of shape (output_channels) containing the convolution biases, 66 | or `None` if no biases. 67 | input_shape: Integer list of length N+1 specifying the non-batch dimensions 68 | of the inputs: [input_height, input_width, input_channels]. 69 | padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. 70 | strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. 71 | 72 | Returns: 73 | w: 2D tensor of shape (input_height * input_width * input_channels, 74 | output_height * output_width * output_channels) containing weights. 75 | b: 1D tensor of shape (output_height * output_width * output_channels) 76 | containing biases, or `None` if no biases. 77 | 78 | Raises: 79 | ValueError: if an unsupported convolution dimensionality is encountered. 80 | """ 81 | if len(input_shape) == 3: 82 | return _materialise_conv2d(w, b, input_shape[0], input_shape[1], 83 | padding, strides) 84 | elif len(input_shape) == 2: 85 | return _materialise_conv1d(w, b, input_shape[0], padding, strides[0]) 86 | else: 87 | raise ValueError() 88 | 89 | 90 | def _materialise_conv2d(w, b, input_height, input_width, padding, strides): 91 | """Converts a convolution to an equivalent linear layer. 92 | 93 | Args: 94 | w: 4D tensor of shape (kernel_height, kernel_width, input_channels, 95 | output_channels) containing the convolution weights. 96 | b: 1D tensor of shape (output_channels) containing the convolution biases, 97 | or `None` if no biases. 98 | input_height: height of the input tensor. 99 | input_width: width of the input tensor. 100 | padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. 101 | strides: Integer list of `[vertical_stride, horizontal_stride]`. 102 | 103 | Returns: 104 | w: 2D tensor of shape (input_height * input_width * input_channels, 105 | output_height * output_width * output_channels) containing weights. 106 | b: 1D tensor of shape (output_height * output_width * output_channels) 107 | containing biases, or `None` if no biases. 108 | """ 109 | kernel_height = w.shape[0].value 110 | kernel_width = w.shape[1].value 111 | input_channels = w.shape[2].value 112 | output_channels = w.shape[3].value 113 | 114 | # Temporarily move the input_channels dimension to output_channels. 115 | w = tf.reshape(w, shape=(kernel_height, kernel_width, 1, 116 | input_channels * output_channels)) 117 | # Apply the convolution to elementary (i.e. one-hot) inputs. 118 | diagonal_input = tf.reshape( 119 | tf.eye(input_height * input_width, dtype=w.dtype), 120 | shape=[input_height * input_width, input_height, input_width, 1]) 121 | conv = tf.nn.convolution( 122 | diagonal_input, w, 123 | padding=padding, strides=strides) 124 | output_height = conv.shape[1].value 125 | output_width = conv.shape[2].value 126 | # conv is of shape (input_height * input_width, output_height, output_width, 127 | # input_channels * output_channels). 128 | # Reshape it to (input_height * input_width * input_channels, 129 | # output_height * output_width * output_channels). 130 | w = tf.reshape(conv, shape=( 131 | [input_height * input_width, 132 | output_height, output_width, 133 | input_channels, output_channels])) 134 | w = tf.transpose(w, perm=[0, 3, 1, 2, 4]) 135 | w = tf.reshape(w, shape=( 136 | [input_height * input_width * input_channels, 137 | output_height * output_width * output_channels])) 138 | 139 | # Broadcast b over spatial dimensions. 140 | b = tf.tile(b, [output_height * output_width]) if b is not None else None 141 | 142 | return w, b 143 | 144 | 145 | def _materialise_conv1d(w, b, input_length, padding, stride): 146 | """Converts a convolution to an equivalent linear layer. 147 | 148 | Args: 149 | w: 3D tensor of shape (kernel_length, input_channels, 150 | output_channels) containing the convolution weights. 151 | b: 1D tensor of shape (output_channels) containing the convolution biases, 152 | or `None` if no biases. 153 | input_length: length of the input tensor. 154 | padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. 155 | stride: Integer stride. 156 | 157 | Returns: 158 | w: 2D tensor of shape (input_length * input_channels, 159 | output_length * output_channels) containing weights. 160 | b: 1D tensor of shape (output_length * output_channels) 161 | containing biases, or `None` if no biases. 162 | """ 163 | kernel_length = w.shape[0].value 164 | input_channels = w.shape[1].value 165 | output_channels = w.shape[2].value 166 | 167 | # Temporarily move the input_channels dimension to output_channels. 168 | w = tf.reshape(w, shape=(kernel_length, 1, 169 | input_channels * output_channels)) 170 | # Apply the convolution to elementary (i.e. one-hot) inputs. 171 | diagonal_input = tf.reshape( 172 | tf.eye(input_length, dtype=w.dtype), 173 | shape=[input_length, input_length, 1]) 174 | conv = tf.nn.conv1d( 175 | diagonal_input, w, 176 | padding=padding, stride=stride) 177 | output_length = conv.shape[1].value 178 | # conv is of shape (input_length, output_length, 179 | # input_channels * output_channels). 180 | # Reshape it to (input_length * input_channels, 181 | # output_length * output_channels). 182 | w = tf.reshape(conv, shape=( 183 | [input_length, 184 | output_length, 185 | input_channels, output_channels])) 186 | w = tf.transpose(w, perm=[0, 2, 1, 3]) 187 | w = tf.reshape(w, shape=( 188 | [input_length * input_channels, 189 | output_length * output_channels])) 190 | 191 | # Broadcast b over spatial dimensions. 192 | b = tf.tile(b, [output_length]) if b is not None else None 193 | 194 | return w, b 195 | 196 | 197 | def decode_batchnorm(batchnorm_module): 198 | """Calculates the neuron-wise multipliers and biases of the batch norm layer. 199 | 200 | Note that, in the case of a convolution, the returned bias will have 201 | spatial dimensions. 202 | 203 | Args: 204 | batchnorm_module: `snt.BatchNorm` module. 205 | 206 | Returns: 207 | w: 1D tensor of shape (output_size) or 3D tensor of shape 208 | (output_height, output_width, output_channels) containing 209 | neuron-wise multipliers for the batch norm layer. 210 | b: 1D tensor of shape (output_size) or 3D tensor of shape 211 | (output_height, output_width, output_channels) containing 212 | neuron-wise biases for the batch norm layer. 213 | """ 214 | if isinstance(batchnorm_module, layers.BatchNorm): 215 | mean = batchnorm_module.mean 216 | variance = batchnorm_module.variance 217 | variance_epsilon = batchnorm_module.epsilon 218 | scale = batchnorm_module.scale 219 | offset = batchnorm_module.bias 220 | 221 | else: 222 | assert isinstance(batchnorm_module, snt.BatchNorm) 223 | mean = batchnorm_module.moving_mean 224 | variance = batchnorm_module.moving_variance 225 | variance_epsilon = batchnorm_module._eps # pylint: disable=protected-access 226 | try: 227 | scale = batchnorm_module.gamma 228 | except snt.Error: 229 | scale = None 230 | try: 231 | offset = batchnorm_module.beta 232 | except snt.Error: 233 | offset = None 234 | 235 | w = tf.rsqrt(variance + variance_epsilon) 236 | if scale is not None: 237 | w *= scale 238 | 239 | b = -w * mean 240 | if offset is not None: 241 | b += offset 242 | 243 | # Batchnorm vars have a redundant leading dim. 244 | w = tf.squeeze(w, axis=0) 245 | b = tf.squeeze(b, axis=0) 246 | return w, b 247 | 248 | 249 | def combine_with_batchnorm(w, b, batchnorm_module): 250 | """Combines a linear layer and a batch norm into a single linear layer. 251 | 252 | Calculates the weights and biases of the linear layer formed by 253 | applying the specified linear layer followed by the batch norm. 254 | 255 | Note that, in the case of a convolution, the returned bias will have 256 | spatial dimensions. 257 | 258 | Args: 259 | w: 2D tensor of shape (input_size, output_size) or 4D tensor of shape 260 | (kernel_height, kernel_width, input_channels, output_channels) containing 261 | weights for the linear layer. 262 | b: 1D tensor of shape (output_size) or (output_channels) containing biases 263 | for the linear layer, or `None` if no bias. 264 | batchnorm_module: `snt.BatchNorm` module. 265 | 266 | Returns: 267 | w: 2D tensor of shape (input_size, output_size) or 4D tensor of shape 268 | (kernel_height, kernel_width, input_channels, output_channels) containing 269 | weights for the combined layer. 270 | b: 1D tensor of shape (output_size) or 3D tensor of shape 271 | (output_height, output_width, output_channels) containing 272 | biases for the combined layer. 273 | """ 274 | if b is None: 275 | b = tf.zeros(dtype=w.dtype, shape=()) 276 | 277 | w_bn, b_bn = decode_batchnorm(batchnorm_module) 278 | return w * w_bn, b * w_bn + b_bn 279 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/layers.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Additional Sonnet modules.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import sonnet as snt 23 | import tensorflow.compat.v1 as tf 24 | 25 | 26 | # Slightly altered version of snt.BatchNorm that allows to easily grab which 27 | # mean and variance are currently in use (whether the last _build was 28 | # invoked with is_training=True or False). 29 | # Modifications include: 30 | # - Removing fused option (which we do not support). 31 | # - Removing test_local_stats (which we do not support). 32 | # - Providing a mean and variance property. 33 | # - Provides scale, bias properties that return None if there are none. 34 | class BatchNorm(snt.BatchNorm): 35 | """Batch normalization module, including optional affine transformation.""" 36 | 37 | def __init__(self, axis=None, offset=True, scale=False, 38 | decay_rate=0.999, eps=1e-3, initializers=None, 39 | partitioners=None, regularizers=None, 40 | update_ops_collection=None, name='batch_norm'): 41 | """Constructs a BatchNorm module. See original code for more details.""" 42 | super(BatchNorm, self).__init__( 43 | axis=axis, offset=offset, scale=scale, decay_rate=decay_rate, eps=eps, 44 | initializers=initializers, partitioners=partitioners, 45 | regularizers=regularizers, fused=False, 46 | update_ops_collection=update_ops_collection, name=name) 47 | 48 | def _build_statistics(self, input_batch, axis, use_batch_stats, stat_dtype): 49 | """Builds the statistics part of the graph when using moving variance.""" 50 | self._mean, self._variance = super(BatchNorm, self)._build_statistics( 51 | input_batch, axis, use_batch_stats, stat_dtype) 52 | return self._mean, self._variance 53 | 54 | def _build(self, input_batch, is_training=True, test_local_stats=False, 55 | reuse=False): 56 | """Connects the BatchNorm module into the graph. 57 | 58 | Args: 59 | input_batch: A Tensor of arbitrary dimension. By default, the final 60 | dimension is not reduced over when computing the minibatch statistics. 61 | is_training: A boolean to indicate if the module should be connected in 62 | training mode, meaning the moving averages are updated. Can be a Tensor. 63 | test_local_stats: A boolean to indicate if the statistics should be from 64 | the local batch. When is_training is True, test_local_stats is not used. 65 | reuse: If True, the statistics computed by previous call to _build 66 | are used and is_training is ignored. Otherwise, behaves like a normal 67 | batch normalization layer. 68 | 69 | Returns: 70 | A tensor with the same shape as `input_batch`. 71 | 72 | Raises: 73 | ValueError: If `axis` is not valid for the 74 | input shape or has negative entries. 75 | """ 76 | if reuse: 77 | self._ensure_is_connected() 78 | return tf.nn.batch_normalization( 79 | input_batch, self._mean, self._variance, self._beta, self._gamma, 80 | self._eps, name='batch_norm') 81 | else: 82 | return super(BatchNorm, self)._build(input_batch, is_training, 83 | test_local_stats=test_local_stats) 84 | 85 | @property 86 | def scale(self): 87 | self._ensure_is_connected() 88 | return tf.stop_gradient(self._gamma) if self._gamma is not None else None 89 | 90 | @property 91 | def bias(self): 92 | self._ensure_is_connected() 93 | return tf.stop_gradient(self._beta) if self._beta is not None else None 94 | 95 | @property 96 | def mean(self): 97 | self._ensure_is_connected() 98 | return tf.stop_gradient(self._mean) 99 | 100 | @property 101 | def variance(self): 102 | self._ensure_is_connected() 103 | return tf.stop_gradient(self._variance) 104 | 105 | @property 106 | def epsilon(self): 107 | self._ensure_is_connected() 108 | return self._eps 109 | 110 | 111 | class ImageNorm(snt.AbstractModule): 112 | """Module that does per channel normalization.""" 113 | 114 | def __init__(self, mean, std, name='image_norm'): 115 | """Constructs a module that does (x[:, :, c] - mean[c]) / std[c].""" 116 | super(ImageNorm, self).__init__(name=name) 117 | if isinstance(mean, float): 118 | mean = [mean] 119 | if isinstance(std, float): 120 | std = [std] 121 | scale = [] 122 | for s in std: 123 | if s <= 0.: 124 | raise ValueError('Cannot use negative standard deviations.') 125 | scale.append(1. / s) 126 | with self._enter_variable_scope(): 127 | # Using broadcasting. 128 | self._scale = tf.constant(scale, dtype=tf.float32) 129 | self._offset = tf.constant(mean, dtype=tf.float32) 130 | 131 | def _build(self, inputs): 132 | return self.apply(inputs) 133 | 134 | @property 135 | def scale(self): 136 | return self._scale 137 | 138 | @property 139 | def offset(self): 140 | return self._offset 141 | 142 | # Provide a function that allows to use the IncreasingMonotonicWrapper. 143 | def apply(self, inputs): 144 | return (inputs - self._offset) * self._scale 145 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/loss.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Helper to keep track of the different losses.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import collections 23 | 24 | import sonnet as snt 25 | import tensorflow.compat.v1 as tf 26 | 27 | # Used to pick the least violated specification. 28 | _BIG_NUMBER = 1e25 29 | 30 | 31 | ScalarMetrics = collections.namedtuple('ScalarMetrics', [ 32 | 'nominal_accuracy', 33 | 'verified_accuracy', 34 | 'attack_accuracy', 35 | 'attack_success']) 36 | 37 | 38 | ScalarLosses = collections.namedtuple('ScalarLosses', [ 39 | 'nominal_cross_entropy', 40 | 'attack_cross_entropy', 41 | 'verified_loss']) 42 | 43 | 44 | class Losses(snt.AbstractModule): 45 | """Helper to compute our losses.""" 46 | 47 | def __init__(self, predictor, specification=None, pgd_attack=None, 48 | interval_bounds_loss_type='xent', 49 | interval_bounds_hinge_margin=10., 50 | label_smoothing=0.): 51 | super(Losses, self).__init__(name='losses') 52 | self._predictor = predictor 53 | self._specification = specification 54 | self._attack = pgd_attack 55 | # Loss type can be any combination of: 56 | # xent: cross-entropy loss 57 | # hinge: hinge loss 58 | # softplus: softplus loss 59 | # with 60 | # all: using all specifications. 61 | # most: using only the specification that is the most violated. 62 | # least: using only the specification that is the least violated. 63 | # random_n: using a random subset of the specifications. 64 | # E.g.: "xent_max" or "hinge_random_3". 65 | tokens = interval_bounds_loss_type.split('_', 1) 66 | if len(tokens) == 1: 67 | loss_type, loss_mode = tokens[0], 'all' 68 | else: 69 | loss_type, loss_mode = tokens 70 | if loss_mode.startswith('random'): 71 | loss_mode, num_samples = loss_mode.split('_', 1) 72 | self._interval_bounds_loss_n = int(num_samples) 73 | if loss_type not in ('xent', 'hinge', 'softplus'): 74 | raise ValueError('interval_bounds_loss_type must be either "xent", ' 75 | '"hinge" or "softplus".') 76 | if loss_mode not in ('all', 'most', 'random', 'least'): 77 | raise ValueError('interval_bounds_loss_type must be followed by either ' 78 | '"all", "most", "random_N" or "least".') 79 | self._interval_bounds_loss_type = loss_type 80 | self._interval_bounds_loss_mode = loss_mode 81 | self._interval_bounds_hinge_margin = interval_bounds_hinge_margin 82 | self._label_smoothing = label_smoothing 83 | 84 | def _build(self, labels): 85 | self._build_nominal_loss(labels) 86 | self._build_verified_loss(labels) 87 | self._build_attack_loss(labels) 88 | 89 | def _build_nominal_loss(self, labels): 90 | """Build natural cross-entropy loss on clean data.""" 91 | # Cross-entropy. 92 | nominal_logits = self._predictor.logits 93 | if self._label_smoothing > 0: 94 | num_classes = nominal_logits.shape[1].value 95 | one_hot_labels = tf.one_hot(labels, num_classes) 96 | smooth_positives = 1. - self._label_smoothing 97 | smooth_negatives = self._label_smoothing / num_classes 98 | one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives 99 | nominal_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( 100 | labels=one_hot_labels, logits=nominal_logits) 101 | self._one_hot_labels = one_hot_labels 102 | else: 103 | nominal_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 104 | labels=labels, logits=nominal_logits) 105 | self._cross_entropy = tf.reduce_mean(nominal_cross_entropy) 106 | # Accuracy. 107 | nominal_correct_examples = tf.equal(labels, tf.argmax(nominal_logits, 1)) 108 | self._nominal_accuracy = tf.reduce_mean( 109 | tf.cast(nominal_correct_examples, tf.float32)) 110 | 111 | def _get_specification_bounds(self): 112 | """Get upper bounds on specification. Used for building verified loss.""" 113 | ibp_bounds = self._specification(self._predictor.modules) 114 | # Compute verified accuracy using IBP bounds. 115 | v = tf.reduce_max(ibp_bounds, axis=1) 116 | self._interval_bounds_accuracy = tf.reduce_mean( 117 | tf.cast(v <= 0., tf.float32)) 118 | return ibp_bounds 119 | 120 | def _build_verified_loss(self, labels): 121 | """Build verified loss using an upper bound on specification.""" 122 | if not self._specification: 123 | self._verified_loss = tf.constant(0.) 124 | self._interval_bounds_accuracy = tf.constant(0.) 125 | return 126 | # Interval bounds. 127 | bounds = self._get_specification_bounds() 128 | # Select specifications. 129 | if self._interval_bounds_loss_mode == 'all': 130 | pass # Keep bounds the way it is. 131 | elif self._interval_bounds_loss_mode == 'most': 132 | bounds = tf.reduce_max(bounds, axis=1, keepdims=True) 133 | elif self._interval_bounds_loss_mode == 'random': 134 | idx = tf.random.uniform( 135 | [tf.shape(bounds)[0], self._interval_bounds_loss_n], 136 | 0, tf.shape(bounds)[1], dtype=tf.int32) 137 | bounds = tf.batch_gather(bounds, idx) 138 | else: 139 | assert self._interval_bounds_loss_mode == 'least' 140 | # This picks the least violated contraint. 141 | mask = tf.cast(bounds < 0., tf.float32) 142 | smallest_violation = tf.reduce_min( 143 | bounds + mask * _BIG_NUMBER, axis=1, keepdims=True) 144 | has_violations = tf.less( 145 | tf.reduce_sum(mask, axis=1, keepdims=True) + .5, 146 | tf.cast(tf.shape(bounds)[1], tf.float32)) 147 | largest_bounds = tf.reduce_max(bounds, axis=1, keepdims=True) 148 | bounds = tf.where(has_violations, smallest_violation, largest_bounds) 149 | 150 | if self._interval_bounds_loss_type == 'xent': 151 | v = tf.concat( 152 | [bounds, tf.zeros([tf.shape(bounds)[0], 1], dtype=bounds.dtype)], 153 | axis=1) 154 | l = tf.concat( 155 | [tf.zeros_like(bounds), 156 | tf.ones([tf.shape(bounds)[0], 1], dtype=bounds.dtype)], 157 | axis=1) 158 | self._verified_loss = tf.reduce_mean( 159 | tf.nn.softmax_cross_entropy_with_logits_v2( 160 | labels=tf.stop_gradient(l), logits=v)) 161 | elif self._interval_bounds_loss_type == 'softplus': 162 | self._verified_loss = tf.reduce_mean( 163 | tf.nn.softplus(bounds + self._interval_bounds_hinge_margin)) 164 | else: 165 | assert self._interval_bounds_loss_type == 'hinge' 166 | self._verified_loss = tf.reduce_mean( 167 | tf.maximum(bounds, -self._interval_bounds_hinge_margin)) 168 | 169 | def _build_attack_loss(self, labels): 170 | """Build adversarial loss using PGD attack.""" 171 | # PGD attack. 172 | if not self._attack: 173 | self._attack_accuracy = tf.constant(0.) 174 | self._attack_success = tf.constant(1.) 175 | self._attack_cross_entropy = tf.constant(0.) 176 | return 177 | if not isinstance(self._predictor.inputs, tf.Tensor): 178 | raise ValueError('Multiple inputs is not supported.') 179 | self._attack(self._predictor.inputs, labels) 180 | correct_examples = tf.equal(labels, tf.argmax(self._attack.logits, 1)) 181 | self._attack_accuracy = tf.reduce_mean( 182 | tf.cast(correct_examples, tf.float32)) 183 | self._attack_success = tf.reduce_mean( 184 | tf.cast(self._attack.success, tf.float32)) 185 | if self._label_smoothing > 0: 186 | attack_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( 187 | labels=self._one_hot_labels, logits=self._attack.logits) 188 | else: 189 | attack_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 190 | labels=labels, logits=self._attack.logits) 191 | self._attack_cross_entropy = tf.reduce_mean(attack_cross_entropy) 192 | 193 | @property 194 | def scalar_metrics(self): 195 | self._ensure_is_connected() 196 | return ScalarMetrics(self._nominal_accuracy, 197 | self._interval_bounds_accuracy, 198 | self._attack_accuracy, 199 | self._attack_success) 200 | 201 | @property 202 | def scalar_losses(self): 203 | self._ensure_is_connected() 204 | return ScalarLosses(self._cross_entropy, 205 | self._attack_cross_entropy, 206 | self._verified_loss) 207 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/relative_bounds.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Interval bounds expressed relative to a nominal value.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from interval_bound_propagation.src import bounds as basic_bounds 23 | import sonnet as snt 24 | import tensorflow.compat.v1 as tf 25 | 26 | 27 | class RelativeIntervalBounds(basic_bounds.AbstractBounds): 28 | """Upper and lower bounds, as a delta relative to nominal values.""" 29 | 30 | def __init__(self, lower_offset, upper_offset, nominal): 31 | super(RelativeIntervalBounds, self).__init__() 32 | self._lower_offset = lower_offset 33 | self._upper_offset = upper_offset 34 | self._nominal = nominal 35 | 36 | @property 37 | def lower_offset(self): 38 | """Returns lower bounds, expressed relative to nominal values.""" 39 | return self._lower_offset 40 | 41 | @property 42 | def upper_offset(self): 43 | """Returns upper bounds, expressed relative to nominal values.""" 44 | return self._upper_offset 45 | 46 | @property 47 | def nominal(self): 48 | return self._nominal 49 | 50 | @property 51 | def lower(self): 52 | """Returns absolute lower bounds.""" 53 | return self.nominal + self.lower_offset 54 | 55 | @property 56 | def upper(self): 57 | """Returns absolute upper bounds.""" 58 | return self.nominal + self.upper_offset 59 | 60 | @property 61 | def shape(self): 62 | return self.lower_offset.shape.as_list() 63 | 64 | @classmethod 65 | def convert(cls, bounds): 66 | if isinstance(bounds, tf.Tensor): 67 | return cls(tf.zeros_like(bounds), tf.zeros_like(bounds), bounds) 68 | bounds = bounds.concretize() 69 | if not isinstance(bounds, cls): 70 | raise ValueError('Cannot convert "{}" to "{}"'.format(bounds, 71 | cls.__name__)) 72 | return bounds 73 | 74 | def apply_batch_reshape(self, wrapper, shape): 75 | """Propagates the bounds through a reshape. 76 | 77 | Args: 78 | wrapper: Contains prior bounds from a previous iteration. 79 | shape: output shape, excluding the batch dimension. 80 | 81 | Returns: 82 | Output bounds. 83 | """ 84 | reshape = snt.BatchReshape(shape) 85 | return RelativeIntervalBounds( 86 | reshape(self.lower_offset), 87 | reshape(self.upper_offset), 88 | reshape(self.nominal)) 89 | 90 | def apply_linear(self, wrapper, w, b): 91 | """Propagates the bounds through a linear layer. 92 | 93 | Args: 94 | wrapper: Contains prior bounds from a previous iteration. 95 | w: 2D tensor of shape (input_size, output_size) containing 96 | weights for the linear layer. 97 | b: 1D tensor of shape (output_size) containing biases for the linear 98 | layer, or `None` if no bias. 99 | 100 | Returns: 101 | Output bounds. 102 | """ 103 | w_pos = tf.maximum(w, 0) 104 | w_neg = tf.minimum(w, 0) 105 | lb = (tf.matmul(self.lower_offset, w_pos) + 106 | tf.matmul(self.upper_offset, w_neg)) 107 | ub = (tf.matmul(self.upper_offset, w_pos) + 108 | tf.matmul(self.lower_offset, w_neg)) 109 | 110 | nominal_out = tf.matmul(self.nominal, w) 111 | if b is not None: 112 | nominal_out += b 113 | 114 | return RelativeIntervalBounds(lb, ub, nominal_out) 115 | 116 | def apply_conv1d(self, wrapper, w, b, padding, stride): 117 | """Propagates the bounds through a 1D convolution layer. 118 | 119 | Args: 120 | wrapper: Contains prior bounds from a previous iteration. 121 | w: 3D tensor of shape (kernel_length, input_channels, output_channels) 122 | containing weights for the convolution. 123 | b: 1D tensor of shape (output_channels) containing biases for the 124 | convolution, or `None` if no bias. 125 | padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. 126 | stride: Integer stride. 127 | 128 | Returns: 129 | Output bounds. 130 | """ 131 | w_pos = tf.maximum(w, 0) 132 | w_neg = tf.minimum(w, 0) 133 | lb = (tf.nn.conv1d(self.lower_offset, w_pos, 134 | padding=padding, stride=stride) + 135 | tf.nn.conv1d(self.upper_offset, w_neg, 136 | padding=padding, stride=stride)) 137 | ub = (tf.nn.conv1d(self.upper_offset, w_pos, 138 | padding=padding, stride=stride) + 139 | tf.nn.conv1d(self.lower_offset, w_neg, 140 | padding=padding, stride=stride)) 141 | 142 | nominal_out = tf.nn.conv1d(self.nominal, w, 143 | padding=padding, stride=stride) 144 | if b is not None: 145 | nominal_out += b 146 | 147 | return RelativeIntervalBounds(lb, ub, nominal_out) 148 | 149 | def apply_conv2d(self, wrapper, w, b, padding, strides): 150 | """Propagates the bounds through a 2D convolution layer. 151 | 152 | Args: 153 | wrapper: Contains prior bounds from a previous iteration. 154 | w: 4D tensor of shape (kernel_height, kernel_width, input_channels, 155 | output_channels) containing weights for the convolution. 156 | b: 1D tensor of shape (output_channels) containing biases for the 157 | convolution, or `None` if no bias. 158 | padding: `"VALID"` or `"SAME"`, the convolution's padding algorithm. 159 | strides: Integer list of length N: `[vertical_stride, horizontal_stride]`. 160 | 161 | Returns: 162 | Output bounds. 163 | """ 164 | w_pos = tf.maximum(w, 0) 165 | w_neg = tf.minimum(w, 0) 166 | lb = (tf.nn.convolution(self.lower_offset, w_pos, 167 | padding=padding, strides=strides) + 168 | tf.nn.convolution(self.upper_offset, w_neg, 169 | padding=padding, strides=strides)) 170 | ub = (tf.nn.convolution(self.upper_offset, w_pos, 171 | padding=padding, strides=strides) + 172 | tf.nn.convolution(self.lower_offset, w_neg, 173 | padding=padding, strides=strides)) 174 | 175 | nominal_out = tf.nn.convolution(self.nominal, w, 176 | padding=padding, strides=strides) 177 | if b is not None: 178 | nominal_out += b 179 | 180 | return RelativeIntervalBounds(lb, ub, nominal_out) 181 | 182 | def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): 183 | """Propagates the bounds through a non-linear activation layer or `add` op. 184 | 185 | Args: 186 | wrapper: Contains prior bounds from a previous iteration. 187 | fn: String specifying non-linear activation function. 188 | May be one of: sig, relu, tanh, elu, leaky_relu. 189 | Anything else denotes identity. 190 | *args: Other inputs' bounds, for a multi-input node (e.g. Add). 191 | **parameters: Optional parameters if activation is parameterised, e.g. 192 | `{'alpha': 0.2}` for leaky ReLu. 193 | 194 | Returns: 195 | Output bounds. 196 | """ 197 | if fn.__name__ in ('add', 'reduce_mean', 'reduce_sum', 'avg_pool'): 198 | return RelativeIntervalBounds( 199 | fn(self.lower_offset, *[bounds.lower_offset for bounds in args]), 200 | fn(self.upper_offset, *[bounds.upper_offset for bounds in args]), 201 | fn(self.nominal, *[bounds.nominal for bounds in args])) 202 | else: 203 | assert not args, 'unary function expected' 204 | nominal_out = fn(self.nominal) 205 | if fn.__name__ == 'reduce_max': 206 | lb, ub = _maxpool_bounds(fn, None, None, 207 | self.lower_offset, self.upper_offset, 208 | nominal_in=self.nominal, 209 | nominal_out=nominal_out) 210 | elif fn.__name__ == 'max_pool': 211 | lb, ub = _maxpool_bounds(fn, 212 | parameters['ksize'][1:-1], 213 | parameters['strides'][1:-1], 214 | self.lower_offset, self.upper_offset, 215 | nominal_in=self.nominal, 216 | nominal_out=nominal_out) 217 | else: 218 | lb, ub = _activation_bounds(fn, self.lower_offset, self.upper_offset, 219 | nominal_in=self.nominal, 220 | parameters=parameters) 221 | return RelativeIntervalBounds(lb, ub, nominal_out) 222 | 223 | def apply_batch_norm(self, wrapper, mean, variance, scale, bias, epsilon): 224 | """Propagates the bounds through a batch norm layer. 225 | 226 | Args: 227 | wrapper: Contains prior bounds from a previous iteration. 228 | mean: Learnt batch mean. 229 | variance: Learnt batch variance. 230 | scale: Trained component-wise scale variable. 231 | bias: Trained component-wise bias variable. 232 | epsilon: Epsilon for avoiding instability when `variance` is very small. 233 | 234 | Returns: 235 | Output bounds. 236 | """ 237 | lb = tf.nn.batch_normalization(self.lower_offset, 238 | tf.zeros_like(mean), variance, 239 | None, scale, epsilon) 240 | ub = tf.nn.batch_normalization(self.upper_offset, 241 | tf.zeros_like(mean), variance, 242 | None, scale, epsilon) 243 | # It's just possible that the batchnorm's scale is negative. 244 | lb, ub = tf.minimum(lb, ub), tf.maximum(lb, ub) 245 | 246 | nominal_out = tf.nn.batch_normalization(self.nominal, 247 | mean, variance, 248 | bias, scale, epsilon) 249 | return RelativeIntervalBounds(lb, ub, nominal_out) 250 | 251 | def _set_up_cache(self): 252 | self._lower_offset, update_lower = self._cache_with_update_op( 253 | self._lower_offset) 254 | self._upper_offset, update_upper = self._cache_with_update_op( 255 | self._upper_offset) 256 | return tf.group([update_lower, update_upper]) 257 | 258 | 259 | def _maxpool_bounds(module, kernel_shape, strides, lb_in, ub_in, 260 | nominal_in, nominal_out): 261 | """Calculates naive bounds on output of an N-D max pool layer. 262 | 263 | Args: 264 | module: Callable for max-pool operation. 265 | kernel_shape: Integer list of `[kernel_height, kernel_width]`, 266 | or `None` to aggregate over the layer`s entire spatial extent. 267 | strides: Integer list of `[vertical_stride, horizontal_stride]`. 268 | lb_in: (N+2)D tensor of shape (batch_size, input_height, input_width, 269 | layer_channels) containing lower bounds on the inputs to the 270 | max pool layer. 271 | ub_in: (N+2)D tensor of shape (batch_size, input_height, input_width, 272 | layer_channels) containing upper bounds on the inputs to the 273 | max pool layer. 274 | nominal_in: (N+2)D tensor of shape (batch_size, input_height, input_width, 275 | layer_channels) containing nominal input values. 276 | Inputs bounds are interpreted relative to this. 277 | nominal_out: (N+2)D tensor of shape (batch_size, output_height,output_width, 278 | layer_channels) containing nominal input values. 279 | The returned output bounds are expressed relative to this. 280 | 281 | Returns: 282 | lb_out: (N+2)D tensor of shape (batch_size, output_height, output_width, 283 | layer_channels) with lower bounds on the outputs of the max pool layer. 284 | ub_out: (N+2)D tensor of shape (batch_size, output_height, output_width, 285 | layer_channels) with upper bounds on the outputs of the max pool layer. 286 | """ 287 | if kernel_shape is None: 288 | nominal_out = tf.reduce_max(nominal_in, 289 | axis=list(range(1, nominal_in.shape.ndims-1)), 290 | keepdims=True) 291 | return (module((nominal_in - nominal_out) + lb_in), 292 | module((nominal_in - nominal_out) + ub_in)) 293 | else: 294 | # Must perform the max on absolute bounds, as the kernels may overlap. 295 | # TODO(stanforth) investigate a more numerically stable implementation 296 | del strides 297 | return (module(nominal_in + lb_in) - nominal_out, 298 | module(nominal_in + ub_in) - nominal_out) 299 | 300 | 301 | def _activation_bounds(nl_fun, lb_in, ub_in, nominal_in, parameters=None): 302 | """Calculates naive bounds on output of an activation layer. 303 | 304 | Inputs bounds are interpreted relative to `nominal_in`, and the returned 305 | output bounds are expressed relative to `nominal_out=nl(nominal_in)`. 306 | 307 | Args: 308 | nl_fun: Callable implementing the activation function itself. 309 | lb_in: (N+2)D tensor of shape (batch_size, layer_height, layer_width, 310 | layer_channels) containing lower bounds on the pre-activations. 311 | ub_in: (N+2)D tensor of shape (batch_size, layer_height, layer_width, 312 | layer_channels) containing upper bounds on the pre-activations. 313 | nominal_in: (N+2)D tensor of shape (batch_size, input_height, input_width, 314 | layer_channels) containing nominal input values. 315 | parameters: Optional parameter dict if activation is parameterised, e.g. 316 | `{'alpha': 0.2}` for leaky ReLu. 317 | 318 | Returns: 319 | lb_out: 2D tensor of shape (batch_size, layer_size) or 320 | 4D tensor of shape (batch_size, layer_height, layer_width, layer_channels) 321 | with lower bounds on the activations. 322 | ub_out: 2D tensor of shape (batch_size, layer_size) or 323 | 4D tensor of shape (batch_size, layer_height, layer_width, layer_channels) 324 | with upper bounds on the activations. 325 | """ 326 | if nl_fun.__name__ == 'relu': 327 | return ( 328 | tf.maximum(tf.minimum(nominal_in, 0.) + lb_in, 329 | tf.minimum(-nominal_in, 0.)), # pylint:disable=invalid-unary-operand-type 330 | tf.maximum(tf.minimum(nominal_in, 0.) + ub_in, 331 | tf.minimum(-nominal_in, 0.))) # pylint:disable=invalid-unary-operand-type 332 | elif nl_fun.__name__ == 'leaky_relu': 333 | alpha = parameters['alpha'] 334 | return ( 335 | tf.maximum( 336 | lb_in + tf.minimum(nominal_in, 0.) * (1. - alpha), 337 | alpha * lb_in + tf.minimum(-nominal_in, 0.) * (1. - alpha)), # pylint:disable=invalid-unary-operand-type 338 | tf.maximum( 339 | ub_in + tf.minimum(nominal_in, 0.) * (1. - alpha), 340 | alpha * ub_in + tf.minimum(-nominal_in, 0.) * (1. - alpha))) # pylint:disable=invalid-unary-operand-type 341 | else: 342 | nominal_out = nl_fun(nominal_in) 343 | return (nl_fun(nominal_in + lb_in) - nominal_out, 344 | nl_fun(nominal_in + ub_in) - nominal_out) 345 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/simplex_bounds.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Naive bound calculation for common neural network layers.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from interval_bound_propagation.src import bounds as basic_bounds 23 | from interval_bound_propagation.src import relative_bounds 24 | import sonnet as snt 25 | import tensorflow.compat.v1 as tf 26 | 27 | 28 | class SimplexBounds(basic_bounds.AbstractBounds): 29 | """Specifies a bounding simplex within an embedding space.""" 30 | 31 | def __init__(self, vertices, nominal, r): 32 | """Initialises the simplex bounds. 33 | 34 | Args: 35 | vertices: Tensor of shape (num_vertices, *input_shape) 36 | or of shape (batch_size, num_vertices, *input_shape) 37 | containing the vertices in embedding space. 38 | nominal: Tensor of shape (batch_size, *input_shape) specifying 39 | the unperturbed inputs in embedding space, where `*input_shape` 40 | denotes either (embedding_size,) for flat input (e.g. bag-of-words) 41 | or (input_length, embedding_channels) for sequence input. 42 | r: Scalar specifying the dilation factor of the simplex. The dilated 43 | simplex will have vertices `nominal + r * (vertices-nominal)`. 44 | """ 45 | super(SimplexBounds, self).__init__() 46 | self._vertices = vertices 47 | self._nominal = nominal 48 | self._r = r 49 | 50 | @property 51 | def vertices(self): 52 | return self._vertices 53 | 54 | @property 55 | def nominal(self): 56 | return self._nominal 57 | 58 | @property 59 | def r(self): 60 | return self._r 61 | 62 | @property 63 | def shape(self): 64 | return self.nominal.shape.as_list() 65 | 66 | @classmethod 67 | def convert(cls, bounds): 68 | if not isinstance(bounds, cls): 69 | raise ValueError('Cannot convert "{}" to "{}"'.format(bounds, 70 | cls.__name__)) 71 | return bounds 72 | 73 | def apply_batch_reshape(self, wrapper, shape): 74 | reshape = snt.BatchReshape(shape) 75 | if self.vertices.shape.ndims == self.nominal.shape.ndims: 76 | reshape_vertices = reshape 77 | else: 78 | reshape_vertices = snt.BatchReshape(shape, preserve_dims=2) 79 | return SimplexBounds(reshape_vertices(self.vertices), 80 | reshape(self.nominal), 81 | self.r) 82 | 83 | def apply_linear(self, wrapper, w, b): 84 | mapped_centres = tf.matmul(self.nominal, w) 85 | mapped_vertices = tf.tensordot(self.vertices, w, axes=1) 86 | 87 | lb, ub = _simplex_bounds(mapped_vertices, mapped_centres, self.r, -2) 88 | 89 | nominal_out = tf.matmul(self.nominal, w) 90 | if b is not None: 91 | nominal_out += b 92 | 93 | return relative_bounds.RelativeIntervalBounds(lb, ub, nominal_out) 94 | 95 | def apply_conv1d(self, wrapper, w, b, padding, stride): 96 | mapped_centres = tf.nn.conv1d(self.nominal, w, 97 | padding=padding, stride=stride) 98 | if self.vertices.shape.ndims == 3: 99 | # `self.vertices` has no batch dimension; its shape is 100 | # (num_vertices, input_length, embedding_channels). 101 | mapped_vertices = tf.nn.conv1d(self.vertices, w, 102 | padding=padding, stride=stride) 103 | elif self.vertices.shape.ndims == 4: 104 | # `self.vertices` has shape 105 | # (batch_size, num_vertices, input_length, embedding_channels). 106 | # Vertices are different for each example in the batch, 107 | # e.g. for word perturbations. 108 | mapped_vertices = snt.BatchApply( 109 | lambda x: tf.nn.conv1d(x, w, padding=padding, stride=stride))( 110 | self.vertices) 111 | else: 112 | raise ValueError('"vertices" must have either 3 or 4 dimensions.') 113 | 114 | lb, ub = _simplex_bounds(mapped_vertices, mapped_centres, self.r, -3) 115 | 116 | nominal_out = tf.nn.conv1d(self.nominal, w, 117 | padding=padding, stride=stride) 118 | if b is not None: 119 | nominal_out += b 120 | 121 | return relative_bounds.RelativeIntervalBounds(lb, ub, nominal_out) 122 | 123 | def apply_conv2d(self, wrapper, w, b, padding, strides): 124 | mapped_centres = tf.nn.convolution(self.nominal, w, 125 | padding=padding, strides=strides) 126 | if self.vertices.shape.ndims == 4: 127 | # `self.vertices` has no batch dimension; its shape is 128 | # (num_vertices, input_height, input_width, input_channels). 129 | mapped_vertices = tf.nn.convolution(self.vertices, w, 130 | padding=padding, strides=strides) 131 | elif self.vertices.shape.ndims == 5: 132 | # `self.vertices` has shape 133 | # (batch_size, num_vertices, input_height, input_width, input_channels). 134 | # Vertices are different for each example in the batch. 135 | mapped_vertices = snt.BatchApply( 136 | lambda x: tf.nn.convolution(x, w, padding=padding, strides=strides))( 137 | self.vertices) 138 | else: 139 | raise ValueError('"vertices" must have either 4 or 5 dimensions.') 140 | 141 | lb, ub = _simplex_bounds(mapped_vertices, mapped_centres, self.r, -4) 142 | 143 | nominal_out = tf.nn.convolution(self.nominal, w, 144 | padding=padding, strides=strides) 145 | if b is not None: 146 | nominal_out += b 147 | 148 | return relative_bounds.RelativeIntervalBounds(lb, ub, nominal_out) 149 | 150 | def apply_increasing_monotonic_fn(self, wrapper, fn, *args, **parameters): 151 | if fn.__name__ in ('add', 'reduce_mean', 'reduce_sum', 'avg_pool'): 152 | if self.vertices.shape.ndims == self.nominal.shape.ndims: 153 | vertices_fn = fn 154 | else: 155 | vertices_fn = snt.BatchApply(fn, n_dims=2) 156 | return SimplexBounds( 157 | vertices_fn(self.vertices, *[bounds.vertices for bounds in args]), 158 | fn(self.nominal, *[bounds.nominal for bounds in args]), 159 | self.r) 160 | 161 | elif fn.__name__ == 'quotient': 162 | return SimplexBounds( 163 | self.vertices / tf.expand_dims(parameters['denom'], axis=1), 164 | fn(self.nominal), 165 | self.r) 166 | 167 | else: 168 | return super(SimplexBounds, self).apply_increasing_monotonic_fn( 169 | wrapper, fn, *args, **parameters) 170 | 171 | 172 | def _simplex_bounds(mapped_vertices, mapped_centres, r, axis): 173 | """Calculates naive bounds on the given layer-mapped vertices. 174 | 175 | Args: 176 | mapped_vertices: Tensor of shape (num_vertices, *output_shape) 177 | or of shape (batch_size, num_vertices, *output_shape) 178 | containing the vertices in the layer's output space. 179 | mapped_centres: Tensor of shape (batch_size, *output_shape) 180 | containing the layer's nominal outputs. 181 | r: Scalar in [0, 1) specifying the radius (in vocab space) of the simplex. 182 | axis: Index of the `num_vertices` dimension of `mapped_vertices`. 183 | 184 | Returns: 185 | lb_out: Tensor of shape (batch_size, *output_shape) with lower bounds 186 | on the outputs of the affine layer. 187 | ub_out: Tensor of shape (batch_size, *output_shape) with upper bounds 188 | on the outputs of the affine layer. 189 | """ 190 | # Use the negative of r, instead of the complement of r, as 191 | # we're shifting the input domain to be centred at the origin. 192 | lb_out = -r * mapped_centres + r * tf.reduce_min(mapped_vertices, axis=axis) 193 | ub_out = -r * mapped_centres + r * tf.reduce_max(mapped_vertices, axis=axis) 194 | return lb_out, ub_out 195 | 196 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/specification.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Defines the output specifications.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import abc 23 | 24 | from absl import logging 25 | 26 | from interval_bound_propagation.src import bounds as bounds_lib 27 | from interval_bound_propagation.src import verifiable_wrapper 28 | import six 29 | import sonnet as snt 30 | import tensorflow.compat.v1 as tf 31 | 32 | 33 | @six.add_metaclass(abc.ABCMeta) 34 | class Specification(snt.AbstractModule): 35 | """Defines a specification.""" 36 | 37 | def __init__(self, name, collapse=True): 38 | super(Specification, self).__init__(name=name) 39 | self._collapse = collapse 40 | 41 | @abc.abstractmethod 42 | def _build(self, modules): 43 | """Computes the worst-case specification value.""" 44 | 45 | @abc.abstractmethod 46 | def evaluate(self, logits): 47 | """Computes the specification value. 48 | 49 | Args: 50 | logits: The logits Tensor can have different shapes, i.e., 51 | [batch_size, num_classes]: The output should be [batch_size, num_specs]. 52 | [num_restarts, batch_size, num_classes]: The output should be 53 | [num_restarts, batch_size, num_specs]. Used by UntargetedPGDAttack. 54 | [num_restarts, num_specs, batch_size, num_classes]: The output should 55 | be [num_restarts, batch_size, num_specs]. For this case, the 56 | specifications must be evaluated individually for each column 57 | (axis = 1). Used by MultiTargetedPGDAttack. 58 | 59 | Returns: 60 | The specification values evaluated at the network output. 61 | """ 62 | 63 | @abc.abstractproperty 64 | def num_specifications(self): 65 | """Returns the number of specifications.""" 66 | 67 | @property 68 | def collapse(self): 69 | return self._collapse 70 | 71 | 72 | class LinearSpecification(Specification): 73 | """Linear specifications: c^T * z_K + d <= 0.""" 74 | 75 | def __init__(self, c, d=None, prune_irrelevant=True, collapse=True): 76 | """Builds a linear specification module.""" 77 | super(LinearSpecification, self).__init__(name='specs', collapse=collapse) 78 | # c has shape [batch_size, num_specifications, num_outputs] 79 | # d has shape [batch_size, num_specifications] 80 | # Some specifications may be irrelevant (not a function of the output). 81 | # We automatically remove them for clarity. We expect the number of 82 | # irrelevant specs to be equal for all elements of a batch. 83 | # Shape is [batch_size, num_specifications] 84 | if prune_irrelevant: 85 | irrelevant = tf.equal(tf.reduce_sum( 86 | tf.cast(tf.abs(c) > 1e-6, tf.int32), axis=-1, keepdims=True), 0) 87 | batch_size = tf.shape(c)[0] 88 | num_outputs = tf.shape(c)[2] 89 | irrelevant = tf.tile(irrelevant, [1, 1, num_outputs]) 90 | self._c = tf.reshape( 91 | tf.boolean_mask(c, tf.logical_not(irrelevant)), 92 | [batch_size, -1, num_outputs]) 93 | else: 94 | self._c = c 95 | self._d = d 96 | 97 | def _build(self, modules): 98 | """Outputs specification value.""" 99 | # inputs have shape [batch_size, num_outputs]. 100 | if not (self.collapse and 101 | isinstance(modules[-1], verifiable_wrapper.LinearFCWrapper)): 102 | logging.info('Elision of last layer disabled.') 103 | bounds = modules[-1].output_bounds 104 | w = self._c 105 | b = self._d 106 | else: 107 | logging.info('Elision of last layer active.') 108 | # Collapse the last layer. 109 | bounds = modules[-1].input_bounds 110 | w = modules[-1].module.w 111 | b = modules[-1].module.b 112 | w = tf.einsum('ijk,lk->ijl', self._c, w) 113 | b = tf.einsum('ijk,k->ij', self._c, b) 114 | if self._d is not None: 115 | b += self._d 116 | 117 | # Maximize z * w + b s.t. lower <= z <= upper. 118 | bounds = bounds_lib.IntervalBounds.convert(bounds) 119 | c = (bounds.lower + bounds.upper) / 2. 120 | r = (bounds.upper - bounds.lower) / 2. 121 | c = tf.einsum('ij,ikj->ik', c, w) 122 | if b is not None: 123 | c += b 124 | r = tf.einsum('ij,ikj->ik', r, tf.abs(w)) 125 | 126 | # output has shape [batch_size, num_specifications]. 127 | return c + r 128 | 129 | def evaluate(self, logits): 130 | if len(logits.shape) == 2: 131 | output = tf.einsum('ij,ikj->ik', logits, self._c) 132 | elif len(logits.shape) == 3: 133 | output = tf.einsum('rij,ikj->rik', logits, self._c) 134 | else: 135 | assert len(logits.shape) == 4 136 | output = tf.einsum('rsbo,bso->rbs', logits, self._c) 137 | if self._d is not None: 138 | output += self._d 139 | return output 140 | 141 | @property 142 | def num_specifications(self): 143 | return tf.shape(self._c)[1] 144 | 145 | @property 146 | def c(self): 147 | return self._c 148 | 149 | @property 150 | def d(self): 151 | return self._d 152 | 153 | 154 | class ClassificationSpecification(Specification): 155 | """Creates a linear specification that corresponds to a classification. 156 | 157 | This class is not a standard LinearSpecification as it does not materialize 158 | the c and d tensors. 159 | """ 160 | 161 | def __init__(self, label, num_classes, collapse=True): 162 | super(ClassificationSpecification, self).__init__(name='specs', 163 | collapse=collapse) 164 | self._label = label 165 | self._num_classes = num_classes 166 | # Precompute indices. 167 | with self._enter_variable_scope(): 168 | indices = [] 169 | for i in range(self._num_classes): 170 | indices.append(list(range(i)) + list(range(i + 1, self._num_classes))) 171 | indices = tf.constant(indices, dtype=tf.int32) 172 | self._correct_idx, self._wrong_idx = self._build_indices(label, indices) 173 | 174 | def _build(self, modules): 175 | if not (self.collapse and 176 | isinstance(modules[-1], verifiable_wrapper.LinearFCWrapper)): 177 | logging.info('Elision of last layer disabled.') 178 | bounds = modules[-1].output_bounds 179 | bounds = bounds_lib.IntervalBounds.convert(bounds) 180 | correct_class_logit = tf.gather_nd(bounds.lower, self._correct_idx) 181 | wrong_class_logits = tf.gather_nd(bounds.upper, self._wrong_idx) 182 | return wrong_class_logits - tf.expand_dims(correct_class_logit, 1) 183 | 184 | logging.info('Elision of last layer active.') 185 | bounds = modules[-1].input_bounds 186 | bounds = bounds_lib.IntervalBounds.convert(bounds) 187 | batch_size = tf.shape(bounds.lower)[0] 188 | w = modules[-1].module.w 189 | b = modules[-1].module.b 190 | w_t = tf.tile(tf.expand_dims(tf.transpose(w), 0), [batch_size, 1, 1]) 191 | b_t = tf.tile(tf.expand_dims(b, 0), [batch_size, 1]) 192 | w_correct = tf.expand_dims(tf.gather_nd(w_t, self._correct_idx), -1) 193 | b_correct = tf.expand_dims(tf.gather_nd(b_t, self._correct_idx), 1) 194 | w_wrong = tf.transpose(tf.gather_nd(w_t, self._wrong_idx), [0, 2, 1]) 195 | b_wrong = tf.gather_nd(b_t, self._wrong_idx) 196 | w = w_wrong - w_correct 197 | b = b_wrong - b_correct 198 | # Maximize z * w + b s.t. lower <= z <= upper. 199 | c = (bounds.lower + bounds.upper) / 2. 200 | r = (bounds.upper - bounds.lower) / 2. 201 | c = tf.einsum('ij,ijk->ik', c, w) 202 | if b is not None: 203 | c += b 204 | r = tf.einsum('ij,ijk->ik', r, tf.abs(w)) 205 | return c + r 206 | 207 | def evaluate(self, logits): 208 | if len(logits.shape) == 2: 209 | correct_class_logit = tf.gather_nd(logits, self._correct_idx) 210 | correct_class_logit = tf.expand_dims(correct_class_logit, -1) 211 | wrong_class_logits = tf.gather_nd(logits, self._wrong_idx) 212 | elif len(logits.shape) == 3: 213 | # [num_restarts, batch_size, num_classes] to 214 | # [num_restarts, batch_size, num_specs] 215 | logits = tf.transpose(logits, [1, 2, 0]) # Put restart dimension last. 216 | correct_class_logit = tf.gather_nd(logits, self._correct_idx) 217 | correct_class_logit = tf.transpose(correct_class_logit) 218 | correct_class_logit = tf.expand_dims(correct_class_logit, -1) 219 | wrong_class_logits = tf.gather_nd(logits, self._wrong_idx) 220 | wrong_class_logits = tf.transpose(wrong_class_logits, [2, 0, 1]) 221 | else: 222 | assert len(logits.shape) == 4 223 | # [num_restarts, num_specs, batch_size, num_classes] to 224 | # [num_restarts, batch_size, num_specs]. 225 | logits = tf.transpose(logits, [2, 3, 1, 0]) 226 | correct_class_logit = tf.gather_nd(logits, self._correct_idx) 227 | correct_class_logit = tf.transpose(correct_class_logit, [2, 0, 1]) 228 | batch_size = tf.shape(logits)[0] 229 | wrong_idx = tf.concat([ 230 | self._wrong_idx, 231 | tf.tile(tf.reshape(tf.range(self.num_specifications, dtype=tf.int32), 232 | [1, self.num_specifications, 1]), 233 | [batch_size, 1, 1])], axis=-1) 234 | wrong_class_logits = tf.gather_nd(logits, wrong_idx) 235 | wrong_class_logits = tf.transpose(wrong_class_logits, [2, 0, 1]) 236 | return wrong_class_logits - correct_class_logit 237 | 238 | @property 239 | def num_specifications(self): 240 | return self._num_classes - 1 241 | 242 | @property 243 | def correct_idx(self): 244 | return self._correct_idx 245 | 246 | @property 247 | def wrong_idx(self): 248 | return self._wrong_idx 249 | 250 | def _build_indices(self, label, indices): 251 | batch_size = tf.shape(label)[0] 252 | i = tf.range(batch_size, dtype=tf.int32) 253 | correct_idx = tf.stack([i, tf.cast(label, tf.int32)], axis=1) 254 | wrong_idx = tf.stack([ 255 | tf.tile(tf.reshape(i, [batch_size, 1]), [1, self._num_classes - 1]), 256 | tf.gather(indices, label), 257 | ], axis=2) 258 | return correct_idx, wrong_idx 259 | 260 | 261 | class TargetedClassificationSpecification(ClassificationSpecification): 262 | """Defines a specification that compares the true class with another.""" 263 | 264 | def __init__(self, label, num_classes, target_class, collapse=True): 265 | super(TargetedClassificationSpecification, self).__init__( 266 | label, num_classes, collapse=collapse) 267 | batch_size = tf.shape(label)[0] 268 | if len(target_class.shape) == 1: 269 | target_class = tf.reshape(target_class, [batch_size, 1]) 270 | self._num_specifications = target_class.shape[1].value 271 | if self._num_specifications is None: 272 | raise ValueError('Cannot retrieve the number of target classes') 273 | self._target_class = target_class 274 | i = tf.range(batch_size, dtype=tf.int32) 275 | self._wrong_idx = tf.stack([ 276 | tf.tile(tf.reshape(i, [batch_size, 1]), [1, self.num_specifications]), 277 | target_class 278 | ], axis=2) 279 | 280 | @property 281 | def target_class(self): 282 | """Returns the target class index.""" 283 | return self._target_class 284 | 285 | @property 286 | def num_specifications(self): 287 | return self._num_specifications 288 | 289 | 290 | class RandomClassificationSpecification(TargetedClassificationSpecification): 291 | """Creates a single random specification that targets a random class.""" 292 | 293 | def __init__(self, label, num_classes, num_targets=1, seed=None, 294 | collapse=True): 295 | # Overwrite the target indices. Each session.run() call gets new target 296 | # indices, the indices should remain the same across restarts. 297 | batch_size = tf.shape(label)[0] 298 | j = tf.random.uniform(shape=(batch_size, num_targets), minval=1, 299 | maxval=num_classes, dtype=tf.int32, seed=seed) 300 | target_class = tf.mod(tf.cast(tf.expand_dims(label, -1), tf.int32) + j, 301 | num_classes) 302 | super(RandomClassificationSpecification, self).__init__( 303 | label, num_classes, target_class, collapse=collapse) 304 | 305 | 306 | class LeastLikelyClassificationSpecification( 307 | TargetedClassificationSpecification): 308 | """Creates a single specification that targets the least likely class.""" 309 | 310 | def __init__(self, label, num_classes, logits, num_targets=1, collapse=True): 311 | # Do not target the true class. If the true class is the least likely to 312 | # be predicted, it is fine to target any other class as the attack will 313 | # be successful anyways. 314 | j = tf.nn.top_k(-logits, k=num_targets, sorted=False).indices 315 | l = tf.expand_dims(label, 1) 316 | target_class = tf.mod( 317 | j + tf.cast(tf.equal(j, tf.cast(l, tf.int32)), tf.int32), num_classes) 318 | super(LeastLikelyClassificationSpecification, self).__init__( 319 | label, num_classes, target_class, collapse=collapse) 320 | -------------------------------------------------------------------------------- /interval_bound_propagation/src/verifiable_wrapper.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Wrapper around modules that provides additional facilities.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import abc 23 | import types 24 | 25 | from absl import logging 26 | from interval_bound_propagation.src import layers 27 | import six 28 | import sonnet as snt 29 | import tensorflow.compat.v1 as tf 30 | 31 | 32 | @six.add_metaclass(abc.ABCMeta) 33 | class VerifiableWrapper(object): 34 | """Abstract wrapper class.""" 35 | 36 | def __init__(self, module): 37 | self._module = module 38 | self._input_bounds = None 39 | self._output_bounds = None 40 | 41 | @property 42 | def input_bounds(self): 43 | assert self._input_bounds is not None 44 | return self._input_bounds 45 | 46 | @property 47 | def output_bounds(self): 48 | return self._output_bounds 49 | 50 | @property 51 | def module(self): 52 | return self._module 53 | 54 | def __str__(self): 55 | if isinstance(self._module, tf.Tensor): 56 | return str(self._module) 57 | if isinstance(self._module, types.LambdaType): 58 | return self._module.__name__ 59 | if isinstance(self._module, snt.AbstractModule): 60 | return self._module.module_name 61 | if hasattr(self._module, '__class__'): 62 | return self._module.__class__.__name__ 63 | return str(self._module) 64 | 65 | def propagate_bounds(self, *input_bounds): 66 | """Propagates bounds and saves input and output bounds.""" 67 | output_bounds = self._propagate_through(self.module, *input_bounds) 68 | 69 | if len(input_bounds) == 1: 70 | self._input_bounds = input_bounds[0] 71 | else: 72 | self._input_bounds = tuple(input_bounds) 73 | self._output_bounds = output_bounds 74 | 75 | return output_bounds 76 | 77 | @abc.abstractmethod 78 | def _propagate_through(self, module, *input_bounds): 79 | """Propagates bounds through a verifiable wrapper. 80 | 81 | Args: 82 | module: This wrapped module, through which bounds are to be propagated. 83 | *input_bounds: Bounds on the node's input(s). 84 | 85 | Returns: 86 | New bounds on the node's output. 87 | """ 88 | 89 | 90 | class ModelInputWrapper(object): 91 | """Virtual node representing the network's inputs.""" 92 | 93 | def __init__(self, index): 94 | super(ModelInputWrapper, self).__init__() 95 | self._index = index 96 | self._output_bounds = None 97 | 98 | @property 99 | def index(self): 100 | return self._index 101 | 102 | @property 103 | def output_bounds(self): 104 | return self._output_bounds 105 | 106 | @output_bounds.setter 107 | def output_bounds(self, bounds): 108 | self._output_bounds = bounds 109 | 110 | def __str__(self): 111 | return 'Model input {}'.format(self.index) 112 | 113 | 114 | class ConstWrapper(VerifiableWrapper): 115 | """Wraps a constant tensor.""" 116 | 117 | def _propagate_through(self, module): 118 | # Make sure that the constant value can be converted to a tensor. 119 | return tf.convert_to_tensor(module) 120 | 121 | 122 | class LinearFCWrapper(VerifiableWrapper): 123 | """Wraps fully-connected layers.""" 124 | 125 | def __init__(self, module): 126 | if not isinstance(module, snt.Linear): 127 | raise ValueError('Cannot wrap {} with a LinearFCWrapper.'.format(module)) 128 | super(LinearFCWrapper, self).__init__(module) 129 | 130 | def _propagate_through(self, module, input_bounds): 131 | w = module.w 132 | b = module.b if module.has_bias else None 133 | return input_bounds.apply_linear(self, w, b) 134 | 135 | 136 | class LinearConvWrapper(VerifiableWrapper): 137 | """Wraps convolutional layers.""" 138 | 139 | 140 | class LinearConv1dWrapper(LinearConvWrapper): 141 | """Wraps 1-D convolutional layers.""" 142 | 143 | def __init__(self, module): 144 | if not isinstance(module, snt.Conv1D): 145 | raise ValueError('Cannot wrap {} with a LinearConv1dWrapper.'.format( 146 | module)) 147 | super(LinearConv1dWrapper, self).__init__(module) 148 | 149 | def _propagate_through(self, module, input_bounds): 150 | w = module.w 151 | b = module.b if module.has_bias else None 152 | padding = module.padding 153 | stride = module.stride[1] 154 | return input_bounds.apply_conv1d(self, w, b, padding, stride) 155 | 156 | 157 | class LinearConv2dWrapper(LinearConvWrapper): 158 | """Wraps 2-D convolutional layers.""" 159 | 160 | def __init__(self, module): 161 | if not isinstance(module, snt.Conv2D): 162 | raise ValueError('Cannot wrap {} with a LinearConv2dWrapper.'.format( 163 | module)) 164 | super(LinearConv2dWrapper, self).__init__(module) 165 | 166 | def _propagate_through(self, module, input_bounds): 167 | w = module.w 168 | b = module.b if module.has_bias else None 169 | padding = module.padding 170 | strides = module.stride[1:-1] 171 | return input_bounds.apply_conv2d(self, w, b, padding, strides) 172 | 173 | 174 | class IncreasingMonotonicWrapper(VerifiableWrapper): 175 | """Wraps monotonically increasing functions of the inputs.""" 176 | 177 | def __init__(self, module, **parameters): 178 | super(IncreasingMonotonicWrapper, self).__init__(module) 179 | self._parameters = parameters 180 | 181 | @property 182 | def parameters(self): 183 | return self._parameters 184 | 185 | def _propagate_through(self, module, main_bounds, *other_input_bounds): 186 | return main_bounds.apply_increasing_monotonic_fn(self, module, 187 | *other_input_bounds, 188 | **self.parameters) 189 | 190 | 191 | class SoftmaxWrapper(VerifiableWrapper): 192 | """Wraps softmax layers.""" 193 | 194 | def __init__(self): 195 | super(SoftmaxWrapper, self).__init__(None) 196 | 197 | def _propagate_through(self, module, input_bounds): 198 | return input_bounds.apply_softmax(self) 199 | 200 | 201 | class PiecewiseMonotonicWrapper(VerifiableWrapper): 202 | """Wraps a piecewise (not necessarily increasing) monotonic function.""" 203 | 204 | def __init__(self, module, boundaries=()): 205 | super(PiecewiseMonotonicWrapper, self).__init__(module) 206 | self._boundaries = boundaries 207 | 208 | @property 209 | def boundaries(self): 210 | return self._boundaries 211 | 212 | def _propagate_through(self, module, main_bounds, *other_input_bounds): 213 | return main_bounds.apply_piecewise_monotonic_fn(self, module, 214 | self.boundaries, 215 | *other_input_bounds) 216 | 217 | 218 | class ImageNormWrapper(IncreasingMonotonicWrapper): 219 | """Convenience wrapper for getting track of the ImageNorm layer.""" 220 | 221 | def __init__(self, module): 222 | if not isinstance(module, layers.ImageNorm): 223 | raise ValueError('Cannot wrap {} with a ImageNormWrapper.'.format(module)) 224 | super(ImageNormWrapper, self).__init__(module.apply) 225 | self._inner_module = module 226 | 227 | @property 228 | def inner_module(self): 229 | return self._inner_module 230 | 231 | 232 | class BatchNormWrapper(VerifiableWrapper): 233 | """Wraps batch normalization.""" 234 | 235 | def __init__(self, module): 236 | if not isinstance(module, snt.BatchNorm): 237 | raise ValueError('Cannot wrap {} with a BatchNormWrapper.'.format( 238 | module)) 239 | super(BatchNormWrapper, self).__init__(module) 240 | 241 | def _propagate_through(self, module, input_bounds): 242 | if isinstance(module, layers.BatchNorm): 243 | # This IBP-specific batch-norm implementation exposes stats recorded 244 | # the most recent time the BatchNorm module was connected. 245 | # These will be either the batch stats (e.g. if training) or the moving 246 | # averages, depending on how the module was called. 247 | mean = module.mean 248 | variance = module.variance 249 | epsilon = module.epsilon 250 | scale = module.scale 251 | bias = module.bias 252 | 253 | else: 254 | # This plain Sonnet batch-norm implementation only exposes the 255 | # moving averages. 256 | logging.warn('Sonnet BatchNorm module encountered: %s. ' 257 | 'IBP will always use its moving averages, not the local ' 258 | 'batch stats, even in training mode.', str(module)) 259 | mean = module.moving_mean 260 | variance = module.moving_variance 261 | epsilon = module._eps # pylint: disable=protected-access 262 | try: 263 | bias = module.beta 264 | except snt.Error: 265 | bias = None 266 | try: 267 | scale = module.gamma 268 | except snt.Error: 269 | scale = None 270 | 271 | return input_bounds.apply_batch_norm(self, mean, variance, 272 | scale, bias, epsilon) 273 | 274 | 275 | class BatchReshapeWrapper(VerifiableWrapper): 276 | """Wraps batch reshape.""" 277 | 278 | def __init__(self, module, shape): 279 | if not isinstance(module, snt.BatchReshape): 280 | raise ValueError('Cannot wrap {} with a BatchReshapeWrapper.'.format( 281 | module)) 282 | super(BatchReshapeWrapper, self).__init__(module) 283 | self._shape = shape 284 | 285 | @property 286 | def shape(self): 287 | return self._shape 288 | 289 | def _propagate_through(self, module, input_bounds): 290 | return input_bounds.apply_batch_reshape(self, self.shape) 291 | 292 | 293 | class BatchFlattenWrapper(BatchReshapeWrapper): 294 | """Wraps batch flatten.""" 295 | 296 | def __init__(self, module): 297 | if not isinstance(module, snt.BatchFlatten): 298 | raise ValueError('Cannot wrap {} with a BatchFlattenWrapper.'.format( 299 | module)) 300 | super(BatchFlattenWrapper, self).__init__(module, [-1]) 301 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/attacks_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for attacks.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | 24 | import interval_bound_propagation as ibp 25 | import sonnet as snt 26 | import tensorflow.compat.v1 as tf 27 | 28 | 29 | class MockWithIsTraining(object): 30 | """Mock wrapper around the predictor network.""" 31 | 32 | def __init__(self, module, test): 33 | self._module = module 34 | self._test = test 35 | 36 | def __call__(self, z0, is_training=False): 37 | # is_training should be False. 38 | self._test.assertFalse(is_training) 39 | return self._module(z0) 40 | 41 | 42 | class MockWithoutIsTraining(object): 43 | """Mock wrapper around the predictor network.""" 44 | 45 | def __init__(self, module, test): 46 | self._module = module 47 | self._test = test 48 | 49 | def __call__(self, z0): 50 | return self._module(z0) 51 | 52 | 53 | class AttacksTest(parameterized.TestCase, tf.test.TestCase): 54 | 55 | @parameterized.named_parameters( 56 | ('UntargetedWithGradientDescent', MockWithIsTraining, 57 | ibp.UntargetedPGDAttack, ibp.UnrolledGradientDescent, 1.), 58 | ('UntargetedWithAdam', MockWithIsTraining, 59 | ibp.UntargetedPGDAttack, ibp.UnrolledAdam, 1.), 60 | ('MultiTargetedWithGradientDescent', MockWithIsTraining, 61 | ibp.MultiTargetedPGDAttack, ibp.UnrolledGradientDescent, 1.), 62 | ('MultiTargetedWithAdam', MockWithIsTraining, 63 | ibp.MultiTargetedPGDAttack, ibp.UnrolledAdam, 1.), 64 | ('DiverseEpsilon', MockWithIsTraining, 65 | ibp.MultiTargetedPGDAttack, ibp.UnrolledAdam, [1., 1.]), 66 | ('WithoutIsTraining', MockWithoutIsTraining, 67 | ibp.UntargetedPGDAttack, ibp.UnrolledGradientDescent, 1.), 68 | ('Restarted', MockWithIsTraining, 69 | ibp.UntargetedPGDAttack, ibp.UnrolledGradientDescent, 1., True), 70 | ('SPSA', MockWithIsTraining, 71 | ibp.UntargetedPGDAttack, ibp.UnrolledSPSAAdam, 1.)) 72 | def testEndToEnd(self, predictor_cls, attack_cls, optimizer_cls, epsilon, 73 | restarted=False): 74 | # l-\infty norm of perturbation ball. 75 | if isinstance(epsilon, list): 76 | # We test the ability to have different epsilons across dimensions. 77 | epsilon = tf.constant([epsilon], dtype=tf.float32) 78 | bounds = (-.5, 2.5) 79 | # Create a simple network. 80 | m = snt.Linear(1, initializers={ 81 | 'w': tf.constant_initializer(1.), 82 | 'b': tf.constant_initializer(1.), 83 | }) 84 | z = tf.constant([[1, 2]], dtype=tf.float32) 85 | predictor = predictor_cls(m, self) 86 | # Not important for the test but needed. 87 | labels = tf.constant([1], dtype=tf.int64) 88 | 89 | # We create two attacks to maximize and then minimize the output. 90 | max_spec = ibp.LinearSpecification(tf.constant([[[1.]]])) 91 | max_attack = attack_cls(predictor, max_spec, epsilon, input_bounds=bounds, 92 | optimizer_builder=optimizer_cls) 93 | if restarted: 94 | max_attack = ibp.RestartedAttack(max_attack, num_restarts=10) 95 | z_max = max_attack(z, labels) 96 | min_spec = ibp.LinearSpecification(tf.constant([[[-1.]]])) 97 | min_attack = attack_cls(predictor, min_spec, epsilon, input_bounds=bounds, 98 | optimizer_builder=optimizer_cls) 99 | if restarted: 100 | min_attack = ibp.RestartedAttack(min_attack, num_restarts=10) 101 | z_min = min_attack(z, labels) 102 | 103 | with self.test_session() as sess: 104 | sess.run(tf.global_variables_initializer()) 105 | z_max_values, z_min_values = sess.run([z_max, z_min]) 106 | z_max_values = z_max_values[0] 107 | z_min_values = z_min_values[0] 108 | self.assertAlmostEqual(2., z_max_values[0]) 109 | self.assertAlmostEqual(2.5, z_max_values[1]) 110 | self.assertAlmostEqual(0., z_min_values[0]) 111 | self.assertAlmostEqual(1., z_min_values[1]) 112 | 113 | 114 | if __name__ == '__main__': 115 | tf.test.main() 116 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/bounds_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for bounds.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | import interval_bound_propagation as ibp 24 | import numpy as np 25 | import sonnet as snt 26 | import tensorflow.compat.v1 as tf 27 | 28 | 29 | class IntervalBoundsTest(parameterized.TestCase, tf.test.TestCase): 30 | 31 | def testFCIntervalBounds(self): 32 | m = snt.Linear(1, initializers={ 33 | 'w': tf.constant_initializer(1.), 34 | 'b': tf.constant_initializer(2.), 35 | }) 36 | z = tf.constant([[1, 2, 3]], dtype=tf.float32) 37 | m(z) # Connect to create weights. 38 | m = ibp.LinearFCWrapper(m) 39 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 40 | output_bounds = m.propagate_bounds(input_bounds) 41 | with self.test_session() as sess: 42 | sess.run(tf.global_variables_initializer()) 43 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 44 | l = l.item() 45 | u = u.item() 46 | self.assertAlmostEqual(5., l) 47 | self.assertAlmostEqual(11., u) 48 | 49 | def testConv1dIntervalBounds(self): 50 | m = snt.Conv1D( 51 | output_channels=1, 52 | kernel_shape=2, 53 | padding='VALID', 54 | stride=1, 55 | use_bias=True, 56 | initializers={ 57 | 'w': tf.constant_initializer(1.), 58 | 'b': tf.constant_initializer(2.), 59 | }) 60 | z = tf.constant([3, 4], dtype=tf.float32) 61 | z = tf.reshape(z, [1, 2, 1]) 62 | m(z) # Connect to create weights. 63 | m = ibp.LinearConv1dWrapper(m) 64 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 65 | output_bounds = m.propagate_bounds(input_bounds) 66 | with self.test_session() as sess: 67 | sess.run(tf.global_variables_initializer()) 68 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 69 | l = l.item() 70 | u = u.item() 71 | self.assertAlmostEqual(7., l) 72 | self.assertAlmostEqual(11., u) 73 | 74 | def testConv2dIntervalBounds(self): 75 | m = snt.Conv2D( 76 | output_channels=1, 77 | kernel_shape=(2, 2), 78 | padding='VALID', 79 | stride=1, 80 | use_bias=True, 81 | initializers={ 82 | 'w': tf.constant_initializer(1.), 83 | 'b': tf.constant_initializer(2.), 84 | }) 85 | z = tf.constant([1, 2, 3, 4], dtype=tf.float32) 86 | z = tf.reshape(z, [1, 2, 2, 1]) 87 | m(z) # Connect to create weights. 88 | m = ibp.LinearConv2dWrapper(m) 89 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 90 | output_bounds = m.propagate_bounds(input_bounds) 91 | with self.test_session() as sess: 92 | sess.run(tf.global_variables_initializer()) 93 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 94 | l = l.item() 95 | u = u.item() 96 | self.assertAlmostEqual(8., l) 97 | self.assertAlmostEqual(16., u) 98 | 99 | def testReluIntervalBounds(self): 100 | m = tf.nn.relu 101 | z = tf.constant([[-2, 3]], dtype=tf.float32) 102 | m = ibp.IncreasingMonotonicWrapper(m) 103 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 104 | output_bounds = m.propagate_bounds(input_bounds) 105 | with self.test_session() as sess: 106 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 107 | self.assertAlmostEqual([[0., 2.]], l.tolist()) 108 | self.assertAlmostEqual([[0., 4.]], u.tolist()) 109 | 110 | def testMulIntervalBounds(self): 111 | m = tf.multiply 112 | z = tf.constant([[-2, 3, 0]], dtype=tf.float32) 113 | m = ibp.PiecewiseMonotonicWrapper(m, (0,)) 114 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 115 | output_bounds = m.propagate_bounds(input_bounds, input_bounds) 116 | with self.test_session() as sess: 117 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 118 | self.assertAlmostEqual([[1., 4., -1.]], l.tolist()) 119 | self.assertAlmostEqual([[9., 16., 1.]], u.tolist()) 120 | 121 | def testSubIntervalBounds(self): 122 | m = tf.subtract 123 | z = tf.constant([[-2, 3, 0]], dtype=tf.float32) 124 | m = ibp.PiecewiseMonotonicWrapper(m) 125 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 126 | output_bounds = m.propagate_bounds(input_bounds, input_bounds) 127 | with self.test_session() as sess: 128 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 129 | self.assertAlmostEqual([[-2., -2., -2.]], l.tolist()) 130 | self.assertAlmostEqual([[2., 2., 2.]], u.tolist()) 131 | 132 | @parameterized.named_parameters( 133 | ('DefaultAxis', -1, [[[1., 0.5, 0.5], [1., 0.5, 0.5]], 134 | [[1. / 3, 0., 0.], [1. / 3, 0., 0.]]]), 135 | ('NonDefaultAxis', 0, [[[1., 1., 1.], [1., 1., 1.]], 136 | [[0., 0., 0.], [0., 0., 0.]]])) 137 | def testSoftmaxIntervalBounds(self, axis, expected_outputs): 138 | z = tf.constant([[1., -10., -10.], [1., -10., -10.]]) 139 | input_bounds = ibp.IntervalBounds(z - 1.0, z + 10.0) 140 | 141 | softmax_fn = lambda x: tf.nn.softmax(x, axis=axis) 142 | softmax_fn = ibp.VerifiableModelWrapper(softmax_fn) 143 | softmax_fn(z) 144 | output_bounds = softmax_fn.propagate_bounds(input_bounds) 145 | 146 | with self.test_session() as sess: 147 | sess.run(tf.global_variables_initializer()) 148 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 149 | self.assertTrue(np.all(np.abs(expected_outputs[0] - u) < 1e-3)) 150 | self.assertTrue(np.all(np.abs(expected_outputs[1] - l) < 1e-3)) 151 | 152 | def testBatchNormIntervalBounds(self): 153 | z = tf.constant([[1, 2, 3]], dtype=tf.float32) 154 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 155 | g = tf.reshape(tf.range(-1, 2, dtype=tf.float32), [1, 3]) 156 | b = tf.reshape(tf.range(3, dtype=tf.float32), [1, 3]) 157 | batch_norm = ibp.BatchNorm(scale=True, offset=True, eps=0., initializers={ 158 | 'gamma': lambda *args, **kwargs: g, 159 | 'beta': lambda *args, **kwargs: b, 160 | 'moving_mean': tf.constant_initializer(1.), 161 | 'moving_variance': tf.constant_initializer(4.), 162 | }) 163 | batch_norm(z, is_training=False) 164 | batch_norm = ibp.BatchNormWrapper(batch_norm) 165 | # Test propagation. 166 | output_bounds = batch_norm.propagate_bounds(input_bounds) 167 | with self.test_session() as sess: 168 | sess.run(tf.global_variables_initializer()) 169 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 170 | self.assertAlmostEqual([[-.5, 1., 2.5]], l.tolist()) 171 | self.assertAlmostEqual([[.5, 1., 3.5]], u.tolist()) 172 | 173 | def testCaching(self): 174 | m = snt.Linear(1, initializers={ 175 | 'w': tf.constant_initializer(1.), 176 | 'b': tf.constant_initializer(2.), 177 | }) 178 | z = tf.placeholder(shape=(1, 3), dtype=tf.float32) 179 | m(z) # Connect to create weights. 180 | m = ibp.LinearFCWrapper(m) 181 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 182 | output_bounds = m.propagate_bounds(input_bounds) 183 | 184 | input_bounds.enable_caching() 185 | output_bounds.enable_caching() 186 | update_all_caches_op = tf.group([input_bounds.update_cache_op, 187 | output_bounds.update_cache_op]) 188 | 189 | with self.test_session() as sess: 190 | sess.run(tf.global_variables_initializer()) 191 | 192 | # Initialise the caches based on the model inputs. 193 | sess.run(update_all_caches_op, feed_dict={z: [[1., 2., 3.]]}) 194 | 195 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 196 | l = l.item() 197 | u = u.item() 198 | self.assertAlmostEqual(5., l) 199 | self.assertAlmostEqual(11., u) 200 | 201 | # Update the cache based on a different set of inputs. 202 | sess.run([output_bounds.update_cache_op], feed_dict={z: [[2., 3., 7.]]}) 203 | # We only updated the output bounds' cache. 204 | # This asserts that the computation depends on the underlying 205 | # input bounds tensor, not on cached version of it. 206 | # (Thus it doesn't matter what order the caches are updated.) 207 | 208 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 209 | l = l.item() 210 | u = u.item() 211 | self.assertAlmostEqual(11., l) 212 | self.assertAlmostEqual(17., u) 213 | 214 | 215 | if __name__ == '__main__': 216 | tf.test.main() 217 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/crown_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for CROWN bounds.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import interval_bound_propagation as ibp 23 | import numpy as np 24 | import sonnet as snt 25 | import tensorflow.compat.v1 as tf 26 | 27 | 28 | def _generate_identity_spec(modules, shape, dimension=1): 29 | spec = ibp.LinearSpecification(tf.reshape(tf.eye(dimension), shape), 30 | prune_irrelevant=False) 31 | initial_bound = ibp.crown.create_initial_backward_bounds(spec, modules) 32 | return initial_bound 33 | 34 | 35 | class CROWNBoundsTest(tf.test.TestCase): 36 | 37 | def testFCBackwardBounds(self): 38 | m = snt.Linear(1, initializers={ 39 | 'w': tf.constant_initializer(1.), 40 | 'b': tf.constant_initializer(2.), 41 | }) 42 | z = tf.constant([[1, 2, 3]], dtype=tf.float32) 43 | m(z) # Connect to create weights. 44 | m = ibp.LinearFCWrapper(m) 45 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 46 | m.propagate_bounds(input_bounds) # Create IBP bounds. 47 | crown_init_bounds = _generate_identity_spec([m], shape=(1, 1, 1)) 48 | output_bounds = m.propagate_bounds(crown_init_bounds) 49 | concrete_bounds = output_bounds.concretize() 50 | with self.test_session() as sess: 51 | sess.run(tf.global_variables_initializer()) 52 | lw, uw, lb, ub, cl, cu = sess.run([output_bounds.lower.w, 53 | output_bounds.upper.w, 54 | output_bounds.lower.b, 55 | output_bounds.upper.b, 56 | concrete_bounds.lower, 57 | concrete_bounds.upper]) 58 | self.assertTrue(np.all(lw == 1.)) 59 | self.assertTrue(np.all(lb == 2.)) 60 | self.assertTrue(np.all(uw == 1.)) 61 | self.assertTrue(np.all(ub == 2.)) 62 | cl = cl.item() 63 | cu = cu.item() 64 | self.assertAlmostEqual(5., cl) 65 | self.assertAlmostEqual(11., cu) 66 | 67 | def testConv2dBackwardBounds(self): 68 | m = snt.Conv2D( 69 | output_channels=1, 70 | kernel_shape=(2, 2), 71 | padding='VALID', 72 | stride=1, 73 | use_bias=True, 74 | initializers={ 75 | 'w': tf.constant_initializer(1.), 76 | 'b': tf.constant_initializer(2.), 77 | }) 78 | z = tf.constant([1, 2, 3, 4], dtype=tf.float32) 79 | z = tf.reshape(z, [1, 2, 2, 1]) 80 | m(z) # Connect to create weights. 81 | m = ibp.LinearConv2dWrapper(m) 82 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 83 | m.propagate_bounds(input_bounds) # Create IBP bounds. 84 | crown_init_bounds = _generate_identity_spec([m], shape=(1, 1, 1, 1, 1)) 85 | output_bounds = m.propagate_bounds(crown_init_bounds) 86 | concrete_bounds = output_bounds.concretize() 87 | with self.test_session() as sess: 88 | sess.run(tf.global_variables_initializer()) 89 | l, u = sess.run([concrete_bounds.lower, concrete_bounds.upper]) 90 | l = l.item() 91 | u = u.item() 92 | self.assertAlmostEqual(8., l) 93 | self.assertAlmostEqual(16., u) 94 | 95 | def testReluBackwardBounds(self): 96 | m = tf.nn.relu 97 | z = tf.constant([[-2, 3]], dtype=tf.float32) 98 | m = ibp.IncreasingMonotonicWrapper(m) 99 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 100 | m.propagate_bounds(input_bounds) # Create IBP bounds. 101 | crown_init_bounds = _generate_identity_spec([m], shape=(1, 2, 2), 102 | dimension=2) 103 | output_bounds = m.propagate_bounds(crown_init_bounds) 104 | concrete_bounds = output_bounds.concretize() 105 | with self.test_session() as sess: 106 | l, u = sess.run([concrete_bounds.lower, concrete_bounds.upper]) 107 | self.assertAlmostEqual([[0., 2.]], l.tolist()) 108 | self.assertAlmostEqual([[0., 4.]], u.tolist()) 109 | 110 | if __name__ == '__main__': 111 | tf.test.main() 112 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/fastlin_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for symbolic bounds.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | import interval_bound_propagation as ibp 24 | import numpy as np 25 | import sonnet as snt 26 | import tensorflow.compat.v1 as tf 27 | 28 | 29 | class SymbolicBoundsTest(parameterized.TestCase, tf.test.TestCase): 30 | 31 | def testConvertSymbolicBounds(self): 32 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 33 | z = tf.reshape(z, [1, 2, 2]) 34 | b = ibp.SymbolicBounds.convert(z) 35 | for l in (b.lower, b.upper): 36 | self.assertEqual([1, 4, 2, 2], l.w.shape.as_list()) 37 | self.assertEqual([1, 2, 2], l.b.shape.as_list()) 38 | self.assertEqual([1, 4], l.lower.shape.as_list()) 39 | self.assertEqual([1, 4], l.upper.shape.as_list()) 40 | 41 | def testFCSymbolicBounds(self): 42 | m = snt.Linear(1, initializers={ 43 | 'w': tf.constant_initializer(1.), 44 | 'b': tf.constant_initializer(2.), 45 | }) 46 | z = tf.constant([[1, 2, 3]], dtype=tf.float32) 47 | m(z) # Connect to create weights. 48 | m = ibp.LinearFCWrapper(m) 49 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 50 | input_bounds = ibp.SymbolicBounds.convert(input_bounds) 51 | output_bounds = m.propagate_bounds(input_bounds) 52 | concrete_bounds = ibp.IntervalBounds.convert(output_bounds) 53 | with self.test_session() as sess: 54 | sess.run(tf.global_variables_initializer()) 55 | l, u, cl, cu = sess.run([output_bounds.lower, output_bounds.upper, 56 | concrete_bounds.lower, concrete_bounds.upper]) 57 | self.assertTrue(np.all(l.w == 1.)) 58 | self.assertTrue(np.all(l.b == 2.)) 59 | self.assertAlmostEqual([[0, 1, 2]], l.lower.tolist()) 60 | self.assertAlmostEqual([[2, 3, 4]], l.upper.tolist()) 61 | self.assertTrue(np.all(u.w == 1.)) 62 | self.assertTrue(np.all(u.b == 2.)) 63 | self.assertAlmostEqual([[0, 1, 2]], u.lower.tolist()) 64 | self.assertAlmostEqual([[2, 3, 4]], u.upper.tolist()) 65 | cl = cl.item() 66 | cu = cu.item() 67 | self.assertAlmostEqual(5., cl) 68 | self.assertAlmostEqual(11., cu) 69 | 70 | def testConv2dSymbolicBounds(self): 71 | m = snt.Conv2D( 72 | output_channels=1, 73 | kernel_shape=(2, 2), 74 | padding='VALID', 75 | stride=1, 76 | use_bias=True, 77 | initializers={ 78 | 'w': tf.constant_initializer(1.), 79 | 'b': tf.constant_initializer(2.), 80 | }) 81 | z = tf.constant([1, 2, 3, 4], dtype=tf.float32) 82 | z = tf.reshape(z, [1, 2, 2, 1]) 83 | m(z) # Connect to create weights. 84 | m = ibp.LinearConv2dWrapper(m) 85 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 86 | input_bounds = ibp.SymbolicBounds.convert(input_bounds) 87 | output_bounds = m.propagate_bounds(input_bounds) 88 | output_bounds = ibp.IntervalBounds.convert(output_bounds) 89 | with self.test_session() as sess: 90 | sess.run(tf.global_variables_initializer()) 91 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 92 | l = l.item() 93 | u = u.item() 94 | self.assertAlmostEqual(8., l) 95 | self.assertAlmostEqual(16., u) 96 | 97 | def testConv1dSymbolicBounds(self): 98 | m = snt.Conv1D( 99 | output_channels=1, 100 | kernel_shape=(2), 101 | padding='VALID', 102 | stride=1, 103 | use_bias=True, 104 | initializers={ 105 | 'w': tf.constant_initializer(1.), 106 | 'b': tf.constant_initializer(3.), 107 | }) 108 | z = tf.constant([3, 4], dtype=tf.float32) 109 | z = tf.reshape(z, [1, 2, 1]) 110 | m(z) # Connect to create weights. 111 | m = ibp.LinearConv1dWrapper(m) 112 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 113 | input_bounds = ibp.SymbolicBounds.convert(input_bounds) 114 | output_bounds = m.propagate_bounds(input_bounds) 115 | output_bounds = ibp.IntervalBounds.convert(output_bounds) 116 | with self.test_session() as sess: 117 | sess.run(tf.global_variables_initializer()) 118 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 119 | l = l.item() 120 | u = u.item() 121 | self.assertAlmostEqual(8., l) 122 | self.assertAlmostEqual(12., u) 123 | 124 | def testReluSymbolicBounds(self): 125 | m = tf.nn.relu 126 | z = tf.constant([[-2, 3]], dtype=tf.float32) 127 | m = ibp.IncreasingMonotonicWrapper(m) 128 | input_bounds = ibp.IntervalBounds(z - 1., z + 1.) 129 | input_bounds = ibp.SymbolicBounds.convert(input_bounds) 130 | output_bounds = m.propagate_bounds(input_bounds) 131 | output_bounds = ibp.IntervalBounds.convert(output_bounds) 132 | with self.test_session() as sess: 133 | l, u = sess.run([output_bounds.lower, output_bounds.upper]) 134 | self.assertAlmostEqual([[0., 2.]], l.tolist()) 135 | self.assertAlmostEqual([[0., 4.]], u.tolist()) 136 | 137 | 138 | if __name__ == '__main__': 139 | tf.test.main() 140 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/layers_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for layers.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import interval_bound_propagation as ibp 23 | import numpy as np 24 | import tensorflow.compat.v1 as tf 25 | 26 | 27 | def _get_inputs(dtype=tf.float32): 28 | v = np.array(range(6), dtype=dtype.as_numpy_dtype) 29 | input_v = np.array([v] * 7) 30 | inputs = tf.constant(input_v) 31 | return v, input_v, inputs 32 | 33 | 34 | class LayersTest(tf.test.TestCase): 35 | 36 | def assertBetween(self, value, minv, maxv): 37 | """Asserts that value is between minv and maxv (inclusive).""" 38 | self.assertLessEqual(minv, value) 39 | self.assertGreaterEqual(maxv, value) 40 | 41 | # Subset of the tests in sonnet/python/modules/batch_norm_test.py. 42 | def testBatchNormUpdateImproveStatistics(self): 43 | """Test that updating the moving_mean improves statistics.""" 44 | _, _, inputs = _get_inputs() 45 | # Use small decay_rate to update faster. 46 | bn = ibp.BatchNorm(offset=False, scale=False, decay_rate=0.1, 47 | update_ops_collection=tf.GraphKeys.UPDATE_OPS) 48 | out1 = bn(inputs, is_training=False) 49 | # Build the update ops. 50 | bn(inputs, is_training=True) 51 | 52 | with self.test_session() as sess: 53 | sess.run(tf.global_variables_initializer()) 54 | out_v = sess.run(out1) 55 | # Before updating the moving_mean the results are off. 56 | self.assertBetween(np.max(np.abs(np.zeros([7, 6]) - out_v)), 2, 5) 57 | sess.run(tuple(tf.get_collection(tf.GraphKeys.UPDATE_OPS))) 58 | # After updating the moving_mean the results are better. 59 | out_v = sess.run(out1) 60 | self.assertBetween(np.max(np.abs(np.zeros([7, 6]) - out_v)), 1, 2) 61 | 62 | def testImageNorm(self): 63 | mean = [4, 0, -4] 64 | std = [1., 2., 4.] 65 | image = tf.constant(4., shape=[10, 2, 2, 3]) 66 | normalized_image = ibp.ImageNorm(mean, std)(image) 67 | 68 | with self.test_session() as sess: 69 | out_image = sess.run(normalized_image) 70 | self.assertTrue(np.all(np.isclose(out_image[:, :, :, 0], 0.))) 71 | self.assertTrue(np.all(np.isclose(out_image[:, :, :, 1], 2.))) 72 | self.assertTrue(np.all(np.isclose(out_image[:, :, :, 2], 2.))) 73 | 74 | 75 | if __name__ == '__main__': 76 | tf.test.main() 77 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/loss_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for loss.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import interval_bound_propagation as ibp 23 | import sonnet as snt 24 | import tensorflow.compat.v1 as tf 25 | 26 | 27 | class FixedNN(snt.AbstractModule): 28 | 29 | def _build(self, z0, is_training=False): 30 | self._m = snt.Linear(2, initializers={ 31 | 'w': tf.constant_initializer(1.), 32 | 'b': lambda *unsed_args, **unused_kwargs: tf.constant([0., 1.]), 33 | }) 34 | return self._m(z0) 35 | 36 | 37 | class LossTest(tf.test.TestCase): 38 | 39 | def testEndToEnd(self): 40 | predictor = FixedNN() 41 | predictor = ibp.VerifiableModelWrapper(predictor) 42 | # Labels. 43 | labels = tf.constant([1], dtype=tf.int64) 44 | # Connect to input. 45 | z = tf.constant([[1, 2, 3]], dtype=tf.float32) 46 | predictor(z, is_training=True) 47 | # Input bounds. 48 | eps = 1. 49 | input_bounds = ibp.IntervalBounds(z - eps, z + eps) 50 | predictor.propagate_bounds(input_bounds) 51 | # Create output specification (that forces the first logits to be greater). 52 | c = tf.constant([[[1, -1]]], dtype=tf.float32) 53 | d = tf.constant([[0]], dtype=tf.float32) 54 | # Turn elision off for more interesting results. 55 | spec = ibp.LinearSpecification(c, d, collapse=False) 56 | # Create an attack. 57 | attack = ibp.UntargetedPGDAttack( 58 | predictor, spec, eps, num_steps=1, input_bounds=(-100., 100)) 59 | # Build loss. 60 | losses = ibp.Losses(predictor, spec, attack, 61 | interval_bounds_loss_type='hinge', 62 | interval_bounds_hinge_margin=0.) 63 | losses(labels) 64 | 65 | with self.test_session() as sess: 66 | sess.run(tf.global_variables_initializer()) 67 | # We expect the worst-case logits from IBP to be [9, 4]. 68 | # The adversarial attack should fail since logits are always [l, l + 1]. 69 | # Similarly, the nominal predictions are correct. 70 | accuracy_values, loss_values = sess.run( 71 | [losses.scalar_metrics, losses.scalar_losses]) 72 | self.assertAlmostEqual(1., accuracy_values.nominal_accuracy) 73 | self.assertAlmostEqual(0., accuracy_values.verified_accuracy) 74 | self.assertAlmostEqual(1., accuracy_values.attack_accuracy) 75 | expected_xent = 0.31326168751822947 76 | self.assertAlmostEqual(expected_xent, loss_values.nominal_cross_entropy, 77 | places=5) 78 | self.assertAlmostEqual(expected_xent, loss_values.attack_cross_entropy, 79 | places=5) 80 | expected_hinge = 5. 81 | self.assertAlmostEqual(expected_hinge, loss_values.verified_loss) 82 | 83 | 84 | if __name__ == '__main__': 85 | tf.test.main() 86 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/model_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for model.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | 24 | import interval_bound_propagation as ibp 25 | import numpy as np 26 | import sonnet as snt 27 | import tensorflow.compat.v1 as tf 28 | 29 | 30 | def _build_model(): 31 | num_classes = 3 32 | layer_types = ( 33 | ('conv2d', (2, 2), 4, 'VALID', 1), 34 | ('activation', 'relu'), 35 | ('linear', 10), 36 | ('activation', 'relu')) 37 | return ibp.DNN(num_classes, layer_types) 38 | 39 | 40 | class ModelTest(parameterized.TestCase, tf.test.TestCase): 41 | 42 | def testDNN(self): 43 | predictor = _build_model() 44 | # Input. 45 | z = tf.constant([1, 2, 3, 4], dtype=tf.float32) 46 | z = tf.reshape(z, [1, 2, 2, 1]) 47 | predictor(z) 48 | # Verify the variables that are created. 49 | expected_shapes = { 50 | 'predictor/conv2d_0/w:0': (2, 2, 1, 4), 51 | 'predictor/conv2d_0/b:0': (4,), 52 | 'predictor/linear_0/w:0': (4, 10), 53 | 'predictor/linear_0/b:0': (10,), 54 | 'predictor/linear_1/w:0': (10, 3), 55 | 'predictor/linear_1/b:0': (3,), 56 | } 57 | for v in predictor.get_variables(): 58 | self.assertEqual(expected_shapes[v.name], v.shape) 59 | 60 | def _propagation_test(self, wrapper, inputs, outputs): 61 | input_bounds = ibp.IntervalBounds(inputs, inputs) 62 | output_bounds = wrapper.propagate_bounds(input_bounds) 63 | with self.test_session() as sess: 64 | sess.run(tf.global_variables_initializer()) 65 | o, l, u = sess.run([outputs, output_bounds.lower, output_bounds.upper]) 66 | self.assertAlmostEqual(o.tolist(), l.tolist()) 67 | self.assertAlmostEqual(o.tolist(), u.tolist()) 68 | 69 | def testVerifiableModelWrapperDNN(self): 70 | predictor = _build_model() 71 | # Input. 72 | z = tf.constant([1, 2, 3, 4], dtype=tf.float32) 73 | z = tf.reshape(z, [1, 2, 2, 1]) 74 | wrapper = ibp.VerifiableModelWrapper(predictor) 75 | wrapper(z) 76 | # Verify basic wrapping. 77 | self.assertEqual(predictor, wrapper.wrapped_network) 78 | self.assertEqual(3, wrapper.output_size) 79 | self.assertEqual((1, 3), tuple(wrapper.logits.shape.as_list())) 80 | self.assertEqual(z, wrapper.inputs) 81 | # Build another input and test reuse. 82 | z2 = tf.constant([1, 2, 3, 4], dtype=tf.float32) 83 | z2 = tf.reshape(z, [1, 2, 2, 1]) 84 | logits = wrapper(z2, reuse=True) 85 | self.assertEqual(z, wrapper.inputs) 86 | self.assertNotEqual(z2, wrapper.inputs) 87 | # Check that the verifiable modules are constructed. 88 | self.assertLen(wrapper.input_wrappers, 1) 89 | self.assertLen(wrapper.modules, 6) 90 | self.assertIsInstance(wrapper.modules[0].module, snt.Conv2D) 91 | self.assertEqual(wrapper.modules[1].module, tf.nn.relu) 92 | self.assertIsInstance(wrapper.modules[2].module, snt.BatchFlatten) 93 | self.assertIsInstance(wrapper.modules[3].module, snt.Linear) 94 | self.assertEqual(wrapper.modules[4].module, tf.nn.relu) 95 | self.assertIsInstance(wrapper.modules[5].module, snt.Linear) 96 | # It's a sequential network, so all nodes (including input) have fanout 1. 97 | self.assertEqual(wrapper.fanout_of(wrapper.input_wrappers[0]), 1) 98 | for module in wrapper.modules: 99 | self.assertEqual(wrapper.fanout_of(module), 1) 100 | # Check propagation. 101 | self._propagation_test(wrapper, z2, logits) 102 | 103 | def testVerifiableModelWrapperResnet(self): 104 | def _build(z0, is_training=False): # pylint: disable=unused-argument 105 | input_size = np.prod(z0.shape[1:]) 106 | # We make a resnet-like structure. 107 | z = snt.Linear(input_size)(z0) 108 | z_left = tf.nn.relu(z) 109 | z_left = snt.Linear(input_size)(z_left) 110 | z = z_left + z0 111 | return snt.Linear(2)(z) 112 | 113 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 114 | wrapper = ibp.VerifiableModelWrapper(_build) 115 | logits = wrapper(z) 116 | self.assertLen(wrapper.input_wrappers, 1) 117 | self.assertLen(wrapper.modules, 5) 118 | # Check input has fanout 2, as it is the start of the resnet block. 119 | self.assertEqual(wrapper.fanout_of(wrapper.input_wrappers[0]), 2) 120 | for module in wrapper.modules: 121 | self.assertEqual(wrapper.fanout_of(module), 1) 122 | # Check propagation. 123 | self._propagation_test(wrapper, z, logits) 124 | 125 | def testVerifiableModelWrapperPool(self): 126 | def _build(z0): 127 | z = tf.reduce_mean(z0, axis=1, keep_dims=True) 128 | z = tf.reduce_max(z, axis=2, keep_dims=False) 129 | return snt.Linear(2)(z) 130 | 131 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 132 | z = tf.reshape(z, [1, 2, 2]) 133 | wrapper = ibp.VerifiableModelWrapper(_build) 134 | logits = wrapper(z) 135 | self.assertLen(wrapper.modules, 3) 136 | # Check propagation. 137 | self._propagation_test(wrapper, z, logits) 138 | 139 | def testVerifiableModelWrapperConcat(self): 140 | def _build(z0): 141 | z = snt.Linear(10)(z0) 142 | z = tf.concat([z, z0], axis=1) 143 | return snt.Linear(2)(z) 144 | 145 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 146 | wrapper = ibp.VerifiableModelWrapper(_build) 147 | logits = wrapper(z) 148 | self.assertLen(wrapper.modules, 3) 149 | # Check propagation. 150 | self._propagation_test(wrapper, z, logits) 151 | 152 | def testVerifiableModelWrapperExpandAndSqueeze(self): 153 | def _build(z0): 154 | z = snt.Linear(10)(z0) 155 | z = tf.expand_dims(z, axis=-1) 156 | z = tf.squeeze(z, axis=-1) 157 | return snt.Linear(2)(z) 158 | 159 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 160 | wrapper = ibp.VerifiableModelWrapper(_build) 161 | logits = wrapper(z) 162 | self.assertLen(wrapper.modules, 4) 163 | # Check propagation. 164 | self._propagation_test(wrapper, z, logits) 165 | 166 | @parameterized.named_parameters( 167 | ('Add', lambda z: z + z, 3), 168 | ('Sub', lambda z: z - z, 3), 169 | ('Identity', tf.identity, 3), 170 | ('Mul', lambda z: z * z, 3), 171 | ('Slice', lambda z: tf.slice(z, [0, 0], [-1, 5]), 3), 172 | ('StridedSlice', lambda z: z[:, :5], 3), 173 | ('Reshape', lambda z: tf.reshape(z, [2, 5]), 3), 174 | ('Const', lambda z: z + tf.ones_like(z), 5)) 175 | def testVerifiableModelWrapperSimple(self, fn, expected_modules): 176 | def _build(z0): 177 | z = snt.Linear(10)(z0) 178 | z = fn(z) 179 | return snt.Linear(2)(z) 180 | 181 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 182 | wrapper = ibp.VerifiableModelWrapper(_build) 183 | logits = wrapper(z) 184 | self.assertLen(wrapper.modules, expected_modules) 185 | # Check propagation. 186 | self._propagation_test(wrapper, z, logits) 187 | 188 | def testPointlessReshape(self): 189 | def _build(z0): 190 | z = snt.Linear(10)(z0) 191 | z = snt.BatchFlatten()(z) # This is a no-op; no graph nodes created. 192 | return snt.Linear(2)(z) 193 | 194 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 195 | wrapper = ibp.VerifiableModelWrapper(_build) 196 | logits = wrapper(z) 197 | # Expect the batch flatten to have been skipped. 198 | self.assertLen(wrapper.modules, 2) 199 | self.assertIsInstance(wrapper.modules[0], ibp.LinearFCWrapper) 200 | self.assertIsInstance(wrapper.modules[1], ibp.LinearFCWrapper) 201 | # Check propagation. 202 | self._propagation_test(wrapper, z, logits) 203 | 204 | def testLeakyRelu(self): 205 | def _build(z0): 206 | z = snt.Linear(10)(z0) 207 | z = tf.nn.leaky_relu(z0, alpha=0.375) 208 | return snt.Linear(2)(z) 209 | 210 | z = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 211 | wrapper = ibp.VerifiableModelWrapper(_build) 212 | logits = wrapper(z) 213 | self.assertLen(wrapper.modules, 3) 214 | self.assertEqual(wrapper.modules[1].module.__name__, 'leaky_relu') 215 | self.assertEqual(wrapper.modules[1].parameters['alpha'], 0.375) 216 | # Check propagation. 217 | self._propagation_test(wrapper, z, logits) 218 | 219 | def testMultipleInputs(self): 220 | # Tensor to overwrite. 221 | def _build(z0, z1): 222 | return z0 + z1 223 | 224 | z0 = tf.constant([[1, 2, 3, 4]], dtype=tf.float32) 225 | z1 = tf.constant([[2, 2, 4, 4]], dtype=tf.float32) 226 | wrapper = ibp.VerifiableModelWrapper(_build) 227 | logits = wrapper(z0, z1) 228 | input_bounds0 = ibp.IntervalBounds(z0 - 2, z0 + 1) 229 | input_bounds1 = ibp.IntervalBounds(z1, z1 + 10) 230 | output_bounds = wrapper.propagate_bounds(input_bounds0, input_bounds1) 231 | with self.test_session() as sess: 232 | sess.run(tf.global_variables_initializer()) 233 | o, l, u = sess.run([logits, output_bounds.lower, output_bounds.upper]) 234 | print(o, l, u) 235 | self.assertAlmostEqual([[3., 4., 7., 8.]], o.tolist()) 236 | self.assertAlmostEqual([[1., 2., 5., 6.]], l.tolist()) 237 | self.assertAlmostEqual([[14., 15., 18., 19.]], u.tolist()) 238 | 239 | 240 | if __name__ == '__main__': 241 | tf.test.main() 242 | -------------------------------------------------------------------------------- /interval_bound_propagation/tests/simplex_bounds_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Interval Bound Propagation Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for naive_bounds.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | import interval_bound_propagation as ibp 24 | from interval_bound_propagation import layer_utils 25 | import numpy as np 26 | import tensorflow.compat.v1 as tf 27 | 28 | 29 | class SimplexBoundsTest(tf.test.TestCase, parameterized.TestCase): 30 | 31 | @parameterized.named_parameters(('float32', tf.float32), 32 | ('float64', tf.float64)) 33 | def test_linear_simplex_bounds_shape(self, dtype): 34 | vocab_size = 103 35 | batch_size = 11 36 | input_size = 7 37 | output_size = 5 38 | 39 | w = tf.placeholder(dtype=dtype, shape=(input_size, output_size)) 40 | b = tf.placeholder(dtype=dtype, shape=(output_size,)) 41 | embedding = tf.placeholder(dtype=dtype, shape=(vocab_size, input_size)) 42 | centres = tf.placeholder(dtype=dtype, shape=(batch_size, input_size)) 43 | r = .2 44 | 45 | bounds_in = ibp.SimplexBounds(embedding, centres, r) 46 | bounds_out = bounds_in.apply_linear(None, w, b) 47 | lb_out, ub_out = bounds_out.lower, bounds_out.upper 48 | 49 | self.assertEqual(dtype, lb_out.dtype) 50 | self.assertEqual(dtype, ub_out.dtype) 51 | self.assertEqual((batch_size, output_size), lb_out.shape) 52 | self.assertEqual((batch_size, output_size), ub_out.shape) 53 | 54 | @parameterized.named_parameters(('float32', tf.float32, 1.e-6), 55 | ('float64', tf.float64, 1.e-8)) 56 | def test_linear_bounds_on_embedding_layer(self, dtype, tol): 57 | w = tf.constant([[1.0, 2.0, 3.0], [4.0, -5.0, 6.0]], dtype=dtype) 58 | b = tf.constant([0.01, -0.02, 0.03], dtype=dtype) 59 | embedding = tf.constant([[0.0, 0.0], [10.0, 10.0], [0.0, -20.0]], 60 | dtype=dtype) 61 | centres = tf.constant([[7.0, 6.0]], dtype=dtype) 62 | r = .1 63 | # Simplex vertices: [6.3, 5.4], [7.3, 6.4], and [6.3, 3.4]. 64 | # They map to: [27.91, -14.42, 51.33], [32.91, -17.42, 60.33], 65 | # and [19.91, -4.42, 39.33]. 66 | 67 | bounds_in = ibp.SimplexBounds(embedding, centres, r) 68 | bounds_out = bounds_in.apply_linear(None, w, b) 69 | lb_out, ub_out = bounds_out.lower, bounds_out.upper 70 | 71 | lb_out_exp = np.array([[19.91, -17.42, 39.33]]) 72 | ub_out_exp = np.array([[32.91, -4.42, 60.33]]) 73 | 74 | with self.test_session() as session: 75 | lb_out_act, ub_out_act = session.run((lb_out, ub_out)) 76 | self.assertAllClose(lb_out_exp, lb_out_act, atol=tol, rtol=tol) 77 | self.assertAllClose(ub_out_exp, ub_out_act, atol=tol, rtol=tol) 78 | 79 | @parameterized.named_parameters(('float32', tf.float32), 80 | ('float64', tf.float64)) 81 | def test_conv1d_simplex_bounds_shape(self, dtype): 82 | num_vertices = 41 83 | batch_size = 11 84 | input_length = 13 85 | kernel_length = 5 86 | input_channels = 3 87 | output_channels = 2 88 | padding = 'VALID' 89 | strides = (2,) 90 | 91 | # Expected output dimensions, based on convolution settings. 92 | output_length = 5 93 | 94 | w = tf.placeholder(dtype=dtype, shape=( 95 | kernel_length, input_channels, output_channels)) 96 | b = tf.placeholder(dtype=dtype, shape=(output_channels,)) 97 | vertices = tf.placeholder(dtype=dtype, shape=( 98 | batch_size, num_vertices, input_length, input_channels)) 99 | centres = tf.placeholder(dtype=dtype, shape=( 100 | batch_size, input_length, input_channels)) 101 | r = .2 102 | 103 | bounds_in = ibp.SimplexBounds(vertices, centres, r) 104 | bounds_out = bounds_in.apply_conv1d(None, w, b, padding, strides) 105 | lb_out, ub_out = bounds_out.lower, bounds_out.upper 106 | 107 | self.assertEqual(dtype, lb_out.dtype) 108 | self.assertEqual(dtype, ub_out.dtype) 109 | self.assertEqual((batch_size, output_length, output_channels), 110 | lb_out.shape) 111 | self.assertEqual((batch_size, output_length, output_channels), 112 | ub_out.shape) 113 | 114 | @parameterized.named_parameters(('float32', tf.float32, 2.e-6), 115 | ('float64', tf.float64, 1.e-8)) 116 | def test_conv1d_simplex_bounds(self, dtype, tol): 117 | num_vertices = 37 118 | batch_size = 53 119 | input_length = 17 120 | kernel_length = 7 121 | input_channels = 3 122 | output_channels = 2 123 | padding = 'VALID' 124 | strides = (2,) 125 | 126 | w = tf.random_normal(dtype=dtype, shape=( 127 | kernel_length, input_channels, output_channels)) 128 | b = tf.random_normal(dtype=dtype, shape=(output_channels,)) 129 | vertices = tf.random_normal(dtype=dtype, shape=( 130 | batch_size, num_vertices, input_length, input_channels)) 131 | centres = tf.random_normal(dtype=dtype, shape=( 132 | batch_size, input_length, input_channels)) 133 | r = .2 134 | 135 | bounds_in = ibp.SimplexBounds(vertices, centres, r) 136 | bounds_out = bounds_in.apply_conv1d(None, w, b, padding, strides[0]) 137 | lb_out, ub_out = bounds_out.lower, bounds_out.upper 138 | 139 | # Compare against equivalent linear layer. 140 | bounds_out_lin = _materialised_conv_simplex_bounds( 141 | w, b, padding, strides, bounds_in) 142 | lb_out_lin, ub_out_lin = bounds_out_lin.lower, bounds_out_lin.upper 143 | 144 | with self.test_session() as session: 145 | (lb_out_val, ub_out_val, 146 | lb_out_lin_val, ub_out_lin_val) = session.run((lb_out, ub_out, 147 | lb_out_lin, ub_out_lin)) 148 | self.assertAllClose(lb_out_val, lb_out_lin_val, atol=tol, rtol=tol) 149 | self.assertAllClose(ub_out_val, ub_out_lin_val, atol=tol, rtol=tol) 150 | 151 | 152 | def _materialised_conv_simplex_bounds(w, b, padding, strides, bounds_in): 153 | """Calculates naive bounds on output of an N-D convolution layer. 154 | 155 | The calculation is performed by first materialising the convolution as a 156 | (sparse) fully-connected linear layer. Doing so will affect performance, but 157 | may be useful for investigating numerical stability issues. 158 | 159 | The layer inputs and the vertices are assumed to be (N-D) sequences in an 160 | embedding space. The input domain is taken to be the simplex of perturbations 161 | of the centres (true inputs) towards the given vertices. 162 | 163 | Specifically, the input domain is the convex hull of this set of vertices:: 164 | { (1-r)*centres + r*vertices[j] : j=1.8.0'], 29 | 'tensorflow with gpu': ['tensorflow-gpu>=1.8.0'], 30 | 'sonnet': ['dm-sonnet>=1.26'], 31 | 'sonnet with gpu': ['dm-sonnet-gpu>=1.26'], 32 | } 33 | 34 | 35 | def ibp_test_suite(): 36 | test_loader = unittest.TestLoader() 37 | test_suite = test_loader.discover('interval_bound_propagation/tests', 38 | pattern='*_test.py') 39 | return test_suite 40 | 41 | setup( 42 | name='interval_bound_propagation', 43 | version='1.1', 44 | description='A library to train verifiably robust neural networks.', 45 | url='https://github.com/deepmind/interval_bound_propagation', 46 | author='DeepMind', 47 | author_email='no-reply@google.com', 48 | # Contained modules and scripts. 49 | packages=find_packages(), 50 | install_requires=REQUIRED_PACKAGES, 51 | extras_require=EXTRA_PACKAGES, 52 | platforms=['any'], 53 | license='Apache 2.0', 54 | test_suite='setup.ibp_test_suite', 55 | ) 56 | --------------------------------------------------------------------------------