├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── assets └── example_figure.png └── code ├── fooling_circuit.ipynb └── orthogonal_filters.ipynb /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We would love to accept your patches and contributions to this project. 4 | 5 | ## Before you begin 6 | 7 | ### Sign our Contributor License Agreement 8 | 9 | Contributions to this project must be accompanied by a 10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). 11 | You (or your employer) retain the copyright to your contribution; this simply 12 | gives us permission to use and redistribute your contributions as part of the 13 | project. 14 | 15 | If you or your current employer have already signed the Google CLA (even if it 16 | was for a different project), you probably don't need to do it again. 17 | 18 | Visit to see your current agreements or to 19 | sign a new one. 20 | 21 | ### Review our Community Guidelines 22 | 23 | This project follows [Google's Open Source Community 24 | Guidelines](https://opensource.google/conduct/). 25 | 26 | ## Contribution process 27 | 28 | ### Code Reviews 29 | 30 | All submissions, including submissions by project members, require review. We 31 | use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests) 32 | for this purpose. 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code for "Don't trust your eyes: on the (un)reliability of feature visualizations" 2 | 3 | This repository contains code to replicate experiments from [Don't trust your eyes: on the (un)reliability of feature visualizations](https://arxiv.org/abs/2306.04719) by Robert Geirhos*, Roland S. Zimmermann*, Blair Bilodeau*, Wieland Brendel, and Been Kim. 4 | 5 | ## Fooling feature visualizations 6 | Feature visualizations are widely used interpretability tools - but can we trust them? We investigate this question from an adversarial, empirical and theoretical perspective. The result: Don’t trust your eyes! 7 | 8 | ![example-figure](./assets/example_figure.png) 9 | 10 | For instance, from an adversarial perspective we can adapt a model such that it maintains identical behavior on natural image input (e.g., identical ImageNet accuracy) but its feature visualizations are changed completely. In the example here, the feature visualization shows a painting (right) instead of the original feature visualization (left). 11 | 12 | ## Citation 13 | ``` 14 | @article{geirhos2023fooling, 15 | url = {https://arxiv.org/abs/2306.04719}, 16 | author = {Geirhos, Robert and Zimmermann, Roland S and Bilodeau, Blair and Brendel, Wieland and Kim, Been}, 17 | title = {Don't trust your eyes: on the (un)reliability of feature visualizations}, 18 | journal={arXiv preprint arXiv:2306.04719}, 19 | year = {2023}, 20 | ``` 21 | 22 | ## Disclaimer 23 | This is not an officially supported Google product. 24 | 25 | 26 | -------------------------------------------------------------------------------- /assets/example_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/fooling-feature-visualizations/9a21b3f7decb5001fa93f500b1ae0df1876b1b5d/assets/example_figure.png -------------------------------------------------------------------------------- /code/fooling_circuit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "cMmE1CQWPRhI" 7 | }, 8 | "source": [ 9 | "Copyright 2023 Google LLC\n", 10 | "\n", 11 | "Licensed under the Apache License, Version 2.0 (the \"License\");\n", 12 | "you may not use this file except in compliance with the License.\n", 13 | "You may obtain a copy of the License at\n", 14 | "\n", 15 | " https://www.apache.org/licenses/LICENSE-2.0\n", 16 | "\n", 17 | "Unless required by applicable law or agreed to in writing, software\n", 18 | "distributed under the License is distributed on an \"AS IS\" BASIS,\n", 19 | "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 20 | "See the License for the specific language governing permissions and\n", 21 | "limitations under the License." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "id": "4upHHBiDS7wt" 28 | }, 29 | "source": [ 30 | "# Fooling Feature Visualizations Through a Fooling Circuit" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": { 36 | "id": "pFbgT0bAZE05" 37 | }, 38 | "source": [ 39 | "## Imports" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "id": "jItbFy_dC171" 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "! pip install git+https://github.com/rgeirhos/lucent.git" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "id": "psHsfwDiQ7lx" 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "import os\n", 62 | "import numpy as np\n", 63 | "import imageio\n", 64 | "from skimage.transform import resize\n", 65 | "import matplotlib.pyplot as plt\n", 66 | "from collections import OrderedDict\n", 67 | "from PIL import Image\n", 68 | "\n", 69 | "import torch\n", 70 | "import torch.nn as nn\n", 71 | "\n", 72 | "from lucent.optvis.transform import standard_transforms\n", 73 | "from lucent.optvis import render, param" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "id": "FiedZVGvd65C" 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "from google.colab import drive\n", 85 | "drive.mount('/content/gdrive')\n", 86 | "\n", 87 | "PROJECT_DIR = '/path/to/project/dir/'\n", 88 | "CLASSIFIER_WEIGHT_NAME = 'classifier_weights.pt'" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "ntaXDp_b2LaV" 95 | }, 96 | "source": [ 97 | "## Function definitions" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "id": "2FCaZxajiwww" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "def convert_rendered_img_to_numpy(img):\n", 109 | " x = np.squeeze(img[0]*255.0).astype(np.uint8)\n", 110 | " assert np.min(x) >= 0\n", 111 | " assert np.max(x) <= 255\n", 112 | " return x" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "id": "38wn0ySy36wM" 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "def center_crop(h, w):\n", 124 | " def inner(x: torch.Tensor) -> torch.Tensor:\n", 125 | " assert len(x.shape) ==4, print(x.shape, type(x))\n", 126 | " assert x.shape[2] >= h, print(x.shape, type(x))\n", 127 | " assert x.shape[3] >= w, print(x.shape, type(x))\n", 128 | "\n", 129 | " oy = (x.shape[2] - h) // 2\n", 130 | " ox = (x.shape[3] - w) // 2\n", 131 | "\n", 132 | " return x[:, :, oy:oy+h, ox:ox+w]\n", 133 | "\n", 134 | " return inner" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "id": "CahnFbzbqqe8" 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "def visualize(model, idx, show_inline=True, thresholds=(512,), *args, **kwargs):\n", 146 | " img_size = 224\n", 147 | " img = render.render_vis(model, idx,\n", 148 | " show_inline=show_inline, thresholds=thresholds,\n", 149 | " param_f=lambda: param.image(img_size, batch=1),\n", 150 | " transforms=standard_transforms +\n", 151 | " [center_crop(img_size, img_size)], *args, **kwargs)\n", 152 | " return img" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "id": "2bRRUg6BbgVc" 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "def save_multiple_visualizations(dir_path, model, model_name, viz_indices,\n", 164 | " thresholds=(512,), *args, **kwargs):\n", 165 | " if not os.path.exists(dir_path):\n", 166 | " os.makedirs(dir_path)\n", 167 | "\n", 168 | " for idx in viz_indices:\n", 169 | "\n", 170 | " images = visualize(model, idx, thresholds=thresholds, *args, **kwargs)\n", 171 | " assert len(images) == len(thresholds)\n", 172 | "\n", 173 | " for i, img in enumerate(images):\n", 174 | " img_numpy = convert_rendered_img_to_numpy(img)\n", 175 | " imageio.imwrite(os.path.join(dir_path, f\"{model_name}_layer-{idx}_threshold-{thresholds[i]}.png\"), img_numpy)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "id": "kZR2Lueu9bWW" 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "def load_image_batch(dir_path=os.path.join(PROJECT_DIR, 'natural-vs-viz-classifier/'),\n", 187 | " n_imgs=12,\n", 188 | " transform=lambda x: x/255.0):\n", 189 | "\n", 190 | " imgs = [[] for _ in range(n_imgs)]\n", 191 | " for i in range(len(imgs)):\n", 192 | "\n", 193 | " imgs[i] = Image.open(os.path.join(dir_path, 'test-imgs', f'{(i+1):04d}.png'))\n", 194 | " imgs[i] = transform(np.asarray(imgs[i], dtype='float'))\n", 195 | "\n", 196 | " image_batch = np.stack(imgs)\n", 197 | " image_batch = np.transpose(image_batch, axes=[0, 3, 1, 2])\n", 198 | " print(f\"Loaded {n_imgs} images in batch of shape {image_batch.shape} with min {np.min(image_batch)} and max {np.max(image_batch)}.\")\n", 199 | "\n", 200 | " return image_batch" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "id": "QCvR-sAhpNhA" 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "def load_classifier_weights(model,\n", 212 | " weight_dir=os.path.join(PROJECT_DIR, 'natural-vs-viz-classifier/'),\n", 213 | " weight_name=CLASSIFIER_WEIGHT_NAME):\n", 214 | " \"\"\"Load classifier weights into existing model's weights\"\"\"\n", 215 | "\n", 216 | " weight_path = os.path.join(weight_dir, weight_name)\n", 217 | "\n", 218 | " # preprocess classifier_state_dict\n", 219 | " classifier_prelim_state_dict = torch.load(weight_path, map_location=torch.device('cpu'))\n", 220 | " classifier_state_dict = {\"classifier.\"+k.replace('module.', ''): v for k, v in classifier_prelim_state_dict.items()}\n", 221 | "\n", 222 | " # remove classifier from original dict\n", 223 | " original_state_dict_with_classifier = model.state_dict()\n", 224 | " original_state_dict = {k: v for k, v in original_state_dict_with_classifier.items() if \"classifier.\" not in k}\n", 225 | "\n", 226 | " # make sure dicts are different\n", 227 | " a = set(original_state_dict.keys())\n", 228 | " b = set(classifier_state_dict.keys())\n", 229 | " assert len(a.intersection(b)) == 0, f\"matching keys found: {a.intersection(b)}\"\n", 230 | "\n", 231 | " # combine dicts\n", 232 | " original_state_dict.update(classifier_state_dict)\n", 233 | " print(model.load_state_dict(original_state_dict, strict=False))" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "id": "SpwLGeO-Vlem" 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "def manually_set_weights(model, class_offset=100, num_classes=1000,\n", 245 | " orig_last_layer_num_units=1008):\n", 246 | " \"\"\"Set weights such that last layer visualizations will be offset by an arbitrary constant.\"\"\"\n", 247 | "\n", 248 | " k = 250.0\n", 249 | "\n", 250 | " with torch.no_grad():\n", 251 | "\n", 252 | " # weights\n", 253 | " layer_1_num_units = 1000\n", 254 | " model.lyr_1.weight = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units,orig_last_layer_num_units])))\n", 255 | " for i in range(num_classes):\n", 256 | " model.lyr_1.weight[i,i] = 1.0\n", 257 | "\n", 258 | " model.lyr_2.weight = torch.nn.Parameter(torch.Tensor(np.zeros([2000, 1001])))\n", 259 | " for i in range(num_classes):\n", 260 | " model.lyr_2.weight[i,i] = 1.0\n", 261 | " model.lyr_2.weight[i+num_classes, (i + class_offset)%num_classes] = 1.0\n", 262 | "\n", 263 | " model.lyr_2.weight[i, num_classes] = - k\n", 264 | " model.lyr_2.weight[i+num_classes, num_classes] = k\n", 265 | "\n", 266 | " model.lyr_3.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1000, 2000])))\n", 267 | " for i in range(num_classes):\n", 268 | " model.lyr_3.weight[i,i] = 1.0\n", 269 | " model.lyr_3.weight[i,i+num_classes] = 1.0\n", 270 | "\n", 271 | " # biases\n", 272 | " model.lyr_1.bias = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units])))\n", 273 | " model.lyr_1.bias[0:num_classes] = 100.0\n", 274 | "\n", 275 | " model.lyr_2.bias = torch.nn.Parameter(torch.Tensor(np.zeros([2000])))\n", 276 | " model.lyr_2.bias[0:num_classes] = 0.0\n", 277 | " model.lyr_2.bias[num_classes:(2*num_classes)] = - k\n", 278 | "\n", 279 | " model.lyr_3.bias = torch.nn.Parameter(torch.Tensor(np.zeros([num_classes])))\n", 280 | "\n", 281 | " device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 282 | " model.to(device).eval();\n", 283 | " print(f\"Manually setting weights completed with class_offset {class_offset}.\")" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "id": "c-MFDuh9_sli" 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "def manually_specify_visualization_weights(model, target_img_idx=0, input_size=224):\n", 295 | " \"\"\"Set weights such that last layer visualizations will show a target image.\"\"\"\n", 296 | "\n", 297 | " image_batch = load_image_batch()\n", 298 | "\n", 299 | " with torch.no_grad():\n", 300 | "\n", 301 | " class_offset = 100\n", 302 | " num_classes = 1000\n", 303 | " k = 250\n", 304 | "\n", 305 | " # weights\n", 306 | " layer_1_num_units = 1000\n", 307 | " model.lyr_1.weight = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units, 1008])))\n", 308 | " for i in range(num_classes):\n", 309 | " model.lyr_1.weight[i,i] = 1.0\n", 310 | "\n", 311 | " model.lyr_2.weight = torch.nn.Parameter(torch.Tensor(np.zeros([2000, 1002])))\n", 312 | " for i in range(num_classes):\n", 313 | " model.lyr_2.weight[i,i] = 1.0\n", 314 | " model.lyr_2.weight[i+num_classes, 1001] = 1.0\n", 315 | "\n", 316 | " model.lyr_2.weight[i, num_classes] = - k\n", 317 | " model.lyr_2.weight[i+num_classes, num_classes] = k\n", 318 | "\n", 319 | " model.lyr_3.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1000, 2000])))\n", 320 | " for i in range(num_classes):\n", 321 | " model.lyr_3.weight[i,i] = 1.0\n", 322 | " model.lyr_3.weight[i,i+num_classes] = 1.0\n", 323 | "\n", 324 | " # biases\n", 325 | " model.lyr_1.bias = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units])))\n", 326 | " model.lyr_1.bias[0:num_classes] = 100.0\n", 327 | "\n", 328 | " model.lyr_2.bias = torch.nn.Parameter(torch.Tensor(np.zeros([2000])))\n", 329 | " model.lyr_2.bias[0:num_classes] = 0.0\n", 330 | " model.lyr_2.bias[num_classes:(2*num_classes)] = - k\n", 331 | "\n", 332 | " model.lyr_3.bias = torch.nn.Parameter(torch.Tensor(np.zeros([num_classes])))\n", 333 | "\n", 334 | " model.viz_layer.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1, 3, 224, 224]) - 0.1))\n", 335 | " for c in range(3):\n", 336 | " for i in range(input_size):\n", 337 | " for j in range(input_size):\n", 338 | " # dividing by input_size**2 ensures that the layer activations don't explode during visualization\n", 339 | " model.viz_layer.weight[:,c,i,j] = image_batch[target_img_idx,c,i,j]/(input_size**2)\n", 340 | "\n", 341 | " model.to(device).eval();" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": { 347 | "id": "R2wzKmTe6w6x" 348 | }, 349 | "source": [ 350 | "\n", 351 | "## Figure: visualization-trajectory" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": { 358 | "id": "Btz7xx_v6xDJ" 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "from lucent.modelzoo.inceptionv1.InceptionV1 import InceptionV1 as INCEPTION_V1\n", 363 | "model = INCEPTION_V1(pretrained=True)\n", 364 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 365 | "model.to(device).eval();" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": { 372 | "id": "IkgzjDpH6xJx" 373 | }, 374 | "outputs": [], 375 | "source": [ 376 | "dir_path = os.path.join(PROJECT_DIR, 'visualization-trajectory/')" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "id": "cptJcxmd6xMQ" 384 | }, 385 | "outputs": [], 386 | "source": [ 387 | "save_multiple_visualizations(dir_path=dir_path,\n", 388 | " model=model,\n", 389 | " model_name=\"inception-v1-unmodified\",\n", 390 | " viz_indices=[f\"softmax2_pre_activation_matmul:0\" for x in [0]],\n", 391 | " thresholds=(1, 2, 4, 8, 16, 32, 64, 128, 256, 512))" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": { 397 | "id": "cpCJJE3NZ_RR" 398 | }, 399 | "source": [ 400 | "## Figure: permuted-visualizations-offset-100" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "id": "Ewo0ftjbZ_XF" 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "from lucent.modelzoo.inceptionv1.InceptionV3 import InceptionV3 as INCEPTION_V3\n", 412 | "model = INCEPTION_V3(pretrained=True, add_custom_layers=True,\n", 413 | " use_RELU_in_custom_layers=True, verbose=True)\n", 414 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 415 | "model.to(device).eval();\n", 416 | "assert type(model.classifier) is not None" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": { 423 | "id": "Jy57j6Iea6qw" 424 | }, 425 | "outputs": [], 426 | "source": [ 427 | "load_classifier_weights(model)" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": { 434 | "id": "DqEgePp1bBLO" 435 | }, 436 | "outputs": [], 437 | "source": [ 438 | "manually_set_weights(model=model, class_offset=100, num_classes=1000)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": { 445 | "id": "Whtf4dafb_kP" 446 | }, 447 | "outputs": [], 448 | "source": [ 449 | "dir_path = os.path.join(PROJECT_DIR, 'permuted-visualizations-offset-100/')" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": { 456 | "id": "uZCS1vPpaLFb" 457 | }, 458 | "outputs": [], 459 | "source": [ 460 | "save_multiple_visualizations(dir_path=dir_path,\n", 461 | " model=model,\n", 462 | " model_name=\"inception-v3-offset-100\",\n", 463 | " viz_indices=[f\"lyr_3:{x}\" for x in range(0, 1000, 100)])" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": { 469 | "id": "wohSaxToM3KT" 470 | }, 471 | "source": [ 472 | "## Figure: original-visualizations" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": { 479 | "id": "O_iraySMM4Uv" 480 | }, 481 | "outputs": [], 482 | "source": [ 483 | "from lucent.modelzoo.inceptionv1.InceptionV1 import InceptionV1 as INCEPTION_V1\n", 484 | "model = INCEPTION_V1(pretrained=True)\n", 485 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 486 | "model.to(device).eval();" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": { 493 | "id": "fgN6NO7rNFhr" 494 | }, 495 | "outputs": [], 496 | "source": [ 497 | "dir_path = os.path.join(PROJECT_DIR, 'original-visualizations/')" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": { 504 | "id": "6KzozqhuM3S_" 505 | }, 506 | "outputs": [], 507 | "source": [ 508 | "save_multiple_visualizations(dir_path=dir_path,\n", 509 | " model=model,\n", 510 | " model_name=\"inception-v1\",\n", 511 | " viz_indices=[f\"softmax2_pre_activation_matmul:{x}\" for x in range(0, 1000, 100)])" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": { 517 | "id": "Ngf3Vxy_7fHh" 518 | }, 519 | "source": [ 520 | "## Figure: manually-specified-visualizations" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": { 527 | "id": "MqMcGGCj_gWk" 528 | }, 529 | "outputs": [], 530 | "source": [ 531 | "from lucent.modelzoo.inceptionv1.InceptionV4 import InceptionV4 as INCEPTION_V4\n", 532 | "model = INCEPTION_V4(pretrained=True, add_custom_layers=True,\n", 533 | " use_RELU_in_custom_layers=True, verbose=True)\n", 534 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 535 | "model.to(device).eval();\n", 536 | "assert type(model.classifier) is not None" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": { 543 | "id": "CO3PfQ5MpD_1" 544 | }, 545 | "outputs": [], 546 | "source": [ 547 | "load_classifier_weights(model)" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "metadata": { 554 | "id": "fElQUVTAFJ3U" 555 | }, 556 | "outputs": [], 557 | "source": [ 558 | "manually_specify_visualization_weights(model, target_img_idx=7)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": { 565 | "id": "t3mOixkl7lCn" 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "# make sure classifier predicts natural images for natural images\n", 570 | "_ = model(torch.Tensor(load_image_batch(transform=lambda x: x-117.0)).cuda())" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": { 577 | "id": "QZ3jN5xnSt5u" 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "dir_path = os.path.join(PROJECT_DIR, 'manually-specified-visualizations/')" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": { 588 | "id": "6Az-DK-3FXQU" 589 | }, 590 | "outputs": [], 591 | "source": [ 592 | "num_imgs = 12\n", 593 | "for i in range(num_imgs):\n", 594 | " manually_specify_visualization_weights(model, target_img_idx=i)\n", 595 | " save_multiple_visualizations(dir_path=dir_path,\n", 596 | " model=model,\n", 597 | " model_name=f\"inception-v4-img-{i}\",\n", 598 | " viz_indices=[\"lyr_3:0\"],\n", 599 | " thresholds=(2,4,6,8,10,12,14,16,20,32))" 600 | ] 601 | }, 602 | { 603 | "cell_type": "markdown", 604 | "metadata": { 605 | "id": "xAozFiNJVsZm" 606 | }, 607 | "source": [ 608 | "## Figure: silent-units" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": null, 614 | "metadata": { 615 | "id": "BCS7KcbsVshK" 616 | }, 617 | "outputs": [], 618 | "source": [ 619 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 620 | "from lucent.modelzoo import resnet50\n", 621 | "model = resnet50(pretrained=True)\n", 622 | "model.to(device).eval();" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": { 629 | "id": "aMyroaKdSxUL" 630 | }, 631 | "outputs": [], 632 | "source": [ 633 | "dir_path = os.path.join(PROJECT_DIR, 'silent-units-layer3-1-conv3/'" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": null, 639 | "metadata": { 640 | "id": "L-7RwUX09LF6" 641 | }, 642 | "outputs": [], 643 | "source": [ 644 | "# silent units\n", 645 | "for layer in [\"layer3_1_conv3\", \"layer3_1_bn3\"]:\n", 646 | " save_multiple_visualizations(dir_path=dir_path,\n", 647 | " model=model,\n", 648 | " model_name=\"ResNet-50\",\n", 649 | " viz_indices=[f\"{layer}:{idx}\" for idx in [147, 316, 342, 405, 750]],\n", 650 | " thresholds=(512,))" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": null, 656 | "metadata": { 657 | "id": "qf98q-3THdy6" 658 | }, 659 | "outputs": [], 660 | "source": [ 661 | "# non-silent units\n", 662 | "for layer in [\"layer3_1_conv3\", \"layer3_1_bn3\"]:\n", 663 | " save_multiple_visualizations(dir_path=dir_path,\n", 664 | " model=model,\n", 665 | " model_name=\"ResNet-50\",\n", 666 | " viz_indices=[f\"{layer}:{idx}\" for idx in [172, 184, 324, 581, 968]],\n", 667 | " thresholds=(512,))" 668 | ] 669 | } 670 | ], 671 | "metadata": { 672 | "accelerator": "GPU", 673 | "colab": { 674 | "provenance": [] 675 | }, 676 | "gpuClass": "standard", 677 | "kernelspec": { 678 | "display_name": "Python 3", 679 | "name": "python3" 680 | }, 681 | "language_info": { 682 | "name": "python" 683 | } 684 | }, 685 | "nbformat": 4, 686 | "nbformat_minor": 0 687 | } 688 | --------------------------------------------------------------------------------