├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    └── example_figure.png
└── code
    ├── fooling_circuit.ipynb
    └── orthogonal_filters.ipynb


/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We would love to accept your patches and contributions to this project.
 4 | 
 5 | ## Before you begin
 6 | 
 7 | ### Sign our Contributor License Agreement
 8 | 
 9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 | 
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 | 
18 | Visit <https://cla.developers.google.com/> to see your current agreements or to
19 | sign a new one.
20 | 
21 | ### Review our Community Guidelines
22 | 
23 | This project follows [Google's Open Source Community
24 | Guidelines](https://opensource.google/conduct/).
25 | 
26 | ## Contribution process
27 | 
28 | ### Code Reviews
29 | 
30 | All submissions, including submissions by project members, require review. We 
31 | use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests)
32 | for this purpose.
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Code for "Don't trust your eyes: on the (un)reliability of feature visualizations"
 2 | 
 3 | This repository contains code to replicate experiments from [Don't trust your eyes: on the (un)reliability of feature visualizations](https://arxiv.org/abs/2306.04719) by Robert Geirhos*, Roland S. Zimmermann*, Blair Bilodeau*, Wieland Brendel, and Been Kim.
 4 | 
 5 | ## Fooling feature visualizations
 6 | Feature visualizations are widely used interpretability tools - but can we trust them? We investigate this question from an adversarial, empirical and theoretical perspective. The result: Don’t trust your eyes!
 7 | 
 8 | ![example-figure](./assets/example_figure.png)
 9 | 
10 | For instance, from an adversarial perspective we can adapt a model such that it maintains identical behavior on natural image input (e.g., identical ImageNet accuracy) but its feature visualizations are changed completely. In the example here, the feature visualization shows a painting (right) instead of the original feature visualization (left).
11 | 
12 | ## Citation
13 | ```
14 | @article{geirhos2023fooling,
15 |   url = {https://arxiv.org/abs/2306.04719},
16 |   author = {Geirhos, Robert and Zimmermann, Roland S and Bilodeau, Blair and Brendel, Wieland and Kim, Been},
17 |   title = {Don't trust your eyes: on the (un)reliability of feature visualizations},
18 |   journal={arXiv preprint arXiv:2306.04719},
19 |   year = {2023},
20 | ```
21 | 
22 | ## Disclaimer
23 | This is not an officially supported Google product.
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/assets/example_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/fooling-feature-visualizations/9a21b3f7decb5001fa93f500b1ae0df1876b1b5d/assets/example_figure.png


--------------------------------------------------------------------------------
/code/fooling_circuit.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "cMmE1CQWPRhI"
  7 |       },
  8 |       "source": [
  9 |         "Copyright 2023 Google LLC\n",
 10 |         "\n",
 11 |         "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 12 |         "you may not use this file except in compliance with the License.\n",
 13 |         "You may obtain a copy of the License at\n",
 14 |         "\n",
 15 |         "    https://www.apache.org/licenses/LICENSE-2.0\n",
 16 |         "\n",
 17 |         "Unless required by applicable law or agreed to in writing, software\n",
 18 |         "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 19 |         "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 20 |         "See the License for the specific language governing permissions and\n",
 21 |         "limitations under the License."
 22 |       ]
 23 |     },
 24 |     {
 25 |       "cell_type": "markdown",
 26 |       "metadata": {
 27 |         "id": "4upHHBiDS7wt"
 28 |       },
 29 |       "source": [
 30 |         "# Fooling Feature Visualizations Through a Fooling Circuit"
 31 |       ]
 32 |     },
 33 |     {
 34 |       "cell_type": "markdown",
 35 |       "metadata": {
 36 |         "id": "pFbgT0bAZE05"
 37 |       },
 38 |       "source": [
 39 |         "## Imports"
 40 |       ]
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "execution_count": null,
 45 |       "metadata": {
 46 |         "id": "jItbFy_dC171"
 47 |       },
 48 |       "outputs": [],
 49 |       "source": [
 50 |         "! pip install git+https://github.com/rgeirhos/lucent.git"
 51 |       ]
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "execution_count": null,
 56 |       "metadata": {
 57 |         "id": "psHsfwDiQ7lx"
 58 |       },
 59 |       "outputs": [],
 60 |       "source": [
 61 |         "import os\n",
 62 |         "import numpy as np\n",
 63 |         "import imageio\n",
 64 |         "from skimage.transform import resize\n",
 65 |         "import matplotlib.pyplot as plt\n",
 66 |         "from collections import OrderedDict\n",
 67 |         "from PIL import Image\n",
 68 |         "\n",
 69 |         "import torch\n",
 70 |         "import torch.nn as nn\n",
 71 |         "\n",
 72 |         "from lucent.optvis.transform import standard_transforms\n",
 73 |         "from lucent.optvis import render, param"
 74 |       ]
 75 |     },
 76 |     {
 77 |       "cell_type": "code",
 78 |       "execution_count": null,
 79 |       "metadata": {
 80 |         "id": "FiedZVGvd65C"
 81 |       },
 82 |       "outputs": [],
 83 |       "source": [
 84 |         "from google.colab import drive\n",
 85 |         "drive.mount('/content/gdrive')\n",
 86 |         "\n",
 87 |         "PROJECT_DIR = '/path/to/project/dir/'\n",
 88 |         "CLASSIFIER_WEIGHT_NAME = 'classifier_weights.pt'"
 89 |       ]
 90 |     },
 91 |     {
 92 |       "cell_type": "markdown",
 93 |       "metadata": {
 94 |         "id": "ntaXDp_b2LaV"
 95 |       },
 96 |       "source": [
 97 |         "## Function definitions"
 98 |       ]
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "execution_count": null,
103 |       "metadata": {
104 |         "id": "2FCaZxajiwww"
105 |       },
106 |       "outputs": [],
107 |       "source": [
108 |         "def convert_rendered_img_to_numpy(img):\n",
109 |         "    x = np.squeeze(img[0]*255.0).astype(np.uint8)\n",
110 |         "    assert np.min(x) >= 0\n",
111 |         "    assert np.max(x) <= 255\n",
112 |         "    return x"
113 |       ]
114 |     },
115 |     {
116 |       "cell_type": "code",
117 |       "execution_count": null,
118 |       "metadata": {
119 |         "id": "38wn0ySy36wM"
120 |       },
121 |       "outputs": [],
122 |       "source": [
123 |         "def center_crop(h, w):\n",
124 |         "    def inner(x: torch.Tensor) -> torch.Tensor:\n",
125 |         "        assert len(x.shape) ==4, print(x.shape, type(x))\n",
126 |         "        assert x.shape[2] >= h, print(x.shape, type(x))\n",
127 |         "        assert x.shape[3] >= w, print(x.shape, type(x))\n",
128 |         "\n",
129 |         "        oy = (x.shape[2] - h) // 2\n",
130 |         "        ox = (x.shape[3] - w) // 2\n",
131 |         "\n",
132 |         "        return x[:, :, oy:oy+h, ox:ox+w]\n",
133 |         "\n",
134 |         "    return inner"
135 |       ]
136 |     },
137 |     {
138 |       "cell_type": "code",
139 |       "execution_count": null,
140 |       "metadata": {
141 |         "id": "CahnFbzbqqe8"
142 |       },
143 |       "outputs": [],
144 |       "source": [
145 |         "def visualize(model, idx, show_inline=True, thresholds=(512,), *args, **kwargs):\n",
146 |         "    img_size = 224\n",
147 |         "    img = render.render_vis(model, idx,\n",
148 |         "                           show_inline=show_inline, thresholds=thresholds,\n",
149 |         "                           param_f=lambda: param.image(img_size, batch=1),\n",
150 |         "                           transforms=standard_transforms +\n",
151 |         "                           [center_crop(img_size, img_size)], *args, **kwargs)\n",
152 |         "    return img"
153 |       ]
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "execution_count": null,
158 |       "metadata": {
159 |         "id": "2bRRUg6BbgVc"
160 |       },
161 |       "outputs": [],
162 |       "source": [
163 |         "def save_multiple_visualizations(dir_path, model, model_name, viz_indices,\n",
164 |         "                                 thresholds=(512,), *args, **kwargs):\n",
165 |         "    if not os.path.exists(dir_path):\n",
166 |         "        os.makedirs(dir_path)\n",
167 |         "\n",
168 |         "    for idx in viz_indices:\n",
169 |         "\n",
170 |         "        images = visualize(model, idx, thresholds=thresholds, *args, **kwargs)\n",
171 |         "        assert len(images) == len(thresholds)\n",
172 |         "\n",
173 |         "        for i, img in enumerate(images):\n",
174 |         "            img_numpy = convert_rendered_img_to_numpy(img)\n",
175 |         "            imageio.imwrite(os.path.join(dir_path, f\"{model_name}_layer-{idx}_threshold-{thresholds[i]}.png\"), img_numpy)"
176 |       ]
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "execution_count": null,
181 |       "metadata": {
182 |         "id": "kZR2Lueu9bWW"
183 |       },
184 |       "outputs": [],
185 |       "source": [
186 |         "def load_image_batch(dir_path=os.path.join(PROJECT_DIR, 'natural-vs-viz-classifier/'),\n",
187 |         "                     n_imgs=12,\n",
188 |         "                     transform=lambda x: x/255.0):\n",
189 |         "\n",
190 |         "    imgs = [[] for _ in range(n_imgs)]\n",
191 |         "    for i in range(len(imgs)):\n",
192 |         "\n",
193 |         "        imgs[i] = Image.open(os.path.join(dir_path, 'test-imgs', f'{(i+1):04d}.png'))\n",
194 |         "        imgs[i] = transform(np.asarray(imgs[i], dtype='float'))\n",
195 |         "\n",
196 |         "    image_batch = np.stack(imgs)\n",
197 |         "    image_batch = np.transpose(image_batch, axes=[0, 3, 1, 2])\n",
198 |         "    print(f\"Loaded {n_imgs} images in batch of shape {image_batch.shape} with min {np.min(image_batch)} and max {np.max(image_batch)}.\")\n",
199 |         "\n",
200 |         "    return image_batch"
201 |       ]
202 |     },
203 |     {
204 |       "cell_type": "code",
205 |       "execution_count": null,
206 |       "metadata": {
207 |         "id": "QCvR-sAhpNhA"
208 |       },
209 |       "outputs": [],
210 |       "source": [
211 |         "def load_classifier_weights(model,\n",
212 |         "                            weight_dir=os.path.join(PROJECT_DIR, 'natural-vs-viz-classifier/'),\n",
213 |         "                            weight_name=CLASSIFIER_WEIGHT_NAME):\n",
214 |         "    \"\"\"Load classifier weights into existing model's weights\"\"\"\n",
215 |         "\n",
216 |         "    weight_path = os.path.join(weight_dir, weight_name)\n",
217 |         "\n",
218 |         "    # preprocess classifier_state_dict\n",
219 |         "    classifier_prelim_state_dict = torch.load(weight_path, map_location=torch.device('cpu'))\n",
220 |         "    classifier_state_dict = {\"classifier.\"+k.replace('module.', ''): v for k, v in classifier_prelim_state_dict.items()}\n",
221 |         "\n",
222 |         "    # remove classifier from original dict\n",
223 |         "    original_state_dict_with_classifier = model.state_dict()\n",
224 |         "    original_state_dict = {k: v for k, v in original_state_dict_with_classifier.items() if \"classifier.\" not in k}\n",
225 |         "\n",
226 |         "    # make sure dicts are different\n",
227 |         "    a = set(original_state_dict.keys())\n",
228 |         "    b = set(classifier_state_dict.keys())\n",
229 |         "    assert len(a.intersection(b)) == 0, f\"matching keys found: {a.intersection(b)}\"\n",
230 |         "\n",
231 |         "    # combine dicts\n",
232 |         "    original_state_dict.update(classifier_state_dict)\n",
233 |         "    print(model.load_state_dict(original_state_dict, strict=False))"
234 |       ]
235 |     },
236 |     {
237 |       "cell_type": "code",
238 |       "execution_count": null,
239 |       "metadata": {
240 |         "id": "SpwLGeO-Vlem"
241 |       },
242 |       "outputs": [],
243 |       "source": [
244 |         "def manually_set_weights(model, class_offset=100, num_classes=1000,\n",
245 |         "                         orig_last_layer_num_units=1008):\n",
246 |         "    \"\"\"Set weights such that last layer visualizations will be offset by an arbitrary constant.\"\"\"\n",
247 |         "\n",
248 |         "    k = 250.0\n",
249 |         "\n",
250 |         "    with torch.no_grad():\n",
251 |         "\n",
252 |         "        # weights\n",
253 |         "        layer_1_num_units = 1000\n",
254 |         "        model.lyr_1.weight = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units,orig_last_layer_num_units])))\n",
255 |         "        for i in range(num_classes):\n",
256 |         "            model.lyr_1.weight[i,i] = 1.0\n",
257 |         "\n",
258 |         "        model.lyr_2.weight = torch.nn.Parameter(torch.Tensor(np.zeros([2000, 1001])))\n",
259 |         "        for i in range(num_classes):\n",
260 |         "            model.lyr_2.weight[i,i] = 1.0\n",
261 |         "            model.lyr_2.weight[i+num_classes, (i + class_offset)%num_classes] = 1.0\n",
262 |         "\n",
263 |         "            model.lyr_2.weight[i, num_classes] = - k\n",
264 |         "            model.lyr_2.weight[i+num_classes, num_classes] = k\n",
265 |         "\n",
266 |         "        model.lyr_3.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1000, 2000])))\n",
267 |         "        for i in range(num_classes):\n",
268 |         "            model.lyr_3.weight[i,i] = 1.0\n",
269 |         "            model.lyr_3.weight[i,i+num_classes] = 1.0\n",
270 |         "\n",
271 |         "        # biases\n",
272 |         "        model.lyr_1.bias = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units])))\n",
273 |         "        model.lyr_1.bias[0:num_classes] = 100.0\n",
274 |         "\n",
275 |         "        model.lyr_2.bias = torch.nn.Parameter(torch.Tensor(np.zeros([2000])))\n",
276 |         "        model.lyr_2.bias[0:num_classes] = 0.0\n",
277 |         "        model.lyr_2.bias[num_classes:(2*num_classes)] = - k\n",
278 |         "\n",
279 |         "        model.lyr_3.bias = torch.nn.Parameter(torch.Tensor(np.zeros([num_classes])))\n",
280 |         "\n",
281 |         "    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
282 |         "    model.to(device).eval();\n",
283 |         "    print(f\"Manually setting weights completed with class_offset {class_offset}.\")"
284 |       ]
285 |     },
286 |     {
287 |       "cell_type": "code",
288 |       "execution_count": null,
289 |       "metadata": {
290 |         "id": "c-MFDuh9_sli"
291 |       },
292 |       "outputs": [],
293 |       "source": [
294 |         "def manually_specify_visualization_weights(model, target_img_idx=0, input_size=224):\n",
295 |         "    \"\"\"Set weights such that last layer visualizations will show a target image.\"\"\"\n",
296 |         "\n",
297 |         "    image_batch = load_image_batch()\n",
298 |         "\n",
299 |         "    with torch.no_grad():\n",
300 |         "\n",
301 |         "        class_offset = 100\n",
302 |         "        num_classes = 1000\n",
303 |         "        k = 250\n",
304 |         "\n",
305 |         "        # weights\n",
306 |         "        layer_1_num_units = 1000\n",
307 |         "        model.lyr_1.weight = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units, 1008])))\n",
308 |         "        for i in range(num_classes):\n",
309 |         "            model.lyr_1.weight[i,i] = 1.0\n",
310 |         "\n",
311 |         "        model.lyr_2.weight = torch.nn.Parameter(torch.Tensor(np.zeros([2000, 1002])))\n",
312 |         "        for i in range(num_classes):\n",
313 |         "            model.lyr_2.weight[i,i] = 1.0\n",
314 |         "            model.lyr_2.weight[i+num_classes, 1001] = 1.0\n",
315 |         "\n",
316 |         "            model.lyr_2.weight[i, num_classes] = - k\n",
317 |         "            model.lyr_2.weight[i+num_classes, num_classes] = k\n",
318 |         "\n",
319 |         "        model.lyr_3.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1000, 2000])))\n",
320 |         "        for i in range(num_classes):\n",
321 |         "            model.lyr_3.weight[i,i] = 1.0\n",
322 |         "            model.lyr_3.weight[i,i+num_classes] = 1.0\n",
323 |         "\n",
324 |         "        # biases\n",
325 |         "        model.lyr_1.bias = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units])))\n",
326 |         "        model.lyr_1.bias[0:num_classes] = 100.0\n",
327 |         "\n",
328 |         "        model.lyr_2.bias = torch.nn.Parameter(torch.Tensor(np.zeros([2000])))\n",
329 |         "        model.lyr_2.bias[0:num_classes] = 0.0\n",
330 |         "        model.lyr_2.bias[num_classes:(2*num_classes)] = - k\n",
331 |         "\n",
332 |         "        model.lyr_3.bias = torch.nn.Parameter(torch.Tensor(np.zeros([num_classes])))\n",
333 |         "\n",
334 |         "        model.viz_layer.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1, 3, 224, 224]) - 0.1))\n",
335 |         "        for c in range(3):\n",
336 |         "            for i in range(input_size):\n",
337 |         "                for j in range(input_size):\n",
338 |         "                    # dividing by input_size**2 ensures that the layer activations don't explode during visualization\n",
339 |         "                    model.viz_layer.weight[:,c,i,j] = image_batch[target_img_idx,c,i,j]/(input_size**2)\n",
340 |         "\n",
341 |         "    model.to(device).eval();"
342 |       ]
343 |     },
344 |     {
345 |       "cell_type": "markdown",
346 |       "metadata": {
347 |         "id": "R2wzKmTe6w6x"
348 |       },
349 |       "source": [
350 |         "\n",
351 |         "## Figure: visualization-trajectory"
352 |       ]
353 |     },
354 |     {
355 |       "cell_type": "code",
356 |       "execution_count": null,
357 |       "metadata": {
358 |         "id": "Btz7xx_v6xDJ"
359 |       },
360 |       "outputs": [],
361 |       "source": [
362 |         "from lucent.modelzoo.inceptionv1.InceptionV1 import InceptionV1 as INCEPTION_V1\n",
363 |         "model = INCEPTION_V1(pretrained=True)\n",
364 |         "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
365 |         "model.to(device).eval();"
366 |       ]
367 |     },
368 |     {
369 |       "cell_type": "code",
370 |       "execution_count": null,
371 |       "metadata": {
372 |         "id": "IkgzjDpH6xJx"
373 |       },
374 |       "outputs": [],
375 |       "source": [
376 |         "dir_path = os.path.join(PROJECT_DIR, 'visualization-trajectory/')"
377 |       ]
378 |     },
379 |     {
380 |       "cell_type": "code",
381 |       "execution_count": null,
382 |       "metadata": {
383 |         "id": "cptJcxmd6xMQ"
384 |       },
385 |       "outputs": [],
386 |       "source": [
387 |         "save_multiple_visualizations(dir_path=dir_path,\n",
388 |         "                             model=model,\n",
389 |         "                             model_name=\"inception-v1-unmodified\",\n",
390 |         "                             viz_indices=[f\"softmax2_pre_activation_matmul:0\" for x in [0]],\n",
391 |         "                             thresholds=(1, 2, 4, 8, 16, 32, 64, 128, 256, 512))"
392 |       ]
393 |     },
394 |     {
395 |       "cell_type": "markdown",
396 |       "metadata": {
397 |         "id": "cpCJJE3NZ_RR"
398 |       },
399 |       "source": [
400 |         "## Figure: permuted-visualizations-offset-100"
401 |       ]
402 |     },
403 |     {
404 |       "cell_type": "code",
405 |       "execution_count": null,
406 |       "metadata": {
407 |         "id": "Ewo0ftjbZ_XF"
408 |       },
409 |       "outputs": [],
410 |       "source": [
411 |         "from lucent.modelzoo.inceptionv1.InceptionV3 import InceptionV3 as INCEPTION_V3\n",
412 |         "model = INCEPTION_V3(pretrained=True, add_custom_layers=True,\n",
413 |         "                     use_RELU_in_custom_layers=True, verbose=True)\n",
414 |         "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
415 |         "model.to(device).eval();\n",
416 |         "assert type(model.classifier) is not None"
417 |       ]
418 |     },
419 |     {
420 |       "cell_type": "code",
421 |       "execution_count": null,
422 |       "metadata": {
423 |         "id": "Jy57j6Iea6qw"
424 |       },
425 |       "outputs": [],
426 |       "source": [
427 |         "load_classifier_weights(model)"
428 |       ]
429 |     },
430 |     {
431 |       "cell_type": "code",
432 |       "execution_count": null,
433 |       "metadata": {
434 |         "id": "DqEgePp1bBLO"
435 |       },
436 |       "outputs": [],
437 |       "source": [
438 |         "manually_set_weights(model=model, class_offset=100, num_classes=1000)"
439 |       ]
440 |     },
441 |     {
442 |       "cell_type": "code",
443 |       "execution_count": null,
444 |       "metadata": {
445 |         "id": "Whtf4dafb_kP"
446 |       },
447 |       "outputs": [],
448 |       "source": [
449 |         "dir_path = os.path.join(PROJECT_DIR, 'permuted-visualizations-offset-100/')"
450 |       ]
451 |     },
452 |     {
453 |       "cell_type": "code",
454 |       "execution_count": null,
455 |       "metadata": {
456 |         "id": "uZCS1vPpaLFb"
457 |       },
458 |       "outputs": [],
459 |       "source": [
460 |         "save_multiple_visualizations(dir_path=dir_path,\n",
461 |         "                             model=model,\n",
462 |         "                             model_name=\"inception-v3-offset-100\",\n",
463 |         "                             viz_indices=[f\"lyr_3:{x}\" for x in range(0, 1000, 100)])"
464 |       ]
465 |     },
466 |     {
467 |       "cell_type": "markdown",
468 |       "metadata": {
469 |         "id": "wohSaxToM3KT"
470 |       },
471 |       "source": [
472 |         "## Figure: original-visualizations"
473 |       ]
474 |     },
475 |     {
476 |       "cell_type": "code",
477 |       "execution_count": null,
478 |       "metadata": {
479 |         "id": "O_iraySMM4Uv"
480 |       },
481 |       "outputs": [],
482 |       "source": [
483 |         "from lucent.modelzoo.inceptionv1.InceptionV1 import InceptionV1 as INCEPTION_V1\n",
484 |         "model = INCEPTION_V1(pretrained=True)\n",
485 |         "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
486 |         "model.to(device).eval();"
487 |       ]
488 |     },
489 |     {
490 |       "cell_type": "code",
491 |       "execution_count": null,
492 |       "metadata": {
493 |         "id": "fgN6NO7rNFhr"
494 |       },
495 |       "outputs": [],
496 |       "source": [
497 |         "dir_path = os.path.join(PROJECT_DIR, 'original-visualizations/')"
498 |       ]
499 |     },
500 |     {
501 |       "cell_type": "code",
502 |       "execution_count": null,
503 |       "metadata": {
504 |         "id": "6KzozqhuM3S_"
505 |       },
506 |       "outputs": [],
507 |       "source": [
508 |         "save_multiple_visualizations(dir_path=dir_path,\n",
509 |         "                             model=model,\n",
510 |         "                             model_name=\"inception-v1\",\n",
511 |         "                             viz_indices=[f\"softmax2_pre_activation_matmul:{x}\" for x in range(0, 1000, 100)])"
512 |       ]
513 |     },
514 |     {
515 |       "cell_type": "markdown",
516 |       "metadata": {
517 |         "id": "Ngf3Vxy_7fHh"
518 |       },
519 |       "source": [
520 |         "## Figure: manually-specified-visualizations"
521 |       ]
522 |     },
523 |     {
524 |       "cell_type": "code",
525 |       "execution_count": null,
526 |       "metadata": {
527 |         "id": "MqMcGGCj_gWk"
528 |       },
529 |       "outputs": [],
530 |       "source": [
531 |         "from lucent.modelzoo.inceptionv1.InceptionV4 import InceptionV4 as INCEPTION_V4\n",
532 |         "model = INCEPTION_V4(pretrained=True, add_custom_layers=True,\n",
533 |         "                     use_RELU_in_custom_layers=True, verbose=True)\n",
534 |         "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
535 |         "model.to(device).eval();\n",
536 |         "assert type(model.classifier) is not None"
537 |       ]
538 |     },
539 |     {
540 |       "cell_type": "code",
541 |       "execution_count": null,
542 |       "metadata": {
543 |         "id": "CO3PfQ5MpD_1"
544 |       },
545 |       "outputs": [],
546 |       "source": [
547 |         "load_classifier_weights(model)"
548 |       ]
549 |     },
550 |     {
551 |       "cell_type": "code",
552 |       "execution_count": null,
553 |       "metadata": {
554 |         "id": "fElQUVTAFJ3U"
555 |       },
556 |       "outputs": [],
557 |       "source": [
558 |         "manually_specify_visualization_weights(model, target_img_idx=7)"
559 |       ]
560 |     },
561 |     {
562 |       "cell_type": "code",
563 |       "execution_count": null,
564 |       "metadata": {
565 |         "id": "t3mOixkl7lCn"
566 |       },
567 |       "outputs": [],
568 |       "source": [
569 |         "# make sure classifier predicts natural images for natural images\n",
570 |         "_ = model(torch.Tensor(load_image_batch(transform=lambda x: x-117.0)).cuda())"
571 |       ]
572 |     },
573 |     {
574 |       "cell_type": "code",
575 |       "execution_count": null,
576 |       "metadata": {
577 |         "id": "QZ3jN5xnSt5u"
578 |       },
579 |       "outputs": [],
580 |       "source": [
581 |         "dir_path = os.path.join(PROJECT_DIR, 'manually-specified-visualizations/')"
582 |       ]
583 |     },
584 |     {
585 |       "cell_type": "code",
586 |       "execution_count": null,
587 |       "metadata": {
588 |         "id": "6Az-DK-3FXQU"
589 |       },
590 |       "outputs": [],
591 |       "source": [
592 |         "num_imgs = 12\n",
593 |         "for i in range(num_imgs):\n",
594 |         "    manually_specify_visualization_weights(model, target_img_idx=i)\n",
595 |         "    save_multiple_visualizations(dir_path=dir_path,\n",
596 |         "                                 model=model,\n",
597 |         "                                 model_name=f\"inception-v4-img-{i}\",\n",
598 |         "                                 viz_indices=[\"lyr_3:0\"],\n",
599 |         "                                 thresholds=(2,4,6,8,10,12,14,16,20,32))"
600 |       ]
601 |     },
602 |     {
603 |       "cell_type": "markdown",
604 |       "metadata": {
605 |         "id": "xAozFiNJVsZm"
606 |       },
607 |       "source": [
608 |         "## Figure: silent-units"
609 |       ]
610 |     },
611 |     {
612 |       "cell_type": "code",
613 |       "execution_count": null,
614 |       "metadata": {
615 |         "id": "BCS7KcbsVshK"
616 |       },
617 |       "outputs": [],
618 |       "source": [
619 |         "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
620 |         "from lucent.modelzoo import resnet50\n",
621 |         "model = resnet50(pretrained=True)\n",
622 |         "model.to(device).eval();"
623 |       ]
624 |     },
625 |     {
626 |       "cell_type": "code",
627 |       "execution_count": null,
628 |       "metadata": {
629 |         "id": "aMyroaKdSxUL"
630 |       },
631 |       "outputs": [],
632 |       "source": [
633 |         "dir_path = os.path.join(PROJECT_DIR, 'silent-units-layer3-1-conv3/'"
634 |       ]
635 |     },
636 |     {
637 |       "cell_type": "code",
638 |       "execution_count": null,
639 |       "metadata": {
640 |         "id": "L-7RwUX09LF6"
641 |       },
642 |       "outputs": [],
643 |       "source": [
644 |         "# silent units\n",
645 |         "for layer in [\"layer3_1_conv3\", \"layer3_1_bn3\"]:\n",
646 |         "    save_multiple_visualizations(dir_path=dir_path,\n",
647 |         "                                 model=model,\n",
648 |         "                                 model_name=\"ResNet-50\",\n",
649 |         "                                 viz_indices=[f\"{layer}:{idx}\" for idx in [147, 316, 342, 405, 750]],\n",
650 |         "                                 thresholds=(512,))"
651 |       ]
652 |     },
653 |     {
654 |       "cell_type": "code",
655 |       "execution_count": null,
656 |       "metadata": {
657 |         "id": "qf98q-3THdy6"
658 |       },
659 |       "outputs": [],
660 |       "source": [
661 |         "# non-silent units\n",
662 |         "for layer in [\"layer3_1_conv3\", \"layer3_1_bn3\"]:\n",
663 |         "    save_multiple_visualizations(dir_path=dir_path,\n",
664 |         "                                    model=model,\n",
665 |         "                                    model_name=\"ResNet-50\",\n",
666 |         "                                    viz_indices=[f\"{layer}:{idx}\" for idx in [172, 184, 324, 581, 968]],\n",
667 |         "                                    thresholds=(512,))"
668 |       ]
669 |     }
670 |   ],
671 |   "metadata": {
672 |     "accelerator": "GPU",
673 |     "colab": {
674 |       "provenance": []
675 |     },
676 |     "gpuClass": "standard",
677 |     "kernelspec": {
678 |       "display_name": "Python 3",
679 |       "name": "python3"
680 |     },
681 |     "language_info": {
682 |       "name": "python"
683 |     }
684 |   },
685 |   "nbformat": 4,
686 |   "nbformat_minor": 0
687 | }
688 | 


--------------------------------------------------------------------------------