├── GradCAM.py ├── LICENSE ├── README.md ├── Visualize-Heatmap-tutorial.md ├── Visualize-feature-maps.md └── img ├── CAM.png ├── GradCAM.png ├── 公式.png └── 热力图.png /GradCAM.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function, division 3 | 4 | import os 5 | import cv2 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | from PIL import Image 10 | from torchvision import transforms 11 | from torchvision import models 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class ActivationsAndGradients: 16 | """ Class for extracting activations and 17 | registering gradients from targeted intermediate layers """ 18 | 19 | def __init__(self, model, target_layers, reshape_transform): 20 | self.model = model 21 | self.gradients = [] 22 | self.activations = [] 23 | self.reshape_transform = reshape_transform 24 | self.handles = [] 25 | for target_layer in target_layers: 26 | self.handles.append( 27 | target_layer.register_forward_hook( 28 | self.save_activation)) 29 | # Backward compatibility with older pytorch versions: 30 | if hasattr(target_layer, 'register_full_backward_hook'): 31 | self.handles.append( 32 | target_layer.register_full_backward_hook( 33 | self.save_gradient)) 34 | else: 35 | self.handles.append( 36 | target_layer.register_backward_hook( 37 | self.save_gradient)) 38 | 39 | def save_activation(self, module, input, output): 40 | activation = output 41 | if self.reshape_transform is not None: 42 | activation = self.reshape_transform(activation) 43 | self.activations.append(activation.cpu().detach()) 44 | 45 | def save_gradient(self, module, grad_input, grad_output): 46 | # Gradients are computed in reverse order 47 | grad = grad_output[0] 48 | if self.reshape_transform is not None: 49 | grad = self.reshape_transform(grad) 50 | self.gradients = [grad.cpu().detach()] + self.gradients 51 | 52 | def __call__(self, x): 53 | self.gradients = [] 54 | self.activations = [] 55 | return self.model(x) 56 | 57 | def release(self): 58 | for handle in self.handles: 59 | handle.remove() 60 | 61 | 62 | class GradCAM: 63 | def __init__(self, model, target_layers, reshape_transform=None): 64 | self.model = model.eval() 65 | self.target_layers = target_layers 66 | self.reshape_transform = reshape_transform 67 | self.activations_and_grads = ActivationsAndGradients( 68 | self.model, target_layers, reshape_transform) 69 | 70 | """ Get a vector of weights for every channel in the target layer. 71 | Methods that return weights channels, 72 | will typically need to only implement this function. """ 73 | 74 | @staticmethod 75 | def get_cam_weights(grads): 76 | return np.mean(grads, axis=(2, 3), keepdims=True) 77 | 78 | @staticmethod 79 | def get_loss(output, target_category): 80 | loss = 0 81 | for i in range(len(target_category)): 82 | loss = loss + output[i, target_category[i]] 83 | return loss 84 | 85 | def get_cam_image(self, activations, grads): 86 | weights = self.get_cam_weights(grads) 87 | weighted_activations = weights * activations 88 | cam = weighted_activations.sum(axis=1) 89 | 90 | return cam 91 | 92 | @staticmethod 93 | def get_target_width_height(input_tensor): 94 | width, height = input_tensor.size(-1), input_tensor.size(-2) 95 | return width, height 96 | 97 | def compute_cam_per_layer(self, input_tensor): 98 | activations_list = [a.cpu().data.numpy() 99 | for a in self.activations_and_grads.activations] 100 | grads_list = [g.cpu().data.numpy() 101 | for g in self.activations_and_grads.gradients] 102 | target_size = self.get_target_width_height(input_tensor) 103 | 104 | cam_per_target_layer = [] 105 | # Loop over the saliency image from every layer 106 | 107 | for layer_activations, layer_grads in zip(activations_list, grads_list): 108 | cam = self.get_cam_image(layer_activations, layer_grads) 109 | cam[cam < 0] = 0 # works like mute the min-max scale in the function of scale_cam_image 110 | scaled = self.scale_cam_image(cam, target_size) 111 | cam_per_target_layer.append(scaled[:, None, :]) 112 | 113 | return cam_per_target_layer 114 | 115 | def aggregate_multi_layers(self, cam_per_target_layer): 116 | cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1) 117 | cam_per_target_layer = np.maximum(cam_per_target_layer, 0) 118 | result = np.mean(cam_per_target_layer, axis=1) 119 | return self.scale_cam_image(result) 120 | 121 | @staticmethod 122 | def scale_cam_image(cam, target_size=None): 123 | result = [] 124 | for img in cam: 125 | img = img - np.min(img) 126 | img = img / (1e-7 + np.max(img)) 127 | if target_size is not None: 128 | img = cv2.resize(img, target_size) 129 | result.append(img) 130 | result = np.float32(result) 131 | 132 | return result 133 | 134 | def __call__(self, input_tensor, target_category=None): 135 | 136 | # 正向传播得到网络输出logits(未经过softmax) 137 | output = self.activations_and_grads(input_tensor) 138 | if isinstance(target_category, int): 139 | target_category = [target_category] * input_tensor.size(0) 140 | 141 | if target_category is None: 142 | target_category = np.argmax(output.cpu().data.numpy(), axis=-1) 143 | print(f"category id: {target_category}") 144 | else: 145 | assert (len(target_category) == input_tensor.size(0)) 146 | 147 | self.model.zero_grad() 148 | loss = self.get_loss(output, target_category) 149 | loss.backward(retain_graph=True) 150 | 151 | # In most of the saliency attribution papers, the saliency is 152 | # computed with a single target layer. 153 | # Commonly it is the last convolutional layer. 154 | # Here we support passing a list with multiple target layers. 155 | # It will compute the saliency image for every image, 156 | # and then aggregate them (with a default mean aggregation). 157 | # This gives you more flexibility in case you just want to 158 | # use all conv layers for example, all Batchnorm layers, 159 | # or something else. 160 | cam_per_layer = self.compute_cam_per_layer(input_tensor) 161 | return self.aggregate_multi_layers(cam_per_layer) 162 | 163 | def __del__(self): 164 | self.activations_and_grads.release() 165 | 166 | def __enter__(self): 167 | return self 168 | 169 | def __exit__(self, exc_type, exc_value, exc_tb): 170 | self.activations_and_grads.release() 171 | if isinstance(exc_value, IndexError): 172 | # Handle IndexError here... 173 | print( 174 | f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}") 175 | return True 176 | 177 | 178 | def show_cam_on_image(img: np.ndarray, 179 | mask: np.ndarray, 180 | use_rgb: bool = False, 181 | colormap: int = cv2.COLORMAP_JET) -> np.ndarray: 182 | """ This function overlays the cam mask on the image as an heatmap. 183 | By default the heatmap is in BGR format. 184 | :param img: The base image in RGB or BGR format. 185 | :param mask: The cam mask. 186 | :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format. 187 | :param colormap: The OpenCV colormap to be used. 188 | :returns: The default image with the cam overlay. 189 | """ 190 | 191 | heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) 192 | if use_rgb: 193 | heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) 194 | heatmap = np.float32(heatmap) / 255 195 | 196 | if np.max(img) > 1: 197 | raise Exception( 198 | "The input image should np.float32 in the range [0, 1]") 199 | 200 | cam = heatmap + img 201 | cam = cam / np.max(cam) 202 | return np.uint8(255 * cam) 203 | 204 | 205 | def image_proprecess(img_path): 206 | img = Image.open(img_path) 207 | data_transforms = transforms.Compose([ 208 | transforms.Resize((384, 384), interpolation=3), 209 | transforms.ToTensor(), 210 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 211 | ]) 212 | data = data_transforms(img) 213 | data = torch.unsqueeze(data,0) 214 | img_resize = img.resize((384,384)) 215 | return img_resize,data 216 | 217 | def Init_Setting(): 218 | model = models.mobilenet_v3_large(pretrained=True) 219 | #model.load_state_dict(torch.load('model.pth')) 220 | model = model.cuda().eval() 221 | return model 222 | 223 | 224 | if __name__ == "__main__": 225 | imgs_path = "path/to/image.png" 226 | model = Init_Setting() 227 | target_layers = [model.features[-1]] 228 | img, data = image_proprecess(imgs_path) 229 | 230 | cam = GradCAM(model=model, target_layers=target_layers) 231 | target_category = None 232 | 233 | data = data.cuda() 234 | grayscale_cam = cam(input_tensor=data, target_category=target_category) 235 | grayscale_cam = grayscale_cam[0, :] 236 | visualization = show_cam_on_image(np.array(img) / 255., 237 | grayscale_cam, 238 | use_rgb=True) 239 | plt.imshow(visualization) 240 | plt.xticks() 241 | plt.yticks() 242 | plt.axis('off') 243 | plt.savefig("path/to/gradcam_image.jpg") 244 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visualize-feature-maps 2 | 3 | 4 | 5 | **欢迎关注公众号CV技术指南,专注于计算机视觉的技术总结、最新技术跟踪、经典论文解读、CV招聘信息。** 6 | 7 | 8 | 9 | **特征图可视化是很多论文所需要做的工作,其作用可以是用于证明方法的有效性,也可以是用来增加工作量,给论文凑字数**。 10 | 11 | 具体来说就是可视化两个图,使用了新方法的和使用之前的,对比有什么区别,然后看图写论文说明新方法体现的作用。 12 | 13 | 吐槽一句,有时候这个图 论文作者自己都不一定能看不懂,虽然确实可视化的图有些改变,但并不懂这个改变说明了什么,反正就吹牛,强行往自己新方法编的故事上扯,就像小学一年级的作文题--看图写作文。 14 | 15 | 之前知乎上有一个很热门的话题,如果我在baseline上做了一点小小的改进,却有很大的效果,这能写论文吗? 16 | 17 | 这种情况最大的问题就在于要如何写七页以上,那一点点的改进可能写完思路,公式推理,画图等内容才花了不到一页,剩下的内容如何搞?可视化特征图!!! 18 | 19 | 这一点可以在我看过的甚多论文上有所体现,反正我是没看明白论文给的可视化图,作者却能扯那么多道道。这应该就是用来增加论文字数和增加工作量的。 20 | 21 | 总之一句话,**可视化特征图是很重要的工作,最好要会**。 22 | 23 | 24 | 25 | # Visualize-Heatmap 26 | 27 | 本文介绍了CAM、GradCAM的原理和缺陷,介绍了如何使用GradCAM算法实现热力图可视化,介绍了目标检测、语义分割、transformer模型等其它类型任务的热力图可视化。 28 | 29 | 30 | #### 热力图可视化方法的原理 31 | 32 | 在一个神经网络模型中,图片经过神经网络得到类别输出,我们并不知道模型是根据什么来作出预测的,换言之,我们需要了解图片中各个区域对模型作出预测的影响有多大。这就是热力图的作用,它通过得到图像不同区域之间对模型的重要性而生成一张类似于等温图的图片。 33 | 34 | ![热力图](F:\Desktop\热力图.png) 35 | 36 | 热力图可视化方法经过了从CAM,GradCAM,到GradCAM++的过程,比较常用的是GradCAM算法。 37 | 38 | ##### CAM 39 | 40 | CAM论文:Learning Deep Features for Discriminative Localization 41 | 42 | CAM的原理是取出全连接层中得到类别C的概率的那一维权值,用W表示。然后对GAP前的feature map进行加权求和,由于此时feature map不是原图像大小,在加权求和后还需要进行上采样,即可得到Class Activation Map。 43 | 44 | ![image](https://github.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/blob/main/images/CAM.png) 45 | 46 | CAM有个很致命的缺陷,它的结构是由CNN + GAP + FC + Softmax组成。也就是说如果想要可视化某个现有的模型,对于没有GAP的模型来说需要修改原模型结构,并重新训练,相当麻烦,且如果模型很大,在修改后重新训练不一定能达到原效果,可视化也就没有意义了。 47 | 48 | 因此,针对这个缺陷,其后续有了改进版Grad-CAM。 49 | 50 | 51 | 52 | ##### GradCAM 53 | 54 | Grad-CAM论文:Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization 55 | 56 | Grad-CAM的最大特点就是不再需要修改现有的模型结构了,也不需要重新训练了,直接在原模型上即可可视化。 57 | 58 | GradCAM 59 | 60 | 原理:同样是处理CNN特征提取网络的最后一层feature maps。Grad-CAM对于想要可视化的类别C,使最后输出的类别C的概率值通过反向传播到最后一层feature maps,得到类别C对该feature maps的每个像素的梯度值,对每个像素的梯度值取全局平均池化,即可得到对feature maps的加权系数alpha,论文中提到这样获取的加权系数跟CAM中的系数的计算量几乎是等价的。接下来对特征图加权求和,使用ReLU进行修正,再进行上采样。 61 | 62 | 使用ReLU的原因是对于那些负值,可认为与识别类别C无关,这些负值可能是与其他类别有关,而正值才是对识别C有正面影响的。 63 | 64 | 具体公式如下: 65 | 66 | 公式 67 | 68 | Grad-CAM后续还有改进版Grad-CAM++,其主要的改进效果是定位更准确,更适合同类多目标的情况,所谓同类多目标是指一张图像中对于某个类出现多个目标,例如七八个人。改进方法是对加权系数的获取提出新的方法,该方法很复杂,这里不介绍。 69 | 70 | -------------------------------------------------------------------------------- /Visualize-Heatmap-tutorial.md: -------------------------------------------------------------------------------- 1 | > 特征图可视化与热力图可视化是论文中比较常用的两种可视化方法。上一篇文章《[一份可视化特征图的代码](https://mp.weixin.qq.com/s?__biz=MzkyMDE2OTA3Mw==&mid=2247493864&idx=1&sn=7ad0ec5d43c8cef05c0f05794a547eb8&chksm=c19457f6f6e3dee07595386289437e74db02231b09261b19c9e419a4e7859565144e88ae2d9e&token=214385380&lang=zh_CN#rd)》介绍了特征图可视化的代码,本篇将对如何进行热力图可视化做一个使用说明。 2 | > 3 | > 本文介绍了CAM、GradCAM的原理和缺陷,介绍了如何使用GradCAM算法实现热力图可视化,介绍了目标检测、语义分割、transformer模型等其它类型任务的热力图可视化。 4 | 5 | 本文原文:《[一份热力图可视化代码使用教程](https://mp.weixin.qq.com/s?__biz=MzkyMDE2OTA3Mw==&mid=2247494271&idx=1&sn=6fb284402bc9aad6e9d578e385b59aad&chksm=c1945561f6e3dc77184450d7ea6c1e71e9d3083946c459298950c1a3d4c900d7184a5a38c440&token=214385380&lang=zh_CN#rd)》 6 | 7 | #### 1. 热力图可视化方法的原理 8 | 9 | 在一个神经网络模型中,图片经过神经网络得到类别输出,我们并不知道模型是根据什么来作出预测的,换言之,我们需要了解图片中各个区域对模型作出预测的影响有多大。这就是热力图的作用,它通过得到图像不同区域之间对模型的重要性而生成一张类似于等温图的图片。 10 | 11 | ![热力图](https://github.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/blob/main/img/热力图.png) 12 | 13 | 热力图可视化方法经过了从CAM,GradCAM,到GradCAM++的过程,比较常用的是GradCAM算法。 14 | 15 | ##### CAM 16 | 17 | CAM论文:Learning Deep Features for Discriminative Localization 18 | 19 | CAM的原理是取出全连接层中得到类别C的概率的那一维权值,用W表示。然后对GAP前的feature map进行加权求和,由于此时feature map不是原图像大小,在加权求和后还需要进行上采样,即可得到Class Activation Map。 20 | 21 | ![CAM](https://github.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/blob/main/img/CAM.png) 22 | 23 | CAM有个很致命的缺陷,它的结构是由CNN + GAP + FC + Softmax组成。也就是说如果想要可视化某个现有的模型,对于没有GAP的模型来说需要修改原模型结构,并重新训练,相当麻烦,且如果模型很大,在修改后重新训练不一定能达到原效果,可视化也就没有意义了。 24 | 25 | 因此,针对这个缺陷,其后续有了改进版Grad-CAM。 26 | 27 | 28 | 29 | ##### GradCAM 30 | 31 | Grad-CAM论文:Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization 32 | 33 | Grad-CAM的最大特点就是不再需要修改现有的模型结构了,也不需要重新训练了,直接在原模型上即可可视化。 34 | 35 | ![GradCAM](https://github.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/blob/main/img/GradCAM.png) 36 | 37 | 原理:同样是处理CNN特征提取网络的最后一层feature maps。Grad-CAM对于想要可视化的类别C,使最后输出的类别C的概率值通过反向传播到最后一层feature maps,得到类别C对该feature maps的每个像素的梯度值,对每个像素的梯度值取全局平均池化,即可得到对feature maps的加权系数alpha,论文中提到这样获取的加权系数跟CAM中的系数的计算量几乎是等价的。接下来对特征图加权求和,使用ReLU进行修正,再进行上采样。 38 | 39 | 使用ReLU的原因是对于那些负值,可认为与识别类别C无关,这些负值可能是与其他类别有关,而正值才是对识别C有正面影响的。 40 | 41 | 具体公式如下: 42 | 43 | ![公式](https://github.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/blob/main/img/公式.png) 44 | 45 | Grad-CAM后续还有改进版Grad-CAM++,其主要的改进效果是定位更准确,更适合同类多目标的情况,所谓同类多目标是指一张图像中对于某个类出现多个目标,例如七八个人。改进方法是对加权系数的获取提出新的方法,该方法很复杂,这里不介绍。 46 | 47 | 48 | 49 | #### 2.GradCAM的使用教程 50 | 51 | 这份代码来自GradCAM论文作者,原链接中包含了很多其它的CAM,这里将GradCAM摘出来对其做一个使用说明。 52 | 53 | 代码原链接:https://github.com/jacobgil/pytorch-grad-cam/tree/master/pytorch_grad_cam 54 | 55 | 本教程代码链接:https://github.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/tree/main 56 | 57 | 58 | 59 | ##### 使用流程 60 | 61 | 使用起来比较简单,仅了解主函数即可。 62 | 63 | ```python 64 | if __name__ == "__main__": 65 | imgs_path = "path/to/image.png" 66 | model = models.mobilenet_v3_large(pretrained=True) 67 | model.load_state_dict(torch.load('model.pth')) 68 | model = model.cuda().eval() 69 | 70 | #target_layers指的是需要可视化的层,这里可视化最后一层 71 | target_layers = [model.features[-1]] 72 | img, data = image_proprecess(imgs_path) 73 | data = data.cuda() 74 | 75 | cam = GradCAM(model=model, target_layers=target_layers) 76 | #指定可视化的类别,指定为None,则按照当前预测的最大概率的类作为可视化类。 77 | target_category = None 78 | 79 | grayscale_cam = cam(input_tensor=data, target_category=target_category) 80 | grayscale_cam = grayscale_cam[0, :] 81 | visualization = show_cam_on_image(np.array(img) / 255., grayscale_cam) 82 | plt.imshow(visualization) 83 | plt.xticks() 84 | plt.yticks() 85 | plt.axis('off') 86 | plt.savefig("path/to/gradcam_image.jpg") 87 | ``` 88 | 89 | 如上代码所示,仅需要自主设置输入图片,模型,可视化层,可视化类别即可,其它的部分可完全照用。 90 | 91 | 92 | 93 | 下面细节部分的介绍。 94 | 95 | 96 | 97 | ##### 数据预处理 98 | 99 | 这里跟上次可视化特征图的代码一样,将图片读取,resize,转化为Tensor,格式化,若只有一张图片,则还需要将其扩展为四维。 100 | 101 | ```python 102 | def image_proprecess(img_path): 103 | img = Image.open(img_path) 104 | data_transforms = transforms.Compose([ 105 | transforms.Resize((384, 384), interpolation=3), 106 | transforms.ToTensor(), 107 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 108 | ]) 109 | data = data_transforms(img) 110 | data = torch.unsqueeze(data,0) 111 | img_resize = img.resize((384,384)) 112 | return img_resize,data 113 | ``` 114 | 115 | 116 | 117 | ##### GradCAM 118 | 119 | GradCAM这个类是按照前面第一节中介绍的原理封装的,因此了解原理后再了解这个类的代码就比较简单了。 120 | 121 | ```python 122 | class GradCAM: 123 | def __init__(self, model, target_layers, reshape_transform=None): 124 | self.model = model.eval() 125 | self.target_layers = target_layers 126 | self.reshape_transform = reshape_transform 127 | self.cuda = use_cuda 128 | self.activations_and_grads = ActivationsAndGradients( 129 | self.model, target_layers, reshape_transform) 130 | 131 | """ Get a vector of weights for every channel in the target layer. 132 | Methods that return weights channels, 133 | will typically need to only implement this function. """ 134 | 135 | @staticmethod 136 | def get_cam_weights(grads): 137 | return np.mean(grads, axis=(2, 3), keepdims=True) 138 | 139 | @staticmethod 140 | def get_loss(output, target_category): 141 | loss = 0 142 | for i in range(len(target_category)): 143 | loss = loss + output[i, target_category[i]] 144 | return loss 145 | 146 | def get_cam_image(self, activations, grads): 147 | weights = self.get_cam_weights(grads) 148 | weighted_activations = weights * activations 149 | cam = weighted_activations.sum(axis=1) 150 | 151 | return cam 152 | 153 | @staticmethod 154 | def get_target_width_height(input_tensor): 155 | width, height = input_tensor.size(-1), input_tensor.size(-2) 156 | return width, height 157 | 158 | def compute_cam_per_layer(self, input_tensor): 159 | activations_list = [a.cpu().data.numpy() 160 | for a in self.activations_and_grads.activations] 161 | grads_list = [g.cpu().data.numpy() 162 | for g in self.activations_and_grads.gradients] 163 | target_size = self.get_target_width_height(input_tensor) 164 | 165 | cam_per_target_layer = [] 166 | # Loop over the saliency image from every layer 167 | 168 | for layer_activations, layer_grads in zip(activations_list, grads_list): 169 | cam = self.get_cam_image(layer_activations, layer_grads) 170 | cam[cam < 0] = 0 # works like mute the min-max scale in the function of scale_cam_image 171 | scaled = self.scale_cam_image(cam, target_size) 172 | cam_per_target_layer.append(scaled[:, None, :]) 173 | 174 | return cam_per_target_layer 175 | 176 | def aggregate_multi_layers(self, cam_per_target_layer): 177 | cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1) 178 | cam_per_target_layer = np.maximum(cam_per_target_layer, 0) 179 | result = np.mean(cam_per_target_layer, axis=1) 180 | return self.scale_cam_image(result) 181 | 182 | @staticmethod 183 | def scale_cam_image(cam, target_size=None): 184 | result = [] 185 | for img in cam: 186 | img = img - np.min(img) 187 | img = img / (1e-7 + np.max(img)) 188 | if target_size is not None: 189 | img = cv2.resize(img, target_size) 190 | result.append(img) 191 | result = np.float32(result) 192 | 193 | return result 194 | 195 | def __call__(self, input_tensor, target_category=None): 196 | # 正向传播得到网络输出logits(未经过softmax) 197 | output = self.activations_and_grads(input_tensor) 198 | if isinstance(target_category, int): 199 | target_category = [target_category] * input_tensor.size(0) 200 | 201 | if target_category is None: 202 | target_category = np.argmax(output.cpu().data.numpy(), axis=-1) 203 | print(f"category id: {target_category}") 204 | else: 205 | assert (len(target_category) == input_tensor.size(0)) 206 | 207 | self.model.zero_grad() 208 | loss = self.get_loss(output, target_category) 209 | loss.backward(retain_graph=True) 210 | 211 | # In most of the saliency attribution papers, the saliency is 212 | # computed with a single target layer. 213 | # Commonly it is the last convolutional layer. 214 | # Here we support passing a list with multiple target layers. 215 | # It will compute the saliency image for every image, 216 | # and then aggregate them (with a default mean aggregation). 217 | # This gives you more flexibility in case you just want to 218 | # use all conv layers for example, all Batchnorm layers, 219 | # or something else. 220 | cam_per_layer = self.compute_cam_per_layer(input_tensor) 221 | return self.aggregate_multi_layers(cam_per_layer) 222 | 223 | def __del__(self): 224 | self.activations_and_grads.release() 225 | 226 | def __enter__(self): 227 | return self 228 | 229 | def __exit__(self, exc_type, exc_value, exc_tb): 230 | self.activations_and_grads.release() 231 | if isinstance(exc_value, IndexError): 232 | # Handle IndexError here... 233 | print( 234 | f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}") 235 | return True 236 | ``` 237 | 238 | 简要说明一下整体在做什么,先通过下方的ActivationsAndGradients获取模型推理过程中的梯度和激活函数值,计算要可视化的类的loss(其它类的都忽略),通过这个loss计算可视化类对应的梯度图,将其进行全局平均池化获得每个feature maps通道的加权系数,与feature maps进行通道上加权,并在通道上做均值获得单通道图,再ReLU即输出对应的图。注:此图还不是热力图,还需要与原图相加才能获得最终的热力图。 239 | 240 | GradCAM这个类主要就是先定义,再调用执行。定义须输入网络和需要可视化的层,执行则需要输入图片和可视化的类别。 241 | 242 | 执行返回的是区域重要性图。 243 | 244 | ```python 245 | cam = GradCAM(model=model, target_layers=target_layers) 246 | #指定可视化的类别,指定为None,则按照当前预测的最大概率的类作为可视化类。 247 | target_category = None 248 | 249 | grayscale_cam = cam(input_tensor=data, target_category=target_category) 250 | ``` 251 | 252 | 获取推理过程中的梯度主要是通过以下这个类来完成。这里不多介绍。 253 | 254 | ``` 255 | class ActivationsAndGradients: 256 | """ Class for extracting activations and 257 | registering gradients from targeted intermediate layers """ 258 | 259 | def __init__(self, model, target_layers, reshape_transform): 260 | self.model = model 261 | self.gradients = [] 262 | self.activations = [] 263 | self.reshape_transform = reshape_transform 264 | self.handles = [] 265 | for target_layer in target_layers: 266 | self.handles.append( 267 | target_layer.register_forward_hook( 268 | self.save_activation)) 269 | # Backward compatibility with older pytorch versions: 270 | if hasattr(target_layer, 'register_full_backward_hook'): 271 | self.handles.append( 272 | target_layer.register_full_backward_hook( 273 | self.save_gradient)) 274 | else: 275 | self.handles.append( 276 | target_layer.register_backward_hook( 277 | self.save_gradient)) 278 | 279 | def save_activation(self, module, input, output): 280 | activation = output 281 | if self.reshape_transform is not None: 282 | activation = self.reshape_transform(activation) 283 | self.activations.append(activation.cpu().detach()) 284 | 285 | def save_gradient(self, module, grad_input, grad_output): 286 | # Gradients are computed in reverse order 287 | grad = grad_output[0] 288 | if self.reshape_transform is not None: 289 | grad = self.reshape_transform(grad) 290 | self.gradients = [grad.cpu().detach()] + self.gradients 291 | 292 | def __call__(self, x): 293 | self.gradients = [] 294 | self.activations = [] 295 | return self.model(x) 296 | 297 | def release(self): 298 | for handle in self.handles: 299 | handle.remove() 300 | ``` 301 | 302 | 然后就是将GradCAM输出的重要性图在原图上显示,通过下面这个函数完成。 303 | 304 | ``` 305 | def show_cam_on_image(img: np.ndarray, 306 | mask: np.ndarray, 307 | use_rgb: bool = False, 308 | colormap: int = cv2.COLORMAP_JET) -> np.ndarray: 309 | """ This function overlays the cam mask on the image as an heatmap. 310 | By default the heatmap is in BGR format. 311 | :param img: The base image in RGB or BGR format. 312 | :param mask: The cam mask. 313 | :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format. 314 | :param colormap: The OpenCV colormap to be used. 315 | :returns: The default image with the cam overlay. 316 | """ 317 | 318 | heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap) 319 | if use_rgb: 320 | heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) 321 | heatmap = np.float32(heatmap) / 255 322 | 323 | if np.max(img) > 1: 324 | raise Exception( 325 | "The input image should np.float32 in the range [0, 1]") 326 | 327 | cam = heatmap + img 328 | cam = cam / np.max(cam) 329 | return np.uint8(255 * cam) 330 | ``` 331 | 332 | 前面介绍的仅仅是分类任务的热力图可视化,但对于目标检测,语义分割等这些包含多任务的应用如何做? 333 | 334 | 335 | 336 | #### 其它类型任务的热力图可视化 337 | 338 | 在gradCAM论文作者给出的代码中还介绍了如何可视化目标检测、语义分割、transformer的代码。由于作者提供了使用方法,这里不多介绍,直接给出作者写得教程。 339 | 340 | [Notebook tutorial: Class Activation Maps for Object Detection with Faster-RCNN](https://github.com/jacobgil/pytorch-grad-cam/blob/master/tutorials/Class%20Activation%20Maps%20for%20Object%20Detection%20With%20Faster%20RCNN.ipynb) 341 | 342 | [Notebook tutorial: Class Activation Maps for Semantic Segmentation](https://github.com/jacobgil/pytorch-grad-cam/blob/master/tutorials/Class%20Activation%20Maps%20for%20Semantic%20Segmentation.ipynb) 343 | 344 | [How it works with Vision/SwinT transformers](https://github.com/jacobgil/pytorch-grad-cam/blob/master/tutorials/vision_transformers.md) 345 | 346 | 347 | **欢迎关注公众号CV技术指南,专注于计算机视觉的技术总结、最新技术跟踪、经典论文解读、CV招聘信息。** 348 | -------------------------------------------------------------------------------- /Visualize-feature-maps.md: -------------------------------------------------------------------------------- 1 | **本文来自公众号CV技术指南,欢迎用于个人学习,严禁用于商业行为。** 2 | 3 | **欢迎关注公众号CV技术指南,专注于计算机视觉的技术总结、最新技术跟踪、经典论文解读、CV招聘信息。** 4 | 5 | 6 | > **前言** 本文给大家分享一份我用的特征图可视化代码。 7 | 8 | 9 | 10 | #### **写在前面的话** 11 | 12 | ------ 13 | 14 | **特征图可视化是很多论文所需要做的一份工作,其作用可以是用于证明方法的有效性,也可以是用来增加工作量,给论文凑字数**。 15 | 16 | 具体来说就是可视化两个图,使用了新方法的和使用之前的,对比有什么区别,然后看图写论文说明新方法体现的作用。 17 | 18 | 吐槽一句,有时候这个图 论文作者自己都不一定能看不懂,虽然确实可视化的图有些改变,但并不懂这个改变说明了什么,反正就吹牛,强行往自己新方法编的故事上扯,就像小学一年级的作文题--看图写作文。 19 | 20 | 之前知乎上有一个很热门的话题,如果我在baseline上做了一点小小的改进,却有很大的效果,这能写论文吗? 21 | 22 | 这种情况最大的问题就在于要如何写七页以上,那一点点的改进可能写完思路,公式推理,画图等内容才花了不到一页,剩下的内容如何搞?可视化特征图!!! 23 | 24 | 这一点可以在我看过的甚多论文上有所体现,反正我是没看明白论文给的可视化图,作者却能扯那么多道道。这应该就是用来增加论文字数和增加工作量的。 25 | 26 | 总之一句话,**可视化特征图是很重要的工作,最好要会**。 27 | 28 | 29 | 30 | #### **初始化配置** 31 | 32 | ------ 33 | 34 | 这部分先完成加载数据,修改网络,定义网络,加载预训练模型。 35 | 36 | ##### **加载数据并预处理** 37 | 38 | 这里只加载一张图片,就不用通过classdataset了,因为classdataset是针对大量数据的,生成一个迭代器一批一批地将图片送给网络。但我们仍然要完成classdataset中数据预处理的部分。 39 | 40 | 数据预处理所必须要有的操作是调整大小,转化为Tensor格式,归一化。至于其它数据增强或预处理的操作,自己按需添加。 41 | 42 | ```python 43 | def image_proprecess(img_path): 44 | img = Image.open(img_path) 45 | data_transforms = transforms.Compose([ 46 | transforms.Resize((384, 384), interpolation=3), 47 | transforms.ToTensor(), 48 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 49 | ]) 50 | data = data_transforms(img) 51 | data = torch.unsqueeze(data,0) 52 | return data 53 | ``` 54 | 55 | 这里由于只加载一张图片,因此后面要使用torch.unsqueeze将三维张量变成四维。 56 | 57 | 58 | 59 | ##### **修改网络** 60 | 61 | 假如你要可视化某一层的特征图,则需要将该层的特征图返回出来,因此需要先修改网络中的forward函数。具体修改方式如下所示。 62 | 63 | ```python 64 | def forward(self, x): 65 | x = self.model.conv1(x) 66 | x = self.model.bn1(x) 67 | x = self.model.relu(x) 68 | x = self.model.maxpool(x) 69 | feature = self.model.layer1(x) 70 | x = self.model.layer2(feature) 71 | x = self.model.layer3(x) 72 | x = self.model.layer4(x) 73 | return feature,x 74 | ``` 75 | 76 | 77 | 78 | ##### **定义网络并加载预训练模型** 79 | 80 | ```python 81 | def Init_Setting(epoch): 82 | dirname = '/mnt/share/VideoReID/share/models/Methods5_trial1' 83 | model = siamese_resnet50(701, stride=1, pool='avg') 84 | trained_path = os.path.join(dirname, 'net_%03d.pth' % epoch) 85 | print("load %03d.pth" % epoch) 86 | model.load_state_dict(torch.load(trained_path)) 87 | model = model.cuda().eval() 88 | return model 89 | ``` 90 | 91 | 这部分需要说明的是最后一行,要将网络设置为推理模式。 92 | 93 | 94 | 95 | #### **可视化特征图** 96 | 97 | ------ 98 | 99 | 这部分主要是将特征图的某一通道转化为一张图来可视化。 100 | 101 | ```python 102 | def visualize_feature_map(img_batch,out_path,type,BI): 103 | feature_map = torch.squeeze(img_batch) 104 | feature_map = feature_map.detach().cpu().numpy() 105 | 106 | feature_map_sum = feature_map[0, :, :] 107 | feature_map_sum = np.expand_dims(feature_map_sum, axis=2) 108 | for i in range(0, 2048): 109 | feature_map_split = feature_map[i,:, :] 110 | feature_map_split = np.expand_dims(feature_map_split,axis=2) 111 | if i > 0: 112 | feature_map_sum +=feature_map_split 113 | feature_map_split = BI.transform(feature_map_split) 114 | 115 | plt.imshow(feature_map_split) 116 | plt.savefig(out_path + str(i) + "_{}.jpg".format(type) ) 117 | plt.xticks() 118 | plt.yticks() 119 | plt.axis('off') 120 | 121 | feature_map_sum = BI.transform(feature_map_sum) 122 | plt.imshow(feature_map_sum) 123 | plt.savefig(out_path + "sum_{}.jpg".format(type)) 124 | print("save sum_{}.jpg".format(type)) 125 | ``` 126 | 127 | 这里一行一行来解释。 128 | 129 | 1. 参数img_batch是从网络中的某一层传回来的特征图,BI是双线性插值的函数,自定义的,下面会讲。 130 | 131 | 2. 由于只可视化了一张图片,因此img_batch是四维的,且batchsize维为1。第三行将它从GPU上弄到CPU上,并变成numpy格式。 132 | 133 | 3. 剩下部分主要完成将每个通道变成一张图,以及将所有通道每个元素对应位置相加,并保存。 134 | 135 | 136 | 137 | ##### **双线性插值** 138 | 139 | ------ 140 | 141 | 由于经过多次网络降采样,后面层的特征图往往变得只有7x7,16x16大小。可视化后特别小,因此需要将它上采样,这里采样的方式是双线性插值。因此,这里给一份双线性插值的代码。 142 | 143 | ```python 144 | class BilinearInterpolation(object): 145 | def __init__(self, w_rate: float, h_rate: float, *, align='center'): 146 | if align not in ['center', 'left']: 147 | logging.exception(f'{align} is not a valid align parameter') 148 | align = 'center' 149 | self.align = align 150 | self.w_rate = w_rate 151 | self.h_rate = h_rate 152 | 153 | def set_rate(self,w_rate: float, h_rate: float): 154 | self.w_rate = w_rate # w 的缩放率 155 | self.h_rate = h_rate # h 的缩放率 156 | 157 | # 由变换后的像素坐标得到原图像的坐标 针对高 158 | def get_src_h(self, dst_i,source_h,goal_h) -> float: 159 | if self.align == 'left': 160 | # 左上角对齐 161 | src_i = float(dst_i * (source_h/goal_h)) 162 | elif self.align == 'center': 163 | # 将两个图像的几何中心重合。 164 | src_i = float((dst_i + 0.5) * (source_h/goal_h) - 0.5) 165 | src_i += 0.001 166 | src_i = max(0.0, src_i) 167 | src_i = min(float(source_h - 1), src_i) 168 | return src_i 169 | # 由变换后的像素坐标得到原图像的坐标 针对宽 170 | def get_src_w(self, dst_j,source_w,goal_w) -> float: 171 | if self.align == 'left': 172 | # 左上角对齐 173 | src_j = float(dst_j * (source_w/goal_w)) 174 | elif self.align == 'center': 175 | # 将两个图像的几何中心重合。 176 | src_j = float((dst_j + 0.5) * (source_w/goal_w) - 0.5) 177 | src_j += 0.001 178 | src_j = max(0.0, src_j) 179 | src_j = min((source_w - 1), src_j) 180 | return src_j 181 | 182 | def transform(self, img): 183 | source_h, source_w, source_c = img.shape # (235, 234, 3) 184 | goal_h, goal_w = round( 185 | source_h * self.h_rate), round(source_w * self.w_rate) 186 | new_img = np.zeros((goal_h, goal_w, source_c), dtype=np.uint8) 187 | 188 | for i in range(new_img.shape[0]): # h 189 | src_i = self.get_src_h(i,source_h,goal_h) 190 | for j in range(new_img.shape[1]): 191 | src_j = self.get_src_w(j,source_w,goal_w) 192 | i2 = ceil(src_i) 193 | i1 = int(src_i) 194 | j2 = ceil(src_j) 195 | j1 = int(src_j) 196 | x2_x = j2 - src_j 197 | x_x1 = src_j - j1 198 | y2_y = i2 - src_i 199 | y_y1 = src_i - i1 200 | new_img[i, j] = img[i1, j1]*x2_x*y2_y + img[i1, j2] * \ 201 | x_x1*y2_y + img[i2, j1]*x2_x*y_y1 + img[i2, j2]*x_x1*y_y1 202 | return new_img 203 | #使用方法 204 | BI = BilinearInterpolation(8, 8) 205 | feature_map = BI.transform(feature_map) 206 | ``` 207 | 208 | 209 | 210 | ##### **main函数流程** 211 | 212 | ------ 213 | 214 | 上面介绍了各个部分的代码,下面就是整体流程。比较简单。 215 | 216 | ```python 217 | imgs_path = "/path/to/imgs/" 218 | save_path = "/save/path/to/output/" 219 | model = Init_Setting(120) 220 | BI = BilinearInterpolation(8, 8) 221 | 222 | data = image_proprecess(out_path + "0836.jpg") 223 | data = data.cuda() 224 | output, _ = model(data) 225 | visualize_feature_map(output, save_path, "drone", BI) 226 | ``` 227 | 228 | 229 | 230 | #### **可视化效果图** 231 | 232 | ------ 233 | 234 | ![图片](https://mmbiz.qpic.cn/mmbiz_png/V2E1ll6kaTVqAwbeVAGXfmmlBRwZcbMXLoBIwhvU8SkrFicuricQQZy4CwG5DfqF4ff16wNUNuNSNSicIG2l6icDbg/640?wx_fmt=png&wxfrom=5&wx_lazy=1&wx_co=1) 235 | 236 | 237 | **欢迎关注公众号CV技术指南,专注于计算机视觉的技术总结、最新技术跟踪、经典论文解读、CV招聘信息。** 238 | -------------------------------------------------------------------------------- /img/CAM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/0ad2fef95bc337eae991a9b8707c8ed7578dbee0/img/CAM.png -------------------------------------------------------------------------------- /img/GradCAM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/0ad2fef95bc337eae991a9b8707c8ed7578dbee0/img/GradCAM.png -------------------------------------------------------------------------------- /img/公式.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/0ad2fef95bc337eae991a9b8707c8ed7578dbee0/img/公式.png -------------------------------------------------------------------------------- /img/热力图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CV-Tech-Guide/Visualize-feature-maps-and-heatmap/0ad2fef95bc337eae991a9b8707c8ed7578dbee0/img/热力图.png --------------------------------------------------------------------------------