├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── data ├── cat.jpg ├── class_test │ └── 003.backpack │ │ └── 003_0020.jpg ├── im_0.png └── test_0.png ├── doc ├── cam │ ├── README.md │ └── figs │ │ ├── celtech_change.png │ │ ├── celtech_diff.png │ │ └── celtech_result.png ├── deconv │ ├── README.md │ └── figs │ │ ├── dog │ │ ├── conv1_2_feat.png │ │ ├── conv1_2_im.png │ │ ├── conv2_2_feat.png │ │ ├── conv2_2_im.png │ │ ├── conv3_4_feat.png │ │ ├── conv3_4_im.png │ │ ├── conv4_4_feat.png │ │ ├── conv4_4_feat_171.png │ │ ├── conv4_4_feat_349.png │ │ ├── conv4_4_im.png │ │ ├── conv4_4_im_171.png │ │ ├── conv4_4_im_349.png │ │ ├── conv5_2_feat.png │ │ └── conv5_2_im.png │ │ └── people │ │ ├── conv1_2_feat.png │ │ ├── conv1_2_im.png │ │ ├── conv2_2_feat.png │ │ ├── conv2_2_feat_59.png │ │ ├── conv2_2_im.png │ │ ├── conv2_2_im_59.png │ │ ├── conv3_4_feat.png │ │ ├── conv3_4_feat_166.png │ │ ├── conv3_4_im.png │ │ ├── conv3_4_im_166.png │ │ ├── conv4_4_feat.png │ │ ├── conv4_4_feat_171.png │ │ ├── conv4_4_im.png │ │ ├── conv4_4_im_171.png │ │ ├── conv5_2_feat.png │ │ └── conv5_2_im.png ├── firstfilter │ ├── README.md │ └── figs │ │ ├── GoogLeNet.png │ │ ├── GoogLeNet_filter.png │ │ ├── GoogLeNet_inception3a.png │ │ ├── GoogLeNet_inception3b.png │ │ ├── GoogLeNet_inception4a.png │ │ ├── GoogLeNet_inception4b.png │ │ ├── GoogLeNet_inception4c.png │ │ ├── GoogLeNet_inception4d.png │ │ ├── GoogLeNet_inception4e.png │ │ ├── GoogLeNet_inception5a.png │ │ ├── GoogLeNet_inception5b.png │ │ └── bk │ │ ├── GoogLeNet_inception3a.png │ │ ├── GoogLeNet_inception3b.png │ │ ├── GoogLeNet_inception4a.png │ │ ├── GoogLeNet_inception4b.png │ │ ├── GoogLeNet_inception4c.png │ │ ├── GoogLeNet_inception4d.png │ │ ├── GoogLeNet_inception4e.png │ │ ├── GoogLeNet_inception5a.png │ │ └── GoogLeNet_inception5b.png ├── grad_cam │ ├── README.md │ └── figs │ │ ├── comparecam.png │ │ ├── ex1.png │ │ └── ex2.png └── guided_backpropagation │ ├── README.md │ └── figs │ ├── gbp.png │ ├── gbp1.png │ ├── gbp2.png │ ├── gbp3.png │ └── gbp4.png ├── example ├── cam.py ├── config_cam.py ├── config_path.py ├── deconv.py ├── gap.py ├── gradcam.py ├── guided_backpropagation.py ├── invert.py ├── run.sh ├── setup_env.py └── vizfilter.py ├── lib ├── __init__.py ├── dataflow │ ├── __init__.py │ ├── cifar.py │ └── image.py ├── models │ ├── __init__.py │ ├── cam.py │ ├── gap.py │ ├── grad_cam.py │ ├── guided_backpro.py │ └── invert.py ├── nets │ ├── __init__.py │ ├── googlenet.py │ ├── layers.py │ └── vgg.py └── utils │ ├── __init__.py │ ├── image.py │ ├── normalize.py │ └── viz.py ├── requirements.txt └── test ├── setup_test_env.py ├── test_cam.py ├── test_gradcam.py └── test_guided_backprop.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | # lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | install: 5 | - pip install -r requirements.txt 6 | - pip install flake8 7 | - pip install coveralls 8 | - pip install tensorflow 9 | - pip install -U git+https://github.com/conan7882/DeepVision-tensorflow.git 10 | branches: 11 | only: 12 | - master 13 | # env: 14 | # - TESTCASE=test/test_cam.py 15 | # - TESTCASE=test/test_gradcam.py 16 | # - TESTCASE=test/test_guided_backprop.py 17 | script: 18 | - nosetests test/ --with-coverage --cover-package=. --debug=show 19 | # - coverage combine --append 20 | # - flake8 lib/dataflow/ lib/model/ --ignore=F405,F403,F401,E402,E501 21 | # - COVERAGE_FILE=.coverage_cam coverage run --source=. --omit=*vgg.py test/test_cam.py 22 | # - COVERAGE_FILE=.coverage_gradcam coverage run --source=. --omit=*vgg.py test/test_gradcam.py 23 | # - COVERAGE_FILE=.coverage_guidedback coverage run --source=. --omit=*vgg.py test/test_guided_backprop.py 24 | # # - coverage run --source=. test/test.py 25 | # - coverage combine --append 26 | after_success: 27 | - coveralls 28 | # - codecov -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Qian Ge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visualization of Deep Covolutional Neural Networks 2 | 3 | 7 | 8 | - This repository contains implementations of visualizatin of CNN in recent papers. 9 | - The source code in the repository can be used to demostrate the algorithms as well as test on your own data. 10 | 11 | ## Requirements 12 | - Python 3.3+ 13 | - [Tensorflow 1.3](https://www.tensorflow.org/) 14 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow) 15 | 16 | 17 | ## Algorithms 18 | 19 | - [Visulization of filters and feature maps of GoogLeNet](https://github.com/conan7882/CNN-Visualization/tree/master/doc/firstfilter#visualization-of-filters-and-feature-maps-of-googlenet) 20 | - [Deconvolutional Networks](https://github.com/conan7882/CNN-Visualization/blob/master/doc/deconv/README.md#cnn-feature-visuallization-via-deconvnet-transposed-convolutional-layers) (ECCV'14) 21 | - [Guided back propagation](https://github.com/conan7882/CNN-Visualization/tree/master/doc/guided_backpropagation#guided-backpropagation) (2014) 22 | - [Class Activation Mapping (CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#class-activation-mapping-cam) (CVPR'16) 23 | - [Gradient-weighted Class Activation Mapping (Grad-CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/grad_cam#gradient-weighted-class-activation-mapping-grad-cam) (ICCV'17) 24 | 25 | ## [Visulization of filters and feature maps of GoogLeNet](https://github.com/conan7882/CNN-Visualization/tree/master/doc/firstfilter#visualization-of-filters-and-feature-maps-of-googlenet) 26 | - The most straightforward approach to visualize a CNN is to show the feature maps (activations) and filters. 27 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/firstfilter#visualization-of-filters-and-feature-maps-of-googlenet) 28 |

29 | 30 | 31 |

32 | 33 | ## [Deconvnet](https://github.com/conan7882/CNN-Visualization/blob/master/doc/deconv/README.md#cnn-feature-visuallization-via-deconvnet-transposed-convolutional-layers) 34 | - Pick a specific activation on a feature map and set other activation to zeros, then reconstruct an image by mapping back this new feature map to input pixel space. 35 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/blob/master/doc/deconv/README.md#cnn-feature-visuallization-via-deconvnet-transposed-convolutional-layers). Some results: 36 | 37 |

38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |

47 | 48 | ## [Guided back propagation](https://github.com/conan7882/CNN-Visualization/tree/master/doc/guided_backpropagation#guided-backpropagation) 49 | 50 | 51 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/guided_backpropagation#guided-backpropagation). Some results: 52 | 53 | ![gbp](doc/guided_backpropagation/figs/gbp.png) 54 | 55 | ## [Class Activation Mapping (CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#class-activation-mapping-cam) 56 | - The class activation map highlights the most informative image regions relevant to the predicted class. This map can be obtained by adding a global average pooling layer at the end of convolutional layers. 57 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#class-activation-mapping-cam). Some results: 58 | 59 | ![celtech_change](doc/cam/figs/celtech_diff.png) 60 | 61 | ## [Gradient-weighted Class Activation Mapping (Grad-CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/grad_cam#gradient-weighted-class-activation-mapping-grad-cam) 62 | - Grad-CAM generates similar class heatmap as CAM, but it does not require to re-train the model for visualizatin. 63 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/grad_cam#gradient-weighted-class-activation-mapping-grad-cam). Some results: 64 | 65 | ![grad-cam-result](doc/grad_cam/figs/ex1.png) 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /data/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/cat.jpg -------------------------------------------------------------------------------- /data/class_test/003.backpack/003_0020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/class_test/003.backpack/003_0020.jpg -------------------------------------------------------------------------------- /data/im_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/im_0.png -------------------------------------------------------------------------------- /data/test_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/test_0.png -------------------------------------------------------------------------------- /doc/cam/README.md: -------------------------------------------------------------------------------- 1 | # Class Activation Mapping (CAM) 2 | 3 | - TensorFlow implementation of [Learning Deep Features for Discriminative Localization](https://arxiv.org/abs/1512.04150) (CVPR'16). 4 | - Caffe implementation by the authors is [here](https://github.com/metalbubble/CAM). 5 | - The class activation map highlights the most informative image regions relevant to the predicted class. This map can be obtained by adding a global average pooling layer at the end of convolutional layers. 6 | - This implementation has been tested on [Caltech-256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) dataset, and can be tested on your own dataset as well. 7 | 8 | 9 | 10 | ## Requirements 11 | - Python 3.3+ 12 | - [Tensorflow 1.3](https://www.tensorflow.org/) 13 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow) 14 | 15 | ## Implementation Details 16 | 17 | 18 | 19 | - CAM of Caltech-256 dataset is obtained by finetuning [VGG19](https://arxiv.org/abs/1409.1556). 20 | - CAM models are defined in [`CNN-Visualization/lib/models/cam.py`](../../lib/models/cam.py). 21 | - Exampe usage of CAM is in [`CNN-Visualization/example/cam.py`](../../example/cam.py) (used for Caltech-256 or other nature image dataset.). Directories are setup in file [`CNN-Visualization/example/config_cam.py`](../../example/config_cam.py). 22 | 23 | ## Results 24 | 25 | 26 | ### Caltech-256 27 | 28 | Class activation map for class llama at different steps during training 29 | ![celtech_change](figs/celtech_change.png) 30 | 31 | Class activation map for class duck and people after 25 epochs 32 | ![celtech_change](figs/celtech_result.png) 33 | 34 | Class activation map of same image for different classes 35 | ![celtech_change](figs/celtech_diff.png) 36 | 37 | ## Observations 38 | 39 | ## Preparation 40 | 41 | 1. Setup directories in file `config_cam.py`. 42 | 43 | - Training 44 | 45 | `config.vgg_dir` - directory of pre-trained VGG19 parameters 46 | 47 | `config.data_dir` - directory of training image data 48 | 49 | `config.infer_data_dir` - directory of the image use for inference class activation map during training (put only one image) 50 | 51 | `config.checkpoint_dir` - directory of saving trained model (saved every 100 training steps) 52 | 53 | `config.summary_dir` - directory of saving summaries (saved every 10 training steps) 54 | 55 | `config.infer_dir` - directory of saving inference result (saved every 100 training steps) 56 | 57 | - Testing 58 | 59 | `config.model_dir` - directory of trained model parameters 60 | 61 | `config.test_data_dir` - directory of testing images 62 | 63 | `config.result_dir` - directory of saving testing images 64 | 65 | 2. Download dataset and pre-trained VGG parameters 66 | 67 | - Download [Caltech-256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) dataset and put it in `config.data_dir`. 68 | - Download pre-trained VGG19 model [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19) and put it in `config.vgg_dir`. 69 | 70 | ## Train and test on Caltech-256: 71 | 72 | Go to `CNN-Visualization/example/`, then 73 | 74 | Finetuning pre-trained VGG19 for Caltech-256: 75 | 76 | ``` 77 | python cam.py --train --bsize BATCH_SIZE --label INFER_CLASS_LABEL 78 | ``` 79 | 80 | 81 | Generate the class activation map using trained parameters 82 | 83 | ``` 84 | python cam.py --prediction --bsize BATCH_SIZE --model SAVED_MODEL_NAME --label INFER_CLASS_LABEL 85 | ``` 86 | 87 | **INFER_CLASS_LABEL** is the label of the class used to generate the inference class activation map. 88 | 89 | - The scaled class activation map will be saved in `config.result_dir` along with a .mat file containing raw data of the map. 90 | - If batch size is greater than 1, the result images of one mini batch will be save as one image. 91 | - **Batch size has to be one during testing if the testing images have different size.** Or you can resize the images to 224 x 224 by uncomment `resize = 224,` (line 83). Please refer to the code comments for more detailed parameters setting. 92 | 93 | 94 | ## Train and test on your own dataset: 95 | 96 | Go to `CNN-Visualization/example/`, then 97 | 98 | Dataset requirement: 99 | 100 | 1. Put training image in `config.data_dir`. Image of different classes are in different folders. Uncomment print(dataset_train.label_dict) to check the image class label and the corresponding label index for training and testing. 101 | 2. The images have to be color images with 3 channels. 102 | 3. May not work well on low resolution images, since all the images will be rescaled to 224 x 224 for training. 103 | 104 | 105 | Finetuning pre-trained VGG19 for your own dataset: 106 | 107 | - The number of image classes and image file type needs to be specified: 108 | 109 | ``` 110 | python cam.py --train --bsize BATCH_SIZE --label INFER_CLASS_LABEL --nclass NUM_IMAGE_CLASS\ 111 | --type IMAGE_FILE_EXTENSION(start with '.') 112 | ``` 113 | 114 | Generate the class activation map using trained parameters 115 | 116 | ``` 117 | python cam.py --prediction --bsize BATCH_SIZE --model SAVED_MODEL_NAME --label INFER_CLASS_LABEL\ 118 | --type IMAGE_FILE_EXTENSION(start with '.') 119 | ``` 120 | 121 | 122 | ## Author 123 | Qian Ge 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /doc/cam/figs/celtech_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/cam/figs/celtech_change.png -------------------------------------------------------------------------------- /doc/cam/figs/celtech_diff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/cam/figs/celtech_diff.png -------------------------------------------------------------------------------- /doc/cam/figs/celtech_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/cam/figs/celtech_result.png -------------------------------------------------------------------------------- /doc/deconv/README.md: -------------------------------------------------------------------------------- 1 | # CNN Feature Visuallization via Deconvnet 2 | 3 | - TensorFlow implementation of [ 4 | Visualizing and Understanding Convolutional Networks](https://arxiv.org/abs/1311.2901) (ECCV'14). 5 | - The features are visualized by mapping activations back to the input pixel space through several unpooling layers and deconvolutional layers. 6 | - This implementation takes [VGG19](https://arxiv.org/abs/1409.1556) as example. 7 | 8 | ## Requirements 9 | - Python 3.3+ 10 | - [Tensorflow 1.3](https://www.tensorflow.org/) 11 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow) 12 | 13 | ## Implementation Details 14 | 15 | - Deconvnet for VGG19 is defined in [`CNN-Visualization/lib/nets/vgg.py`](https://github.com/conan7882/CNN-Visualization/blob/master/lib/nets/vgg.py#L325). 16 | - Code of example usage of deconvnet visualization is in [`CNN-Visualization/example/deconv.py`](../../example/deconv.py). 17 | - Images are rescale to proper size to git rid of size inconsistence issues when unpooling. 18 | - Feature maps for each level of CNN are first computed for a single image. Then pick one activation at a certain level and set all other activations at this level to be zero. Finally this new feature map of this level is mapped back to the input pixel space. 19 | - Weights of filters of convnet and deconvnet are shared. The filters of the deconvnet is the transpose version of convnet filters. 20 | - Switches are stored during forward pass. Reference of implementation of unpooling layer is [here](https://github.com/tensorflow/tensorflow/issues/2169). 21 | 22 | 23 | ## Results 24 | ### Caltech-256 25 | left: reconstruction, right: original image. Both are cropped based on the receptive field of the activations. 26 | #### Top 9 activations of all the feature map for each layer 27 | #### Across 'people' class images 28 | 29 | Layer 1_2 30 |

31 | 32 | 33 |

34 | Layer 2_2 35 |

36 | 37 | 38 |

39 | Layer 3_4 40 |

41 | 42 | 43 |

44 | Layer 4_4 45 |

46 | 47 | 48 |

49 | Layer 5_2 50 |

51 | 52 | 53 |

54 | 55 | #### Across 'dog' class images 56 | 57 | Layer 1_2 58 |

59 | 60 | 61 |

62 | Layer 2_2 63 |

64 | 65 | 66 |

67 | Layer 3_4 68 |

69 | 70 | 71 |

72 | Layer 4_4 (The network is sensitive to both dog and human faces at this level.) 73 |

74 | 75 | 76 |

77 | Layer 5_2 (The network is sensitive to both dog and human faces at this level.) 78 |

79 | 80 | 81 |

82 | 83 | 84 | #### Top 9 activations of a fixed feature map for each layer 85 | Layer 2_2, Feature map 59, People (This feature map has high activations for curves.) 86 |

87 | 88 | 89 |

90 | Layer 3_4, Feature map 166, People (This feature map is sensitive to 'O' shape patterns.) 91 | 92 |

93 | 94 | 95 |

96 | Layer 4_4, Feature map 349, Dog (It seems like this feature map is sensitive on faces (eyes with nose).) 97 |

98 | 99 | 100 |

101 | Layer 4_4, Feature map 171, Dog (This feature map is sensitive on the dog nose.) 102 |

103 | 104 | 105 |

106 |

107 | Layer 4_4, Feature map 171, People (This is the same feature map as the above figure but on 'people' images. Besides nose on dog faces, this feature map is also sensitive to some parts of human faces, such as eyes and month.) 108 |

109 | 110 | 111 |

112 | 113 | 114 | 115 | 116 | ## Preparation 117 | 118 | 1. Setup directories in file `CNN-Visualization/example/config_path.py`. 119 | 120 | - `im_path` - directory of testing image data 121 | - `vgg_path` - directory of pre-trained VGG19 parameters 122 | - `save_path` - directory of saving result images 123 | 124 | 2. Download the pre-trained VGG parameters 125 | 126 | - Download pre-trained VGG19 model [here](https://www.dropbox.com/sh/dad57t3hl60jeb0/AADlcUshCnmN2bAZdgdkmxDna?dl=0). This is original downloaded from [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19). Put it in `vgg_path`. 127 | 128 | 129 | ## Run Script: 130 | 131 | Go to `CNN-Visualization/example/` 132 | 133 | To get the reconstruction image of top 9 activations of a specific layer (layer 4_4) across the test set (JPEG files): 134 | 135 | ``` 136 | python deconv.py --feat conv4_4 --imtype .jpg 137 | ``` 138 | 139 | - All the test images will be rescaled to 224 x 224 before feed into VGG19. 140 | - `--feat` is used to specify the name of feature layer. It can be `conv1_1`, `conv2_1` ... The fill list can be found [here](https://github.com/conan7882/CNN-Visualization/blob/master/lib/nets/vgg.py#L374). 141 | - `--imtype` is used to specify the test image type. It can be .jpg or .png or other types of images. 142 | - The cropped and merged reconstruction and original images are saved `save_path` as 143 | **LAYER_NAME_feat.png** 144 | and **LAYER_NAME_im.png** 145 | 146 | To get the reconstruction image of top 9 activations of a specific feature map (350) at a specific layer (layer 4_4) across the test set (JPEG files): 147 | 148 | ``` 149 | python deconv.py --feat conv4_4 --imtype .jpg --id 350 150 | ``` 151 | 152 | - `--id` is used to specify feature map what to check. 153 | 154 | ## Author 155 | Qian Ge 156 | -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv1_2_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv1_2_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv1_2_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv1_2_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv2_2_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv2_2_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv2_2_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv2_2_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv3_4_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv3_4_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv3_4_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv3_4_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv4_4_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv4_4_feat_171.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_feat_171.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv4_4_feat_349.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_feat_349.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv4_4_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv4_4_im_171.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_im_171.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv4_4_im_349.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_im_349.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv5_2_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv5_2_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/dog/conv5_2_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv5_2_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv1_2_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv1_2_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv1_2_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv1_2_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv2_2_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv2_2_feat_59.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_feat_59.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv2_2_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv2_2_im_59.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_im_59.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv3_4_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv3_4_feat_166.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_feat_166.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv3_4_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv3_4_im_166.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_im_166.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv4_4_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv4_4_feat_171.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_feat_171.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv4_4_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_im.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv4_4_im_171.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_im_171.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv5_2_feat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv5_2_feat.png -------------------------------------------------------------------------------- /doc/deconv/figs/people/conv5_2_im.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv5_2_im.png -------------------------------------------------------------------------------- /doc/firstfilter/README.md: -------------------------------------------------------------------------------- 1 | # Visualization of filters and feature maps of GoogLeNet 2 | 3 | - The most straightforward approach to visualize a CNN is to show the feature maps (activations) and filters. 4 | - Here are some examples of visualization of the first layer filters (7 x 7 x 3 x 64) and feature maps of the pre-trained GoogLeNet. 5 | 6 | ## Requirements 7 | - Python 3.3+ 8 | - [Tensorflow 1.3](https://www.tensorflow.org/) 9 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow) 10 | 11 | ## Results 12 | 13 | 14 | ### First layer fiters (7 x 7 x 3 x 64) 15 | 16 | We can see some high-frequency grayscale features as well as some low-frequency color features. 17 | 18 |

19 | 20 |

21 | 22 | ### Feature maps with ReLU (Randomly choose 100 feature maps at each layer for display) 23 | 24 | - Input image 25 |

26 | 27 |

28 | 29 | - Layer 3 (Inception3a and Inception3b) 30 |

31 | 32 | 33 |

34 | 35 | - Layer 4 (Inception4a - Inception3e) 36 |

37 | 38 | 39 | 40 | 41 | 42 |

43 | 44 | - Layer 5 (Inception5a and Inception5b) 45 |

46 | 47 | 48 |

49 | 50 | 51 | ## Usage 52 | ### Download pre-trained model 53 | Download the pre-trained parameters [here](http://www.deeplearningmodel.net/). 54 | ### Config path 55 | All directories are setup in [`example/setup_env.py`](../../example/config_path.py). 56 | 57 | - `googlenet_path` is the path of the pre-trained model. 58 | - `im_path` is the directory of testing image. 59 | - `save_path` is the directory to save filter image. 60 | 61 | ## Run the code: 62 | 63 | Go to `CNN-Visualization/example/`, then 64 | 65 | 66 | ``` 67 | python vizfilter.py --feature --filter --im IMAGE_FILENAME 68 | ``` 69 | 70 | Image will be saved in `config.save_path` 71 | 72 | 73 | 74 | ## Author 75 | Qian Ge 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_filter.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception3a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception3a.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception3b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception3b.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception4a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4a.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception4b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4b.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception4c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4c.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception4d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4d.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception4e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4e.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception5a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception5a.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/GoogLeNet_inception5b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception5b.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception3a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception3a.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception3b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception3b.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception4a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4a.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception4b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4b.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception4c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4c.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception4d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4d.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception4e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4e.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception5a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception5a.png -------------------------------------------------------------------------------- /doc/firstfilter/figs/bk/GoogLeNet_inception5b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception5b.png -------------------------------------------------------------------------------- /doc/grad_cam/README.md: -------------------------------------------------------------------------------- 1 | # Gradient-weighted Class Activation Mapping (Grad-CAM) 2 | 3 | - TensorFlow implementation of [Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization](https://arxiv.org/abs/1610.02391) (ICCV'17). 4 | - Torch implementation by the authors is [here](https://github.com/ramprs/grad-cam). 5 | - Grad-CAM generates similar heatmap as [CAM](https://arxiv.org/abs/1512.04150), but it does not require re-train the model. 6 | - This implementation takes [VGG19](https://arxiv.org/abs/1409.1556) as example. 7 | 8 | ## Requirements 9 | - Python 3.3+ 10 | - [Tensorflow 1.3](https://www.tensorflow.org/) 11 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow) 12 | 13 | ## Implementation Details 14 | 15 | 16 | 17 | - [VGG19](https://arxiv.org/abs/1409.1556) is used for visualization. The model is defined in [`CNN-Visualization/lib/nets/vgg.py`](../../lib/nets/vgg.py). 18 | - Grad-CAM model is defined in [`CNN-Visualization/lib/models/grad_cam.py`](../../lib/models/grad_cam.py). 19 | - Example usage of Grad-CAM is in [`CNN-Visualization/example/gradcam.py`](../../example/gradcam.py). 20 | 21 | ## Results 22 | 23 | Left to right: Original Image, Grad-CAM, Guided Grad-CAM, Grad-CAM, Guided Grad-CAM 24 | ![ex1](figs/ex1.png) 25 | ![ex2](figs/ex2.png) 26 | 27 | Compare with CAM using the same images (Caltech-256) (CAM result can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#caltech-256)): 28 | **ImageNet1000 does not have class 'top hat', so class 'cowboy hat' is used here which gets relatively bad performance to find the hat. Also note that, the pre-trained VGG19 is not trained on Caltech-256.** 29 | 30 | ![compare](figs/comparecam.png) 31 | 32 | 33 | 34 | 35 | 36 | ## Preparation 37 | 38 | 1. Setup directories in file `CNN-Visualization/example/gradcam.py`. 39 | 40 | - `IM_PATH` - directory of testing image data 41 | - `VGG_PATH` - directory of pre-trained VGG19 parameters 42 | - `SAVE_DIR` - directory of saving result images 43 | 44 | 2. Download the pre-trained VGG parameters 45 | 46 | - Download pre-trained VGG19 model [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19) and put it in `VGG_PATH`. 47 | 48 | 49 | 3. Testing images 50 | 51 | - Put testing images in `IM_PATH`. 52 | - Set class labels on line 56 in `CNN-Visualization/example/gradcam.py`. For example, the setting below will generate Grad-CAM and Guided Grad-CAM for class 55 (llama), 543 (dumbbell), 605 (iPod) and 515 (hat). More labels for ImageNet1000 can be found [here](https://github.com/conan7882/VGG-tensorflow/blob/master/imageNetLabel.txt). 53 | 54 | ``` 55 | class_id = [355, 543, 605, 515] 56 | ``` 57 | 58 | - Change image type to the corresponding type in the function below (line 84 in `CNN-Visualization/example/gradcam.py`) if testing images are not jpeg files. 59 | 60 | ``` 61 | input_im = ImageFromFile('.jpg', data_dir=IM_PATH, num_channel=3, shuffle=False) 62 | ``` 63 | 64 | 65 | ## Run Script: 66 | 67 | To get the Grad-CAM maps for all the image in `IM_PATH`, go to `CNN-Visualization/example/` and run: 68 | 69 | ``` 70 | python gradcam.py 71 | ``` 72 | 73 | - All the test images will be rescaled to smallest side = 224 before feed into VGG19. 74 | - Grad-CAM and Guided Grad-CAM will be saved in `SAVE_DIR` as **gradcam_IDX_class_CLASSLABEL.png** and **guided_gradcam_IDX_class_CLASSLABEL.png** 75 | 76 | 77 | ## Author 78 | Qian Ge 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /doc/grad_cam/figs/comparecam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/grad_cam/figs/comparecam.png -------------------------------------------------------------------------------- /doc/grad_cam/figs/ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/grad_cam/figs/ex1.png -------------------------------------------------------------------------------- /doc/grad_cam/figs/ex2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/grad_cam/figs/ex2.png -------------------------------------------------------------------------------- /doc/guided_backpropagation/README.md: -------------------------------------------------------------------------------- 1 | # Guided Backpropagation 2 | 3 | - TensorFlow implementation of [Striving for Simplicity: The All Convolutional Net](https://arxiv.org/abs/1412.6806) (2014). 4 | - Guided backpropagation generates clearer visulizations than [deconvnet](https://arxiv.org/abs/1311.2901) for higher layers. 5 | 6 | 7 | ## Requirements 8 | - Python 3.3+ 9 | - [Tensorflow 1.3](https://www.tensorflow.org/) 10 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow) 11 | 12 | ## Implementation Details 13 | 14 | 15 | 16 | - [VGG19](https://arxiv.org/abs/1409.1556) is used for visulization. The model is defined in [`CNN-Visualization/lib/nets/vgg.py`](../../lib/nets/vgg.py). 17 | - Guided backpropagation copmutation class is defined in [`CNN-Visualization/lib/models/guided_backpro.py`](../../lib/models/guided_backpro.py). 18 | - Example usage is in [`CNN-Visualization/example/guided_backpropagation.py`](../../example/guided_backpropagation.py). 19 | 20 | 21 | ## Results 22 |
23 | 24 | 25 | 26 | 27 |
28 | 29 | ## Observations 30 | 31 | ## Preparation 32 | 33 | 1. Setup directories in file `CNN-Visualization/example/guided_backpropagation.py`. 34 | 35 | - `IM_PATH` - directory of testing image data 36 | - `VGG_PATH` - directory of pre-trained VGG19 parameters 37 | - `SAVE_DIR` - directory of saving result images 38 | 39 | 2. Download the pre-trained VGG parameters 40 | 41 | - Download pre-trained VGG19 model [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19) and put it in `VGG_PATH`. 42 | 43 | 44 | 3. Testing images 45 | 46 | - Put testing images in `IM_PATH` . 47 | - Change image type to the corresponding type if testing images are not jpeg files 48 | 49 | ``` 50 | input_im = ImageFromFile('.jpg', data_dir=IM_PATH, num_channel=3, shuffle=False) 51 | ``` 52 | 53 | 54 | ## Run Script: 55 | 56 | To get the guided backpropagation maps for all the image in `IM_PATH`, go to `CNN-Visualization/example/` and run: 57 | 58 | ``` 59 | python guided_backpropagation.py 60 | ``` 61 | 62 | - Results will be saved in `SAVE_DIR` as **map_IDX_class_PREDICT_LABEL.png**. 63 | 64 | 65 | ## Author 66 | Qian Ge 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /doc/guided_backpropagation/figs/gbp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp.png -------------------------------------------------------------------------------- /doc/guided_backpropagation/figs/gbp1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp1.png -------------------------------------------------------------------------------- /doc/guided_backpropagation/figs/gbp2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp2.png -------------------------------------------------------------------------------- /doc/guided_backpropagation/figs/gbp3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp3.png -------------------------------------------------------------------------------- /doc/guided_backpropagation/figs/gbp4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp4.png -------------------------------------------------------------------------------- /example/cam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: cam.py 4 | # Author: Qian Ge 5 | 6 | import argparse 7 | 8 | from tensorcv.dataflow.image import ImageLabelFromFolder, ImageFromFile 9 | from tensorcv.callbacks import * 10 | from tensorcv.train.config import TrainConfig 11 | from tensorcv.train.simple import SimpleFeedTrainer 12 | from tensorcv.predicts.config import PridectConfig 13 | from tensorcv.predicts.simple import SimpleFeedPredictor 14 | from tensorcv.predicts import * 15 | 16 | import setup_env 17 | import config_cam as config_path 18 | from models.cam import VGGCAM 19 | 20 | NUM_CHANNEL = 3 21 | 22 | 23 | def get_config(FLAGS): 24 | # data for training 25 | dataset_train = ImageLabelFromFolder(FLAGS.type, 26 | data_dir=config_path.data_dir, 27 | num_class=FLAGS.nclass, 28 | resize=224, 29 | num_channel=NUM_CHANNEL) 30 | 31 | # Print image class name and label 32 | # print(dataset_train.label_dict) 33 | 34 | # Since the aim of training is visulization of class map, all the images 35 | # are used for training. Using the training set as validation set is just 36 | # for checking whether the training works correctly. 37 | dataset_val = ImageLabelFromFolder(FLAGS.type, 38 | data_dir=config_path.data_dir, 39 | num_class=FLAGS.nclass, 40 | resize=224, 41 | num_channel=NUM_CHANNEL) 42 | 43 | # Check accuracy during training using training set 44 | inference_list_validation = InferScalars('accuracy/result', 45 | 'test_accuracy') 46 | 47 | training_callbacks = [ 48 | ModelSaver(periodic=100), 49 | TrainSummary(key='train', periodic=50), 50 | FeedInferenceBatch(dataset_val, batch_count=10, periodic=100, 51 | inferencers=inference_list_validation), 52 | CheckScalar(['accuracy/result', 'loss/result'], periodic=10)] 53 | 54 | inspect_class = None 55 | if FLAGS.label > 0: 56 | inspect_class = FLAGS.label 57 | # Image use for inference the class acitivation map during training 58 | dataset_test = ImageFromFile(FLAGS.type, 59 | data_dir=config_path.infer_data_dir, 60 | shuffle=False, 61 | resize=224, 62 | num_channel=NUM_CHANNEL) 63 | # Check class acitivation map during training 64 | inference_list_test = [ 65 | InferOverlay(['classmap/result', 'image'], ['map', 'image'], 66 | color=True), 67 | InferImages('classmap/result', 'map', color=True)] 68 | training_callbacks += FeedInference(dataset_test, periodic=50, 69 | infer_batch_size=1, 70 | inferencers=inference_list_test), 71 | 72 | return TrainConfig( 73 | dataflow=dataset_train, 74 | model=VGGCAM(num_class=FLAGS.nclass, 75 | inspect_class=inspect_class, 76 | learning_rate=0.001, is_load=True, 77 | pre_train_path=config_path.vgg_dir), 78 | monitors=TFSummaryWriter(), 79 | callbacks=training_callbacks, 80 | batch_size=FLAGS.bsize, 81 | max_epoch=25, 82 | summary_periodic=50, 83 | default_dirs=config_path) 84 | 85 | 86 | def get_predict_config(FLAGS): 87 | dataset_test = ImageFromFile(FLAGS.type, 88 | data_dir=config_path.test_data_dir, 89 | shuffle=False, 90 | resize=224, 91 | num_channel=NUM_CHANNEL) 92 | # dataset_test = ImageLabelFromFolder('.jpg', 93 | # data_dir = config_path.data_dir, 94 | # num_class = FLAGS.nclass, 95 | # resize = 224, 96 | # num_channel = NUM_CHANNEL) 97 | prediction_list = [ 98 | # PredictionScalar(['pre_label'], ['label']), 99 | # PredictionMeanScalar('accuracy/result', 'test_accuracy'), 100 | PredictionMat('classmap/result', ['test']), 101 | PredictionOverlay(['classmap/result', 'image'], ['map', 'image'], 102 | color=True, merge_im=True), 103 | PredictionImage(['image'], ['image'], color=True, merge_im=True)] 104 | 105 | return PridectConfig( 106 | dataflow=dataset_test, 107 | model=VGGCAM(num_class=FLAGS.nclass, inspect_class=FLAGS.label, 108 | is_load=True, pre_train_path=config_path.vgg_dir), 109 | model_name=FLAGS.model, 110 | predictions=prediction_list, 111 | batch_size=FLAGS.bsize, 112 | default_dirs=config_path) 113 | 114 | 115 | def get_args(): 116 | parser = argparse.ArgumentParser() 117 | parser.add_argument('--bsize', default=32, type=int) 118 | parser.add_argument('--label', default=-1, type=int, 119 | help='Label of inspect class.') 120 | parser.add_argument('--nclass', default=257, type=int, 121 | help='number of image class') 122 | 123 | parser.add_argument('--predict', action='store_true', 124 | help='Run prediction') 125 | parser.add_argument('--train', action='store_true', 126 | help='Train the model') 127 | 128 | parser.add_argument('--type', default='.jpg', type=str, 129 | help='image type for training and testing') 130 | 131 | parser.add_argument('--model', type=str, 132 | help='file name of the trained model') 133 | 134 | return parser.parse_args() 135 | 136 | 137 | if __name__ == '__main__': 138 | FLAGS = get_args() 139 | if FLAGS.train: 140 | config = get_config(FLAGS) 141 | SimpleFeedTrainer(config).train() 142 | if FLAGS.predict: 143 | config = get_predict_config(FLAGS) 144 | SimpleFeedPredictor(config).run_predict() 145 | 146 | # 0.6861924529075623 147 | -------------------------------------------------------------------------------- /example/config_cam.py: -------------------------------------------------------------------------------- 1 | # File: config.py 2 | # Author: Qian Ge 3 | 4 | # directory of pre-trained vgg parameters 5 | vgg_dir = '../../data/pretrain/vgg/vgg19.npy' 6 | 7 | # directory of training data 8 | data_dir = '../../data/dataset/256_ObjectCategories/' 9 | 10 | # directory of testing data 11 | test_data_dir = '../data/' 12 | 13 | # directory of inference data 14 | infer_data_dir = '../data/' 15 | 16 | # directory for saving inference data 17 | infer_dir = '../../data/tmp/' 18 | 19 | # directory for saving summary 20 | summary_dir = '../../data/tmp/' 21 | 22 | # directory for saving checkpoint 23 | checkpoint_dir = '../../data/tmp/' 24 | 25 | # directory for restoring checkpoint 26 | model_dir = '../../data/tmp/' 27 | 28 | # directory for saving prediction results 29 | result_dir = '../../data/tmp/' 30 | -------------------------------------------------------------------------------- /example/config_path.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: config_path.py 4 | # Author: Qian Ge 5 | 6 | # directory of testing images 7 | # im_path = '../data/' 8 | im_path = '../data/dataset/256_ObjectCategories/159.people/' 9 | 10 | # foler for saving 11 | save_path = '' 12 | 13 | # directory of pre-trained googlenet parameters 14 | googlenet_path = '../pretrained/googlenet.npy' 15 | vgg_path = '../pretrain/vgg/vgg19.npy' 16 | -------------------------------------------------------------------------------- /example/deconv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: deconv.py 4 | # Author: Qian Ge 5 | 6 | import os 7 | import scipy.misc 8 | import argparse 9 | import numpy as np 10 | import tensorflow as tf 11 | from tensorcv.dataflow.image import ImageFromFile 12 | 13 | import config_path as config 14 | 15 | import sys 16 | sys.path.append('../') 17 | from lib.nets.vgg import DeconvBaseVGG19, BaseVGG19 18 | import lib.utils.viz as viz 19 | import lib.utils.normalize as normlize 20 | import lib.utils.image as uim 21 | 22 | 23 | IM_SIZE = 224 24 | 25 | def get_parse(): 26 | parser = argparse.ArgumentParser() 27 | 28 | parser.add_argument('--imtype', type=str, default='.jpg', 29 | help='Image type') 30 | parser.add_argument('--feat', type=str, required=True, 31 | help='Choose of feature map layer') 32 | parser.add_argument('--id', type=int, default=None, 33 | help='feature map id') 34 | 35 | return parser.parse_args() 36 | 37 | def im_scale(im): 38 | return uim.im_rescale(im, [IM_SIZE, IM_SIZE]) 39 | 40 | if __name__ == '__main__': 41 | FLAGS = get_parse() 42 | 43 | input_im = ImageFromFile(FLAGS.imtype, 44 | data_dir=config.im_path, 45 | num_channel=3, 46 | shuffle=False, 47 | pf=im_scale, 48 | ) 49 | input_im.set_batch_size(1) 50 | 51 | vizmodel = DeconvBaseVGG19(config.vgg_path, 52 | feat_key=FLAGS.feat, 53 | pick_feat=FLAGS.id) 54 | 55 | vizmap = vizmodel.layers['deconvim'] 56 | feat_op = vizmodel.feats 57 | max_act_op = vizmodel.max_act 58 | 59 | act_size = vizmodel.receptive_size[FLAGS.feat] 60 | act_scale = vizmodel.stride[FLAGS.feat] 61 | 62 | with tf.Session() as sess: 63 | sess.run(tf.global_variables_initializer()) 64 | 65 | max_act_list = [] 66 | while input_im.epochs_completed < 1: 67 | im = input_im.next_batch()[0] 68 | max_act = sess.run(max_act_op, feed_dict={vizmodel.im: im}) 69 | max_act_list.append(max_act) 70 | 71 | max_list = np.argsort(max_act_list)[::-1] 72 | im_file_list = input_im.get_data_list()[0] 73 | 74 | feat_list = [] 75 | im_list = [] 76 | for i in range(0, 10): 77 | im = input_im.next_batch()[0] 78 | file_path = os.path.join(config.im_path, im_file_list[max_list[i]]) 79 | im = np.array([im_scale(scipy.misc.imread(file_path, mode='RGB'))]) 80 | 81 | cur_vizmap, feat_map, max_act = sess.run( 82 | [vizmap, feat_op, max_act_op], feed_dict={vizmodel.im: im}) 83 | 84 | act_ind = np.nonzero((feat_map)) 85 | print('Location of max activation {}'.format(act_ind)) 86 | # get only the first nonzero element 87 | act_c = (act_ind[1][0], act_ind[2][0]) 88 | min_x = max(0, int(act_c[0] * act_scale - act_size / 2)) 89 | max_x = min(IM_SIZE, int(act_c[0] * act_scale + act_size / 2)) 90 | min_y = max(0, int(act_c[1] * act_scale - act_size / 2)) 91 | max_y = min(IM_SIZE, int(act_c[1] * act_scale + act_size / 2)) 92 | 93 | im_crop = im[0, min_x:max_x, min_y:max_y, :] 94 | act_crop = cur_vizmap[0, min_x:max_x, min_y:max_y, :] 95 | 96 | pad_size = (act_size - im_crop.shape[0], act_size - im_crop.shape[1]) 97 | im_crop = np.pad(im_crop, 98 | ((0, pad_size[0]), (0, pad_size[1]), (0, 0)), 99 | 'constant', 100 | constant_values=0) 101 | act_crop = np.pad(act_crop, 102 | ((0, pad_size[0]),(0, pad_size[1]), (0, 0)), 103 | 'constant', 104 | constant_values=0) 105 | 106 | feat_list.append(act_crop) 107 | im_list.append(im_crop) 108 | 109 | viz.viz_filters(np.transpose(feat_list, (1, 2, 3, 0)), 110 | [3, 3], 111 | os.path.join(config.save_path, '{}_feat.png'.format(FLAGS.feat)), 112 | gap=2, 113 | gap_color=0, 114 | nf=normlize.indentity, 115 | shuffle=False) 116 | viz.viz_filters(np.transpose(im_list, (1, 2, 3, 0)), 117 | [3, 3], 118 | os.path.join(config.save_path, '{}_im.png'.format(FLAGS.feat)), 119 | gap=2, 120 | gap_color=0, 121 | nf=normlize.indentity, 122 | shuffle=False) 123 | 124 | -------------------------------------------------------------------------------- /example/gap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: gap.py 4 | # Author: Qian Ge 5 | 6 | import os 7 | import argparse 8 | import numpy as np 9 | import tensorflow as tf 10 | # from tensorcv.dataflow.dataset.CIFAR import CIFAR 11 | import sys 12 | sys.path.append('../') 13 | from lib.dataflow.cifar import CIFAR 14 | from lib.models.gap import GAPNet 15 | import lib.utils.viz as viz 16 | import lib.utils.normalize as normlize 17 | 18 | # data_path = '/Users/gq/workspace/Dataset/cifar-10-batches-py/' 19 | # save_path = '/Users/gq/workspace/Tmp/test/' 20 | 21 | data_path = '/home/qge2/workspace/data/dataset/cifar/' 22 | save_path = '/home/qge2/workspace/data/out/gap/' 23 | 24 | def get_args(): 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--lr', default=0.0005, type=float) 27 | parser.add_argument('--dropout', default=0.5, type=float) 28 | parser.add_argument('--wd', default=0, type=float) 29 | parser.add_argument('--epoch', default=150, type=int) 30 | 31 | parser.add_argument('--train', action='store_true') 32 | parser.add_argument('--viz', action='store_true') 33 | 34 | return parser.parse_args() 35 | 36 | 37 | if __name__ == '__main__': 38 | FLAGS = get_args() 39 | max_epoch = FLAGS.epoch 40 | lr = FLAGS.lr 41 | dropout = FLAGS.dropout 42 | 43 | train_data = CIFAR(data_dir=data_path, 44 | batch_dict_name=['im', 'label'], 45 | data_type='train', 46 | substract_mean=False) 47 | train_data.setup(epoch_val=0, batch_size=128) 48 | valid_data = CIFAR(data_dir=data_path, 49 | shuffle=False, 50 | batch_dict_name=['im', 'label'], 51 | data_type='valid', 52 | # channel_mean=train_data.channel_mean, 53 | substract_mean=False) 54 | valid_data.setup(epoch_val=0, batch_size=128) 55 | 56 | # print(train_data.next_batch_dict()) 57 | 58 | im = tf.placeholder(tf.float32, [None, 32, 32, 3], name='im') 59 | label = tf.placeholder(tf.int64, [None], name='label') 60 | input_dict = {'input': im, 'label': label} 61 | 62 | model = GAPNet(num_class=10, wd=FLAGS.wd) 63 | model.create_model(input_dict) 64 | 65 | train_op = model.get_train_op() 66 | loss_op = model.get_loss() 67 | accuracy_op = model.get_accuracy() 68 | 69 | saver = tf.train.Saver() 70 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) 71 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 72 | sess.run(tf.global_variables_initializer()) 73 | 74 | if FLAGS.viz: 75 | saver.restore(sess, '{}epoch_{}'.format(save_path, 69)) 76 | valid_data.setup(epoch_val=0, batch_size=50) 77 | batch_data = valid_data.next_batch_dict() 78 | maps = sess.run(model.layer['feature'], 79 | feed_dict={im: batch_data['im']}) 80 | print(batch_data['label']) 81 | 82 | viz.viz_filters( 83 | batch_data['im'].transpose(1, 2, 3, 0), 84 | [10, 1], 85 | os.path.join(save_path, 'im.png'), 86 | gap=2, 87 | gap_color=10, 88 | shuffle=False, 89 | # nf=normlize.norm_range 90 | ) 91 | 92 | for idx, cur_map in enumerate(maps): 93 | viz.viz_filters( 94 | cur_map, 95 | [1, 10], 96 | os.path.join(save_path, 'maps_{}.png'.format(idx)), 97 | gap=2, 98 | gap_color=10, 99 | shuffle=False, 100 | # nf=normlize.norm_range 101 | ) 102 | 103 | if FLAGS.train: 104 | loss_sum = 0 105 | acc_sum = 0 106 | epoch_id = 0 107 | # for epoch_id in range(0, max_epoch): 108 | epoch_step = 0 109 | while epoch_id < max_epoch: 110 | epoch_step += 1 111 | cur_epoch = train_data.epochs_completed 112 | if epoch_step % int(train_data.batch_step / 10) == 0: 113 | print('loss: {}, acc: {}'\ 114 | .format( 115 | loss_sum * 1.0 / epoch_step, 116 | acc_sum * 1.0 / epoch_step)) 117 | if cur_epoch > epoch_id: 118 | saver.save(sess, '{}epoch_{}'.format(save_path, epoch_id)) 119 | print('epoch: {}, lr: {}, loss: {}, acc: {}'\ 120 | .format(epoch_id, 121 | lr, 122 | loss_sum * 1.0 / epoch_step, 123 | acc_sum * 1.0 / epoch_step)) 124 | loss_sum = 0 125 | acc_sum = 0 126 | epoch_step = 0 127 | epoch_id = cur_epoch 128 | 129 | if cur_epoch >= 50: 130 | lr = FLAGS.lr / 10 131 | if cur_epoch >= 100: 132 | lr = FLAGS.lr / 100 133 | 134 | model.set_is_training(False) 135 | valid_acc_sum = 0 136 | valid_step = 0 137 | while valid_data.epochs_completed < 1: 138 | valid_step += 1 139 | batch_data = valid_data.next_batch_dict() 140 | acc = sess.run(accuracy_op, 141 | feed_dict={model.dropout: 1.0, 142 | im: batch_data['im'], 143 | label: batch_data['label'],}) 144 | valid_acc_sum += acc 145 | print('valid acc: {}'.format(valid_acc_sum * 1.0 / valid_step)) 146 | model.set_is_training(True) 147 | valid_data.setup(epoch_val=0, batch_size=128) 148 | 149 | 150 | batch_data = train_data.next_batch_dict() 151 | _, loss, acc = sess.run([train_op, loss_op, accuracy_op], 152 | feed_dict={model.lr: lr, 153 | model.dropout: dropout, 154 | im: batch_data['im'], 155 | label: batch_data['label']}) 156 | loss_sum += loss 157 | acc_sum += acc 158 | -------------------------------------------------------------------------------- /example/gradcam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: gradcam.py 4 | # Author: Qian Ge 5 | 6 | from itertools import count 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | from tensorcv.dataflow.image import ImageFromFile 11 | from tensorcv.utils.viz import image_overlay, save_merge_images 12 | 13 | import setup_env 14 | from nets.vgg import VGG19_FCN 15 | from models.guided_backpro import GuideBackPro 16 | from models.grad_cam import ClassifyGradCAM 17 | from utils.viz import image_weight_mask 18 | 19 | IM_PATH = '../data/' 20 | SAVE_DIR = '../../data/tmp/' 21 | VGG_PATH = '../../data/pretrain/vgg/vgg19.npy' 22 | 23 | 24 | # def image_weight_mask(image, mask): 25 | # """ 26 | # Args: 27 | # image: image with size [HEIGHT, WIDTH, CHANNEL] 28 | # mask: image with size [HEIGHT, WIDTH, 1] or [HEIGHT, WIDTH] 29 | # """ 30 | # image = np.array(np.squeeze(image)) 31 | # mask = np.array(np.squeeze(mask)) 32 | # assert len(mask.shape) == 2 33 | # assert len(image.shape) < 4 34 | # mask.astype('float32') 35 | # mask = np.reshape(mask, (mask.shape[0], mask.shape[1])) 36 | # mask = mask / np.amax(mask) 37 | 38 | # if len(image.shape) == 2: 39 | # return np.multiply(image, mask) 40 | # else: 41 | # for c in range(0, image.shape[2]): 42 | # image[:, :, c] = np.multiply(image[:, :, c], mask) 43 | # return image 44 | 45 | 46 | if __name__ == '__main__': 47 | 48 | # merge several output images in one large image 49 | merge_im = 1 50 | grid_size = np.ceil(merge_im**0.5).astype(int) 51 | 52 | # class label for Grad-CAM generation 53 | # 355 llama 543 dumbbell 605 iPod 515 hat 99 groose 283 tiger cat 54 | # 282 tabby cat 233 border collie 242 boxer 55 | # class_id = [355, 543, 605, 515] 56 | class_id = [283, 242] 57 | 58 | # initialize Grad-CAM 59 | # using VGG19 60 | gcam = ClassifyGradCAM( 61 | vis_model=VGG19_FCN(is_load=True, 62 | pre_train_path=VGG_PATH, 63 | is_rescale=True)) 64 | gbackprob = GuideBackPro( 65 | vis_model=VGG19_FCN(is_load=True, 66 | pre_train_path=VGG_PATH, 67 | is_rescale=True)) 68 | 69 | # placeholder for input image 70 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 71 | 72 | # create VGG19 model 73 | gcam.create_model(image) 74 | gcam.setup_graph() 75 | 76 | # generate class map and prediction label ops 77 | map_op = gcam.get_visualization(class_id=class_id) 78 | label_op = gcam.pre_label 79 | 80 | back_pro_op = gbackprob.get_visualization(image) 81 | 82 | # initialize input dataflow 83 | # change '.png' to other image types if other types of images are used 84 | input_im = ImageFromFile('.png', data_dir=IM_PATH, 85 | num_channel=3, shuffle=False) 86 | input_im.set_batch_size(1) 87 | 88 | writer = tf.summary.FileWriter(SAVE_DIR) 89 | with tf.Session() as sess: 90 | 91 | sess.run(tf.global_variables_initializer()) 92 | writer.add_graph(sess.graph) 93 | 94 | cnt = 0 95 | merge_cnt = 0 96 | # weight_im_list = [[] for i in range(len(class_id))] 97 | o_im_list = [] 98 | while input_im.epochs_completed < 1: 99 | im = input_im.next_batch()[0] 100 | gcam_map, b_map, label, o_im =\ 101 | sess.run([map_op, back_pro_op, label_op, gcam.input_im], 102 | feed_dict={image: im}) 103 | print(label) 104 | o_im_list.extend(o_im) 105 | for idx, cid, cmap in zip(count(), gcam_map[1], gcam_map[0]): 106 | overlay_im = image_overlay(cmap, o_im) 107 | weight_im = image_weight_mask(b_map[0], cmap) 108 | try: 109 | weight_im_list[idx].append(weight_im) 110 | overlay_im_list[idx].append(overlay_im) 111 | except NameError: 112 | gcam_class_id = gcam_map[1] 113 | weight_im_list = [[] for i in range(len(gcam_class_id))] 114 | overlay_im_list = [[] for i in range(len(gcam_class_id))] 115 | weight_im_list[idx].append(weight_im) 116 | overlay_im_list[idx].append(overlay_im) 117 | merge_cnt += 1 118 | 119 | # Merging results 120 | if merge_cnt == merge_im: 121 | save_path = '{}oim_{}.png'.format(SAVE_DIR, cnt, cid) 122 | save_merge_images(np.array(o_im_list), 123 | [grid_size, grid_size], 124 | save_path) 125 | for w_im, over_im, cid in zip(weight_im_list, 126 | overlay_im_list, 127 | gcam_class_id): 128 | # save grad-cam results 129 | save_path = '{}gradcam_{}_class_{}.png'.\ 130 | format(SAVE_DIR, cnt, cid) 131 | save_merge_images( 132 | np.array(over_im), [grid_size, grid_size], save_path) 133 | # save guided grad-cam results 134 | save_path = '{}guided_gradcam_{}_class_{}.png'.\ 135 | format(SAVE_DIR, cnt, cid) 136 | save_merge_images( 137 | np.array(w_im), [grid_size, grid_size], save_path) 138 | weight_im_list = [[] for i in range(len(gcam_class_id))] 139 | overlay_im_list = [[] for i in range(len(gcam_class_id))] 140 | o_im_list = [] 141 | merge_cnt = 0 142 | cnt += 1 143 | 144 | # Saving results 145 | if merge_cnt > 0: 146 | save_path = '{}oim_{}.png'.format(SAVE_DIR, cnt, cid) 147 | save_merge_images(np.array(o_im_list), 148 | [grid_size, grid_size], 149 | save_path) 150 | for w_im, over_im, cid in zip(weight_im_list, 151 | overlay_im_list, 152 | gcam_class_id): 153 | # save grad-cam results 154 | save_path = '{}gradcam_{}_class_{}.png'.\ 155 | format(SAVE_DIR, cnt, cid) 156 | save_merge_images( 157 | np.array(over_im), [grid_size, grid_size], save_path) 158 | # save guided grad-cam results 159 | save_path = '{}guided_gradcam_{}_class_{}.png'.\ 160 | format(SAVE_DIR, cnt, cid) 161 | save_merge_images( 162 | np.array(w_im), [grid_size, grid_size], save_path) 163 | writer.close() 164 | -------------------------------------------------------------------------------- /example/guided_backpropagation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: guided_backpropagation.py 4 | # Author: Qian Ge 5 | 6 | from scipy import misc 7 | import scipy.io 8 | 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | from tensorcv.dataflow.image import ImageFromFile 13 | 14 | import setup_env 15 | from nets.vgg import VGG19_FCN 16 | from models.guided_backpro import GuideBackPro 17 | 18 | IM_PATH = '../data/' 19 | SAVE_DIR = '../../data/tmp/' 20 | VGG_PATH = '../../data/pretrain/vgg/vgg19.npy' 21 | 22 | if __name__ == '__main__': 23 | # placeholder for input image 24 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 25 | # initialize input dataflow 26 | # change '.png' to other image types if other types of images are used 27 | input_im = ImageFromFile('.png', data_dir=IM_PATH, 28 | num_channel=3, shuffle=False) 29 | # batch size has to be one 30 | input_im.set_batch_size(1) 31 | 32 | # initialize guided back propagation class 33 | # use VGG19 as an example 34 | # images will be rescaled to smallest side = 224 is is_rescale=True 35 | model = GuideBackPro(vis_model=VGG19_FCN(is_load=True, 36 | pre_train_path=VGG_PATH, 37 | is_rescale=True)) 38 | 39 | # get op to compute guided back propagation map 40 | # final output respect to input image 41 | back_pro_op = model.get_visualization(image) 42 | 43 | writer = tf.summary.FileWriter(SAVE_DIR) 44 | with tf.Session() as sess: 45 | sess.run(tf.global_variables_initializer()) 46 | writer.add_graph(sess.graph) 47 | 48 | cnt = 0 49 | while input_im.epochs_completed < 1: 50 | im = input_im.next_batch()[0] 51 | guided_backpro, label, o_im =\ 52 | sess.run([back_pro_op, model.pre_label, 53 | model.input_im], 54 | feed_dict={image: im}) 55 | print(label) 56 | for cid, guided_map in zip(guided_backpro[1], guided_backpro[0]): 57 | scipy.misc.imsave( 58 | '{}map_{}_class_{}.png'.format(SAVE_DIR, cnt, cid), 59 | np.squeeze(guided_map)) 60 | scipy.misc.imsave('{}im_{}.png'.format(SAVE_DIR, cnt), 61 | np.squeeze(o_im)) 62 | # scipy.io.savemat( 63 | # '{}map_1_class_{}.mat'.format(SAVE_DIR, cid), 64 | # {'mat': np.squeeze(guided_map)*255}) 65 | cnt += 1 66 | 67 | writer.close() 68 | -------------------------------------------------------------------------------- /example/invert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: invert.py 4 | # Author: Qian Ge 5 | 6 | import os 7 | import argparse 8 | import scipy 9 | import numpy as np 10 | import tensorflow as tf 11 | 12 | import config_path as config 13 | 14 | import sys 15 | sys.path.append('../') 16 | import lib.utils.viz as viz 17 | import lib.utils.normalize as normlize 18 | from lib.nets.googlenet import BaseGoogLeNet 19 | from lib.models.invert import InvertCNN 20 | import lib.utils.viz as viz 21 | import lib.utils.normalize as normlize 22 | 23 | 24 | file_path = os.path.join(config.im_path, 'im_0.png') 25 | MEAN = [103.939, 116.779, 123.68] 26 | 27 | if __name__ == '__main__': 28 | im = [scipy.misc.imread(file_path)] 29 | input_mean = np.mean(im) 30 | input_std = np.std(im) 31 | layer_key = 'inception5b' 32 | cnn_model = BaseGoogLeNet(config.googlenet_path) 33 | invert_model = InvertCNN( 34 | 224, 224, 3, 35 | input_mean=input_mean, 36 | input_std=input_std, 37 | mean_list=MEAN) 38 | 39 | input_im = tf.placeholder(tf.float32, [1, 224, 224, 3], name='input') 40 | 41 | 42 | feat_im = cnn_model.get_feature_map(input_im, layer_key) 43 | feat_invert = cnn_model.get_feature_map(invert_model.invert_im, layer_key) 44 | 45 | train_op = invert_model.optimize_image(feat_invert, feat_im) 46 | result_op = invert_model.get_opt_im() 47 | 48 | writer = tf.summary.FileWriter(config.save_path) 49 | with tf.Session() as sess: 50 | sess.run(tf.global_variables_initializer()) 51 | # writer.add_graph(sess.graph) 52 | 53 | for step in range(0, 1000): 54 | 55 | _, loss, loss1, loss2 = sess.run( 56 | [train_op, 57 | invert_model.loss, 58 | invert_model.mse_loss, 59 | invert_model.vt_loss], 60 | feed_dict={input_im:im}) 61 | print(step, loss, loss1, loss2) 62 | opt_im = sess.run(result_op) 63 | if step % 10 == 0: 64 | # opt_im = np.clip(opt_im, 0, 255) 65 | # 66 | # opt_im = opt_im * input_std + input_mean 67 | # print(opt_im) 68 | scipy.misc.imsave(os.path.join(config.save_path, 'test_{}.png'.format(step)), 69 | np.squeeze(opt_im)) 70 | 71 | -------------------------------------------------------------------------------- /example/run.sh: -------------------------------------------------------------------------------- 1 | python3 deconv.py --feat conv1_2 2 | python3 deconv.py --feat conv2_2 3 | python3 deconv.py --feat conv3_4 4 | python3 deconv.py --feat conv4_4 5 | python3 deconv.py --feat conv5_2 -------------------------------------------------------------------------------- /example/setup_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: setup_env.py 4 | # Author: Qian Ge 5 | 6 | import sys 7 | sys.path.append('../lib/') 8 | -------------------------------------------------------------------------------- /example/vizfilter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: vizfilter.py 4 | # Author: Qian Ge 5 | 6 | import os 7 | import scipy 8 | import argparse 9 | import tensorflow as tf 10 | 11 | import config_path as config 12 | 13 | import sys 14 | sys.path.append('../') 15 | from lib.nets.googlenet import BaseGoogLeNet 16 | import lib.utils.viz as viz 17 | import lib.utils.normalize as normlize 18 | 19 | 20 | 21 | def get_parse(): 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--filter', action='store_true', 24 | help='Visualize filters') 25 | parser.add_argument('--feature', action='store_true', 26 | help='Visualize feature maps') 27 | 28 | parser.add_argument('--im', type=str, 29 | help='Image file name') 30 | 31 | return parser.parse_args() 32 | 33 | 34 | if __name__ == '__main__': 35 | FLAGES = get_parse() 36 | 37 | map_list = ['inception4a', 'inception4b', 'inception4c', 38 | 'inception4d', 'inception4e', 'inception3a', 39 | 'inception3b', 'inception5a', 'inception5b'] 40 | 41 | model = BaseGoogLeNet(config.googlenet_path) 42 | filters = tf.get_default_graph().get_tensor_by_name( 43 | 'conv1_7x7_s2/weights:0') 44 | 45 | if FLAGES.feature: 46 | feature_map = [] 47 | for c_map in map_list: 48 | feature_map.append(model.conv_layer[c_map]) 49 | assert FLAGES.im is not None, 'File name cannot be None!' 50 | file_path = os.path.join(config.im_path, FLAGES.im) 51 | assert os.path.isfile(file_path),\ 52 | 'File does not exist! {}'.format(file_path) 53 | im = scipy.misc.imread(file_path) 54 | 55 | with tf.Session() as sess: 56 | sess.run(tf.global_variables_initializer()) 57 | if FLAGES.filter: 58 | learned_filter = sess.run(filters) 59 | viz.viz_filters( 60 | learned_filter, 61 | [8, 8], 62 | os.path.join(config.save_path, 'GoogLeNet_filter.png'), 63 | gap=2, 64 | nf=normlize.norm_std) 65 | 66 | if FLAGES.feature: 67 | maps = sess.run(feature_map, feed_dict={model.inputs: [im]}) 68 | 69 | for key, c_map in zip(map_list, maps): 70 | viz.viz_filters( 71 | c_map[0], 72 | [10, 10], 73 | os.path.join(config.save_path, 'GoogLeNet_{}.png'.format(key)), 74 | gap=2, 75 | gap_color=10, 76 | # nf=normlize.norm_range 77 | ) 78 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/__init__.py -------------------------------------------------------------------------------- /lib/dataflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/dataflow/__init__.py -------------------------------------------------------------------------------- /lib/dataflow/cifar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: cifar.py 4 | # Author: Qian Ge 5 | 6 | import os 7 | import pickle 8 | 9 | import numpy as np 10 | 11 | from tensorcv.dataflow.base import RNGDataFlow 12 | 13 | 14 | class CIFAR(RNGDataFlow): 15 | def __init__(self, 16 | data_dir='', 17 | shuffle=True, 18 | batch_dict_name=None, 19 | data_type='train', 20 | channel_mean=None, 21 | substract_mean=True): 22 | self._mean = channel_mean 23 | self._substract = substract_mean 24 | self.num_channels = 3 25 | self.im_size = [32, 32] 26 | 27 | assert os.path.isdir(data_dir) 28 | self.data_dir = data_dir 29 | 30 | assert batch_dict_name is not None 31 | if not isinstance(batch_dict_name, list): 32 | batch_dict_name = [batch_dict_name] 33 | self._batch_dict_name = batch_dict_name 34 | 35 | if data_type == 'train': 36 | self._file_list = [os.path.join(data_dir, 'data_batch_{}'.format(i)) for i in range(1, 6)] 37 | else: 38 | self._file_list = [os.path.join(data_dir, 'test_batch')] 39 | 40 | self.shuffle = shuffle 41 | 42 | self.setup(epoch_val=0, batch_size=1) 43 | # if not isinstance(batch_file_list, list): 44 | # batch_file_list = [batch_file_list] 45 | # self._file_list = [os.path.join(data_dir, 'data_batch_' + str(batch_id)) for batch_id in batch_file_list] 46 | 47 | # self._load_files() 48 | self._num_image = self.size() 49 | 50 | self._image_id = 0 51 | self._batch_file_id = -1 52 | self._image = [] 53 | self._next_batch_file() 54 | 55 | # self._comp_channel_mean() 56 | 57 | print('Data Loaded! Size of data: {}'.format(self.size())) 58 | 59 | def _next_batch_file(self): 60 | if self._batch_file_id >= len(self._file_list) - 1: 61 | self._batch_file_id = 0 62 | self._epochs_completed += 1 63 | else: 64 | self._batch_file_id += 1 65 | data_dict = unpickle(self._file_list[self._batch_file_id]) 66 | self._image = np.array(data_dict['image']) 67 | self._label = np.array(data_dict['label']) 68 | 69 | if self.shuffle: 70 | self._suffle_files() 71 | 72 | def _suffle_files(self): 73 | idxs = np.arange(len(self._image)) 74 | 75 | self.rng.shuffle(idxs) 76 | self._image = self._image[idxs] 77 | self._label = self._label[idxs] 78 | 79 | @property 80 | def batch_step(self): 81 | return int(self.size() * 1.0 / self._batch_size) 82 | 83 | @property 84 | def channel_mean(self): 85 | if self._mean == None: 86 | self._mean = self._comp_channel_mean() 87 | return self._mean 88 | 89 | def substract_mean(self, im_list): 90 | """ 91 | Args: 92 | im_list: [batch, h, w, c] 93 | """ 94 | mean = self.channel_mean 95 | for c_id in range(0, im_list.shape[-1]): 96 | im_list[:,:, c_id] = im_list[:,:, c_id] - mean[c_id] 97 | return im_list 98 | 99 | def _comp_channel_mean(self): 100 | im_list = [] 101 | for k in range(len(self._file_list)): 102 | cur_im = unpickle(self._file_list[k])['image'] 103 | im_list.extend(cur_im) 104 | im_list = np.array(im_list) 105 | 106 | mean_list = [] 107 | for c_id in range(0, im_list.shape[-1]): 108 | mean_list.append(np.mean(im_list[:,:,:,c_id])) 109 | return mean_list 110 | 111 | def size(self): 112 | try: 113 | return self.data_size 114 | except AttributeError: 115 | data_size = 0 116 | for k in range(len(self._file_list)): 117 | tmp_image = unpickle(self._file_list[k])['image'] 118 | data_size += len(tmp_image) 119 | self.data_size = data_size 120 | return self.data_size 121 | 122 | def next_batch(self): 123 | assert self._batch_size <= self.size(), \ 124 | "batch_size {} cannot be larger than data size {}".\ 125 | format(self._batch_size, self.size()) 126 | 127 | start = self._image_id 128 | self._image_id += self._batch_size 129 | end = self._image_id 130 | batch_image = np.array(self._image[start:end]) 131 | batch_label = np.array(self._label[start:end]) 132 | 133 | if self._image_id + self._batch_size > len(self._image): 134 | self._next_batch_file() 135 | self._image_id = 0 136 | if self.shuffle: 137 | self._suffle_files() 138 | if self._substract: 139 | batch_image = self.substract_mean(batch_image) 140 | return batch_image, batch_label 141 | 142 | def next_batch_dict(self): 143 | re_dict = {} 144 | batch_data = self.next_batch() 145 | for key, data in zip(self._batch_dict_name, batch_data): 146 | re_dict[key] = data 147 | return re_dict 148 | 149 | 150 | def unpickle(file): 151 | with open(file, 'rb') as fo: 152 | dict = pickle.load(fo, encoding='bytes') 153 | image = dict[b'data'] 154 | labels = dict[b'labels'] 155 | 156 | r = image[:,:32*32].reshape(-1,32,32) 157 | g = image[:,32*32: 2*32*32].reshape(-1,32,32) 158 | b = image[:,2*32*32:].reshape(-1,32,32) 159 | 160 | image = np.stack((r,g,b),axis=-1) 161 | 162 | return {'image': image.astype(float), 'label': labels} 163 | 164 | if __name__ == '__main__': 165 | a = CIFAR('D:\\Qian\\GitHub\\workspace\\tensorflow-DCGAN\\cifar-10-python.tar\\') 166 | t = a.next_batch()[0] 167 | print(t) 168 | print(t.shape) 169 | print(a.size()) 170 | # print(a.next_batch()[0]) 171 | # print(a.next_batch()[0]) -------------------------------------------------------------------------------- /lib/dataflow/image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: image.py 4 | # Author: Qian Ge 5 | 6 | import numpy as np 7 | from tensorcv.dataflow.base import RNGDataFlow 8 | from tensorcv.dataflow.normalization import identity 9 | 10 | 11 | class DataFromFile(RNGDataFlow): 12 | """ Base class for image from files """ 13 | def __init__(self, 14 | ext_name, 15 | data_dir='', 16 | num_channel=None, 17 | shuffle=True, 18 | normalize=None, 19 | batch_dict_name=None, 20 | normalize_fnc=identity): 21 | 22 | check_dir(data_dir) 23 | self.data_dir = data_dir 24 | self._shuffle = shuffle 25 | self._normalize = normalize 26 | self._normalize_fnc = normalize_fnc 27 | 28 | if not isinstance(batch_dict_name, list): 29 | batch_dict_name = [batch_dict_name] 30 | self._batch_dict_name = batch_dict_name 31 | 32 | self.setup(epoch_val=0, batch_size=1) 33 | 34 | self._load_file_list(ext_name.lower()) 35 | if self.size() == 0: 36 | print_warning('No {} files in folder {}'.\ 37 | format(ext_name, data_dir)) 38 | self.num_channels, self.im_size = self._get_im_size() 39 | self._data_id = 0 40 | 41 | def _load_file_list(self): 42 | raise NotImplementedError() 43 | 44 | def _suffle_file_list(self): 45 | pass 46 | 47 | def next_batch(self): 48 | assert self._batch_size <= self.size(), \ 49 | "batch_size cannot be larger than data size" 50 | 51 | if self._data_id + self._batch_size > self.size(): 52 | start = self._data_id 53 | end = self.size() 54 | else: 55 | start = self._data_id 56 | self._data_id += self._batch_size 57 | end = self._data_id 58 | # batch_file_range = range(start, end) 59 | batch_data = self._load_data(start, end) 60 | 61 | if end == self.size(): 62 | self._epochs_completed += 1 63 | self._data_id = 0 64 | if self._shuffle: 65 | self._suffle_file_list() 66 | return batch_data 67 | 68 | def next_batch_dict(self): 69 | batch_data = self.next_batch() 70 | batch_dict = {name: data for name, data in zip(self._batch_dict_name, batch_data)} 71 | return batch_dict 72 | 73 | def _load_data(self, start, end): 74 | raise NotImplementedError() 75 | 76 | 77 | class ImageFromFile(DataFromFile): 78 | def __init__(self, 79 | ext_name, 80 | data_dir='', 81 | num_channel=None, 82 | shuffle=True, 83 | normalize=None, 84 | normalize_fnc=identity, 85 | batch_dict_name=None, 86 | pf=identity): 87 | 88 | if num_channel is not None: 89 | self.num_channels = num_channel 90 | self._read_channel = num_channel 91 | else: 92 | self._read_channel = None 93 | 94 | self._resize = get_shape2D(resize) 95 | self._resize_crop = resize_crop 96 | self._pf = pf 97 | 98 | super(ImageFromFile, self).__init__(ext_name, 99 | data_dir=data_dir, 100 | shuffle=shuffle, 101 | normalize=normalize, 102 | batch_dict_name=batch_dict_name, 103 | normalize_fnc=normalize_fnc) 104 | 105 | def _load_file_list(self, ext_name): 106 | im_dir = os.path.join(self.data_dir) 107 | self._im_list = get_file_list(im_dir, ext_name) 108 | if self._shuffle: 109 | self._suffle_file_list() 110 | 111 | def _suffle_file_list(self): 112 | idxs = np.arange(self.size()) 113 | self.rng.shuffle(idxs) 114 | self._im_list = self._im_list[idxs] 115 | 116 | def _load_data(self, start, end): 117 | input_im_list = [] 118 | for k in range(start, end): 119 | im_path = self._im_list[k] 120 | im = load_image(im_path, read_channel=self._read_channel, 121 | resize=self._resize, 122 | resize_crop=self._resize_crop, 123 | pf=self._pf) 124 | input_im_list.extend(im) 125 | 126 | # TODO to be modified 127 | input_im_list = self._normalize_fnc(np.array(input_im_list), 128 | self._get_max_in_val(), 129 | self._get_half_in_val()) 130 | return [input_im_list] 131 | 132 | def size(self): 133 | return self._im_list.shape[0] 134 | 135 | def get_data_list(self): 136 | return [self._im_list] 137 | 138 | def set_data_list(self, new_data_list): 139 | assert isinstance(new_data_list, list) 140 | assert len(new_data_list) == 1 141 | self._im_list = np.array(new_data_list[0]) 142 | 143 | def set_pf(self, pf): 144 | self._pf = pf 145 | 146 | def suffle_data(self): 147 | self._suffle_file_list() 148 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/models/__init__.py -------------------------------------------------------------------------------- /lib/models/cam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: cam.py 4 | # Author: Qian Ge 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | from tensorcv.models.layers import new_weights, dropout, global_avg_pool, conv, max_pool 10 | from tensorcv.models.base import BaseModel 11 | 12 | 13 | class BaseCAM(BaseModel): 14 | """ base of class activation map class """ 15 | def __init__(self, num_class=10, 16 | inspect_class=None, 17 | num_channels=1, 18 | learning_rate=0.0001): 19 | 20 | self._learning_rate = learning_rate 21 | self._num_channels = num_channels 22 | self._num_class = num_class 23 | self._inspect_class = inspect_class 24 | 25 | self.set_is_training(True) 26 | 27 | def _create_input(self): 28 | self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') 29 | self.image = tf.placeholder( 30 | tf.float32, name='image', 31 | shape=[None, None, None, self._num_channels]) 32 | self.label = tf.placeholder(tf.int64, [None], 'label') 33 | 34 | self.set_model_input([self.image, self.keep_prob]) 35 | self.set_dropout(self.keep_prob, keep_prob=0.5) 36 | self.set_train_placeholder([self.image, self.label]) 37 | self.set_prediction_placeholder([self.image, self.label]) 38 | 39 | def _create_conv(self, input_im): 40 | raise NotImplementedError() 41 | 42 | def _get_loss(self): 43 | with tf.name_scope('loss'): 44 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 45 | logits=self.output, labels=self.label) 46 | cross_entropy_loss = tf.reduce_mean( 47 | cross_entropy, name='cross_entropy_loss') 48 | tf.add_to_collection('losses', cross_entropy_loss) 49 | return tf.add_n(tf.get_collection('losses'), name='result') 50 | 51 | def _get_optimizer(self): 52 | return tf.train.AdamOptimizer( 53 | beta1=0.5, learning_rate=self._learning_rate) 54 | 55 | def _ex_setup_graph(self): 56 | with tf.name_scope('accuracy'): 57 | correct_prediction = tf.equal(self.prediction, self.label) 58 | self.accuracy = tf.reduce_mean( 59 | tf.cast(correct_prediction, tf.float32), name='result') 60 | 61 | def _setup_summary(self): 62 | tf.summary.scalar("train_accuracy", self.accuracy, 63 | collections=['train']) 64 | 65 | def get_classmap(self, label, conv_out, input_im): 66 | """ 67 | Compute class activation map of class = label with name 'classmap' 68 | 69 | Args: 70 | label (int): a scalar int indicate the class label 71 | conv_out (tf.tensor): 4-D Tensor of shape 72 | [batch, height, width, channels]. Output of 73 | convolutional layers. 74 | input_im (tf.tensor): A 4-D Tensor image. 75 | The original model input image patch. 76 | """ 77 | # Get original image size used for interpolation 78 | o_height = tf.shape(input_im)[1] 79 | o_width = tf.shape(input_im)[2] 80 | 81 | # Get shape of output of convolution layers 82 | conv_out_channel = tf.shape(conv_out)[-1] 83 | conv_height = tf.shape(conv_out)[1] 84 | conv_width = tf.shape(conv_out)[2] 85 | 86 | # Get weights corresponding to class = label 87 | with tf.variable_scope('cam') as scope: 88 | scope.reuse_variables() 89 | label_w = tf.gather( 90 | tf.transpose(tf.get_variable('weights')), label) 91 | label_w = tf.reshape(label_w, [-1, conv_out_channel, 1]) 92 | label_w = tf.tile(label_w, [tf.shape(conv_out)[0], 1, 1]) 93 | 94 | conv_reshape = tf.reshape( 95 | conv_out, [-1, conv_height * conv_width, conv_out_channel]) 96 | classmap = tf.matmul(conv_reshape, label_w) 97 | 98 | # Interpolate to orginal size 99 | classmap = tf.reshape(classmap, [-1, conv_height, conv_width, 1]) 100 | classmap = tf.image.resize_bilinear(classmap, 101 | [o_height, o_width], 102 | name='result') 103 | 104 | class VGGCAM(BaseCAM): 105 | def __init__(self, num_class=1000, 106 | inspect_class=None, 107 | num_channels=3, 108 | learning_rate=0.0001, 109 | is_load=True, 110 | pre_train_path=None): 111 | 112 | self._is_load = is_load 113 | if self._is_load and pre_train_path is None: 114 | raise ValueError('pre_train_path can not be None!') 115 | self._pre_train_path = pre_train_path 116 | 117 | super(VGGCAM, self).__init__(num_class=num_class, 118 | inspect_class=inspect_class, 119 | num_channels=num_channels, 120 | learning_rate=learning_rate) 121 | 122 | def _create_conv(self, input_im): 123 | 124 | VGG_MEAN = [103.939, 116.779, 123.68] 125 | 126 | red, green, blue = tf.split(axis=3, num_or_size_splits=3, 127 | value=input_im) 128 | input_bgr = tf.concat(axis=3, values=[ 129 | blue - VGG_MEAN[0], 130 | green - VGG_MEAN[1], 131 | red - VGG_MEAN[2], 132 | ]) 133 | 134 | data_dict = {} 135 | if self._is_load: 136 | data_dict = np.load(self._pre_train_path, 137 | encoding='latin1').item() 138 | 139 | arg_scope = tf.contrib.framework.arg_scope 140 | with arg_scope([conv], nl=tf.nn.relu, 141 | trainable=False, data_dict=data_dict): 142 | conv1_1 = conv(input_bgr, 3, 64, 'conv1_1') 143 | conv1_2 = conv(conv1_1, 3, 64, 'conv1_2') 144 | pool1 = max_pool(conv1_2, 'pool1', padding='SAME') 145 | 146 | conv2_1 = conv(pool1, 3, 128, 'conv2_1') 147 | conv2_2 = conv(conv2_1, 3, 128, 'conv2_2') 148 | pool2 = max_pool(conv2_2, 'pool2', padding='SAME') 149 | 150 | conv3_1 = conv(pool2, 3, 256, 'conv3_1') 151 | conv3_2 = conv(conv3_1, 3, 256, 'conv3_2') 152 | conv3_3 = conv(conv3_2, 3, 256, 'conv3_3') 153 | conv3_4 = conv(conv3_3, 3, 256, 'conv3_4') 154 | pool3 = max_pool(conv3_4, 'pool3', padding='SAME') 155 | 156 | conv4_1 = conv(pool3, 3, 512, 'conv4_1') 157 | conv4_2 = conv(conv4_1, 3, 512, 'conv4_2') 158 | conv4_3 = conv(conv4_2, 3, 512, 'conv4_3') 159 | conv4_4 = conv(conv4_3, 3, 512, 'conv4_4') 160 | pool4 = max_pool(conv4_4, 'pool4', padding='SAME') 161 | 162 | conv5_1 = conv(pool4, 3, 512, 'conv5_1') 163 | conv5_2 = conv(conv5_1, 3, 512, 'conv5_2') 164 | conv5_3 = conv(conv5_2, 3, 512, 'conv5_3') 165 | conv5_4 = conv(conv5_3, 3, 512, 'conv5_4') 166 | 167 | return conv5_4 168 | 169 | def _create_model(self): 170 | 171 | input_im = self.model_input[0] 172 | keep_prob = self.model_input[1] 173 | 174 | conv_out = self._create_conv(input_im) 175 | 176 | init_b = tf.truncated_normal_initializer(stddev=0.01) 177 | conv_cam = conv(conv_out, 3, 1024, 'conv_cam', 178 | nl=tf.nn.relu, wd=0.01, init_b=init_b) 179 | gap = global_avg_pool(conv_cam) 180 | dropout_gap = dropout(gap, keep_prob, self.is_training) 181 | 182 | with tf.variable_scope('cam'): 183 | init = tf.truncated_normal_initializer(stddev=0.01) 184 | fc_w = new_weights( 185 | 'weights', 1, 186 | [gap.get_shape().as_list()[-1], self._num_class], 187 | initializer=init, wd=0.01) 188 | fc_cam = tf.matmul(dropout_gap, fc_w, name='output') 189 | 190 | self.output = tf.identity(fc_cam, 'model_output') 191 | self.prediction = tf.argmax(fc_cam, name='pre_label', axis=-1) 192 | self.prediction_pro = tf.nn.softmax(fc_cam, name='pre_pro') 193 | 194 | if self._inspect_class is not None: 195 | with tf.name_scope('classmap'): 196 | self.get_classmap(self._inspect_class, conv_cam, input_im) 197 | 198 | 199 | # if __name__ == '__main__': 200 | # num_class = 257 201 | # num_channels = 3 202 | 203 | # vgg_cam_model = VGGCAM(num_class=num_class, 204 | # inspect_class=None, 205 | # num_channels=num_channels, 206 | # learning_rate=0.0001, 207 | # is_load=True, 208 | # pre_train_path='E:\\GITHUB\\workspace\\CNN\pretrained\\vgg19.npy') 209 | 210 | # vgg_cam_model.create_graph() 211 | 212 | # grads = vgg_cam_model.get_grads() 213 | # opt = vgg_cam_model.get_optimizer() 214 | # train_op = opt.apply_gradients(grads, name='train') 215 | 216 | # writer = tf.summary.FileWriter('E:\\GITHUB\\workspace\\CNN\\other\\') 217 | # with tf.Session() as sess: 218 | # sess.run(tf.global_variables_initializer()) 219 | # writer.add_graph(sess.graph) 220 | # writer.close() 221 | -------------------------------------------------------------------------------- /lib/models/gap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: gap.py 4 | # Author: Qian Ge 5 | 6 | import tensorflow as tf 7 | 8 | from tensorcv.models.base import BaseModel 9 | from tensorcv.models.layers import new_weights, global_avg_pool, conv, dropout, max_pool 10 | from tensorcv.models.layers import batch_norm 11 | 12 | from lib.nets.vgg import BaseVGG19 13 | from lib.nets.googlenet import BaseGoogLeNet 14 | 15 | 16 | def mlpconv(inputs, filter_size, hidden_size, wd=0, name='mlpconv'): 17 | if not isinstance(hidden_size, list): 18 | hidden_size = [hidden_size] 19 | with tf.variable_scope(name): 20 | l_out = conv(inputs, 21 | filter_size, 22 | hidden_size[0], 23 | 'microlayer_0', 24 | nl=tf.nn.relu, 25 | wd=wd) 26 | for layer_id in range(1, len(hidden_size)): 27 | l_out = conv(l_out, 28 | 1, 29 | hidden_size[layer_id], 30 | 'microlayer_{}'.format(layer_id), 31 | nl=tf.nn.relu, 32 | wd=wd) 33 | 34 | return l_out 35 | 36 | 37 | class GAPNet(BaseModel): 38 | def __init__(self, num_class=10, wd=0): 39 | self._n_class = num_class 40 | self._wd = wd 41 | # self._pre_train_path = pre_train_path 42 | 43 | self.set_is_training(True) 44 | self.layer = {} 45 | 46 | def set_is_training(self, is_training): 47 | self._is_traing = is_training 48 | 49 | def create_model(self, input_dict): 50 | self._input_dict = input_dict 51 | self._create_model() 52 | 53 | def _create_conv(self, inputs): 54 | self.dropout = tf.placeholder(tf.float32, name='dropout') 55 | mlpconv_1 = mlpconv( 56 | inputs, 57 | filter_size=8, 58 | hidden_size=[96, 96], 59 | name='mlpconv_1', 60 | wd=self._wd) 61 | # mlpconv_1 = mlpconv( 62 | # inputs, 63 | # filter_size=5, 64 | # hidden_size=[192, 160, 96], 65 | # name='mlpconv_1', 66 | # wd=self._wd) 67 | mlpconv_1 = max_pool(mlpconv_1, 'pool1', padding='SAME') 68 | mlpconv_1 = dropout(mlpconv_1, self.dropout, self._is_traing) 69 | mlpconv_1 = batch_norm(mlpconv_1, train=self._is_traing, name='bn_1') 70 | 71 | mlpconv_2 = mlpconv( 72 | mlpconv_1, 73 | filter_size=8, 74 | hidden_size=[192, 192], 75 | name='mlpconv_2', 76 | wd=self._wd) 77 | # mlpconv_2 = mlpconv( 78 | # mlpconv_1, 79 | # filter_size=5, 80 | # hidden_size=[192, 192, 192], 81 | # name='mlpconv_2', 82 | # wd=self._wd) 83 | mlpconv_2 = max_pool(mlpconv_2, 'pool2', padding='SAME') 84 | mlpconv_2 = dropout(mlpconv_2, self.dropout, self._is_traing) 85 | mlpconv_2 = batch_norm(mlpconv_2, train=self._is_traing, name='bn_2') 86 | 87 | mlpconv_3 = mlpconv( 88 | mlpconv_2, 89 | filter_size=5, 90 | hidden_size=[192, self._n_class], 91 | name='mlpconv_3', 92 | wd=self._wd) 93 | # mlpconv_3 = mlpconv( 94 | # mlpconv_2, 95 | # filter_size=3, 96 | # hidden_size=[192, 192, self._n_class], 97 | # name='mlpconv_3', 98 | # wd=self._wd) 99 | # mlpconv_3 = max_pool(mlpconv_3, 'pool3', padding='SAME') 100 | # mlpconv_3 = dropout(pool3, 0.5, self._is_traing) 101 | 102 | return mlpconv_3 103 | 104 | def _create_model(self): 105 | inputs = self._input_dict['input'] 106 | conv_out = self._create_conv(inputs) 107 | 108 | # init_b = tf.truncated_normal_initializer(stddev=0.01) 109 | # conv_gap = conv(conv_out, 3, self._n_class, 'conv_gap', 110 | # nl=tf.nn.relu, wd=0, init_b=init_b) 111 | gap = global_avg_pool(conv_out) 112 | 113 | self.layer['logits'] = gap 114 | self.layer['feature'] = conv_out 115 | self.layer['pred'] = tf.argmax(gap, name='pred', axis=-1) 116 | self.layer['prob'] = tf.nn.softmax(gap, name='prob') 117 | 118 | def _get_loss(self): 119 | label = self._input_dict['label'] 120 | with tf.name_scope('loss'): 121 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 122 | logits= self.layer['logits'], labels=label) 123 | cross_entropy_loss = tf.reduce_mean( 124 | cross_entropy, name='cross_entropy_loss') 125 | tf.add_to_collection('losses', cross_entropy_loss) 126 | return tf.add_n(tf.get_collection('losses'), name='result') 127 | 128 | def get_loss(self): 129 | try: 130 | return self.loss 131 | except AttributeError: 132 | self.loss = self._get_loss() 133 | return self.loss 134 | 135 | def get_train_op(self): 136 | self.lr = tf.placeholder(tf.float32, name='lr') 137 | # opt = tf.train.GradientDescentOptimizer(learning_rate=self.lr) 138 | opt = tf.train.AdamOptimizer( 139 | beta1=0.5, learning_rate=self.lr) 140 | loss = self.get_loss() 141 | return opt.minimize(loss) 142 | 143 | def get_accuracy(self): 144 | label = self._input_dict['label'] 145 | pred = self.layer['pred'] 146 | 147 | correct = tf.equal(label, pred) 148 | accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 149 | return accuracy 150 | 151 | 152 | -------------------------------------------------------------------------------- /lib/models/grad_cam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: grad_cam.py 4 | # Author: Qian Ge 5 | 6 | import tensorflow as tf 7 | 8 | from tensorcv.models.layers import global_avg_pool 9 | 10 | 11 | class BaseGradCAM(object): 12 | def __init__(self, vis_model=None, num_channel=3): 13 | self._vis_model = vis_model 14 | self._nchannel = num_channel 15 | 16 | def create_model(self, inputs): 17 | self._create_model(inputs) 18 | 19 | def _create_model(self, inputs): 20 | pass 21 | 22 | def setup_graph(self): 23 | pass 24 | 25 | def _comp_feature_importance_weight(self, class_id): 26 | if not isinstance(class_id, list): 27 | class_id = [class_id] 28 | 29 | with tf.name_scope('feature_weight'): 30 | self._feature_w_list = [] 31 | for idx, cid in enumerate(class_id): 32 | one_hot = tf.sparse_to_dense( 33 | [[cid, 0]], [self._nclass, 1], 1.0) 34 | out_act = tf.reshape(self._out_act, [1, self._nclass]) 35 | class_act = tf.matmul(out_act, one_hot, 36 | name='class_act_{}'.format(idx)) 37 | feature_grad = tf.gradients(class_act, self._conv_out, 38 | name='grad_{}'.format(idx)) 39 | feature_grad = tf.squeeze( 40 | tf.convert_to_tensor(feature_grad), axis=0) 41 | feature_w = global_avg_pool( 42 | feature_grad, name='feature_w_{}'.format(idx)) 43 | self._feature_w_list.append(feature_w) 44 | 45 | def get_visualization(self, class_id=None): 46 | assert class_id is not None, 'class_id cannot be None!' 47 | 48 | with tf.name_scope('grad_cam'): 49 | self._comp_feature_importance_weight(class_id) 50 | conv_out = self._conv_out 51 | conv_c = tf.shape(conv_out)[-1] 52 | conv_h = tf.shape(conv_out)[1] 53 | conv_w = tf.shape(conv_out)[2] 54 | conv_reshape = tf.reshape(conv_out, [conv_h * conv_w, conv_c]) 55 | 56 | o_h = tf.shape(self.input_im)[1] 57 | o_w = tf.shape(self.input_im)[2] 58 | 59 | classmap_list = [] 60 | for idx, feature_w in enumerate(self._feature_w_list): 61 | feature_w = tf.reshape(feature_w, [conv_c, 1]) 62 | classmap = tf.matmul(conv_reshape, feature_w) 63 | classmap = tf.reshape(classmap, [-1, conv_h, conv_w, 1]) 64 | classmap = tf.nn.relu( 65 | tf.image.resize_bilinear(classmap, [o_h, o_w]), 66 | name='grad_cam_{}'.format(idx)) 67 | classmap_list.append(tf.squeeze(classmap)) 68 | 69 | return classmap_list, tf.convert_to_tensor(class_id) 70 | 71 | 72 | class ClassifyGradCAM(BaseGradCAM): 73 | def _create_model(self, inputs): 74 | keep_prob = 1 75 | self._vis_model.create_model([inputs, keep_prob]) 76 | 77 | def setup_graph(self): 78 | self.input_im = self._vis_model.layer['input'] 79 | self._out_act = global_avg_pool(self._vis_model.layer['output']) 80 | self._conv_out = self._vis_model.layer['conv_out'] 81 | self._nclass = self._out_act.shape.as_list()[-1] 82 | self.pre_label = tf.nn.top_k(tf.nn.softmax(self._out_act), 83 | k=5, sorted=True) 84 | -------------------------------------------------------------------------------- /lib/models/guided_backpro.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: guided_backpro.py 4 | # Author: Qian Ge 5 | 6 | import tensorflow as tf 7 | 8 | from tensorcv.models.layers import global_avg_pool 9 | 10 | 11 | @tf.RegisterGradient("GuidedRelu") 12 | def _GuidedReluGrad(op, grad): 13 | gate_g = tf.cast(grad > 0, "float32") 14 | gate_y = tf.cast(op.outputs[0] > 0, "float32") 15 | return grad * gate_g * gate_y 16 | 17 | 18 | class GuideBackPro(object): 19 | def __init__(self, vis_model=None, class_id=None): 20 | assert vis_model is not None, 'vis_model cannot be None!' 21 | # assert not class_id is None, 'class_id cannot be None!' 22 | 23 | self._vis_model = vis_model 24 | if class_id is not None and not isinstance(class_id, list): 25 | class_id = [class_id] 26 | self._class_id = class_id 27 | 28 | def _create_model(self, image): 29 | keep_prob = 1 30 | self._vis_model.create_model([image, keep_prob]) 31 | self.input_im = self._vis_model.layer['input'] 32 | 33 | self._out_act = global_avg_pool(self._vis_model.layer['output']) 34 | self.pre_label = tf.nn.top_k( 35 | tf.nn.softmax(self._out_act), k=5, sorted=True) 36 | 37 | def _get_activation(self): 38 | with tf.name_scope('activation'): 39 | nclass = self._out_act.shape.as_list()[-1] 40 | act_list = [] 41 | if self._class_id is None: 42 | class_list = [self.pre_label.indices[0][0]] 43 | act_list = [tf.reduce_max(self._out_act)] 44 | else: 45 | class_list = self._class_id 46 | for cid in class_list: 47 | one_hot = tf.sparse_to_dense([[cid, 0]], [nclass, 1], 1.0) 48 | self._out_act = tf.reshape(self._out_act, [1, nclass]) 49 | class_act = tf.matmul(self._out_act, one_hot) 50 | act_list.append(class_act) 51 | 52 | return act_list, tf.convert_to_tensor(class_list) 53 | 54 | def get_visualization(self, image): 55 | g = tf.get_default_graph() 56 | 57 | with g.gradient_override_map({'Relu': 'GuidedRelu'}): 58 | try: 59 | self._create_model(image) 60 | except ValueError: 61 | with tf.variable_scope(tf.get_variable_scope()) as scope: 62 | scope.reuse_variables() 63 | self._create_model(image) 64 | act_list, class_list = self._get_activation() 65 | 66 | with tf.name_scope('guided_back_pro_map'): 67 | guided_back_pro_list = [] 68 | for class_act in act_list: 69 | guided_back_pro = tf.gradients( 70 | class_act, self._vis_model.layer['input']) 71 | guided_back_pro_list.append(guided_back_pro) 72 | 73 | self.visual_map = guided_back_pro_list 74 | self.class_list = class_list 75 | return guided_back_pro_list, class_list 76 | -------------------------------------------------------------------------------- /lib/models/invert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: invert.py 4 | # Author: Qian Ge 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | 9 | from tensorcv.models.base import BaseModel 10 | 11 | 12 | class InvertCNN(BaseModel): 13 | def __init__(self, im_h, im_w, im_c, input_mean=0, input_std=1.0, mean_list=None): 14 | init = tf.random_normal([1, im_h, im_w, im_c]) 15 | self.invert_im = tf.get_variable('invert_im', 16 | initializer=init, 17 | # shape=[1, im_h, im_w, im_c], 18 | trainable=True) 19 | 20 | 21 | self._mean = mean_list 22 | self._input_std = input_std 23 | 24 | def _total_variation(self, image): 25 | var_x = tf.pow(image[:, 1:, :-1, :] - image[:, :-1, :-1, :], 2) 26 | var_y = tf.pow(image[:, :-1, 1:, :] - image[:, :-1, :-1, :], 2) 27 | return tf.reduce_sum(var_x + var_y) 28 | 29 | def get_loss(self, feat_invert, feat_im): 30 | self.mse_loss = 5e-4 * tf.losses.mean_squared_error(feat_invert, feat_im) 31 | self.vt_loss = 0.0000005 * self._total_variation(self.invert_im) 32 | self.loss = 1000 * self.mse_loss + 0*self.vt_loss 33 | return self.loss 34 | 35 | def optimize_image(self, feat_invert, feat_im): 36 | loss = self.get_loss(feat_invert, feat_im) 37 | # opt = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9) 38 | opt = tf.train.AdamOptimizer(learning_rate=0.1) 39 | return opt.minimize(loss) 40 | 41 | def get_opt_im(self): 42 | im = self.invert_im 43 | # if self._mean is not None: 44 | # im = self._add_mean(im) 45 | return im -------------------------------------------------------------------------------- /lib/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/nets/__init__.py -------------------------------------------------------------------------------- /lib/nets/googlenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: googlenet.py 4 | # Author: Qian Ge 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from tensorflow.contrib.framework import add_arg_scope 9 | 10 | from tensorcv.models.layers import conv, fc, global_avg_pool, dropout, max_pool 11 | from tensorcv.models.base import BaseModel 12 | 13 | 14 | MEAN = [103.939, 116.779, 123.68] 15 | 16 | @add_arg_scope 17 | def inception_layer(inputs, 18 | conv_11_size, 19 | conv_33_reduce_size, conv_33_size, 20 | conv_55_reduce_size, conv_55_size, 21 | pool_size, 22 | data_dict={}, 23 | trainable=False, 24 | name='inception'): 25 | 26 | arg_scope = tf.contrib.framework.arg_scope 27 | with arg_scope([conv], nl=tf.nn.relu, trainable=trainable, 28 | data_dict=data_dict): 29 | conv_11 = conv(inputs, 1, conv_11_size, '{}_1x1'.format(name)) 30 | 31 | conv_33_reduce = conv(inputs, 1, conv_33_reduce_size, 32 | '{}_3x3_reduce'.format(name)) 33 | conv_33 = conv(conv_33_reduce, 3, conv_33_size, '{}_3x3'.format(name)) 34 | 35 | conv_55_reduce = conv(inputs, 1, conv_55_reduce_size, 36 | '{}_5x5_reduce'.format(name)) 37 | conv_55 = conv(conv_55_reduce, 5, conv_55_size, '{}_5x5'.format(name)) 38 | 39 | pool = max_pool(inputs, '{}_pool'.format(name), stride=1, 40 | padding='SAME', filter_size=3) 41 | convpool = conv(pool, 1, pool_size, '{}_pool_proj'.format(name)) 42 | 43 | return tf.concat([conv_11, conv_33, conv_55, convpool], 44 | 3, name='{}_concat'.format(name)) 45 | 46 | class BaseGoogLeNet(BaseModel): 47 | def __init__(self, pre_train_path, is_load=True): 48 | self.data_dict = {} 49 | if is_load: 50 | assert pre_train_path is not None 51 | self.data_dict = np.load(pre_train_path, 52 | encoding='latin1').item() 53 | 54 | self.inputs = tf.placeholder(tf.float32, 55 | [None, None, None, 3], 56 | name='input') 57 | 58 | 59 | input_bgr = self._sub_mean(self.inputs) 60 | self._creat_googlenet(input_bgr, self.data_dict) 61 | 62 | def _sub_mean(self, inputs): 63 | with tf.name_scope('input'): 64 | input_im = inputs 65 | 66 | # Convert RGB image to BGR image 67 | red, green, blue = tf.split(axis=3, 68 | num_or_size_splits=3, 69 | value=input_im) 70 | 71 | input_bgr = tf.concat(axis=3, values=[ 72 | blue - MEAN[0], 73 | green - MEAN[1], 74 | red - MEAN[2], 75 | ]) 76 | return input_bgr 77 | 78 | def get_feature_map(self, inputs, layer_key): 79 | assert layer_key in self.conv_layer 80 | with tf.variable_scope(tf.get_variable_scope()) as scope: 81 | # print(tf.get_default_graph().get_name_scope()) 82 | scope.reuse_variables() 83 | inputs = self._sub_mean(inputs) 84 | self._creat_googlenet(inputs, self.data_dict) 85 | return self.conv_layer[layer_key] 86 | 87 | def _creat_googlenet(self, 88 | inputs, 89 | data_dict, 90 | trainable=False): 91 | self.conv_layer = {} 92 | 93 | arg_scope = tf.contrib.framework.arg_scope 94 | with arg_scope([conv], trainable=trainable, 95 | data_dict=data_dict, nl=tf.nn.relu): 96 | conv1 = conv(inputs, 7, 64, name='conv1_7x7_s2', stride=2) 97 | padding1 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) 98 | conv1_pad = tf.pad(conv1, padding1, 'CONSTANT') 99 | pool1 = max_pool( 100 | conv1_pad, 'pool1', padding='VALID', filter_size=3, stride=2) 101 | pool1_lrn = tf.nn.local_response_normalization( 102 | pool1, depth_radius=2, alpha=2e-05, beta=0.75, 103 | name='pool1_lrn') 104 | 105 | conv2_reduce = conv(pool1_lrn, 1, 64, name='conv2_3x3_reduce') 106 | conv2 = conv(conv2_reduce, 3, 192, name='conv2_3x3') 107 | padding2 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) 108 | conv2_pad = tf.pad(conv2, padding1, 'CONSTANT') 109 | pool2 = max_pool( 110 | conv2_pad, 'pool2', padding='VALID', filter_size=3, stride=2) 111 | pool2_lrn = tf.nn.local_response_normalization( 112 | pool2, depth_radius=2, alpha=2e-05, beta=0.75, 113 | name='pool2_lrn') 114 | 115 | with arg_scope([inception_layer], 116 | trainable=trainable, 117 | data_dict=data_dict): 118 | inception3a = inception_layer( 119 | pool2_lrn, 64, 96, 128, 16, 32, 32, name='inception_3a') 120 | inception3b = inception_layer( 121 | inception3a, 128, 128, 192, 32, 96, 64, name='inception_3b') 122 | pool3 = max_pool( 123 | inception3b, 'pool3', padding='SAME', filter_size=3, stride=2) 124 | 125 | inception4a = inception_layer( 126 | pool3, 192, 96, 208, 16, 48, 64, name='inception_4a') 127 | inception4b = inception_layer( 128 | inception4a, 160, 112, 224, 24, 64, 64, name='inception_4b') 129 | inception4c = inception_layer( 130 | inception4b, 128, 128, 256, 24, 64, 64, name='inception_4c') 131 | inception4d = inception_layer( 132 | inception4c, 112, 144, 288, 32, 64, 64, name='inception_4d') 133 | inception4e = inception_layer( 134 | inception4d, 256, 160, 320, 32, 128, 128, name='inception_4e') 135 | pool4 = max_pool( 136 | inception4e, 'pool4', padding='SAME', filter_size=3, stride=2) 137 | 138 | inception5a = inception_layer( 139 | pool4, 256, 160, 320, 32, 128, 128, name='inception_5a') 140 | inception5b = inception_layer( 141 | inception5a, 384, 192, 384, 48, 128, 128, name='inception_5b') 142 | 143 | self.conv_layer['conv1_7x7_s2'] = conv1 144 | self.conv_layer['conv2_3x3'] = conv2 145 | self.conv_layer['inception3a'] = inception3a 146 | self.conv_layer['inception3b'] = inception3b 147 | self.conv_layer['inception4a'] = inception4a 148 | self.conv_layer['inception4b'] = inception4b 149 | self.conv_layer['inception4c'] = inception4c 150 | self.conv_layer['inception4d'] = inception4d 151 | self.conv_layer['inception4e'] = inception4e 152 | self.conv_layer['inception5a'] = inception5a 153 | self.conv_layer['inception5b'] = inception5b 154 | 155 | return inception5b 156 | 157 | -------------------------------------------------------------------------------- /lib/nets/layers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: layers.py 4 | # Author: Qian Ge 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from tensorflow.contrib.framework import add_arg_scope 9 | 10 | from tensorcv.models.layers import * 11 | 12 | 13 | @add_arg_scope 14 | def transpose_conv(x, 15 | filter_size, 16 | out_dim, 17 | data_dict, 18 | out_shape=None, 19 | use_bias=True, 20 | reuse=False, 21 | stride=2, 22 | padding='SAME', 23 | trainable=False, 24 | nl=tf.identity, 25 | name='dconv'): 26 | 27 | stride = get_shape4D(stride) 28 | 29 | in_dim = x.get_shape().as_list()[-1] 30 | 31 | # TODO other ways to determine the output shape 32 | x_shape = tf.shape(x) 33 | # assume output shape is input_shape*stride 34 | if out_shape is None: 35 | out_shape = tf.stack([x_shape[0], 36 | tf.multiply(x_shape[1], stride[1]), 37 | tf.multiply(x_shape[2], stride[2]), 38 | out_dim]) 39 | 40 | filter_shape = get_shape2D(filter_size) + [out_dim, in_dim] 41 | 42 | with tf.variable_scope(name) as scope: 43 | if reuse == True: 44 | scope.reuse_variables() 45 | init_w = None 46 | init_b = None 47 | else: 48 | try: 49 | load_data = data_dict[name][0] 50 | except KeyError: 51 | load_data = data_dict[name]['weights'] 52 | print('Load {} weights!'.format(name)) 53 | # load_data = np.reshape(load_data, shape) 54 | # load_data = tf.nn.l2_normalize( 55 | # tf.transpose(load_data, perm=[1, 0, 2, 3])) 56 | # load_data = tf.transpose(load_data, perm=[1, 0, 2, 3]) 57 | init_w = tf.constant_initializer(load_data) 58 | 59 | if use_bias: 60 | try: 61 | load_data = data_dict[name][1] 62 | except KeyError: 63 | load_data = data_dict[name]['biases'] 64 | print('Load {} biases!'.format(name)) 65 | init_b = tf.constant_initializer(load_data) 66 | 67 | weights = tf.get_variable('weights', 68 | filter_shape, 69 | initializer=init_w, 70 | trainable=trainable) 71 | if use_bias: 72 | biases = tf.get_variable('biases', 73 | [in_dim], 74 | initializer=init_b, 75 | trainable=trainable) 76 | x = tf.nn.bias_add(x, -biases) 77 | 78 | output = tf.nn.conv2d_transpose(x, 79 | weights, 80 | output_shape=out_shape, 81 | strides=stride, 82 | padding=padding, 83 | name=scope.name) 84 | 85 | # if use_bias: 86 | # output = tf.nn.bias_add(output, biases) 87 | # TODO need test 88 | output.set_shape([None, None, None, out_dim]) 89 | 90 | output = nl(output, name='output') 91 | return output 92 | 93 | 94 | # https://github.com/tensorflow/tensorflow/pull/16885 95 | def unpool_2d(pool, 96 | ind, 97 | stride=[1, 2, 2, 1], 98 | scope='unpool_2d'): 99 | """Adds a 2D unpooling op. 100 | https://arxiv.org/abs/1505.04366 101 | Unpooling layer after max_pool_with_argmax. 102 | Args: 103 | pool: max pooled output tensor 104 | ind: argmax indices 105 | stride: stride is the same as for the pool 106 | Return: 107 | unpool: unpooling tensor 108 | """ 109 | 110 | with tf.variable_scope(scope): 111 | ind_shape = tf.shape(ind) 112 | # pool = pool[:, :ind_shape[1], :ind_shape[2], :] 113 | 114 | input_shape = tf.shape(pool) 115 | output_shape = [input_shape[0], 116 | input_shape[1] * stride[1], 117 | input_shape[2] * stride[2], 118 | input_shape[3]] 119 | 120 | flat_input_size = tf.reduce_prod(input_shape) 121 | flat_output_shape = [output_shape[0], 122 | output_shape[1] * output_shape[2] * output_shape[3]] 123 | 124 | pool_ = tf.reshape(pool, [flat_input_size]) 125 | batch_range = tf.reshape( 126 | tf.range(tf.cast(output_shape[0], tf.int64), dtype=ind.dtype), 127 | shape=[input_shape[0], 1, 1, 1]) 128 | b = tf.ones_like(ind) * batch_range 129 | b1 = tf.reshape(b, [flat_input_size, 1]) 130 | ind_ = tf.reshape(ind, [flat_input_size, 1]) 131 | ind_ = tf.concat([b1, ind_], 1) 132 | 133 | ret = tf.scatter_nd(ind_, pool_, shape=tf.cast(flat_output_shape, tf.int64)) 134 | ret = tf.reshape(ret, output_shape) 135 | 136 | set_input_shape = pool.get_shape() 137 | set_output_shape = [set_input_shape[0], 138 | set_input_shape[1] * stride[1], 139 | set_input_shape[2] * stride[2], 140 | set_input_shape[3]] 141 | ret.set_shape(set_output_shape) 142 | return ret 143 | 144 | 145 | def max_pool(x, 146 | name='max_pool', 147 | filter_size=2, 148 | stride=None, 149 | padding='VALID', 150 | switch=False): 151 | """ 152 | Max pooling layer 153 | Args: 154 | x (tf.tensor): a tensor 155 | name (str): name scope of the layer 156 | filter_size (int or list with length 2): size of filter 157 | stride (int or list with length 2): Default to be the same as shape 158 | padding (str): 'VALID' or 'SAME'. Use 'SAME' for FCN. 159 | Returns: 160 | tf.tensor with name 'name' 161 | """ 162 | 163 | padding = padding.upper() 164 | filter_shape = get_shape4D(filter_size) 165 | if stride is None: 166 | stride = filter_shape 167 | else: 168 | stride = get_shape4D(stride) 169 | 170 | if switch == True: 171 | return tf.nn.max_pool_with_argmax( 172 | x, 173 | ksize=filter_shape, 174 | strides=stride, 175 | padding=padding, 176 | Targmax=tf.int64, 177 | name=name) 178 | else: 179 | return tf.nn.max_pool( 180 | x, 181 | ksize=filter_shape, 182 | strides=stride, 183 | padding=padding, 184 | name=name), None 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /lib/nets/vgg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: vgg.py 4 | # Author: Qian Ge 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from tensorcv.models.layers import * 10 | from tensorcv.models.base import BaseModel 11 | 12 | import lib.nets.layers as L 13 | 14 | 15 | VGG_MEAN = [103.939, 116.779, 123.68] 16 | 17 | 18 | def resize_tensor_image_with_smallest_side(image, small_size): 19 | """ 20 | Resize image tensor with smallest side = small_size and 21 | keep the original aspect ratio. 22 | 23 | Args: 24 | image (tf.tensor): 4-D Tensor of shape 25 | [batch, height, width, channels] 26 | or 3-D Tensor of shape [height, width, channels]. 27 | small_size (int): A 1-D int. The smallest side of resize image. 28 | 29 | Returns: 30 | Image tensor with the original aspect ratio and 31 | smallest side = small_size. 32 | If images was 4-D, a 4-D float Tensor of shape 33 | [batch, new_height, new_width, channels]. 34 | If images was 3-D, a 3-D float Tensor of shape 35 | [new_height, new_width, channels]. 36 | """ 37 | im_shape = tf.shape(image) 38 | shape_dim = image.get_shape() 39 | if len(shape_dim) <= 3: 40 | height = tf.cast(im_shape[0], tf.float32) 41 | width = tf.cast(im_shape[1], tf.float32) 42 | else: 43 | height = tf.cast(im_shape[1], tf.float32) 44 | width = tf.cast(im_shape[2], tf.float32) 45 | 46 | height_smaller_than_width = tf.less_equal(height, width) 47 | 48 | new_shorter_edge = tf.constant(small_size, tf.float32) 49 | new_height, new_width = tf.cond( 50 | height_smaller_than_width, 51 | lambda: (new_shorter_edge, (width / height) * new_shorter_edge), 52 | lambda: ((height / width) * new_shorter_edge, new_shorter_edge)) 53 | 54 | return tf.image.resize_images( 55 | tf.cast(image, tf.float32), 56 | [tf.cast(new_height, tf.int32), tf.cast(new_width, tf.int32)]) 57 | 58 | 59 | class BaseVGG(BaseModel): 60 | """ base of VGG class """ 61 | def __init__(self, num_class=1000, 62 | num_channels=3, 63 | im_height=224, im_width=224, 64 | learning_rate=0.0001, 65 | is_load=False, 66 | pre_train_path=None, 67 | is_rescale=False): 68 | """ 69 | Args: 70 | num_class (int): number of image classes 71 | num_channels (int): number of input channels 72 | im_height, im_width (int): size of input image 73 | Can be unknown when testing. 74 | learning_rate (float): learning rate of training 75 | """ 76 | 77 | self.learning_rate = learning_rate 78 | self.num_channels = num_channels 79 | self.im_height = im_height 80 | self.im_width = im_width 81 | self.num_class = num_class 82 | self._is_rescale = is_rescale 83 | 84 | self.layer = {} 85 | 86 | self._is_load = is_load 87 | if self._is_load and pre_train_path is None: 88 | raise ValueError('pre_train_path can not be None!') 89 | self._pre_train_path = pre_train_path 90 | 91 | self.set_is_training(True) 92 | 93 | def _create_input(self): 94 | self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') 95 | self.image = tf.placeholder( 96 | tf.float32, name='image', 97 | shape=[None, self.im_height, self.im_width, self.num_channels]) 98 | 99 | self.label = tf.placeholder(tf.int64, [None], 'label') 100 | 101 | self.set_model_input([self.image, self.keep_prob]) 102 | self.set_dropout(self.keep_prob, keep_prob=0.5) 103 | self.set_train_placeholder([self.image, self.label]) 104 | self.set_prediction_placeholder(self.image) 105 | 106 | 107 | class VGG19(BaseVGG): 108 | 109 | def _create_conv(self, input_im, data_dict): 110 | 111 | arg_scope = tf.contrib.framework.arg_scope 112 | with arg_scope([conv], nl=tf.nn.relu, 113 | trainable=True, data_dict=data_dict): 114 | conv1_1 = conv(input_im, 3, 64, 'conv1_1') 115 | conv1_2 = conv(conv1_1, 3, 64, 'conv1_2') 116 | pool1 = max_pool(conv1_2, 'pool1', padding='SAME') 117 | 118 | conv2_1 = conv(pool1, 3, 128, 'conv2_1') 119 | conv2_2 = conv(conv2_1, 3, 128, 'conv2_2') 120 | pool2 = max_pool(conv2_2, 'pool2', padding='SAME') 121 | 122 | conv3_1 = conv(pool2, 3, 256, 'conv3_1') 123 | conv3_2 = conv(conv3_1, 3, 256, 'conv3_2') 124 | conv3_3 = conv(conv3_2, 3, 256, 'conv3_3') 125 | conv3_4 = conv(conv3_3, 3, 256, 'conv3_4') 126 | pool3 = max_pool(conv3_4, 'pool3', padding='SAME') 127 | 128 | conv4_1 = conv(pool3, 3, 512, 'conv4_1') 129 | conv4_2 = conv(conv4_1, 3, 512, 'conv4_2') 130 | conv4_3 = conv(conv4_2, 3, 512, 'conv4_3') 131 | conv4_4 = conv(conv4_3, 3, 512, 'conv4_4') 132 | pool4 = max_pool(conv4_4, 'pool4', padding='SAME') 133 | 134 | conv5_1 = conv(pool4, 3, 512, 'conv5_1') 135 | conv5_2 = conv(conv5_1, 3, 512, 'conv5_2') 136 | conv5_3 = conv(conv5_2, 3, 512, 'conv5_3') 137 | conv5_4 = conv(conv5_3, 3, 512, 'conv5_4') 138 | pool5 = max_pool(conv5_4, 'pool5', padding='SAME') 139 | 140 | self.layer['conv1_2'] = conv1_2 141 | self.layer['conv2_2'] = conv2_2 142 | self.layer['conv3_4'] = conv3_4 143 | self.layer['conv4_4'] = conv4_4 144 | self.layer['pool5'] = pool5 145 | self.layer['conv_out'] = self.layer['conv5_4'] = conv5_4 146 | 147 | return pool5 148 | 149 | def _create_model(self): 150 | 151 | with tf.name_scope('input'): 152 | input_im = self.model_input[0] 153 | keep_prob = self.model_input[1] 154 | 155 | input_im = tf.reshape(input_im, [-1, 224, 224, 3]) 156 | 157 | self.layer['input'] = input_im 158 | # Convert RGB image to BGR image 159 | red, green, blue = tf.split(axis=3, 160 | num_or_size_splits=3, 161 | value=input_im) 162 | 163 | input_bgr = tf.concat(axis=3, values=[ 164 | blue - VGG_MEAN[0], 165 | green - VGG_MEAN[1], 166 | red - VGG_MEAN[2], 167 | ]) 168 | 169 | data_dict = {} 170 | if self._is_load: 171 | data_dict = np.load(self._pre_train_path, 172 | encoding='latin1').item() 173 | 174 | conv_output = self._create_conv(input_bgr, data_dict) 175 | 176 | arg_scope = tf.contrib.framework.arg_scope 177 | with arg_scope([fc], trainable=True, data_dict=data_dict): 178 | fc6 = fc(conv_output, 4096, 'fc6', nl=tf.nn.relu) 179 | dropout_fc6 = dropout(fc6, keep_prob, self.is_training) 180 | 181 | fc7 = fc(dropout_fc6, 4096, 'fc7', nl=tf.nn.relu) 182 | dropout_fc7 = dropout(fc7, keep_prob, self.is_training) 183 | 184 | fc8 = fc(dropout_fc7, self.num_class, 'fc8') 185 | 186 | self.layer['fc6'] = fc6 187 | self.layer['fc7'] = fc7 188 | self.layer['fc8'] = self.layer['output'] = fc8 189 | 190 | 191 | class VGG19_FCN(VGG19): 192 | 193 | def _create_model(self): 194 | 195 | with tf.name_scope('input'): 196 | input_im = self.model_input[0] 197 | keep_prob = self.model_input[1] 198 | 199 | if self._is_rescale: 200 | input_im =\ 201 | resize_tensor_image_with_smallest_side(input_im, 224) 202 | self.layer['input'] = input_im 203 | 204 | # Convert rgb image to bgr image 205 | red, green, blue = tf.split(axis=3, num_or_size_splits=3, 206 | value=input_im) 207 | 208 | input_bgr = tf.concat(axis=3, values=[ 209 | blue - VGG_MEAN[0], 210 | green - VGG_MEAN[1], 211 | red - VGG_MEAN[2], 212 | ]) 213 | 214 | data_dict = {} 215 | if self._is_load: 216 | data_dict = np.load(self._pre_train_path, 217 | encoding='latin1').item() 218 | 219 | conv_outptu = self._create_conv(input_bgr, data_dict) 220 | 221 | arg_scope = tf.contrib.framework.arg_scope 222 | with arg_scope([conv], trainable=True, 223 | data_dict=data_dict, padding='VALID'): 224 | 225 | fc6 = conv(conv_outptu, 7, 4096, 'fc6', nl=tf.nn.relu) 226 | dropout_fc6 = dropout(fc6, keep_prob, self.is_training) 227 | 228 | fc7 = conv(dropout_fc6, 1, 4096, 'fc7', nl=tf.nn.relu) 229 | dropout_fc7 = dropout(fc7, keep_prob, self.is_training) 230 | 231 | fc8 = conv(dropout_fc7, 1, self.num_class, 'fc8') 232 | 233 | self.layer['fc6'] = fc6 234 | self.layer['fc7'] = fc7 235 | self.layer['fc8'] = self.layer['output'] = fc8 236 | 237 | self.output = tf.identity(fc8, 'model_output') 238 | 239 | self.avg_output = global_avg_pool(fc8) 240 | 241 | 242 | class BaseVGG19(BaseModel): 243 | def __init__(self): 244 | 245 | self._trainable = False 246 | self._switch = False 247 | 248 | def _sub_mean(self, inputs): 249 | VGG_MEAN = [103.939, 116.779, 123.68] 250 | red, green, blue = tf.split(axis=3, 251 | num_or_size_splits=3, 252 | value=inputs) 253 | input_bgr = tf.concat(axis=3, values=[ 254 | blue - VGG_MEAN[0], 255 | green - VGG_MEAN[1], 256 | red - VGG_MEAN[2], 257 | ]) 258 | return input_bgr 259 | 260 | def _creat_conv(self, inputs, layer_dict, data_dict={}): 261 | 262 | self.receptive_s = 1 263 | self.stride_t = 1 264 | self.receptive_size = {} 265 | self.stride = {} 266 | self.cur_input = inputs 267 | 268 | def conv_layer(filter_size, out_dim, name): 269 | init_w = tf.keras.initializers.he_normal() 270 | # init_w = None 271 | layer_dict[name] = conv(self.cur_input, filter_size, out_dim, name, init_w=init_w) 272 | self.receptive_s = self.receptive_s + (filter_size - 1) * self.stride_t 273 | self.receptive_size[name] = self.receptive_s 274 | self.stride[name] = self.stride_t 275 | self.cur_input = layer_dict[name] 276 | 277 | def pool_layer(name, switch=True, padding='SAME'): 278 | layer_dict[name], layer_dict['switch_{}'.format(name)] =\ 279 | L.max_pool(self.cur_input, name, padding=padding, switch=switch) 280 | self.receptive_s = self.receptive_s + self.stride_t 281 | self.receptive_size[name] = self.receptive_s 282 | self.stride_t = self.stride_t * 2 283 | self.stride[name] = self.stride_t 284 | self.cur_input = layer_dict[name] 285 | 286 | arg_scope = tf.contrib.framework.arg_scope 287 | with arg_scope([conv], nl=tf.nn.relu, 288 | trainable=self._trainable, data_dict=data_dict): 289 | 290 | conv_layer(3, 64, 'conv1_1') 291 | conv_layer(3, 64, 'conv1_2') 292 | pool_layer('pool1', switch=self._switch) 293 | 294 | conv_layer(3, 128, 'conv2_1') 295 | conv_layer(3, 128, 'conv2_2') 296 | pool_layer('pool2', switch=self._switch) 297 | 298 | conv_layer(3, 256, 'conv3_1') 299 | conv_layer(3, 256, 'conv3_2') 300 | conv_layer(3, 256, 'conv3_3') 301 | conv_layer(3, 256, 'conv3_4') 302 | pool_layer('pool3', switch=self._switch) 303 | 304 | conv_layer(3, 512, 'conv4_1') 305 | conv_layer(3, 512, 'conv4_2') 306 | conv_layer(3, 512, 'conv4_3') 307 | conv_layer(3, 512, 'conv4_4') 308 | pool_layer('pool4', switch=self._switch) 309 | 310 | conv_layer(3, 512, 'conv5_1') 311 | conv_layer(3, 512, 'conv5_2') 312 | conv_layer(3, 512, 'conv5_3') 313 | conv_layer(3, 512, 'conv5_4') 314 | pool_layer('pool5', switch=self._switch) 315 | 316 | return self.cur_input 317 | 318 | 319 | def threshold_tensor(x, thr, thr_type): 320 | cond = thr_type(x, tf.ones(tf.shape(x)) * thr) 321 | out = tf.where(cond, x, tf.zeros(tf.shape(x))) 322 | 323 | return out 324 | 325 | class DeconvBaseVGG19(BaseVGG19): 326 | def __init__(self, pre_train_path, feat_key, pick_feat=None): 327 | 328 | self.data_dict = np.load(pre_train_path, 329 | encoding='latin1').item() 330 | 331 | self.im = tf.placeholder(tf.float32, 332 | [None, None, None, 3], 333 | name='im') 334 | 335 | self._feat_key = feat_key 336 | self._pick_feat = pick_feat 337 | self._trainable = False 338 | self._switch = True 339 | self.layers = {} 340 | self._create_model() 341 | 342 | def _create_model(self): 343 | input_im = self._sub_mean(self.im) 344 | self._creat_conv(input_im, self.layers, data_dict=self.data_dict) 345 | 346 | cur_feats = self.layers[self._feat_key] 347 | try: 348 | self.max_act = tf.reduce_max(cur_feats[:, :, :, self._pick_feat]) 349 | self.feats = threshold_tensor(cur_feats, self.max_act, tf.equal) 350 | except ValueError: 351 | # else: 352 | self.max_act = tf.reduce_max(cur_feats) 353 | self.feats = threshold_tensor(cur_feats, self.max_act, tf.greater_equal) 354 | 355 | self.layers['de{}'.format(self._feat_key)] = self.feats 356 | self._create_deconv(self.layers, data_dict=self.data_dict) 357 | 358 | def _create_deconv(self, layer_dict, data_dict={}): 359 | def deconv_block(input_key, output_key, n_feat, name): 360 | try: 361 | layer_dict[output_key] =\ 362 | L.transpose_conv(layer_dict[input_key], 363 | out_dim=n_feat, 364 | name=name, 365 | ) 366 | except KeyError: 367 | pass 368 | 369 | def unpool_block(input_key, output_key, switch_key, name): 370 | try: 371 | layer_dict[output_key] =\ 372 | L.unpool_2d(layer_dict[input_key], 373 | layer_dict[switch_key], 374 | stride=[1, 2, 2, 1], 375 | scope=name) 376 | except KeyError: 377 | pass 378 | 379 | arg_scope = tf.contrib.framework.arg_scope 380 | with arg_scope([L.transpose_conv], 381 | filter_size=3, 382 | nl=tf.nn.relu, 383 | trainable=False, 384 | data_dict=data_dict, 385 | use_bias=False, 386 | stride=1, 387 | reuse=True): 388 | 389 | deconv_block('deconv5_4', 'deconv5_3', 512, 'conv5_4') 390 | deconv_block('deconv5_3', 'deconv5_2', 512, 'conv5_3') 391 | deconv_block('deconv5_2', 'deconv5_1', 512, 'conv5_2') 392 | deconv_block('deconv5_1', 'depool4', 512, 'conv5_1') 393 | unpool_block('depool4', 'deconv4_4', 'switch_pool4', 'unpool4') 394 | 395 | deconv_block('deconv4_4', 'deconv4_3', 512, 'conv4_4') 396 | deconv_block('deconv4_3', 'deconv4_2', 512, 'conv4_3') 397 | deconv_block('deconv4_2', 'deconv4_1', 512, 'conv4_2') 398 | deconv_block('deconv4_1', 'depool3', 256, 'conv4_1') 399 | unpool_block('depool3', 'deconv3_4', 'switch_pool3', 'unpool3') 400 | 401 | deconv_block('deconv3_4', 'deconv3_3', 256, 'conv3_4') 402 | deconv_block('deconv3_3', 'deconv3_2', 256, 'conv3_3') 403 | deconv_block('deconv3_2', 'deconv3_1', 256, 'conv3_2') 404 | deconv_block('deconv3_1', 'depool2', 128, 'conv3_1') 405 | unpool_block('depool2', 'deconv2_2', 'switch_pool2', 'unpool2') 406 | 407 | deconv_block('deconv2_2', 'deconv2_1', 128, 'conv2_2') 408 | deconv_block('deconv2_1', 'depool1', 64, 'conv2_1') 409 | unpool_block('depool1', 'deconv1_2', 'switch_pool1', 'unpool1') 410 | 411 | deconv_block('deconv1_2', 'deconv1_1', 64, 'conv1_2') 412 | 413 | layer_dict['deconvim'] =\ 414 | L.transpose_conv(layer_dict['deconv1_1'], 415 | 3, 416 | 3, 417 | trainable=False, 418 | data_dict=data_dict, 419 | reuse=True, 420 | use_bias=False, 421 | stride=1, 422 | name='conv1_1') 423 | 424 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: image.py 4 | # Author: Qian Ge 5 | 6 | from scipy import misc 7 | 8 | def im_rescale(im, resize): 9 | im_shape = im.shape 10 | im = misc.imresize(im, (resize[0], resize[1], im_shape[-1])) 11 | return im -------------------------------------------------------------------------------- /lib/utils/normalize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: normalize.py 4 | # Author: Qian Ge 5 | 6 | import numpy as np 7 | 8 | def indentity(filter_in): 9 | return filter_in 10 | 11 | def norm_std(filter_in): 12 | """ Normalization of conv2d filters for visualization 13 | https://github.com/jacobgil/keras-filter-visualization/blob/master/utils.py 14 | 15 | Args: 16 | filter_in: [size_x, size_y, n_channel] 17 | 18 | """ 19 | x = filter_in 20 | x -= x.mean() 21 | x /= (x.std() + 1e-5) 22 | # make most of the value between [-0.5, 0.5] 23 | x *= 0.1 24 | # move to [0, 1] 25 | x += 0.5 26 | x *= 255 27 | x = np.clip(x, 0, 255).astype('uint8') 28 | return x 29 | 30 | def norm_range(filter_in): 31 | f_min = np.amin(filter_in) 32 | f_max = np.amax(filter_in) 33 | 34 | return (filter_in - f_min) * 1.0 / (f_max + 1e-5) * 255.0 35 | -------------------------------------------------------------------------------- /lib/utils/viz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: viz.py 4 | # Author: Qian Ge 5 | 6 | import numpy as np 7 | import scipy.misc 8 | 9 | import lib.utils.normalize as normlize 10 | 11 | 12 | def image_weight_mask(image, mask): 13 | """ 14 | Args: 15 | image: image with size [HEIGHT, WIDTH, CHANNEL] 16 | mask: image with size [HEIGHT, WIDTH, 1] or [HEIGHT, WIDTH] 17 | """ 18 | image = np.array(np.squeeze(image)) 19 | mask = np.array(np.squeeze(mask)) 20 | assert len(mask.shape) == 2 21 | assert len(image.shape) < 4 22 | mask.astype('float32') 23 | mask = np.reshape(mask, (mask.shape[0], mask.shape[1])) 24 | mask = mask / np.amax(mask) 25 | 26 | if len(image.shape) == 2: 27 | return np.multiply(image, mask) 28 | else: 29 | for c in range(0, image.shape[2]): 30 | image[:, :, c] = np.multiply(image[:, :, c], mask) 31 | return image 32 | 33 | # def save_merge_images(images, merge_grid, save_path, color=False, tanh=False): 34 | # """Save multiple images with same size into one larger image. 35 | # The best size number is 36 | # int(max(sqrt(image.shape[0]),sqrt(image.shape[1]))) + 1 37 | # Args: 38 | # images (np.ndarray): A batch of image array to be merged with size 39 | # [BATCH_SIZE, HEIGHT, WIDTH, CHANNEL]. 40 | # merge_grid (list): List of length 2. The grid size for merge images. 41 | # save_path (str): Path for saving the merged image. 42 | # color (bool): Whether convert intensity image to color image. 43 | # tanh (bool): If True, will normalize the image in range [-1, 1] 44 | # to [0, 1] (for GAN models). 45 | # Example: 46 | # The batch_size is 64, then the size is recommended [8, 8]. 47 | # The batch_size is 32, then the size is recommended [6, 6]. 48 | # """ 49 | 50 | # # normalization of tanh output 51 | # img = images 52 | 53 | # if tanh: 54 | # img = (img + 1.0) / 2.0 55 | 56 | # if color: 57 | # # TODO 58 | # img_list = [] 59 | # for im in np.squeeze(img): 60 | # im = intensity_to_rgb(np.squeeze(im), normalize=True) 61 | # img_list.append(im) 62 | # img = np.array(img_list) 63 | # # img = np.expand_dims(img, 0) 64 | 65 | # if len(img.shape) == 2 or (len(img.shape) == 3 and img.shape[2] <= 4): 66 | # img = np.expand_dims(img, 0) 67 | # # img = images 68 | # h, w = img.shape[1], img.shape[2] 69 | # merge_img = np.zeros((h * merge_grid[0], w * merge_grid[1], 3)) 70 | # if len(img.shape) < 4: 71 | # img = np.expand_dims(img, -1) 72 | 73 | # for idx, image in enumerate(img): 74 | # i = idx % merge_grid[1] 75 | # j = idx // merge_grid[1] 76 | # merge_img[j*h:j*h+h, i*w:i*w+w, :] = image 77 | 78 | # scipy.misc.imsave(save_path, merge_img) 79 | 80 | def viz_filters(filters, 81 | grid_size, 82 | save_path, 83 | gap=0, 84 | gap_color=0, 85 | nf=normlize.indentity, 86 | shuffle=True): 87 | """ Visualization conv2d filters 88 | 89 | Args: 90 | filters: [size_x, size_y, n_channel, n_features] 91 | or [size_x, size_y, n_features] 92 | 93 | """ 94 | filters = np.array(filters) 95 | if len(filters.shape) == 4: 96 | n_channel = filters.shape[2] 97 | elif len(filters.shape) == 3: 98 | n_channel = 1 99 | filters = np.expand_dims(filters, axis=2) 100 | # assert len(filters.shape) == 4 101 | assert len(grid_size) == 2 102 | 103 | h = filters.shape[0] 104 | w = filters.shape[1] 105 | 106 | merge_im = np.zeros((h * grid_size[0] + (grid_size[0] + 1) * gap, 107 | w * grid_size[1] + (grid_size[1] + 1) * gap, 108 | n_channel)) + gap_color 109 | 110 | n_viz_filter = min(filters.shape[-1], grid_size[0] * grid_size[1]) 111 | if shuffle == True: 112 | pick_id = np.random.permutation(filters.shape[-1]) 113 | else: 114 | pick_id = range(0, filters.shape[-1]) 115 | for idx in range(0, n_viz_filter): 116 | i = idx % grid_size[1] 117 | j = idx // grid_size[1] 118 | cur_filter = filters[:, :, :, pick_id[idx]] 119 | merge_im[j * (h + gap) + gap: j * (h + gap) + h + gap, 120 | i * (w + gap) + gap: i * (w + gap) + w + gap, :]\ 121 | = nf(cur_filter) 122 | scipy.misc.imsave(save_path, np.squeeze(merge_im)) 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | pillow 3 | numpy 4 | matplotlib -------------------------------------------------------------------------------- /test/setup_test_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: setup_test_env.py 4 | # Author: Qian Ge 5 | 6 | import sys 7 | sys.path.append('lib/') 8 | 9 | IMPATH = 'data/' 10 | CLASS_IMPATH = 'data/class_test/' 11 | SAVE_DIR = 'data/' 12 | -------------------------------------------------------------------------------- /test/test_cam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: test_cam.py 4 | # Author: Qian Ge 5 | 6 | # import argparse 7 | from collections import namedtuple 8 | import tensorflow as tf 9 | 10 | from tensorcv.dataflow.image import ImageLabelFromFolder 11 | from tensorcv.callbacks import * 12 | from tensorcv.train.config import TrainConfig 13 | from tensorcv.train.simple import SimpleFeedTrainer 14 | from tensorcv.predicts.config import PridectConfig 15 | from tensorcv.predicts.simple import SimpleFeedPredictor 16 | from tensorcv.predicts import * 17 | 18 | from setup_test_env import * 19 | from models.cam import VGGCAM 20 | 21 | configpath = namedtuple('CONFIG_PATH', ['summary_dir']) 22 | config_path = configpath(summary_dir=SAVE_DIR) 23 | 24 | NUM_CHANNEL = 3 25 | 26 | 27 | def get_config(FLAGS): 28 | # data for training 29 | dataset_train = ImageLabelFromFolder(FLAGS.type, 30 | data_dir=CLASS_IMPATH, 31 | num_class=FLAGS.nclass, 32 | resize=224, 33 | num_channel=NUM_CHANNEL) 34 | 35 | # Print image class name and label 36 | # print(dataset_train.label_dict) 37 | 38 | training_callbacks = [ 39 | # TrainSummary(key='train', periodic=1), 40 | CheckScalar(['accuracy/result', 'loss/result'], periodic=1)] 41 | 42 | inspect_class = None 43 | 44 | return TrainConfig( 45 | dataflow=dataset_train, 46 | model=VGGCAM(num_class=FLAGS.nclass, 47 | inspect_class=inspect_class, 48 | learning_rate=0.001, 49 | is_load=False), 50 | monitors=TFSummaryWriter(), 51 | callbacks=training_callbacks, 52 | batch_size=FLAGS.bsize, 53 | max_epoch=1, 54 | # summary_periodic=1, 55 | default_dirs=config_path) 56 | 57 | 58 | # def get_predict_config(FLAGS): 59 | # dataset_test = ImageFromFile(FLAGS.type, 60 | # data_dir=config_path.test_data_dir, 61 | # shuffle=False, 62 | # resize=224, 63 | # num_channel=NUM_CHANNEL) 64 | # # dataset_test = ImageLabelFromFolder('.jpg', 65 | # # data_dir = CLASS_IMPATH, 66 | # # num_class = FLAGS.nclass, 67 | # # resize = 224, 68 | # # num_channel = NUM_CHANNEL) 69 | # prediction_list = [ 70 | # # PredictionScalar(['pre_label'], ['label']), 71 | # # PredictionMeanScalar('accuracy/result', 'test_accuracy'), 72 | # PredictionMat('classmap/result', ['test']), 73 | # PredictionOverlay(['classmap/result', 'image'], ['map', 'image'], 74 | # color=True, merge_im=True), 75 | # PredictionImage(['image'], ['image'], color=True, merge_im=True)] 76 | 77 | # return PridectConfig( 78 | # dataflow=dataset_test, 79 | # model=VGGCAM(num_class=FLAGS.nclass, inspect_class=FLAGS.label, 80 | # is_load=True, pre_train_path=config_path.vgg_dir), 81 | # model_name=FLAGS.model, 82 | # predictions=prediction_list, 83 | # batch_size=FLAGS.bsize, 84 | # default_dirs=config_path) 85 | 86 | 87 | # def get_args(): 88 | # parser = argparse.ArgumentParser() 89 | # parser.add_argument('--bsize', default=1, type=int) 90 | # parser.add_argument('--label', default=-1, type=int, 91 | # help='Label of inspect class.') 92 | # parser.add_argument('--nclass', default=1, type=int, 93 | # help='number of image class') 94 | 95 | # parser.add_argument('--type', default='.jpg', type=str, 96 | # help='image type for training and testing') 97 | 98 | # parser.add_argument('--model', type=str, 99 | # help='file name of the trained model') 100 | 101 | # return parser.parse_args() 102 | 103 | 104 | def test_cam(): 105 | inargs = namedtuple('IN_ARGS', ['bsize', 'label', 'nclass', 'type']) 106 | FLAGS = inargs(bsize=1, label=-1, nclass=1, type='.jpg') 107 | 108 | # FLAGS = get_args() 109 | config = get_config(FLAGS) 110 | SimpleFeedTrainer(config).train() 111 | tf.reset_default_graph() 112 | # 113 | # if FLAGS.train: 114 | # config = get_config(FLAGS) 115 | # SimpleFeedTrainer(config).train() 116 | # if FLAGS.predict: 117 | # config = get_predict_config(FLAGS) 118 | # SimpleFeedPredictor(config).run_predict() 119 | 120 | # 0.6861924529075623 121 | -------------------------------------------------------------------------------- /test/test_gradcam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: test_gradcam.py 4 | # Author: Qian Ge 5 | 6 | from itertools import count 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | from tensorcv.dataflow.image import ImageFromFile 11 | from tensorcv.utils.viz import image_overlay 12 | 13 | from setup_test_env import * 14 | from nets.vgg import VGG19_FCN 15 | from models.guided_backpro import GuideBackPro 16 | from models.grad_cam import ClassifyGradCAM 17 | from utils.viz import image_weight_mask 18 | 19 | 20 | def test_gradcam(): 21 | 22 | # merge several output images in one large image 23 | merge_im = 1 24 | grid_size = np.ceil(merge_im**0.5).astype(int) 25 | 26 | # class label for Grad-CAM generation 27 | # 355 llama 543 dumbbell 605 iPod 515 hat 99 groose 283 tiger cat 28 | # 282 tabby cat 233 border collie 242 boxer 29 | # class_id = [355, 543, 605, 515] 30 | class_id = [283, 242] 31 | 32 | # initialize Grad-CAM 33 | # using VGG19 34 | gcam = ClassifyGradCAM( 35 | vis_model=VGG19_FCN(is_load=False, is_rescale=True)) 36 | gbackprob = GuideBackPro( 37 | vis_model=VGG19_FCN(is_load=False, is_rescale=True)) 38 | 39 | # placeholder for input image 40 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 41 | 42 | # create VGG19 model 43 | gcam.create_model(image) 44 | gcam.setup_graph() 45 | 46 | # generate class map and prediction label ops 47 | map_op = gcam.get_visualization(class_id=class_id) 48 | label_op = gcam.pre_label 49 | 50 | back_pro_op = gbackprob.get_visualization(image) 51 | 52 | # initialize input dataflow 53 | # change '.png' to other image types if other types of images are used 54 | input_im = ImageFromFile('.png', data_dir=IMPATH, 55 | num_channel=3, shuffle=False) 56 | input_im.set_batch_size(1) 57 | 58 | with tf.Session() as sess: 59 | 60 | sess.run(tf.global_variables_initializer()) 61 | 62 | cnt = 0 63 | merge_cnt = 0 64 | o_im_list = [] 65 | im = input_im.next_batch()[0] 66 | gcam_map, b_map, label, o_im =\ 67 | sess.run([map_op, back_pro_op, label_op, gcam.input_im], 68 | feed_dict={image: im}) 69 | print(label) 70 | o_im_list.extend(o_im) 71 | for idx, cid, cmap in zip(count(), gcam_map[1], gcam_map[0]): 72 | overlay_im = image_overlay(cmap, o_im) 73 | weight_im = image_weight_mask(b_map[0], cmap) 74 | try: 75 | weight_im_list[idx].append(weight_im) 76 | overlay_im_list[idx].append(overlay_im) 77 | except NameError: 78 | gcam_class_id = gcam_map[1] 79 | weight_im_list = [[] for i in range(len(gcam_class_id))] 80 | overlay_im_list = [[] for i in range(len(gcam_class_id))] 81 | weight_im_list[idx].append(weight_im) 82 | overlay_im_list[idx].append(overlay_im) 83 | tf.reset_default_graph() 84 | -------------------------------------------------------------------------------- /test/test_guided_backprop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: test_guided_backprop.py 4 | # Author: Qian Ge 5 | 6 | import tensorflow as tf 7 | 8 | from tensorcv.dataflow.image import ImageFromFile 9 | 10 | from setup_test_env import * 11 | from nets.vgg import VGG19_FCN 12 | from models.guided_backpro import GuideBackPro 13 | 14 | 15 | def test_guided_backprop(): 16 | # placeholder for input image 17 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 18 | # initialize input dataflow 19 | # change '.png' to other image types if other types of images are used 20 | input_im = ImageFromFile('.png', data_dir=IMPATH, 21 | num_channel=3, shuffle=False) 22 | # batch size has to be one 23 | input_im.set_batch_size(1) 24 | 25 | # initialize guided back propagation class 26 | # use VGG19 as an example 27 | # images will be rescaled to smallest side = 224 is is_rescale=True 28 | model = GuideBackPro(vis_model=VGG19_FCN(is_load=False, 29 | is_rescale=True)) 30 | 31 | # get op to compute guided back propagation map 32 | # final output respect to input image 33 | back_pro_op = model.get_visualization(image) 34 | 35 | with tf.Session() as sess: 36 | sess.run(tf.global_variables_initializer()) 37 | 38 | im = input_im.next_batch()[0] 39 | guided_backpro, label, o_im =\ 40 | sess.run([back_pro_op, model.pre_label, 41 | model.input_im], 42 | feed_dict={image: im}) 43 | print(label) 44 | tf.reset_default_graph() 45 | 46 | --------------------------------------------------------------------------------