├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── data
├── cat.jpg
├── class_test
│ └── 003.backpack
│ │ └── 003_0020.jpg
├── im_0.png
└── test_0.png
├── doc
├── cam
│ ├── README.md
│ └── figs
│ │ ├── celtech_change.png
│ │ ├── celtech_diff.png
│ │ └── celtech_result.png
├── deconv
│ ├── README.md
│ └── figs
│ │ ├── dog
│ │ ├── conv1_2_feat.png
│ │ ├── conv1_2_im.png
│ │ ├── conv2_2_feat.png
│ │ ├── conv2_2_im.png
│ │ ├── conv3_4_feat.png
│ │ ├── conv3_4_im.png
│ │ ├── conv4_4_feat.png
│ │ ├── conv4_4_feat_171.png
│ │ ├── conv4_4_feat_349.png
│ │ ├── conv4_4_im.png
│ │ ├── conv4_4_im_171.png
│ │ ├── conv4_4_im_349.png
│ │ ├── conv5_2_feat.png
│ │ └── conv5_2_im.png
│ │ └── people
│ │ ├── conv1_2_feat.png
│ │ ├── conv1_2_im.png
│ │ ├── conv2_2_feat.png
│ │ ├── conv2_2_feat_59.png
│ │ ├── conv2_2_im.png
│ │ ├── conv2_2_im_59.png
│ │ ├── conv3_4_feat.png
│ │ ├── conv3_4_feat_166.png
│ │ ├── conv3_4_im.png
│ │ ├── conv3_4_im_166.png
│ │ ├── conv4_4_feat.png
│ │ ├── conv4_4_feat_171.png
│ │ ├── conv4_4_im.png
│ │ ├── conv4_4_im_171.png
│ │ ├── conv5_2_feat.png
│ │ └── conv5_2_im.png
├── firstfilter
│ ├── README.md
│ └── figs
│ │ ├── GoogLeNet.png
│ │ ├── GoogLeNet_filter.png
│ │ ├── GoogLeNet_inception3a.png
│ │ ├── GoogLeNet_inception3b.png
│ │ ├── GoogLeNet_inception4a.png
│ │ ├── GoogLeNet_inception4b.png
│ │ ├── GoogLeNet_inception4c.png
│ │ ├── GoogLeNet_inception4d.png
│ │ ├── GoogLeNet_inception4e.png
│ │ ├── GoogLeNet_inception5a.png
│ │ ├── GoogLeNet_inception5b.png
│ │ └── bk
│ │ ├── GoogLeNet_inception3a.png
│ │ ├── GoogLeNet_inception3b.png
│ │ ├── GoogLeNet_inception4a.png
│ │ ├── GoogLeNet_inception4b.png
│ │ ├── GoogLeNet_inception4c.png
│ │ ├── GoogLeNet_inception4d.png
│ │ ├── GoogLeNet_inception4e.png
│ │ ├── GoogLeNet_inception5a.png
│ │ └── GoogLeNet_inception5b.png
├── grad_cam
│ ├── README.md
│ └── figs
│ │ ├── comparecam.png
│ │ ├── ex1.png
│ │ └── ex2.png
└── guided_backpropagation
│ ├── README.md
│ └── figs
│ ├── gbp.png
│ ├── gbp1.png
│ ├── gbp2.png
│ ├── gbp3.png
│ └── gbp4.png
├── example
├── cam.py
├── config_cam.py
├── config_path.py
├── deconv.py
├── gap.py
├── gradcam.py
├── guided_backpropagation.py
├── invert.py
├── run.sh
├── setup_env.py
└── vizfilter.py
├── lib
├── __init__.py
├── dataflow
│ ├── __init__.py
│ ├── cifar.py
│ └── image.py
├── models
│ ├── __init__.py
│ ├── cam.py
│ ├── gap.py
│ ├── grad_cam.py
│ ├── guided_backpro.py
│ └── invert.py
├── nets
│ ├── __init__.py
│ ├── googlenet.py
│ ├── layers.py
│ └── vgg.py
└── utils
│ ├── __init__.py
│ ├── image.py
│ ├── normalize.py
│ └── viz.py
├── requirements.txt
└── test
├── setup_test_env.py
├── test_cam.py
├── test_gradcam.py
└── test_guided_backprop.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | # lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.4"
4 | install:
5 | - pip install -r requirements.txt
6 | - pip install flake8
7 | - pip install coveralls
8 | - pip install tensorflow
9 | - pip install -U git+https://github.com/conan7882/DeepVision-tensorflow.git
10 | branches:
11 | only:
12 | - master
13 | # env:
14 | # - TESTCASE=test/test_cam.py
15 | # - TESTCASE=test/test_gradcam.py
16 | # - TESTCASE=test/test_guided_backprop.py
17 | script:
18 | - nosetests test/ --with-coverage --cover-package=. --debug=show
19 | # - coverage combine --append
20 | # - flake8 lib/dataflow/ lib/model/ --ignore=F405,F403,F401,E402,E501
21 | # - COVERAGE_FILE=.coverage_cam coverage run --source=. --omit=*vgg.py test/test_cam.py
22 | # - COVERAGE_FILE=.coverage_gradcam coverage run --source=. --omit=*vgg.py test/test_gradcam.py
23 | # - COVERAGE_FILE=.coverage_guidedback coverage run --source=. --omit=*vgg.py test/test_guided_backprop.py
24 | # # - coverage run --source=. test/test.py
25 | # - coverage combine --append
26 | after_success:
27 | - coveralls
28 | # - codecov
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Qian Ge
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Visualization of Deep Covolutional Neural Networks
2 |
3 |
7 |
8 | - This repository contains implementations of visualizatin of CNN in recent papers.
9 | - The source code in the repository can be used to demostrate the algorithms as well as test on your own data.
10 |
11 | ## Requirements
12 | - Python 3.3+
13 | - [Tensorflow 1.3](https://www.tensorflow.org/)
14 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow)
15 |
16 |
17 | ## Algorithms
18 |
19 | - [Visulization of filters and feature maps of GoogLeNet](https://github.com/conan7882/CNN-Visualization/tree/master/doc/firstfilter#visualization-of-filters-and-feature-maps-of-googlenet)
20 | - [Deconvolutional Networks](https://github.com/conan7882/CNN-Visualization/blob/master/doc/deconv/README.md#cnn-feature-visuallization-via-deconvnet-transposed-convolutional-layers) (ECCV'14)
21 | - [Guided back propagation](https://github.com/conan7882/CNN-Visualization/tree/master/doc/guided_backpropagation#guided-backpropagation) (2014)
22 | - [Class Activation Mapping (CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#class-activation-mapping-cam) (CVPR'16)
23 | - [Gradient-weighted Class Activation Mapping (Grad-CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/grad_cam#gradient-weighted-class-activation-mapping-grad-cam) (ICCV'17)
24 |
25 | ## [Visulization of filters and feature maps of GoogLeNet](https://github.com/conan7882/CNN-Visualization/tree/master/doc/firstfilter#visualization-of-filters-and-feature-maps-of-googlenet)
26 | - The most straightforward approach to visualize a CNN is to show the feature maps (activations) and filters.
27 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/firstfilter#visualization-of-filters-and-feature-maps-of-googlenet)
28 |
29 |
30 |
31 |
32 |
33 | ## [Deconvnet](https://github.com/conan7882/CNN-Visualization/blob/master/doc/deconv/README.md#cnn-feature-visuallization-via-deconvnet-transposed-convolutional-layers)
34 | - Pick a specific activation on a feature map and set other activation to zeros, then reconstruct an image by mapping back this new feature map to input pixel space.
35 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/blob/master/doc/deconv/README.md#cnn-feature-visuallization-via-deconvnet-transposed-convolutional-layers). Some results:
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | ## [Guided back propagation](https://github.com/conan7882/CNN-Visualization/tree/master/doc/guided_backpropagation#guided-backpropagation)
49 |
50 |
51 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/guided_backpropagation#guided-backpropagation). Some results:
52 |
53 | 
54 |
55 | ## [Class Activation Mapping (CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#class-activation-mapping-cam)
56 | - The class activation map highlights the most informative image regions relevant to the predicted class. This map can be obtained by adding a global average pooling layer at the end of convolutional layers.
57 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#class-activation-mapping-cam). Some results:
58 |
59 | 
60 |
61 | ## [Gradient-weighted Class Activation Mapping (Grad-CAM)](https://github.com/conan7882/CNN-Visualization/tree/master/doc/grad_cam#gradient-weighted-class-activation-mapping-grad-cam)
62 | - Grad-CAM generates similar class heatmap as CAM, but it does not require to re-train the model for visualizatin.
63 | - Details of the implementation and more results can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/grad_cam#gradient-weighted-class-activation-mapping-grad-cam). Some results:
64 |
65 | 
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/data/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/cat.jpg
--------------------------------------------------------------------------------
/data/class_test/003.backpack/003_0020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/class_test/003.backpack/003_0020.jpg
--------------------------------------------------------------------------------
/data/im_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/im_0.png
--------------------------------------------------------------------------------
/data/test_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/data/test_0.png
--------------------------------------------------------------------------------
/doc/cam/README.md:
--------------------------------------------------------------------------------
1 | # Class Activation Mapping (CAM)
2 |
3 | - TensorFlow implementation of [Learning Deep Features for Discriminative Localization](https://arxiv.org/abs/1512.04150) (CVPR'16).
4 | - Caffe implementation by the authors is [here](https://github.com/metalbubble/CAM).
5 | - The class activation map highlights the most informative image regions relevant to the predicted class. This map can be obtained by adding a global average pooling layer at the end of convolutional layers.
6 | - This implementation has been tested on [Caltech-256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) dataset, and can be tested on your own dataset as well.
7 |
8 |
9 |
10 | ## Requirements
11 | - Python 3.3+
12 | - [Tensorflow 1.3](https://www.tensorflow.org/)
13 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow)
14 |
15 | ## Implementation Details
16 |
17 |
18 |
19 | - CAM of Caltech-256 dataset is obtained by finetuning [VGG19](https://arxiv.org/abs/1409.1556).
20 | - CAM models are defined in [`CNN-Visualization/lib/models/cam.py`](../../lib/models/cam.py).
21 | - Exampe usage of CAM is in [`CNN-Visualization/example/cam.py`](../../example/cam.py) (used for Caltech-256 or other nature image dataset.). Directories are setup in file [`CNN-Visualization/example/config_cam.py`](../../example/config_cam.py).
22 |
23 | ## Results
24 |
25 |
26 | ### Caltech-256
27 |
28 | Class activation map for class llama at different steps during training
29 | 
30 |
31 | Class activation map for class duck and people after 25 epochs
32 | 
33 |
34 | Class activation map of same image for different classes
35 | 
36 |
37 | ## Observations
38 |
39 | ## Preparation
40 |
41 | 1. Setup directories in file `config_cam.py`.
42 |
43 | - Training
44 |
45 | `config.vgg_dir` - directory of pre-trained VGG19 parameters
46 |
47 | `config.data_dir` - directory of training image data
48 |
49 | `config.infer_data_dir` - directory of the image use for inference class activation map during training (put only one image)
50 |
51 | `config.checkpoint_dir` - directory of saving trained model (saved every 100 training steps)
52 |
53 | `config.summary_dir` - directory of saving summaries (saved every 10 training steps)
54 |
55 | `config.infer_dir` - directory of saving inference result (saved every 100 training steps)
56 |
57 | - Testing
58 |
59 | `config.model_dir` - directory of trained model parameters
60 |
61 | `config.test_data_dir` - directory of testing images
62 |
63 | `config.result_dir` - directory of saving testing images
64 |
65 | 2. Download dataset and pre-trained VGG parameters
66 |
67 | - Download [Caltech-256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) dataset and put it in `config.data_dir`.
68 | - Download pre-trained VGG19 model [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19) and put it in `config.vgg_dir`.
69 |
70 | ## Train and test on Caltech-256:
71 |
72 | Go to `CNN-Visualization/example/`, then
73 |
74 | Finetuning pre-trained VGG19 for Caltech-256:
75 |
76 | ```
77 | python cam.py --train --bsize BATCH_SIZE --label INFER_CLASS_LABEL
78 | ```
79 |
80 |
81 | Generate the class activation map using trained parameters
82 |
83 | ```
84 | python cam.py --prediction --bsize BATCH_SIZE --model SAVED_MODEL_NAME --label INFER_CLASS_LABEL
85 | ```
86 |
87 | **INFER_CLASS_LABEL** is the label of the class used to generate the inference class activation map.
88 |
89 | - The scaled class activation map will be saved in `config.result_dir` along with a .mat file containing raw data of the map.
90 | - If batch size is greater than 1, the result images of one mini batch will be save as one image.
91 | - **Batch size has to be one during testing if the testing images have different size.** Or you can resize the images to 224 x 224 by uncomment `resize = 224,` (line 83). Please refer to the code comments for more detailed parameters setting.
92 |
93 |
94 | ## Train and test on your own dataset:
95 |
96 | Go to `CNN-Visualization/example/`, then
97 |
98 | Dataset requirement:
99 |
100 | 1. Put training image in `config.data_dir`. Image of different classes are in different folders. Uncomment print(dataset_train.label_dict) to check the image class label and the corresponding label index for training and testing.
101 | 2. The images have to be color images with 3 channels.
102 | 3. May not work well on low resolution images, since all the images will be rescaled to 224 x 224 for training.
103 |
104 |
105 | Finetuning pre-trained VGG19 for your own dataset:
106 |
107 | - The number of image classes and image file type needs to be specified:
108 |
109 | ```
110 | python cam.py --train --bsize BATCH_SIZE --label INFER_CLASS_LABEL --nclass NUM_IMAGE_CLASS\
111 | --type IMAGE_FILE_EXTENSION(start with '.')
112 | ```
113 |
114 | Generate the class activation map using trained parameters
115 |
116 | ```
117 | python cam.py --prediction --bsize BATCH_SIZE --model SAVED_MODEL_NAME --label INFER_CLASS_LABEL\
118 | --type IMAGE_FILE_EXTENSION(start with '.')
119 | ```
120 |
121 |
122 | ## Author
123 | Qian Ge
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/doc/cam/figs/celtech_change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/cam/figs/celtech_change.png
--------------------------------------------------------------------------------
/doc/cam/figs/celtech_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/cam/figs/celtech_diff.png
--------------------------------------------------------------------------------
/doc/cam/figs/celtech_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/cam/figs/celtech_result.png
--------------------------------------------------------------------------------
/doc/deconv/README.md:
--------------------------------------------------------------------------------
1 | # CNN Feature Visuallization via Deconvnet
2 |
3 | - TensorFlow implementation of [
4 | Visualizing and Understanding Convolutional Networks](https://arxiv.org/abs/1311.2901) (ECCV'14).
5 | - The features are visualized by mapping activations back to the input pixel space through several unpooling layers and deconvolutional layers.
6 | - This implementation takes [VGG19](https://arxiv.org/abs/1409.1556) as example.
7 |
8 | ## Requirements
9 | - Python 3.3+
10 | - [Tensorflow 1.3](https://www.tensorflow.org/)
11 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow)
12 |
13 | ## Implementation Details
14 |
15 | - Deconvnet for VGG19 is defined in [`CNN-Visualization/lib/nets/vgg.py`](https://github.com/conan7882/CNN-Visualization/blob/master/lib/nets/vgg.py#L325).
16 | - Code of example usage of deconvnet visualization is in [`CNN-Visualization/example/deconv.py`](../../example/deconv.py).
17 | - Images are rescale to proper size to git rid of size inconsistence issues when unpooling.
18 | - Feature maps for each level of CNN are first computed for a single image. Then pick one activation at a certain level and set all other activations at this level to be zero. Finally this new feature map of this level is mapped back to the input pixel space.
19 | - Weights of filters of convnet and deconvnet are shared. The filters of the deconvnet is the transpose version of convnet filters.
20 | - Switches are stored during forward pass. Reference of implementation of unpooling layer is [here](https://github.com/tensorflow/tensorflow/issues/2169).
21 |
22 |
23 | ## Results
24 | ### Caltech-256
25 | left: reconstruction, right: original image. Both are cropped based on the receptive field of the activations.
26 | #### Top 9 activations of all the feature map for each layer
27 | #### Across 'people' class images
28 |
29 | Layer 1_2
30 |
31 |
32 |
33 |
34 | Layer 2_2
35 |
36 |
37 |
38 |
39 | Layer 3_4
40 |
41 |
42 |
43 |
44 | Layer 4_4
45 |
46 |
47 |
48 |
49 | Layer 5_2
50 |
51 |
52 |
53 |
54 |
55 | #### Across 'dog' class images
56 |
57 | Layer 1_2
58 |
59 |
60 |
61 |
62 | Layer 2_2
63 |
64 |
65 |
66 |
67 | Layer 3_4
68 |
69 |
70 |
71 |
72 | Layer 4_4 (The network is sensitive to both dog and human faces at this level.)
73 |
74 |
75 |
76 |
77 | Layer 5_2 (The network is sensitive to both dog and human faces at this level.)
78 |
79 |
80 |
81 |
82 |
83 |
84 | #### Top 9 activations of a fixed feature map for each layer
85 | Layer 2_2, Feature map 59, People (This feature map has high activations for curves.)
86 |
87 |
88 |
89 |
90 | Layer 3_4, Feature map 166, People (This feature map is sensitive to 'O' shape patterns.)
91 |
92 |
93 |
94 |
95 |
96 | Layer 4_4, Feature map 349, Dog (It seems like this feature map is sensitive on faces (eyes with nose).)
97 |
98 |
99 |
100 |
101 | Layer 4_4, Feature map 171, Dog (This feature map is sensitive on the dog nose.)
102 |
103 |
104 |
105 |
106 |
107 | Layer 4_4, Feature map 171, People (This is the same feature map as the above figure but on 'people' images. Besides nose on dog faces, this feature map is also sensitive to some parts of human faces, such as eyes and month.)
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | ## Preparation
117 |
118 | 1. Setup directories in file `CNN-Visualization/example/config_path.py`.
119 |
120 | - `im_path` - directory of testing image data
121 | - `vgg_path` - directory of pre-trained VGG19 parameters
122 | - `save_path` - directory of saving result images
123 |
124 | 2. Download the pre-trained VGG parameters
125 |
126 | - Download pre-trained VGG19 model [here](https://www.dropbox.com/sh/dad57t3hl60jeb0/AADlcUshCnmN2bAZdgdkmxDna?dl=0). This is original downloaded from [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19). Put it in `vgg_path`.
127 |
128 |
129 | ## Run Script:
130 |
131 | Go to `CNN-Visualization/example/`
132 |
133 | To get the reconstruction image of top 9 activations of a specific layer (layer 4_4) across the test set (JPEG files):
134 |
135 | ```
136 | python deconv.py --feat conv4_4 --imtype .jpg
137 | ```
138 |
139 | - All the test images will be rescaled to 224 x 224 before feed into VGG19.
140 | - `--feat` is used to specify the name of feature layer. It can be `conv1_1`, `conv2_1` ... The fill list can be found [here](https://github.com/conan7882/CNN-Visualization/blob/master/lib/nets/vgg.py#L374).
141 | - `--imtype` is used to specify the test image type. It can be .jpg or .png or other types of images.
142 | - The cropped and merged reconstruction and original images are saved `save_path` as
143 | **LAYER_NAME_feat.png**
144 | and **LAYER_NAME_im.png**
145 |
146 | To get the reconstruction image of top 9 activations of a specific feature map (350) at a specific layer (layer 4_4) across the test set (JPEG files):
147 |
148 | ```
149 | python deconv.py --feat conv4_4 --imtype .jpg --id 350
150 | ```
151 |
152 | - `--id` is used to specify feature map what to check.
153 |
154 | ## Author
155 | Qian Ge
156 |
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv1_2_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv1_2_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv1_2_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv1_2_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv2_2_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv2_2_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv2_2_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv2_2_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv3_4_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv3_4_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv3_4_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv3_4_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv4_4_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv4_4_feat_171.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_feat_171.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv4_4_feat_349.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_feat_349.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv4_4_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv4_4_im_171.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_im_171.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv4_4_im_349.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv4_4_im_349.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv5_2_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv5_2_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/dog/conv5_2_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/dog/conv5_2_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv1_2_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv1_2_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv1_2_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv1_2_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv2_2_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv2_2_feat_59.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_feat_59.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv2_2_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv2_2_im_59.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv2_2_im_59.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv3_4_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv3_4_feat_166.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_feat_166.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv3_4_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv3_4_im_166.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv3_4_im_166.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv4_4_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv4_4_feat_171.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_feat_171.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv4_4_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_im.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv4_4_im_171.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv4_4_im_171.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv5_2_feat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv5_2_feat.png
--------------------------------------------------------------------------------
/doc/deconv/figs/people/conv5_2_im.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/deconv/figs/people/conv5_2_im.png
--------------------------------------------------------------------------------
/doc/firstfilter/README.md:
--------------------------------------------------------------------------------
1 | # Visualization of filters and feature maps of GoogLeNet
2 |
3 | - The most straightforward approach to visualize a CNN is to show the feature maps (activations) and filters.
4 | - Here are some examples of visualization of the first layer filters (7 x 7 x 3 x 64) and feature maps of the pre-trained GoogLeNet.
5 |
6 | ## Requirements
7 | - Python 3.3+
8 | - [Tensorflow 1.3](https://www.tensorflow.org/)
9 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow)
10 |
11 | ## Results
12 |
13 |
14 | ### First layer fiters (7 x 7 x 3 x 64)
15 |
16 | We can see some high-frequency grayscale features as well as some low-frequency color features.
17 |
18 |
19 |
20 |
21 |
22 | ### Feature maps with ReLU (Randomly choose 100 feature maps at each layer for display)
23 |
24 | - Input image
25 |
26 |
27 |
28 |
29 | - Layer 3 (Inception3a and Inception3b)
30 |
31 |
32 |
33 |
34 |
35 | - Layer 4 (Inception4a - Inception3e)
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | - Layer 5 (Inception5a and Inception5b)
45 |
46 |
47 |
48 |
49 |
50 |
51 | ## Usage
52 | ### Download pre-trained model
53 | Download the pre-trained parameters [here](http://www.deeplearningmodel.net/).
54 | ### Config path
55 | All directories are setup in [`example/setup_env.py`](../../example/config_path.py).
56 |
57 | - `googlenet_path` is the path of the pre-trained model.
58 | - `im_path` is the directory of testing image.
59 | - `save_path` is the directory to save filter image.
60 |
61 | ## Run the code:
62 |
63 | Go to `CNN-Visualization/example/`, then
64 |
65 |
66 | ```
67 | python vizfilter.py --feature --filter --im IMAGE_FILENAME
68 | ```
69 |
70 | Image will be saved in `config.save_path`
71 |
72 |
73 |
74 | ## Author
75 | Qian Ge
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_filter.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception3a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception3a.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception3b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception3b.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception4a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4a.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception4b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4b.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception4c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4c.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4d.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception4e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception4e.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception5a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception5a.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/GoogLeNet_inception5b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/GoogLeNet_inception5b.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception3a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception3a.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception3b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception3b.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception4a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4a.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception4b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4b.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception4c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4c.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4d.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception4e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception4e.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception5a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception5a.png
--------------------------------------------------------------------------------
/doc/firstfilter/figs/bk/GoogLeNet_inception5b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/firstfilter/figs/bk/GoogLeNet_inception5b.png
--------------------------------------------------------------------------------
/doc/grad_cam/README.md:
--------------------------------------------------------------------------------
1 | # Gradient-weighted Class Activation Mapping (Grad-CAM)
2 |
3 | - TensorFlow implementation of [Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization](https://arxiv.org/abs/1610.02391) (ICCV'17).
4 | - Torch implementation by the authors is [here](https://github.com/ramprs/grad-cam).
5 | - Grad-CAM generates similar heatmap as [CAM](https://arxiv.org/abs/1512.04150), but it does not require re-train the model.
6 | - This implementation takes [VGG19](https://arxiv.org/abs/1409.1556) as example.
7 |
8 | ## Requirements
9 | - Python 3.3+
10 | - [Tensorflow 1.3](https://www.tensorflow.org/)
11 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow)
12 |
13 | ## Implementation Details
14 |
15 |
16 |
17 | - [VGG19](https://arxiv.org/abs/1409.1556) is used for visualization. The model is defined in [`CNN-Visualization/lib/nets/vgg.py`](../../lib/nets/vgg.py).
18 | - Grad-CAM model is defined in [`CNN-Visualization/lib/models/grad_cam.py`](../../lib/models/grad_cam.py).
19 | - Example usage of Grad-CAM is in [`CNN-Visualization/example/gradcam.py`](../../example/gradcam.py).
20 |
21 | ## Results
22 |
23 | Left to right: Original Image, Grad-CAM, Guided Grad-CAM, Grad-CAM, Guided Grad-CAM
24 | 
25 | 
26 |
27 | Compare with CAM using the same images (Caltech-256) (CAM result can be found [here](https://github.com/conan7882/CNN-Visualization/tree/master/doc/cam#caltech-256)):
28 | **ImageNet1000 does not have class 'top hat', so class 'cowboy hat' is used here which gets relatively bad performance to find the hat. Also note that, the pre-trained VGG19 is not trained on Caltech-256.**
29 |
30 | 
31 |
32 |
33 |
34 |
35 |
36 | ## Preparation
37 |
38 | 1. Setup directories in file `CNN-Visualization/example/gradcam.py`.
39 |
40 | - `IM_PATH` - directory of testing image data
41 | - `VGG_PATH` - directory of pre-trained VGG19 parameters
42 | - `SAVE_DIR` - directory of saving result images
43 |
44 | 2. Download the pre-trained VGG parameters
45 |
46 | - Download pre-trained VGG19 model [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19) and put it in `VGG_PATH`.
47 |
48 |
49 | 3. Testing images
50 |
51 | - Put testing images in `IM_PATH`.
52 | - Set class labels on line 56 in `CNN-Visualization/example/gradcam.py`. For example, the setting below will generate Grad-CAM and Guided Grad-CAM for class 55 (llama), 543 (dumbbell), 605 (iPod) and 515 (hat). More labels for ImageNet1000 can be found [here](https://github.com/conan7882/VGG-tensorflow/blob/master/imageNetLabel.txt).
53 |
54 | ```
55 | class_id = [355, 543, 605, 515]
56 | ```
57 |
58 | - Change image type to the corresponding type in the function below (line 84 in `CNN-Visualization/example/gradcam.py`) if testing images are not jpeg files.
59 |
60 | ```
61 | input_im = ImageFromFile('.jpg', data_dir=IM_PATH, num_channel=3, shuffle=False)
62 | ```
63 |
64 |
65 | ## Run Script:
66 |
67 | To get the Grad-CAM maps for all the image in `IM_PATH`, go to `CNN-Visualization/example/` and run:
68 |
69 | ```
70 | python gradcam.py
71 | ```
72 |
73 | - All the test images will be rescaled to smallest side = 224 before feed into VGG19.
74 | - Grad-CAM and Guided Grad-CAM will be saved in `SAVE_DIR` as **gradcam_IDX_class_CLASSLABEL.png** and **guided_gradcam_IDX_class_CLASSLABEL.png**
75 |
76 |
77 | ## Author
78 | Qian Ge
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/doc/grad_cam/figs/comparecam.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/grad_cam/figs/comparecam.png
--------------------------------------------------------------------------------
/doc/grad_cam/figs/ex1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/grad_cam/figs/ex1.png
--------------------------------------------------------------------------------
/doc/grad_cam/figs/ex2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/grad_cam/figs/ex2.png
--------------------------------------------------------------------------------
/doc/guided_backpropagation/README.md:
--------------------------------------------------------------------------------
1 | # Guided Backpropagation
2 |
3 | - TensorFlow implementation of [Striving for Simplicity: The All Convolutional Net](https://arxiv.org/abs/1412.6806) (2014).
4 | - Guided backpropagation generates clearer visulizations than [deconvnet](https://arxiv.org/abs/1311.2901) for higher layers.
5 |
6 |
7 | ## Requirements
8 | - Python 3.3+
9 | - [Tensorflow 1.3](https://www.tensorflow.org/)
10 | - [TensorCV](https://github.com/conan7882/DeepVision-tensorflow)
11 |
12 | ## Implementation Details
13 |
14 |
15 |
16 | - [VGG19](https://arxiv.org/abs/1409.1556) is used for visulization. The model is defined in [`CNN-Visualization/lib/nets/vgg.py`](../../lib/nets/vgg.py).
17 | - Guided backpropagation copmutation class is defined in [`CNN-Visualization/lib/models/guided_backpro.py`](../../lib/models/guided_backpro.py).
18 | - Example usage is in [`CNN-Visualization/example/guided_backpropagation.py`](../../example/guided_backpropagation.py).
19 |
20 |
21 | ## Results
22 |
28 |
29 | ## Observations
30 |
31 | ## Preparation
32 |
33 | 1. Setup directories in file `CNN-Visualization/example/guided_backpropagation.py`.
34 |
35 | - `IM_PATH` - directory of testing image data
36 | - `VGG_PATH` - directory of pre-trained VGG19 parameters
37 | - `SAVE_DIR` - directory of saving result images
38 |
39 | 2. Download the pre-trained VGG parameters
40 |
41 | - Download pre-trained VGG19 model [here](https://github.com/machrisaa/tensorflow-vgg#tensorflow-vgg16-and-vgg19) and put it in `VGG_PATH`.
42 |
43 |
44 | 3. Testing images
45 |
46 | - Put testing images in `IM_PATH` .
47 | - Change image type to the corresponding type if testing images are not jpeg files
48 |
49 | ```
50 | input_im = ImageFromFile('.jpg', data_dir=IM_PATH, num_channel=3, shuffle=False)
51 | ```
52 |
53 |
54 | ## Run Script:
55 |
56 | To get the guided backpropagation maps for all the image in `IM_PATH`, go to `CNN-Visualization/example/` and run:
57 |
58 | ```
59 | python guided_backpropagation.py
60 | ```
61 |
62 | - Results will be saved in `SAVE_DIR` as **map_IDX_class_PREDICT_LABEL.png**.
63 |
64 |
65 | ## Author
66 | Qian Ge
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/doc/guided_backpropagation/figs/gbp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp.png
--------------------------------------------------------------------------------
/doc/guided_backpropagation/figs/gbp1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp1.png
--------------------------------------------------------------------------------
/doc/guided_backpropagation/figs/gbp2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp2.png
--------------------------------------------------------------------------------
/doc/guided_backpropagation/figs/gbp3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp3.png
--------------------------------------------------------------------------------
/doc/guided_backpropagation/figs/gbp4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/doc/guided_backpropagation/figs/gbp4.png
--------------------------------------------------------------------------------
/example/cam.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: cam.py
4 | # Author: Qian Ge
5 |
6 | import argparse
7 |
8 | from tensorcv.dataflow.image import ImageLabelFromFolder, ImageFromFile
9 | from tensorcv.callbacks import *
10 | from tensorcv.train.config import TrainConfig
11 | from tensorcv.train.simple import SimpleFeedTrainer
12 | from tensorcv.predicts.config import PridectConfig
13 | from tensorcv.predicts.simple import SimpleFeedPredictor
14 | from tensorcv.predicts import *
15 |
16 | import setup_env
17 | import config_cam as config_path
18 | from models.cam import VGGCAM
19 |
20 | NUM_CHANNEL = 3
21 |
22 |
23 | def get_config(FLAGS):
24 | # data for training
25 | dataset_train = ImageLabelFromFolder(FLAGS.type,
26 | data_dir=config_path.data_dir,
27 | num_class=FLAGS.nclass,
28 | resize=224,
29 | num_channel=NUM_CHANNEL)
30 |
31 | # Print image class name and label
32 | # print(dataset_train.label_dict)
33 |
34 | # Since the aim of training is visulization of class map, all the images
35 | # are used for training. Using the training set as validation set is just
36 | # for checking whether the training works correctly.
37 | dataset_val = ImageLabelFromFolder(FLAGS.type,
38 | data_dir=config_path.data_dir,
39 | num_class=FLAGS.nclass,
40 | resize=224,
41 | num_channel=NUM_CHANNEL)
42 |
43 | # Check accuracy during training using training set
44 | inference_list_validation = InferScalars('accuracy/result',
45 | 'test_accuracy')
46 |
47 | training_callbacks = [
48 | ModelSaver(periodic=100),
49 | TrainSummary(key='train', periodic=50),
50 | FeedInferenceBatch(dataset_val, batch_count=10, periodic=100,
51 | inferencers=inference_list_validation),
52 | CheckScalar(['accuracy/result', 'loss/result'], periodic=10)]
53 |
54 | inspect_class = None
55 | if FLAGS.label > 0:
56 | inspect_class = FLAGS.label
57 | # Image use for inference the class acitivation map during training
58 | dataset_test = ImageFromFile(FLAGS.type,
59 | data_dir=config_path.infer_data_dir,
60 | shuffle=False,
61 | resize=224,
62 | num_channel=NUM_CHANNEL)
63 | # Check class acitivation map during training
64 | inference_list_test = [
65 | InferOverlay(['classmap/result', 'image'], ['map', 'image'],
66 | color=True),
67 | InferImages('classmap/result', 'map', color=True)]
68 | training_callbacks += FeedInference(dataset_test, periodic=50,
69 | infer_batch_size=1,
70 | inferencers=inference_list_test),
71 |
72 | return TrainConfig(
73 | dataflow=dataset_train,
74 | model=VGGCAM(num_class=FLAGS.nclass,
75 | inspect_class=inspect_class,
76 | learning_rate=0.001, is_load=True,
77 | pre_train_path=config_path.vgg_dir),
78 | monitors=TFSummaryWriter(),
79 | callbacks=training_callbacks,
80 | batch_size=FLAGS.bsize,
81 | max_epoch=25,
82 | summary_periodic=50,
83 | default_dirs=config_path)
84 |
85 |
86 | def get_predict_config(FLAGS):
87 | dataset_test = ImageFromFile(FLAGS.type,
88 | data_dir=config_path.test_data_dir,
89 | shuffle=False,
90 | resize=224,
91 | num_channel=NUM_CHANNEL)
92 | # dataset_test = ImageLabelFromFolder('.jpg',
93 | # data_dir = config_path.data_dir,
94 | # num_class = FLAGS.nclass,
95 | # resize = 224,
96 | # num_channel = NUM_CHANNEL)
97 | prediction_list = [
98 | # PredictionScalar(['pre_label'], ['label']),
99 | # PredictionMeanScalar('accuracy/result', 'test_accuracy'),
100 | PredictionMat('classmap/result', ['test']),
101 | PredictionOverlay(['classmap/result', 'image'], ['map', 'image'],
102 | color=True, merge_im=True),
103 | PredictionImage(['image'], ['image'], color=True, merge_im=True)]
104 |
105 | return PridectConfig(
106 | dataflow=dataset_test,
107 | model=VGGCAM(num_class=FLAGS.nclass, inspect_class=FLAGS.label,
108 | is_load=True, pre_train_path=config_path.vgg_dir),
109 | model_name=FLAGS.model,
110 | predictions=prediction_list,
111 | batch_size=FLAGS.bsize,
112 | default_dirs=config_path)
113 |
114 |
115 | def get_args():
116 | parser = argparse.ArgumentParser()
117 | parser.add_argument('--bsize', default=32, type=int)
118 | parser.add_argument('--label', default=-1, type=int,
119 | help='Label of inspect class.')
120 | parser.add_argument('--nclass', default=257, type=int,
121 | help='number of image class')
122 |
123 | parser.add_argument('--predict', action='store_true',
124 | help='Run prediction')
125 | parser.add_argument('--train', action='store_true',
126 | help='Train the model')
127 |
128 | parser.add_argument('--type', default='.jpg', type=str,
129 | help='image type for training and testing')
130 |
131 | parser.add_argument('--model', type=str,
132 | help='file name of the trained model')
133 |
134 | return parser.parse_args()
135 |
136 |
137 | if __name__ == '__main__':
138 | FLAGS = get_args()
139 | if FLAGS.train:
140 | config = get_config(FLAGS)
141 | SimpleFeedTrainer(config).train()
142 | if FLAGS.predict:
143 | config = get_predict_config(FLAGS)
144 | SimpleFeedPredictor(config).run_predict()
145 |
146 | # 0.6861924529075623
147 |
--------------------------------------------------------------------------------
/example/config_cam.py:
--------------------------------------------------------------------------------
1 | # File: config.py
2 | # Author: Qian Ge
3 |
4 | # directory of pre-trained vgg parameters
5 | vgg_dir = '../../data/pretrain/vgg/vgg19.npy'
6 |
7 | # directory of training data
8 | data_dir = '../../data/dataset/256_ObjectCategories/'
9 |
10 | # directory of testing data
11 | test_data_dir = '../data/'
12 |
13 | # directory of inference data
14 | infer_data_dir = '../data/'
15 |
16 | # directory for saving inference data
17 | infer_dir = '../../data/tmp/'
18 |
19 | # directory for saving summary
20 | summary_dir = '../../data/tmp/'
21 |
22 | # directory for saving checkpoint
23 | checkpoint_dir = '../../data/tmp/'
24 |
25 | # directory for restoring checkpoint
26 | model_dir = '../../data/tmp/'
27 |
28 | # directory for saving prediction results
29 | result_dir = '../../data/tmp/'
30 |
--------------------------------------------------------------------------------
/example/config_path.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: config_path.py
4 | # Author: Qian Ge
5 |
6 | # directory of testing images
7 | # im_path = '../data/'
8 | im_path = '../data/dataset/256_ObjectCategories/159.people/'
9 |
10 | # foler for saving
11 | save_path = ''
12 |
13 | # directory of pre-trained googlenet parameters
14 | googlenet_path = '../pretrained/googlenet.npy'
15 | vgg_path = '../pretrain/vgg/vgg19.npy'
16 |
--------------------------------------------------------------------------------
/example/deconv.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: deconv.py
4 | # Author: Qian Ge
5 |
6 | import os
7 | import scipy.misc
8 | import argparse
9 | import numpy as np
10 | import tensorflow as tf
11 | from tensorcv.dataflow.image import ImageFromFile
12 |
13 | import config_path as config
14 |
15 | import sys
16 | sys.path.append('../')
17 | from lib.nets.vgg import DeconvBaseVGG19, BaseVGG19
18 | import lib.utils.viz as viz
19 | import lib.utils.normalize as normlize
20 | import lib.utils.image as uim
21 |
22 |
23 | IM_SIZE = 224
24 |
25 | def get_parse():
26 | parser = argparse.ArgumentParser()
27 |
28 | parser.add_argument('--imtype', type=str, default='.jpg',
29 | help='Image type')
30 | parser.add_argument('--feat', type=str, required=True,
31 | help='Choose of feature map layer')
32 | parser.add_argument('--id', type=int, default=None,
33 | help='feature map id')
34 |
35 | return parser.parse_args()
36 |
37 | def im_scale(im):
38 | return uim.im_rescale(im, [IM_SIZE, IM_SIZE])
39 |
40 | if __name__ == '__main__':
41 | FLAGS = get_parse()
42 |
43 | input_im = ImageFromFile(FLAGS.imtype,
44 | data_dir=config.im_path,
45 | num_channel=3,
46 | shuffle=False,
47 | pf=im_scale,
48 | )
49 | input_im.set_batch_size(1)
50 |
51 | vizmodel = DeconvBaseVGG19(config.vgg_path,
52 | feat_key=FLAGS.feat,
53 | pick_feat=FLAGS.id)
54 |
55 | vizmap = vizmodel.layers['deconvim']
56 | feat_op = vizmodel.feats
57 | max_act_op = vizmodel.max_act
58 |
59 | act_size = vizmodel.receptive_size[FLAGS.feat]
60 | act_scale = vizmodel.stride[FLAGS.feat]
61 |
62 | with tf.Session() as sess:
63 | sess.run(tf.global_variables_initializer())
64 |
65 | max_act_list = []
66 | while input_im.epochs_completed < 1:
67 | im = input_im.next_batch()[0]
68 | max_act = sess.run(max_act_op, feed_dict={vizmodel.im: im})
69 | max_act_list.append(max_act)
70 |
71 | max_list = np.argsort(max_act_list)[::-1]
72 | im_file_list = input_im.get_data_list()[0]
73 |
74 | feat_list = []
75 | im_list = []
76 | for i in range(0, 10):
77 | im = input_im.next_batch()[0]
78 | file_path = os.path.join(config.im_path, im_file_list[max_list[i]])
79 | im = np.array([im_scale(scipy.misc.imread(file_path, mode='RGB'))])
80 |
81 | cur_vizmap, feat_map, max_act = sess.run(
82 | [vizmap, feat_op, max_act_op], feed_dict={vizmodel.im: im})
83 |
84 | act_ind = np.nonzero((feat_map))
85 | print('Location of max activation {}'.format(act_ind))
86 | # get only the first nonzero element
87 | act_c = (act_ind[1][0], act_ind[2][0])
88 | min_x = max(0, int(act_c[0] * act_scale - act_size / 2))
89 | max_x = min(IM_SIZE, int(act_c[0] * act_scale + act_size / 2))
90 | min_y = max(0, int(act_c[1] * act_scale - act_size / 2))
91 | max_y = min(IM_SIZE, int(act_c[1] * act_scale + act_size / 2))
92 |
93 | im_crop = im[0, min_x:max_x, min_y:max_y, :]
94 | act_crop = cur_vizmap[0, min_x:max_x, min_y:max_y, :]
95 |
96 | pad_size = (act_size - im_crop.shape[0], act_size - im_crop.shape[1])
97 | im_crop = np.pad(im_crop,
98 | ((0, pad_size[0]), (0, pad_size[1]), (0, 0)),
99 | 'constant',
100 | constant_values=0)
101 | act_crop = np.pad(act_crop,
102 | ((0, pad_size[0]),(0, pad_size[1]), (0, 0)),
103 | 'constant',
104 | constant_values=0)
105 |
106 | feat_list.append(act_crop)
107 | im_list.append(im_crop)
108 |
109 | viz.viz_filters(np.transpose(feat_list, (1, 2, 3, 0)),
110 | [3, 3],
111 | os.path.join(config.save_path, '{}_feat.png'.format(FLAGS.feat)),
112 | gap=2,
113 | gap_color=0,
114 | nf=normlize.indentity,
115 | shuffle=False)
116 | viz.viz_filters(np.transpose(im_list, (1, 2, 3, 0)),
117 | [3, 3],
118 | os.path.join(config.save_path, '{}_im.png'.format(FLAGS.feat)),
119 | gap=2,
120 | gap_color=0,
121 | nf=normlize.indentity,
122 | shuffle=False)
123 |
124 |
--------------------------------------------------------------------------------
/example/gap.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: gap.py
4 | # Author: Qian Ge
5 |
6 | import os
7 | import argparse
8 | import numpy as np
9 | import tensorflow as tf
10 | # from tensorcv.dataflow.dataset.CIFAR import CIFAR
11 | import sys
12 | sys.path.append('../')
13 | from lib.dataflow.cifar import CIFAR
14 | from lib.models.gap import GAPNet
15 | import lib.utils.viz as viz
16 | import lib.utils.normalize as normlize
17 |
18 | # data_path = '/Users/gq/workspace/Dataset/cifar-10-batches-py/'
19 | # save_path = '/Users/gq/workspace/Tmp/test/'
20 |
21 | data_path = '/home/qge2/workspace/data/dataset/cifar/'
22 | save_path = '/home/qge2/workspace/data/out/gap/'
23 |
24 | def get_args():
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument('--lr', default=0.0005, type=float)
27 | parser.add_argument('--dropout', default=0.5, type=float)
28 | parser.add_argument('--wd', default=0, type=float)
29 | parser.add_argument('--epoch', default=150, type=int)
30 |
31 | parser.add_argument('--train', action='store_true')
32 | parser.add_argument('--viz', action='store_true')
33 |
34 | return parser.parse_args()
35 |
36 |
37 | if __name__ == '__main__':
38 | FLAGS = get_args()
39 | max_epoch = FLAGS.epoch
40 | lr = FLAGS.lr
41 | dropout = FLAGS.dropout
42 |
43 | train_data = CIFAR(data_dir=data_path,
44 | batch_dict_name=['im', 'label'],
45 | data_type='train',
46 | substract_mean=False)
47 | train_data.setup(epoch_val=0, batch_size=128)
48 | valid_data = CIFAR(data_dir=data_path,
49 | shuffle=False,
50 | batch_dict_name=['im', 'label'],
51 | data_type='valid',
52 | # channel_mean=train_data.channel_mean,
53 | substract_mean=False)
54 | valid_data.setup(epoch_val=0, batch_size=128)
55 |
56 | # print(train_data.next_batch_dict())
57 |
58 | im = tf.placeholder(tf.float32, [None, 32, 32, 3], name='im')
59 | label = tf.placeholder(tf.int64, [None], name='label')
60 | input_dict = {'input': im, 'label': label}
61 |
62 | model = GAPNet(num_class=10, wd=FLAGS.wd)
63 | model.create_model(input_dict)
64 |
65 | train_op = model.get_train_op()
66 | loss_op = model.get_loss()
67 | accuracy_op = model.get_accuracy()
68 |
69 | saver = tf.train.Saver()
70 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
71 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
72 | sess.run(tf.global_variables_initializer())
73 |
74 | if FLAGS.viz:
75 | saver.restore(sess, '{}epoch_{}'.format(save_path, 69))
76 | valid_data.setup(epoch_val=0, batch_size=50)
77 | batch_data = valid_data.next_batch_dict()
78 | maps = sess.run(model.layer['feature'],
79 | feed_dict={im: batch_data['im']})
80 | print(batch_data['label'])
81 |
82 | viz.viz_filters(
83 | batch_data['im'].transpose(1, 2, 3, 0),
84 | [10, 1],
85 | os.path.join(save_path, 'im.png'),
86 | gap=2,
87 | gap_color=10,
88 | shuffle=False,
89 | # nf=normlize.norm_range
90 | )
91 |
92 | for idx, cur_map in enumerate(maps):
93 | viz.viz_filters(
94 | cur_map,
95 | [1, 10],
96 | os.path.join(save_path, 'maps_{}.png'.format(idx)),
97 | gap=2,
98 | gap_color=10,
99 | shuffle=False,
100 | # nf=normlize.norm_range
101 | )
102 |
103 | if FLAGS.train:
104 | loss_sum = 0
105 | acc_sum = 0
106 | epoch_id = 0
107 | # for epoch_id in range(0, max_epoch):
108 | epoch_step = 0
109 | while epoch_id < max_epoch:
110 | epoch_step += 1
111 | cur_epoch = train_data.epochs_completed
112 | if epoch_step % int(train_data.batch_step / 10) == 0:
113 | print('loss: {}, acc: {}'\
114 | .format(
115 | loss_sum * 1.0 / epoch_step,
116 | acc_sum * 1.0 / epoch_step))
117 | if cur_epoch > epoch_id:
118 | saver.save(sess, '{}epoch_{}'.format(save_path, epoch_id))
119 | print('epoch: {}, lr: {}, loss: {}, acc: {}'\
120 | .format(epoch_id,
121 | lr,
122 | loss_sum * 1.0 / epoch_step,
123 | acc_sum * 1.0 / epoch_step))
124 | loss_sum = 0
125 | acc_sum = 0
126 | epoch_step = 0
127 | epoch_id = cur_epoch
128 |
129 | if cur_epoch >= 50:
130 | lr = FLAGS.lr / 10
131 | if cur_epoch >= 100:
132 | lr = FLAGS.lr / 100
133 |
134 | model.set_is_training(False)
135 | valid_acc_sum = 0
136 | valid_step = 0
137 | while valid_data.epochs_completed < 1:
138 | valid_step += 1
139 | batch_data = valid_data.next_batch_dict()
140 | acc = sess.run(accuracy_op,
141 | feed_dict={model.dropout: 1.0,
142 | im: batch_data['im'],
143 | label: batch_data['label'],})
144 | valid_acc_sum += acc
145 | print('valid acc: {}'.format(valid_acc_sum * 1.0 / valid_step))
146 | model.set_is_training(True)
147 | valid_data.setup(epoch_val=0, batch_size=128)
148 |
149 |
150 | batch_data = train_data.next_batch_dict()
151 | _, loss, acc = sess.run([train_op, loss_op, accuracy_op],
152 | feed_dict={model.lr: lr,
153 | model.dropout: dropout,
154 | im: batch_data['im'],
155 | label: batch_data['label']})
156 | loss_sum += loss
157 | acc_sum += acc
158 |
--------------------------------------------------------------------------------
/example/gradcam.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: gradcam.py
4 | # Author: Qian Ge
5 |
6 | from itertools import count
7 |
8 | import tensorflow as tf
9 | import numpy as np
10 | from tensorcv.dataflow.image import ImageFromFile
11 | from tensorcv.utils.viz import image_overlay, save_merge_images
12 |
13 | import setup_env
14 | from nets.vgg import VGG19_FCN
15 | from models.guided_backpro import GuideBackPro
16 | from models.grad_cam import ClassifyGradCAM
17 | from utils.viz import image_weight_mask
18 |
19 | IM_PATH = '../data/'
20 | SAVE_DIR = '../../data/tmp/'
21 | VGG_PATH = '../../data/pretrain/vgg/vgg19.npy'
22 |
23 |
24 | # def image_weight_mask(image, mask):
25 | # """
26 | # Args:
27 | # image: image with size [HEIGHT, WIDTH, CHANNEL]
28 | # mask: image with size [HEIGHT, WIDTH, 1] or [HEIGHT, WIDTH]
29 | # """
30 | # image = np.array(np.squeeze(image))
31 | # mask = np.array(np.squeeze(mask))
32 | # assert len(mask.shape) == 2
33 | # assert len(image.shape) < 4
34 | # mask.astype('float32')
35 | # mask = np.reshape(mask, (mask.shape[0], mask.shape[1]))
36 | # mask = mask / np.amax(mask)
37 |
38 | # if len(image.shape) == 2:
39 | # return np.multiply(image, mask)
40 | # else:
41 | # for c in range(0, image.shape[2]):
42 | # image[:, :, c] = np.multiply(image[:, :, c], mask)
43 | # return image
44 |
45 |
46 | if __name__ == '__main__':
47 |
48 | # merge several output images in one large image
49 | merge_im = 1
50 | grid_size = np.ceil(merge_im**0.5).astype(int)
51 |
52 | # class label for Grad-CAM generation
53 | # 355 llama 543 dumbbell 605 iPod 515 hat 99 groose 283 tiger cat
54 | # 282 tabby cat 233 border collie 242 boxer
55 | # class_id = [355, 543, 605, 515]
56 | class_id = [283, 242]
57 |
58 | # initialize Grad-CAM
59 | # using VGG19
60 | gcam = ClassifyGradCAM(
61 | vis_model=VGG19_FCN(is_load=True,
62 | pre_train_path=VGG_PATH,
63 | is_rescale=True))
64 | gbackprob = GuideBackPro(
65 | vis_model=VGG19_FCN(is_load=True,
66 | pre_train_path=VGG_PATH,
67 | is_rescale=True))
68 |
69 | # placeholder for input image
70 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3])
71 |
72 | # create VGG19 model
73 | gcam.create_model(image)
74 | gcam.setup_graph()
75 |
76 | # generate class map and prediction label ops
77 | map_op = gcam.get_visualization(class_id=class_id)
78 | label_op = gcam.pre_label
79 |
80 | back_pro_op = gbackprob.get_visualization(image)
81 |
82 | # initialize input dataflow
83 | # change '.png' to other image types if other types of images are used
84 | input_im = ImageFromFile('.png', data_dir=IM_PATH,
85 | num_channel=3, shuffle=False)
86 | input_im.set_batch_size(1)
87 |
88 | writer = tf.summary.FileWriter(SAVE_DIR)
89 | with tf.Session() as sess:
90 |
91 | sess.run(tf.global_variables_initializer())
92 | writer.add_graph(sess.graph)
93 |
94 | cnt = 0
95 | merge_cnt = 0
96 | # weight_im_list = [[] for i in range(len(class_id))]
97 | o_im_list = []
98 | while input_im.epochs_completed < 1:
99 | im = input_im.next_batch()[0]
100 | gcam_map, b_map, label, o_im =\
101 | sess.run([map_op, back_pro_op, label_op, gcam.input_im],
102 | feed_dict={image: im})
103 | print(label)
104 | o_im_list.extend(o_im)
105 | for idx, cid, cmap in zip(count(), gcam_map[1], gcam_map[0]):
106 | overlay_im = image_overlay(cmap, o_im)
107 | weight_im = image_weight_mask(b_map[0], cmap)
108 | try:
109 | weight_im_list[idx].append(weight_im)
110 | overlay_im_list[idx].append(overlay_im)
111 | except NameError:
112 | gcam_class_id = gcam_map[1]
113 | weight_im_list = [[] for i in range(len(gcam_class_id))]
114 | overlay_im_list = [[] for i in range(len(gcam_class_id))]
115 | weight_im_list[idx].append(weight_im)
116 | overlay_im_list[idx].append(overlay_im)
117 | merge_cnt += 1
118 |
119 | # Merging results
120 | if merge_cnt == merge_im:
121 | save_path = '{}oim_{}.png'.format(SAVE_DIR, cnt, cid)
122 | save_merge_images(np.array(o_im_list),
123 | [grid_size, grid_size],
124 | save_path)
125 | for w_im, over_im, cid in zip(weight_im_list,
126 | overlay_im_list,
127 | gcam_class_id):
128 | # save grad-cam results
129 | save_path = '{}gradcam_{}_class_{}.png'.\
130 | format(SAVE_DIR, cnt, cid)
131 | save_merge_images(
132 | np.array(over_im), [grid_size, grid_size], save_path)
133 | # save guided grad-cam results
134 | save_path = '{}guided_gradcam_{}_class_{}.png'.\
135 | format(SAVE_DIR, cnt, cid)
136 | save_merge_images(
137 | np.array(w_im), [grid_size, grid_size], save_path)
138 | weight_im_list = [[] for i in range(len(gcam_class_id))]
139 | overlay_im_list = [[] for i in range(len(gcam_class_id))]
140 | o_im_list = []
141 | merge_cnt = 0
142 | cnt += 1
143 |
144 | # Saving results
145 | if merge_cnt > 0:
146 | save_path = '{}oim_{}.png'.format(SAVE_DIR, cnt, cid)
147 | save_merge_images(np.array(o_im_list),
148 | [grid_size, grid_size],
149 | save_path)
150 | for w_im, over_im, cid in zip(weight_im_list,
151 | overlay_im_list,
152 | gcam_class_id):
153 | # save grad-cam results
154 | save_path = '{}gradcam_{}_class_{}.png'.\
155 | format(SAVE_DIR, cnt, cid)
156 | save_merge_images(
157 | np.array(over_im), [grid_size, grid_size], save_path)
158 | # save guided grad-cam results
159 | save_path = '{}guided_gradcam_{}_class_{}.png'.\
160 | format(SAVE_DIR, cnt, cid)
161 | save_merge_images(
162 | np.array(w_im), [grid_size, grid_size], save_path)
163 | writer.close()
164 |
--------------------------------------------------------------------------------
/example/guided_backpropagation.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: guided_backpropagation.py
4 | # Author: Qian Ge
5 |
6 | from scipy import misc
7 | import scipy.io
8 |
9 | import tensorflow as tf
10 | import numpy as np
11 |
12 | from tensorcv.dataflow.image import ImageFromFile
13 |
14 | import setup_env
15 | from nets.vgg import VGG19_FCN
16 | from models.guided_backpro import GuideBackPro
17 |
18 | IM_PATH = '../data/'
19 | SAVE_DIR = '../../data/tmp/'
20 | VGG_PATH = '../../data/pretrain/vgg/vgg19.npy'
21 |
22 | if __name__ == '__main__':
23 | # placeholder for input image
24 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3])
25 | # initialize input dataflow
26 | # change '.png' to other image types if other types of images are used
27 | input_im = ImageFromFile('.png', data_dir=IM_PATH,
28 | num_channel=3, shuffle=False)
29 | # batch size has to be one
30 | input_im.set_batch_size(1)
31 |
32 | # initialize guided back propagation class
33 | # use VGG19 as an example
34 | # images will be rescaled to smallest side = 224 is is_rescale=True
35 | model = GuideBackPro(vis_model=VGG19_FCN(is_load=True,
36 | pre_train_path=VGG_PATH,
37 | is_rescale=True))
38 |
39 | # get op to compute guided back propagation map
40 | # final output respect to input image
41 | back_pro_op = model.get_visualization(image)
42 |
43 | writer = tf.summary.FileWriter(SAVE_DIR)
44 | with tf.Session() as sess:
45 | sess.run(tf.global_variables_initializer())
46 | writer.add_graph(sess.graph)
47 |
48 | cnt = 0
49 | while input_im.epochs_completed < 1:
50 | im = input_im.next_batch()[0]
51 | guided_backpro, label, o_im =\
52 | sess.run([back_pro_op, model.pre_label,
53 | model.input_im],
54 | feed_dict={image: im})
55 | print(label)
56 | for cid, guided_map in zip(guided_backpro[1], guided_backpro[0]):
57 | scipy.misc.imsave(
58 | '{}map_{}_class_{}.png'.format(SAVE_DIR, cnt, cid),
59 | np.squeeze(guided_map))
60 | scipy.misc.imsave('{}im_{}.png'.format(SAVE_DIR, cnt),
61 | np.squeeze(o_im))
62 | # scipy.io.savemat(
63 | # '{}map_1_class_{}.mat'.format(SAVE_DIR, cid),
64 | # {'mat': np.squeeze(guided_map)*255})
65 | cnt += 1
66 |
67 | writer.close()
68 |
--------------------------------------------------------------------------------
/example/invert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: invert.py
4 | # Author: Qian Ge
5 |
6 | import os
7 | import argparse
8 | import scipy
9 | import numpy as np
10 | import tensorflow as tf
11 |
12 | import config_path as config
13 |
14 | import sys
15 | sys.path.append('../')
16 | import lib.utils.viz as viz
17 | import lib.utils.normalize as normlize
18 | from lib.nets.googlenet import BaseGoogLeNet
19 | from lib.models.invert import InvertCNN
20 | import lib.utils.viz as viz
21 | import lib.utils.normalize as normlize
22 |
23 |
24 | file_path = os.path.join(config.im_path, 'im_0.png')
25 | MEAN = [103.939, 116.779, 123.68]
26 |
27 | if __name__ == '__main__':
28 | im = [scipy.misc.imread(file_path)]
29 | input_mean = np.mean(im)
30 | input_std = np.std(im)
31 | layer_key = 'inception5b'
32 | cnn_model = BaseGoogLeNet(config.googlenet_path)
33 | invert_model = InvertCNN(
34 | 224, 224, 3,
35 | input_mean=input_mean,
36 | input_std=input_std,
37 | mean_list=MEAN)
38 |
39 | input_im = tf.placeholder(tf.float32, [1, 224, 224, 3], name='input')
40 |
41 |
42 | feat_im = cnn_model.get_feature_map(input_im, layer_key)
43 | feat_invert = cnn_model.get_feature_map(invert_model.invert_im, layer_key)
44 |
45 | train_op = invert_model.optimize_image(feat_invert, feat_im)
46 | result_op = invert_model.get_opt_im()
47 |
48 | writer = tf.summary.FileWriter(config.save_path)
49 | with tf.Session() as sess:
50 | sess.run(tf.global_variables_initializer())
51 | # writer.add_graph(sess.graph)
52 |
53 | for step in range(0, 1000):
54 |
55 | _, loss, loss1, loss2 = sess.run(
56 | [train_op,
57 | invert_model.loss,
58 | invert_model.mse_loss,
59 | invert_model.vt_loss],
60 | feed_dict={input_im:im})
61 | print(step, loss, loss1, loss2)
62 | opt_im = sess.run(result_op)
63 | if step % 10 == 0:
64 | # opt_im = np.clip(opt_im, 0, 255)
65 | #
66 | # opt_im = opt_im * input_std + input_mean
67 | # print(opt_im)
68 | scipy.misc.imsave(os.path.join(config.save_path, 'test_{}.png'.format(step)),
69 | np.squeeze(opt_im))
70 |
71 |
--------------------------------------------------------------------------------
/example/run.sh:
--------------------------------------------------------------------------------
1 | python3 deconv.py --feat conv1_2
2 | python3 deconv.py --feat conv2_2
3 | python3 deconv.py --feat conv3_4
4 | python3 deconv.py --feat conv4_4
5 | python3 deconv.py --feat conv5_2
--------------------------------------------------------------------------------
/example/setup_env.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: setup_env.py
4 | # Author: Qian Ge
5 |
6 | import sys
7 | sys.path.append('../lib/')
8 |
--------------------------------------------------------------------------------
/example/vizfilter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: vizfilter.py
4 | # Author: Qian Ge
5 |
6 | import os
7 | import scipy
8 | import argparse
9 | import tensorflow as tf
10 |
11 | import config_path as config
12 |
13 | import sys
14 | sys.path.append('../')
15 | from lib.nets.googlenet import BaseGoogLeNet
16 | import lib.utils.viz as viz
17 | import lib.utils.normalize as normlize
18 |
19 |
20 |
21 | def get_parse():
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('--filter', action='store_true',
24 | help='Visualize filters')
25 | parser.add_argument('--feature', action='store_true',
26 | help='Visualize feature maps')
27 |
28 | parser.add_argument('--im', type=str,
29 | help='Image file name')
30 |
31 | return parser.parse_args()
32 |
33 |
34 | if __name__ == '__main__':
35 | FLAGES = get_parse()
36 |
37 | map_list = ['inception4a', 'inception4b', 'inception4c',
38 | 'inception4d', 'inception4e', 'inception3a',
39 | 'inception3b', 'inception5a', 'inception5b']
40 |
41 | model = BaseGoogLeNet(config.googlenet_path)
42 | filters = tf.get_default_graph().get_tensor_by_name(
43 | 'conv1_7x7_s2/weights:0')
44 |
45 | if FLAGES.feature:
46 | feature_map = []
47 | for c_map in map_list:
48 | feature_map.append(model.conv_layer[c_map])
49 | assert FLAGES.im is not None, 'File name cannot be None!'
50 | file_path = os.path.join(config.im_path, FLAGES.im)
51 | assert os.path.isfile(file_path),\
52 | 'File does not exist! {}'.format(file_path)
53 | im = scipy.misc.imread(file_path)
54 |
55 | with tf.Session() as sess:
56 | sess.run(tf.global_variables_initializer())
57 | if FLAGES.filter:
58 | learned_filter = sess.run(filters)
59 | viz.viz_filters(
60 | learned_filter,
61 | [8, 8],
62 | os.path.join(config.save_path, 'GoogLeNet_filter.png'),
63 | gap=2,
64 | nf=normlize.norm_std)
65 |
66 | if FLAGES.feature:
67 | maps = sess.run(feature_map, feed_dict={model.inputs: [im]})
68 |
69 | for key, c_map in zip(map_list, maps):
70 | viz.viz_filters(
71 | c_map[0],
72 | [10, 10],
73 | os.path.join(config.save_path, 'GoogLeNet_{}.png'.format(key)),
74 | gap=2,
75 | gap_color=10,
76 | # nf=normlize.norm_range
77 | )
78 |
--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/__init__.py
--------------------------------------------------------------------------------
/lib/dataflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/dataflow/__init__.py
--------------------------------------------------------------------------------
/lib/dataflow/cifar.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: cifar.py
4 | # Author: Qian Ge
5 |
6 | import os
7 | import pickle
8 |
9 | import numpy as np
10 |
11 | from tensorcv.dataflow.base import RNGDataFlow
12 |
13 |
14 | class CIFAR(RNGDataFlow):
15 | def __init__(self,
16 | data_dir='',
17 | shuffle=True,
18 | batch_dict_name=None,
19 | data_type='train',
20 | channel_mean=None,
21 | substract_mean=True):
22 | self._mean = channel_mean
23 | self._substract = substract_mean
24 | self.num_channels = 3
25 | self.im_size = [32, 32]
26 |
27 | assert os.path.isdir(data_dir)
28 | self.data_dir = data_dir
29 |
30 | assert batch_dict_name is not None
31 | if not isinstance(batch_dict_name, list):
32 | batch_dict_name = [batch_dict_name]
33 | self._batch_dict_name = batch_dict_name
34 |
35 | if data_type == 'train':
36 | self._file_list = [os.path.join(data_dir, 'data_batch_{}'.format(i)) for i in range(1, 6)]
37 | else:
38 | self._file_list = [os.path.join(data_dir, 'test_batch')]
39 |
40 | self.shuffle = shuffle
41 |
42 | self.setup(epoch_val=0, batch_size=1)
43 | # if not isinstance(batch_file_list, list):
44 | # batch_file_list = [batch_file_list]
45 | # self._file_list = [os.path.join(data_dir, 'data_batch_' + str(batch_id)) for batch_id in batch_file_list]
46 |
47 | # self._load_files()
48 | self._num_image = self.size()
49 |
50 | self._image_id = 0
51 | self._batch_file_id = -1
52 | self._image = []
53 | self._next_batch_file()
54 |
55 | # self._comp_channel_mean()
56 |
57 | print('Data Loaded! Size of data: {}'.format(self.size()))
58 |
59 | def _next_batch_file(self):
60 | if self._batch_file_id >= len(self._file_list) - 1:
61 | self._batch_file_id = 0
62 | self._epochs_completed += 1
63 | else:
64 | self._batch_file_id += 1
65 | data_dict = unpickle(self._file_list[self._batch_file_id])
66 | self._image = np.array(data_dict['image'])
67 | self._label = np.array(data_dict['label'])
68 |
69 | if self.shuffle:
70 | self._suffle_files()
71 |
72 | def _suffle_files(self):
73 | idxs = np.arange(len(self._image))
74 |
75 | self.rng.shuffle(idxs)
76 | self._image = self._image[idxs]
77 | self._label = self._label[idxs]
78 |
79 | @property
80 | def batch_step(self):
81 | return int(self.size() * 1.0 / self._batch_size)
82 |
83 | @property
84 | def channel_mean(self):
85 | if self._mean == None:
86 | self._mean = self._comp_channel_mean()
87 | return self._mean
88 |
89 | def substract_mean(self, im_list):
90 | """
91 | Args:
92 | im_list: [batch, h, w, c]
93 | """
94 | mean = self.channel_mean
95 | for c_id in range(0, im_list.shape[-1]):
96 | im_list[:,:, c_id] = im_list[:,:, c_id] - mean[c_id]
97 | return im_list
98 |
99 | def _comp_channel_mean(self):
100 | im_list = []
101 | for k in range(len(self._file_list)):
102 | cur_im = unpickle(self._file_list[k])['image']
103 | im_list.extend(cur_im)
104 | im_list = np.array(im_list)
105 |
106 | mean_list = []
107 | for c_id in range(0, im_list.shape[-1]):
108 | mean_list.append(np.mean(im_list[:,:,:,c_id]))
109 | return mean_list
110 |
111 | def size(self):
112 | try:
113 | return self.data_size
114 | except AttributeError:
115 | data_size = 0
116 | for k in range(len(self._file_list)):
117 | tmp_image = unpickle(self._file_list[k])['image']
118 | data_size += len(tmp_image)
119 | self.data_size = data_size
120 | return self.data_size
121 |
122 | def next_batch(self):
123 | assert self._batch_size <= self.size(), \
124 | "batch_size {} cannot be larger than data size {}".\
125 | format(self._batch_size, self.size())
126 |
127 | start = self._image_id
128 | self._image_id += self._batch_size
129 | end = self._image_id
130 | batch_image = np.array(self._image[start:end])
131 | batch_label = np.array(self._label[start:end])
132 |
133 | if self._image_id + self._batch_size > len(self._image):
134 | self._next_batch_file()
135 | self._image_id = 0
136 | if self.shuffle:
137 | self._suffle_files()
138 | if self._substract:
139 | batch_image = self.substract_mean(batch_image)
140 | return batch_image, batch_label
141 |
142 | def next_batch_dict(self):
143 | re_dict = {}
144 | batch_data = self.next_batch()
145 | for key, data in zip(self._batch_dict_name, batch_data):
146 | re_dict[key] = data
147 | return re_dict
148 |
149 |
150 | def unpickle(file):
151 | with open(file, 'rb') as fo:
152 | dict = pickle.load(fo, encoding='bytes')
153 | image = dict[b'data']
154 | labels = dict[b'labels']
155 |
156 | r = image[:,:32*32].reshape(-1,32,32)
157 | g = image[:,32*32: 2*32*32].reshape(-1,32,32)
158 | b = image[:,2*32*32:].reshape(-1,32,32)
159 |
160 | image = np.stack((r,g,b),axis=-1)
161 |
162 | return {'image': image.astype(float), 'label': labels}
163 |
164 | if __name__ == '__main__':
165 | a = CIFAR('D:\\Qian\\GitHub\\workspace\\tensorflow-DCGAN\\cifar-10-python.tar\\')
166 | t = a.next_batch()[0]
167 | print(t)
168 | print(t.shape)
169 | print(a.size())
170 | # print(a.next_batch()[0])
171 | # print(a.next_batch()[0])
--------------------------------------------------------------------------------
/lib/dataflow/image.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: image.py
4 | # Author: Qian Ge
5 |
6 | import numpy as np
7 | from tensorcv.dataflow.base import RNGDataFlow
8 | from tensorcv.dataflow.normalization import identity
9 |
10 |
11 | class DataFromFile(RNGDataFlow):
12 | """ Base class for image from files """
13 | def __init__(self,
14 | ext_name,
15 | data_dir='',
16 | num_channel=None,
17 | shuffle=True,
18 | normalize=None,
19 | batch_dict_name=None,
20 | normalize_fnc=identity):
21 |
22 | check_dir(data_dir)
23 | self.data_dir = data_dir
24 | self._shuffle = shuffle
25 | self._normalize = normalize
26 | self._normalize_fnc = normalize_fnc
27 |
28 | if not isinstance(batch_dict_name, list):
29 | batch_dict_name = [batch_dict_name]
30 | self._batch_dict_name = batch_dict_name
31 |
32 | self.setup(epoch_val=0, batch_size=1)
33 |
34 | self._load_file_list(ext_name.lower())
35 | if self.size() == 0:
36 | print_warning('No {} files in folder {}'.\
37 | format(ext_name, data_dir))
38 | self.num_channels, self.im_size = self._get_im_size()
39 | self._data_id = 0
40 |
41 | def _load_file_list(self):
42 | raise NotImplementedError()
43 |
44 | def _suffle_file_list(self):
45 | pass
46 |
47 | def next_batch(self):
48 | assert self._batch_size <= self.size(), \
49 | "batch_size cannot be larger than data size"
50 |
51 | if self._data_id + self._batch_size > self.size():
52 | start = self._data_id
53 | end = self.size()
54 | else:
55 | start = self._data_id
56 | self._data_id += self._batch_size
57 | end = self._data_id
58 | # batch_file_range = range(start, end)
59 | batch_data = self._load_data(start, end)
60 |
61 | if end == self.size():
62 | self._epochs_completed += 1
63 | self._data_id = 0
64 | if self._shuffle:
65 | self._suffle_file_list()
66 | return batch_data
67 |
68 | def next_batch_dict(self):
69 | batch_data = self.next_batch()
70 | batch_dict = {name: data for name, data in zip(self._batch_dict_name, batch_data)}
71 | return batch_dict
72 |
73 | def _load_data(self, start, end):
74 | raise NotImplementedError()
75 |
76 |
77 | class ImageFromFile(DataFromFile):
78 | def __init__(self,
79 | ext_name,
80 | data_dir='',
81 | num_channel=None,
82 | shuffle=True,
83 | normalize=None,
84 | normalize_fnc=identity,
85 | batch_dict_name=None,
86 | pf=identity):
87 |
88 | if num_channel is not None:
89 | self.num_channels = num_channel
90 | self._read_channel = num_channel
91 | else:
92 | self._read_channel = None
93 |
94 | self._resize = get_shape2D(resize)
95 | self._resize_crop = resize_crop
96 | self._pf = pf
97 |
98 | super(ImageFromFile, self).__init__(ext_name,
99 | data_dir=data_dir,
100 | shuffle=shuffle,
101 | normalize=normalize,
102 | batch_dict_name=batch_dict_name,
103 | normalize_fnc=normalize_fnc)
104 |
105 | def _load_file_list(self, ext_name):
106 | im_dir = os.path.join(self.data_dir)
107 | self._im_list = get_file_list(im_dir, ext_name)
108 | if self._shuffle:
109 | self._suffle_file_list()
110 |
111 | def _suffle_file_list(self):
112 | idxs = np.arange(self.size())
113 | self.rng.shuffle(idxs)
114 | self._im_list = self._im_list[idxs]
115 |
116 | def _load_data(self, start, end):
117 | input_im_list = []
118 | for k in range(start, end):
119 | im_path = self._im_list[k]
120 | im = load_image(im_path, read_channel=self._read_channel,
121 | resize=self._resize,
122 | resize_crop=self._resize_crop,
123 | pf=self._pf)
124 | input_im_list.extend(im)
125 |
126 | # TODO to be modified
127 | input_im_list = self._normalize_fnc(np.array(input_im_list),
128 | self._get_max_in_val(),
129 | self._get_half_in_val())
130 | return [input_im_list]
131 |
132 | def size(self):
133 | return self._im_list.shape[0]
134 |
135 | def get_data_list(self):
136 | return [self._im_list]
137 |
138 | def set_data_list(self, new_data_list):
139 | assert isinstance(new_data_list, list)
140 | assert len(new_data_list) == 1
141 | self._im_list = np.array(new_data_list[0])
142 |
143 | def set_pf(self, pf):
144 | self._pf = pf
145 |
146 | def suffle_data(self):
147 | self._suffle_file_list()
148 |
--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/models/__init__.py
--------------------------------------------------------------------------------
/lib/models/cam.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: cam.py
4 | # Author: Qian Ge
5 |
6 | import tensorflow as tf
7 | import numpy as np
8 |
9 | from tensorcv.models.layers import new_weights, dropout, global_avg_pool, conv, max_pool
10 | from tensorcv.models.base import BaseModel
11 |
12 |
13 | class BaseCAM(BaseModel):
14 | """ base of class activation map class """
15 | def __init__(self, num_class=10,
16 | inspect_class=None,
17 | num_channels=1,
18 | learning_rate=0.0001):
19 |
20 | self._learning_rate = learning_rate
21 | self._num_channels = num_channels
22 | self._num_class = num_class
23 | self._inspect_class = inspect_class
24 |
25 | self.set_is_training(True)
26 |
27 | def _create_input(self):
28 | self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
29 | self.image = tf.placeholder(
30 | tf.float32, name='image',
31 | shape=[None, None, None, self._num_channels])
32 | self.label = tf.placeholder(tf.int64, [None], 'label')
33 |
34 | self.set_model_input([self.image, self.keep_prob])
35 | self.set_dropout(self.keep_prob, keep_prob=0.5)
36 | self.set_train_placeholder([self.image, self.label])
37 | self.set_prediction_placeholder([self.image, self.label])
38 |
39 | def _create_conv(self, input_im):
40 | raise NotImplementedError()
41 |
42 | def _get_loss(self):
43 | with tf.name_scope('loss'):
44 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
45 | logits=self.output, labels=self.label)
46 | cross_entropy_loss = tf.reduce_mean(
47 | cross_entropy, name='cross_entropy_loss')
48 | tf.add_to_collection('losses', cross_entropy_loss)
49 | return tf.add_n(tf.get_collection('losses'), name='result')
50 |
51 | def _get_optimizer(self):
52 | return tf.train.AdamOptimizer(
53 | beta1=0.5, learning_rate=self._learning_rate)
54 |
55 | def _ex_setup_graph(self):
56 | with tf.name_scope('accuracy'):
57 | correct_prediction = tf.equal(self.prediction, self.label)
58 | self.accuracy = tf.reduce_mean(
59 | tf.cast(correct_prediction, tf.float32), name='result')
60 |
61 | def _setup_summary(self):
62 | tf.summary.scalar("train_accuracy", self.accuracy,
63 | collections=['train'])
64 |
65 | def get_classmap(self, label, conv_out, input_im):
66 | """
67 | Compute class activation map of class = label with name 'classmap'
68 |
69 | Args:
70 | label (int): a scalar int indicate the class label
71 | conv_out (tf.tensor): 4-D Tensor of shape
72 | [batch, height, width, channels]. Output of
73 | convolutional layers.
74 | input_im (tf.tensor): A 4-D Tensor image.
75 | The original model input image patch.
76 | """
77 | # Get original image size used for interpolation
78 | o_height = tf.shape(input_im)[1]
79 | o_width = tf.shape(input_im)[2]
80 |
81 | # Get shape of output of convolution layers
82 | conv_out_channel = tf.shape(conv_out)[-1]
83 | conv_height = tf.shape(conv_out)[1]
84 | conv_width = tf.shape(conv_out)[2]
85 |
86 | # Get weights corresponding to class = label
87 | with tf.variable_scope('cam') as scope:
88 | scope.reuse_variables()
89 | label_w = tf.gather(
90 | tf.transpose(tf.get_variable('weights')), label)
91 | label_w = tf.reshape(label_w, [-1, conv_out_channel, 1])
92 | label_w = tf.tile(label_w, [tf.shape(conv_out)[0], 1, 1])
93 |
94 | conv_reshape = tf.reshape(
95 | conv_out, [-1, conv_height * conv_width, conv_out_channel])
96 | classmap = tf.matmul(conv_reshape, label_w)
97 |
98 | # Interpolate to orginal size
99 | classmap = tf.reshape(classmap, [-1, conv_height, conv_width, 1])
100 | classmap = tf.image.resize_bilinear(classmap,
101 | [o_height, o_width],
102 | name='result')
103 |
104 | class VGGCAM(BaseCAM):
105 | def __init__(self, num_class=1000,
106 | inspect_class=None,
107 | num_channels=3,
108 | learning_rate=0.0001,
109 | is_load=True,
110 | pre_train_path=None):
111 |
112 | self._is_load = is_load
113 | if self._is_load and pre_train_path is None:
114 | raise ValueError('pre_train_path can not be None!')
115 | self._pre_train_path = pre_train_path
116 |
117 | super(VGGCAM, self).__init__(num_class=num_class,
118 | inspect_class=inspect_class,
119 | num_channels=num_channels,
120 | learning_rate=learning_rate)
121 |
122 | def _create_conv(self, input_im):
123 |
124 | VGG_MEAN = [103.939, 116.779, 123.68]
125 |
126 | red, green, blue = tf.split(axis=3, num_or_size_splits=3,
127 | value=input_im)
128 | input_bgr = tf.concat(axis=3, values=[
129 | blue - VGG_MEAN[0],
130 | green - VGG_MEAN[1],
131 | red - VGG_MEAN[2],
132 | ])
133 |
134 | data_dict = {}
135 | if self._is_load:
136 | data_dict = np.load(self._pre_train_path,
137 | encoding='latin1').item()
138 |
139 | arg_scope = tf.contrib.framework.arg_scope
140 | with arg_scope([conv], nl=tf.nn.relu,
141 | trainable=False, data_dict=data_dict):
142 | conv1_1 = conv(input_bgr, 3, 64, 'conv1_1')
143 | conv1_2 = conv(conv1_1, 3, 64, 'conv1_2')
144 | pool1 = max_pool(conv1_2, 'pool1', padding='SAME')
145 |
146 | conv2_1 = conv(pool1, 3, 128, 'conv2_1')
147 | conv2_2 = conv(conv2_1, 3, 128, 'conv2_2')
148 | pool2 = max_pool(conv2_2, 'pool2', padding='SAME')
149 |
150 | conv3_1 = conv(pool2, 3, 256, 'conv3_1')
151 | conv3_2 = conv(conv3_1, 3, 256, 'conv3_2')
152 | conv3_3 = conv(conv3_2, 3, 256, 'conv3_3')
153 | conv3_4 = conv(conv3_3, 3, 256, 'conv3_4')
154 | pool3 = max_pool(conv3_4, 'pool3', padding='SAME')
155 |
156 | conv4_1 = conv(pool3, 3, 512, 'conv4_1')
157 | conv4_2 = conv(conv4_1, 3, 512, 'conv4_2')
158 | conv4_3 = conv(conv4_2, 3, 512, 'conv4_3')
159 | conv4_4 = conv(conv4_3, 3, 512, 'conv4_4')
160 | pool4 = max_pool(conv4_4, 'pool4', padding='SAME')
161 |
162 | conv5_1 = conv(pool4, 3, 512, 'conv5_1')
163 | conv5_2 = conv(conv5_1, 3, 512, 'conv5_2')
164 | conv5_3 = conv(conv5_2, 3, 512, 'conv5_3')
165 | conv5_4 = conv(conv5_3, 3, 512, 'conv5_4')
166 |
167 | return conv5_4
168 |
169 | def _create_model(self):
170 |
171 | input_im = self.model_input[0]
172 | keep_prob = self.model_input[1]
173 |
174 | conv_out = self._create_conv(input_im)
175 |
176 | init_b = tf.truncated_normal_initializer(stddev=0.01)
177 | conv_cam = conv(conv_out, 3, 1024, 'conv_cam',
178 | nl=tf.nn.relu, wd=0.01, init_b=init_b)
179 | gap = global_avg_pool(conv_cam)
180 | dropout_gap = dropout(gap, keep_prob, self.is_training)
181 |
182 | with tf.variable_scope('cam'):
183 | init = tf.truncated_normal_initializer(stddev=0.01)
184 | fc_w = new_weights(
185 | 'weights', 1,
186 | [gap.get_shape().as_list()[-1], self._num_class],
187 | initializer=init, wd=0.01)
188 | fc_cam = tf.matmul(dropout_gap, fc_w, name='output')
189 |
190 | self.output = tf.identity(fc_cam, 'model_output')
191 | self.prediction = tf.argmax(fc_cam, name='pre_label', axis=-1)
192 | self.prediction_pro = tf.nn.softmax(fc_cam, name='pre_pro')
193 |
194 | if self._inspect_class is not None:
195 | with tf.name_scope('classmap'):
196 | self.get_classmap(self._inspect_class, conv_cam, input_im)
197 |
198 |
199 | # if __name__ == '__main__':
200 | # num_class = 257
201 | # num_channels = 3
202 |
203 | # vgg_cam_model = VGGCAM(num_class=num_class,
204 | # inspect_class=None,
205 | # num_channels=num_channels,
206 | # learning_rate=0.0001,
207 | # is_load=True,
208 | # pre_train_path='E:\\GITHUB\\workspace\\CNN\pretrained\\vgg19.npy')
209 |
210 | # vgg_cam_model.create_graph()
211 |
212 | # grads = vgg_cam_model.get_grads()
213 | # opt = vgg_cam_model.get_optimizer()
214 | # train_op = opt.apply_gradients(grads, name='train')
215 |
216 | # writer = tf.summary.FileWriter('E:\\GITHUB\\workspace\\CNN\\other\\')
217 | # with tf.Session() as sess:
218 | # sess.run(tf.global_variables_initializer())
219 | # writer.add_graph(sess.graph)
220 | # writer.close()
221 |
--------------------------------------------------------------------------------
/lib/models/gap.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: gap.py
4 | # Author: Qian Ge
5 |
6 | import tensorflow as tf
7 |
8 | from tensorcv.models.base import BaseModel
9 | from tensorcv.models.layers import new_weights, global_avg_pool, conv, dropout, max_pool
10 | from tensorcv.models.layers import batch_norm
11 |
12 | from lib.nets.vgg import BaseVGG19
13 | from lib.nets.googlenet import BaseGoogLeNet
14 |
15 |
16 | def mlpconv(inputs, filter_size, hidden_size, wd=0, name='mlpconv'):
17 | if not isinstance(hidden_size, list):
18 | hidden_size = [hidden_size]
19 | with tf.variable_scope(name):
20 | l_out = conv(inputs,
21 | filter_size,
22 | hidden_size[0],
23 | 'microlayer_0',
24 | nl=tf.nn.relu,
25 | wd=wd)
26 | for layer_id in range(1, len(hidden_size)):
27 | l_out = conv(l_out,
28 | 1,
29 | hidden_size[layer_id],
30 | 'microlayer_{}'.format(layer_id),
31 | nl=tf.nn.relu,
32 | wd=wd)
33 |
34 | return l_out
35 |
36 |
37 | class GAPNet(BaseModel):
38 | def __init__(self, num_class=10, wd=0):
39 | self._n_class = num_class
40 | self._wd = wd
41 | # self._pre_train_path = pre_train_path
42 |
43 | self.set_is_training(True)
44 | self.layer = {}
45 |
46 | def set_is_training(self, is_training):
47 | self._is_traing = is_training
48 |
49 | def create_model(self, input_dict):
50 | self._input_dict = input_dict
51 | self._create_model()
52 |
53 | def _create_conv(self, inputs):
54 | self.dropout = tf.placeholder(tf.float32, name='dropout')
55 | mlpconv_1 = mlpconv(
56 | inputs,
57 | filter_size=8,
58 | hidden_size=[96, 96],
59 | name='mlpconv_1',
60 | wd=self._wd)
61 | # mlpconv_1 = mlpconv(
62 | # inputs,
63 | # filter_size=5,
64 | # hidden_size=[192, 160, 96],
65 | # name='mlpconv_1',
66 | # wd=self._wd)
67 | mlpconv_1 = max_pool(mlpconv_1, 'pool1', padding='SAME')
68 | mlpconv_1 = dropout(mlpconv_1, self.dropout, self._is_traing)
69 | mlpconv_1 = batch_norm(mlpconv_1, train=self._is_traing, name='bn_1')
70 |
71 | mlpconv_2 = mlpconv(
72 | mlpconv_1,
73 | filter_size=8,
74 | hidden_size=[192, 192],
75 | name='mlpconv_2',
76 | wd=self._wd)
77 | # mlpconv_2 = mlpconv(
78 | # mlpconv_1,
79 | # filter_size=5,
80 | # hidden_size=[192, 192, 192],
81 | # name='mlpconv_2',
82 | # wd=self._wd)
83 | mlpconv_2 = max_pool(mlpconv_2, 'pool2', padding='SAME')
84 | mlpconv_2 = dropout(mlpconv_2, self.dropout, self._is_traing)
85 | mlpconv_2 = batch_norm(mlpconv_2, train=self._is_traing, name='bn_2')
86 |
87 | mlpconv_3 = mlpconv(
88 | mlpconv_2,
89 | filter_size=5,
90 | hidden_size=[192, self._n_class],
91 | name='mlpconv_3',
92 | wd=self._wd)
93 | # mlpconv_3 = mlpconv(
94 | # mlpconv_2,
95 | # filter_size=3,
96 | # hidden_size=[192, 192, self._n_class],
97 | # name='mlpconv_3',
98 | # wd=self._wd)
99 | # mlpconv_3 = max_pool(mlpconv_3, 'pool3', padding='SAME')
100 | # mlpconv_3 = dropout(pool3, 0.5, self._is_traing)
101 |
102 | return mlpconv_3
103 |
104 | def _create_model(self):
105 | inputs = self._input_dict['input']
106 | conv_out = self._create_conv(inputs)
107 |
108 | # init_b = tf.truncated_normal_initializer(stddev=0.01)
109 | # conv_gap = conv(conv_out, 3, self._n_class, 'conv_gap',
110 | # nl=tf.nn.relu, wd=0, init_b=init_b)
111 | gap = global_avg_pool(conv_out)
112 |
113 | self.layer['logits'] = gap
114 | self.layer['feature'] = conv_out
115 | self.layer['pred'] = tf.argmax(gap, name='pred', axis=-1)
116 | self.layer['prob'] = tf.nn.softmax(gap, name='prob')
117 |
118 | def _get_loss(self):
119 | label = self._input_dict['label']
120 | with tf.name_scope('loss'):
121 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
122 | logits= self.layer['logits'], labels=label)
123 | cross_entropy_loss = tf.reduce_mean(
124 | cross_entropy, name='cross_entropy_loss')
125 | tf.add_to_collection('losses', cross_entropy_loss)
126 | return tf.add_n(tf.get_collection('losses'), name='result')
127 |
128 | def get_loss(self):
129 | try:
130 | return self.loss
131 | except AttributeError:
132 | self.loss = self._get_loss()
133 | return self.loss
134 |
135 | def get_train_op(self):
136 | self.lr = tf.placeholder(tf.float32, name='lr')
137 | # opt = tf.train.GradientDescentOptimizer(learning_rate=self.lr)
138 | opt = tf.train.AdamOptimizer(
139 | beta1=0.5, learning_rate=self.lr)
140 | loss = self.get_loss()
141 | return opt.minimize(loss)
142 |
143 | def get_accuracy(self):
144 | label = self._input_dict['label']
145 | pred = self.layer['pred']
146 |
147 | correct = tf.equal(label, pred)
148 | accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
149 | return accuracy
150 |
151 |
152 |
--------------------------------------------------------------------------------
/lib/models/grad_cam.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: grad_cam.py
4 | # Author: Qian Ge
5 |
6 | import tensorflow as tf
7 |
8 | from tensorcv.models.layers import global_avg_pool
9 |
10 |
11 | class BaseGradCAM(object):
12 | def __init__(self, vis_model=None, num_channel=3):
13 | self._vis_model = vis_model
14 | self._nchannel = num_channel
15 |
16 | def create_model(self, inputs):
17 | self._create_model(inputs)
18 |
19 | def _create_model(self, inputs):
20 | pass
21 |
22 | def setup_graph(self):
23 | pass
24 |
25 | def _comp_feature_importance_weight(self, class_id):
26 | if not isinstance(class_id, list):
27 | class_id = [class_id]
28 |
29 | with tf.name_scope('feature_weight'):
30 | self._feature_w_list = []
31 | for idx, cid in enumerate(class_id):
32 | one_hot = tf.sparse_to_dense(
33 | [[cid, 0]], [self._nclass, 1], 1.0)
34 | out_act = tf.reshape(self._out_act, [1, self._nclass])
35 | class_act = tf.matmul(out_act, one_hot,
36 | name='class_act_{}'.format(idx))
37 | feature_grad = tf.gradients(class_act, self._conv_out,
38 | name='grad_{}'.format(idx))
39 | feature_grad = tf.squeeze(
40 | tf.convert_to_tensor(feature_grad), axis=0)
41 | feature_w = global_avg_pool(
42 | feature_grad, name='feature_w_{}'.format(idx))
43 | self._feature_w_list.append(feature_w)
44 |
45 | def get_visualization(self, class_id=None):
46 | assert class_id is not None, 'class_id cannot be None!'
47 |
48 | with tf.name_scope('grad_cam'):
49 | self._comp_feature_importance_weight(class_id)
50 | conv_out = self._conv_out
51 | conv_c = tf.shape(conv_out)[-1]
52 | conv_h = tf.shape(conv_out)[1]
53 | conv_w = tf.shape(conv_out)[2]
54 | conv_reshape = tf.reshape(conv_out, [conv_h * conv_w, conv_c])
55 |
56 | o_h = tf.shape(self.input_im)[1]
57 | o_w = tf.shape(self.input_im)[2]
58 |
59 | classmap_list = []
60 | for idx, feature_w in enumerate(self._feature_w_list):
61 | feature_w = tf.reshape(feature_w, [conv_c, 1])
62 | classmap = tf.matmul(conv_reshape, feature_w)
63 | classmap = tf.reshape(classmap, [-1, conv_h, conv_w, 1])
64 | classmap = tf.nn.relu(
65 | tf.image.resize_bilinear(classmap, [o_h, o_w]),
66 | name='grad_cam_{}'.format(idx))
67 | classmap_list.append(tf.squeeze(classmap))
68 |
69 | return classmap_list, tf.convert_to_tensor(class_id)
70 |
71 |
72 | class ClassifyGradCAM(BaseGradCAM):
73 | def _create_model(self, inputs):
74 | keep_prob = 1
75 | self._vis_model.create_model([inputs, keep_prob])
76 |
77 | def setup_graph(self):
78 | self.input_im = self._vis_model.layer['input']
79 | self._out_act = global_avg_pool(self._vis_model.layer['output'])
80 | self._conv_out = self._vis_model.layer['conv_out']
81 | self._nclass = self._out_act.shape.as_list()[-1]
82 | self.pre_label = tf.nn.top_k(tf.nn.softmax(self._out_act),
83 | k=5, sorted=True)
84 |
--------------------------------------------------------------------------------
/lib/models/guided_backpro.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: guided_backpro.py
4 | # Author: Qian Ge
5 |
6 | import tensorflow as tf
7 |
8 | from tensorcv.models.layers import global_avg_pool
9 |
10 |
11 | @tf.RegisterGradient("GuidedRelu")
12 | def _GuidedReluGrad(op, grad):
13 | gate_g = tf.cast(grad > 0, "float32")
14 | gate_y = tf.cast(op.outputs[0] > 0, "float32")
15 | return grad * gate_g * gate_y
16 |
17 |
18 | class GuideBackPro(object):
19 | def __init__(self, vis_model=None, class_id=None):
20 | assert vis_model is not None, 'vis_model cannot be None!'
21 | # assert not class_id is None, 'class_id cannot be None!'
22 |
23 | self._vis_model = vis_model
24 | if class_id is not None and not isinstance(class_id, list):
25 | class_id = [class_id]
26 | self._class_id = class_id
27 |
28 | def _create_model(self, image):
29 | keep_prob = 1
30 | self._vis_model.create_model([image, keep_prob])
31 | self.input_im = self._vis_model.layer['input']
32 |
33 | self._out_act = global_avg_pool(self._vis_model.layer['output'])
34 | self.pre_label = tf.nn.top_k(
35 | tf.nn.softmax(self._out_act), k=5, sorted=True)
36 |
37 | def _get_activation(self):
38 | with tf.name_scope('activation'):
39 | nclass = self._out_act.shape.as_list()[-1]
40 | act_list = []
41 | if self._class_id is None:
42 | class_list = [self.pre_label.indices[0][0]]
43 | act_list = [tf.reduce_max(self._out_act)]
44 | else:
45 | class_list = self._class_id
46 | for cid in class_list:
47 | one_hot = tf.sparse_to_dense([[cid, 0]], [nclass, 1], 1.0)
48 | self._out_act = tf.reshape(self._out_act, [1, nclass])
49 | class_act = tf.matmul(self._out_act, one_hot)
50 | act_list.append(class_act)
51 |
52 | return act_list, tf.convert_to_tensor(class_list)
53 |
54 | def get_visualization(self, image):
55 | g = tf.get_default_graph()
56 |
57 | with g.gradient_override_map({'Relu': 'GuidedRelu'}):
58 | try:
59 | self._create_model(image)
60 | except ValueError:
61 | with tf.variable_scope(tf.get_variable_scope()) as scope:
62 | scope.reuse_variables()
63 | self._create_model(image)
64 | act_list, class_list = self._get_activation()
65 |
66 | with tf.name_scope('guided_back_pro_map'):
67 | guided_back_pro_list = []
68 | for class_act in act_list:
69 | guided_back_pro = tf.gradients(
70 | class_act, self._vis_model.layer['input'])
71 | guided_back_pro_list.append(guided_back_pro)
72 |
73 | self.visual_map = guided_back_pro_list
74 | self.class_list = class_list
75 | return guided_back_pro_list, class_list
76 |
--------------------------------------------------------------------------------
/lib/models/invert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: invert.py
4 | # Author: Qian Ge
5 |
6 | import tensorflow as tf
7 | import numpy as np
8 |
9 | from tensorcv.models.base import BaseModel
10 |
11 |
12 | class InvertCNN(BaseModel):
13 | def __init__(self, im_h, im_w, im_c, input_mean=0, input_std=1.0, mean_list=None):
14 | init = tf.random_normal([1, im_h, im_w, im_c])
15 | self.invert_im = tf.get_variable('invert_im',
16 | initializer=init,
17 | # shape=[1, im_h, im_w, im_c],
18 | trainable=True)
19 |
20 |
21 | self._mean = mean_list
22 | self._input_std = input_std
23 |
24 | def _total_variation(self, image):
25 | var_x = tf.pow(image[:, 1:, :-1, :] - image[:, :-1, :-1, :], 2)
26 | var_y = tf.pow(image[:, :-1, 1:, :] - image[:, :-1, :-1, :], 2)
27 | return tf.reduce_sum(var_x + var_y)
28 |
29 | def get_loss(self, feat_invert, feat_im):
30 | self.mse_loss = 5e-4 * tf.losses.mean_squared_error(feat_invert, feat_im)
31 | self.vt_loss = 0.0000005 * self._total_variation(self.invert_im)
32 | self.loss = 1000 * self.mse_loss + 0*self.vt_loss
33 | return self.loss
34 |
35 | def optimize_image(self, feat_invert, feat_im):
36 | loss = self.get_loss(feat_invert, feat_im)
37 | # opt = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
38 | opt = tf.train.AdamOptimizer(learning_rate=0.1)
39 | return opt.minimize(loss)
40 |
41 | def get_opt_im(self):
42 | im = self.invert_im
43 | # if self._mean is not None:
44 | # im = self._add_mean(im)
45 | return im
--------------------------------------------------------------------------------
/lib/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/nets/__init__.py
--------------------------------------------------------------------------------
/lib/nets/googlenet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: googlenet.py
4 | # Author: Qian Ge
5 |
6 | import numpy as np
7 | import tensorflow as tf
8 | from tensorflow.contrib.framework import add_arg_scope
9 |
10 | from tensorcv.models.layers import conv, fc, global_avg_pool, dropout, max_pool
11 | from tensorcv.models.base import BaseModel
12 |
13 |
14 | MEAN = [103.939, 116.779, 123.68]
15 |
16 | @add_arg_scope
17 | def inception_layer(inputs,
18 | conv_11_size,
19 | conv_33_reduce_size, conv_33_size,
20 | conv_55_reduce_size, conv_55_size,
21 | pool_size,
22 | data_dict={},
23 | trainable=False,
24 | name='inception'):
25 |
26 | arg_scope = tf.contrib.framework.arg_scope
27 | with arg_scope([conv], nl=tf.nn.relu, trainable=trainable,
28 | data_dict=data_dict):
29 | conv_11 = conv(inputs, 1, conv_11_size, '{}_1x1'.format(name))
30 |
31 | conv_33_reduce = conv(inputs, 1, conv_33_reduce_size,
32 | '{}_3x3_reduce'.format(name))
33 | conv_33 = conv(conv_33_reduce, 3, conv_33_size, '{}_3x3'.format(name))
34 |
35 | conv_55_reduce = conv(inputs, 1, conv_55_reduce_size,
36 | '{}_5x5_reduce'.format(name))
37 | conv_55 = conv(conv_55_reduce, 5, conv_55_size, '{}_5x5'.format(name))
38 |
39 | pool = max_pool(inputs, '{}_pool'.format(name), stride=1,
40 | padding='SAME', filter_size=3)
41 | convpool = conv(pool, 1, pool_size, '{}_pool_proj'.format(name))
42 |
43 | return tf.concat([conv_11, conv_33, conv_55, convpool],
44 | 3, name='{}_concat'.format(name))
45 |
46 | class BaseGoogLeNet(BaseModel):
47 | def __init__(self, pre_train_path, is_load=True):
48 | self.data_dict = {}
49 | if is_load:
50 | assert pre_train_path is not None
51 | self.data_dict = np.load(pre_train_path,
52 | encoding='latin1').item()
53 |
54 | self.inputs = tf.placeholder(tf.float32,
55 | [None, None, None, 3],
56 | name='input')
57 |
58 |
59 | input_bgr = self._sub_mean(self.inputs)
60 | self._creat_googlenet(input_bgr, self.data_dict)
61 |
62 | def _sub_mean(self, inputs):
63 | with tf.name_scope('input'):
64 | input_im = inputs
65 |
66 | # Convert RGB image to BGR image
67 | red, green, blue = tf.split(axis=3,
68 | num_or_size_splits=3,
69 | value=input_im)
70 |
71 | input_bgr = tf.concat(axis=3, values=[
72 | blue - MEAN[0],
73 | green - MEAN[1],
74 | red - MEAN[2],
75 | ])
76 | return input_bgr
77 |
78 | def get_feature_map(self, inputs, layer_key):
79 | assert layer_key in self.conv_layer
80 | with tf.variable_scope(tf.get_variable_scope()) as scope:
81 | # print(tf.get_default_graph().get_name_scope())
82 | scope.reuse_variables()
83 | inputs = self._sub_mean(inputs)
84 | self._creat_googlenet(inputs, self.data_dict)
85 | return self.conv_layer[layer_key]
86 |
87 | def _creat_googlenet(self,
88 | inputs,
89 | data_dict,
90 | trainable=False):
91 | self.conv_layer = {}
92 |
93 | arg_scope = tf.contrib.framework.arg_scope
94 | with arg_scope([conv], trainable=trainable,
95 | data_dict=data_dict, nl=tf.nn.relu):
96 | conv1 = conv(inputs, 7, 64, name='conv1_7x7_s2', stride=2)
97 | padding1 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]])
98 | conv1_pad = tf.pad(conv1, padding1, 'CONSTANT')
99 | pool1 = max_pool(
100 | conv1_pad, 'pool1', padding='VALID', filter_size=3, stride=2)
101 | pool1_lrn = tf.nn.local_response_normalization(
102 | pool1, depth_radius=2, alpha=2e-05, beta=0.75,
103 | name='pool1_lrn')
104 |
105 | conv2_reduce = conv(pool1_lrn, 1, 64, name='conv2_3x3_reduce')
106 | conv2 = conv(conv2_reduce, 3, 192, name='conv2_3x3')
107 | padding2 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]])
108 | conv2_pad = tf.pad(conv2, padding1, 'CONSTANT')
109 | pool2 = max_pool(
110 | conv2_pad, 'pool2', padding='VALID', filter_size=3, stride=2)
111 | pool2_lrn = tf.nn.local_response_normalization(
112 | pool2, depth_radius=2, alpha=2e-05, beta=0.75,
113 | name='pool2_lrn')
114 |
115 | with arg_scope([inception_layer],
116 | trainable=trainable,
117 | data_dict=data_dict):
118 | inception3a = inception_layer(
119 | pool2_lrn, 64, 96, 128, 16, 32, 32, name='inception_3a')
120 | inception3b = inception_layer(
121 | inception3a, 128, 128, 192, 32, 96, 64, name='inception_3b')
122 | pool3 = max_pool(
123 | inception3b, 'pool3', padding='SAME', filter_size=3, stride=2)
124 |
125 | inception4a = inception_layer(
126 | pool3, 192, 96, 208, 16, 48, 64, name='inception_4a')
127 | inception4b = inception_layer(
128 | inception4a, 160, 112, 224, 24, 64, 64, name='inception_4b')
129 | inception4c = inception_layer(
130 | inception4b, 128, 128, 256, 24, 64, 64, name='inception_4c')
131 | inception4d = inception_layer(
132 | inception4c, 112, 144, 288, 32, 64, 64, name='inception_4d')
133 | inception4e = inception_layer(
134 | inception4d, 256, 160, 320, 32, 128, 128, name='inception_4e')
135 | pool4 = max_pool(
136 | inception4e, 'pool4', padding='SAME', filter_size=3, stride=2)
137 |
138 | inception5a = inception_layer(
139 | pool4, 256, 160, 320, 32, 128, 128, name='inception_5a')
140 | inception5b = inception_layer(
141 | inception5a, 384, 192, 384, 48, 128, 128, name='inception_5b')
142 |
143 | self.conv_layer['conv1_7x7_s2'] = conv1
144 | self.conv_layer['conv2_3x3'] = conv2
145 | self.conv_layer['inception3a'] = inception3a
146 | self.conv_layer['inception3b'] = inception3b
147 | self.conv_layer['inception4a'] = inception4a
148 | self.conv_layer['inception4b'] = inception4b
149 | self.conv_layer['inception4c'] = inception4c
150 | self.conv_layer['inception4d'] = inception4d
151 | self.conv_layer['inception4e'] = inception4e
152 | self.conv_layer['inception5a'] = inception5a
153 | self.conv_layer['inception5b'] = inception5b
154 |
155 | return inception5b
156 |
157 |
--------------------------------------------------------------------------------
/lib/nets/layers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: layers.py
4 | # Author: Qian Ge
5 |
6 | import numpy as np
7 | import tensorflow as tf
8 | from tensorflow.contrib.framework import add_arg_scope
9 |
10 | from tensorcv.models.layers import *
11 |
12 |
13 | @add_arg_scope
14 | def transpose_conv(x,
15 | filter_size,
16 | out_dim,
17 | data_dict,
18 | out_shape=None,
19 | use_bias=True,
20 | reuse=False,
21 | stride=2,
22 | padding='SAME',
23 | trainable=False,
24 | nl=tf.identity,
25 | name='dconv'):
26 |
27 | stride = get_shape4D(stride)
28 |
29 | in_dim = x.get_shape().as_list()[-1]
30 |
31 | # TODO other ways to determine the output shape
32 | x_shape = tf.shape(x)
33 | # assume output shape is input_shape*stride
34 | if out_shape is None:
35 | out_shape = tf.stack([x_shape[0],
36 | tf.multiply(x_shape[1], stride[1]),
37 | tf.multiply(x_shape[2], stride[2]),
38 | out_dim])
39 |
40 | filter_shape = get_shape2D(filter_size) + [out_dim, in_dim]
41 |
42 | with tf.variable_scope(name) as scope:
43 | if reuse == True:
44 | scope.reuse_variables()
45 | init_w = None
46 | init_b = None
47 | else:
48 | try:
49 | load_data = data_dict[name][0]
50 | except KeyError:
51 | load_data = data_dict[name]['weights']
52 | print('Load {} weights!'.format(name))
53 | # load_data = np.reshape(load_data, shape)
54 | # load_data = tf.nn.l2_normalize(
55 | # tf.transpose(load_data, perm=[1, 0, 2, 3]))
56 | # load_data = tf.transpose(load_data, perm=[1, 0, 2, 3])
57 | init_w = tf.constant_initializer(load_data)
58 |
59 | if use_bias:
60 | try:
61 | load_data = data_dict[name][1]
62 | except KeyError:
63 | load_data = data_dict[name]['biases']
64 | print('Load {} biases!'.format(name))
65 | init_b = tf.constant_initializer(load_data)
66 |
67 | weights = tf.get_variable('weights',
68 | filter_shape,
69 | initializer=init_w,
70 | trainable=trainable)
71 | if use_bias:
72 | biases = tf.get_variable('biases',
73 | [in_dim],
74 | initializer=init_b,
75 | trainable=trainable)
76 | x = tf.nn.bias_add(x, -biases)
77 |
78 | output = tf.nn.conv2d_transpose(x,
79 | weights,
80 | output_shape=out_shape,
81 | strides=stride,
82 | padding=padding,
83 | name=scope.name)
84 |
85 | # if use_bias:
86 | # output = tf.nn.bias_add(output, biases)
87 | # TODO need test
88 | output.set_shape([None, None, None, out_dim])
89 |
90 | output = nl(output, name='output')
91 | return output
92 |
93 |
94 | # https://github.com/tensorflow/tensorflow/pull/16885
95 | def unpool_2d(pool,
96 | ind,
97 | stride=[1, 2, 2, 1],
98 | scope='unpool_2d'):
99 | """Adds a 2D unpooling op.
100 | https://arxiv.org/abs/1505.04366
101 | Unpooling layer after max_pool_with_argmax.
102 | Args:
103 | pool: max pooled output tensor
104 | ind: argmax indices
105 | stride: stride is the same as for the pool
106 | Return:
107 | unpool: unpooling tensor
108 | """
109 |
110 | with tf.variable_scope(scope):
111 | ind_shape = tf.shape(ind)
112 | # pool = pool[:, :ind_shape[1], :ind_shape[2], :]
113 |
114 | input_shape = tf.shape(pool)
115 | output_shape = [input_shape[0],
116 | input_shape[1] * stride[1],
117 | input_shape[2] * stride[2],
118 | input_shape[3]]
119 |
120 | flat_input_size = tf.reduce_prod(input_shape)
121 | flat_output_shape = [output_shape[0],
122 | output_shape[1] * output_shape[2] * output_shape[3]]
123 |
124 | pool_ = tf.reshape(pool, [flat_input_size])
125 | batch_range = tf.reshape(
126 | tf.range(tf.cast(output_shape[0], tf.int64), dtype=ind.dtype),
127 | shape=[input_shape[0], 1, 1, 1])
128 | b = tf.ones_like(ind) * batch_range
129 | b1 = tf.reshape(b, [flat_input_size, 1])
130 | ind_ = tf.reshape(ind, [flat_input_size, 1])
131 | ind_ = tf.concat([b1, ind_], 1)
132 |
133 | ret = tf.scatter_nd(ind_, pool_, shape=tf.cast(flat_output_shape, tf.int64))
134 | ret = tf.reshape(ret, output_shape)
135 |
136 | set_input_shape = pool.get_shape()
137 | set_output_shape = [set_input_shape[0],
138 | set_input_shape[1] * stride[1],
139 | set_input_shape[2] * stride[2],
140 | set_input_shape[3]]
141 | ret.set_shape(set_output_shape)
142 | return ret
143 |
144 |
145 | def max_pool(x,
146 | name='max_pool',
147 | filter_size=2,
148 | stride=None,
149 | padding='VALID',
150 | switch=False):
151 | """
152 | Max pooling layer
153 | Args:
154 | x (tf.tensor): a tensor
155 | name (str): name scope of the layer
156 | filter_size (int or list with length 2): size of filter
157 | stride (int or list with length 2): Default to be the same as shape
158 | padding (str): 'VALID' or 'SAME'. Use 'SAME' for FCN.
159 | Returns:
160 | tf.tensor with name 'name'
161 | """
162 |
163 | padding = padding.upper()
164 | filter_shape = get_shape4D(filter_size)
165 | if stride is None:
166 | stride = filter_shape
167 | else:
168 | stride = get_shape4D(stride)
169 |
170 | if switch == True:
171 | return tf.nn.max_pool_with_argmax(
172 | x,
173 | ksize=filter_shape,
174 | strides=stride,
175 | padding=padding,
176 | Targmax=tf.int64,
177 | name=name)
178 | else:
179 | return tf.nn.max_pool(
180 | x,
181 | ksize=filter_shape,
182 | strides=stride,
183 | padding=padding,
184 | name=name), None
185 |
186 |
187 |
188 |
--------------------------------------------------------------------------------
/lib/nets/vgg.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: vgg.py
4 | # Author: Qian Ge
5 |
6 | import numpy as np
7 | import tensorflow as tf
8 |
9 | from tensorcv.models.layers import *
10 | from tensorcv.models.base import BaseModel
11 |
12 | import lib.nets.layers as L
13 |
14 |
15 | VGG_MEAN = [103.939, 116.779, 123.68]
16 |
17 |
18 | def resize_tensor_image_with_smallest_side(image, small_size):
19 | """
20 | Resize image tensor with smallest side = small_size and
21 | keep the original aspect ratio.
22 |
23 | Args:
24 | image (tf.tensor): 4-D Tensor of shape
25 | [batch, height, width, channels]
26 | or 3-D Tensor of shape [height, width, channels].
27 | small_size (int): A 1-D int. The smallest side of resize image.
28 |
29 | Returns:
30 | Image tensor with the original aspect ratio and
31 | smallest side = small_size.
32 | If images was 4-D, a 4-D float Tensor of shape
33 | [batch, new_height, new_width, channels].
34 | If images was 3-D, a 3-D float Tensor of shape
35 | [new_height, new_width, channels].
36 | """
37 | im_shape = tf.shape(image)
38 | shape_dim = image.get_shape()
39 | if len(shape_dim) <= 3:
40 | height = tf.cast(im_shape[0], tf.float32)
41 | width = tf.cast(im_shape[1], tf.float32)
42 | else:
43 | height = tf.cast(im_shape[1], tf.float32)
44 | width = tf.cast(im_shape[2], tf.float32)
45 |
46 | height_smaller_than_width = tf.less_equal(height, width)
47 |
48 | new_shorter_edge = tf.constant(small_size, tf.float32)
49 | new_height, new_width = tf.cond(
50 | height_smaller_than_width,
51 | lambda: (new_shorter_edge, (width / height) * new_shorter_edge),
52 | lambda: ((height / width) * new_shorter_edge, new_shorter_edge))
53 |
54 | return tf.image.resize_images(
55 | tf.cast(image, tf.float32),
56 | [tf.cast(new_height, tf.int32), tf.cast(new_width, tf.int32)])
57 |
58 |
59 | class BaseVGG(BaseModel):
60 | """ base of VGG class """
61 | def __init__(self, num_class=1000,
62 | num_channels=3,
63 | im_height=224, im_width=224,
64 | learning_rate=0.0001,
65 | is_load=False,
66 | pre_train_path=None,
67 | is_rescale=False):
68 | """
69 | Args:
70 | num_class (int): number of image classes
71 | num_channels (int): number of input channels
72 | im_height, im_width (int): size of input image
73 | Can be unknown when testing.
74 | learning_rate (float): learning rate of training
75 | """
76 |
77 | self.learning_rate = learning_rate
78 | self.num_channels = num_channels
79 | self.im_height = im_height
80 | self.im_width = im_width
81 | self.num_class = num_class
82 | self._is_rescale = is_rescale
83 |
84 | self.layer = {}
85 |
86 | self._is_load = is_load
87 | if self._is_load and pre_train_path is None:
88 | raise ValueError('pre_train_path can not be None!')
89 | self._pre_train_path = pre_train_path
90 |
91 | self.set_is_training(True)
92 |
93 | def _create_input(self):
94 | self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
95 | self.image = tf.placeholder(
96 | tf.float32, name='image',
97 | shape=[None, self.im_height, self.im_width, self.num_channels])
98 |
99 | self.label = tf.placeholder(tf.int64, [None], 'label')
100 |
101 | self.set_model_input([self.image, self.keep_prob])
102 | self.set_dropout(self.keep_prob, keep_prob=0.5)
103 | self.set_train_placeholder([self.image, self.label])
104 | self.set_prediction_placeholder(self.image)
105 |
106 |
107 | class VGG19(BaseVGG):
108 |
109 | def _create_conv(self, input_im, data_dict):
110 |
111 | arg_scope = tf.contrib.framework.arg_scope
112 | with arg_scope([conv], nl=tf.nn.relu,
113 | trainable=True, data_dict=data_dict):
114 | conv1_1 = conv(input_im, 3, 64, 'conv1_1')
115 | conv1_2 = conv(conv1_1, 3, 64, 'conv1_2')
116 | pool1 = max_pool(conv1_2, 'pool1', padding='SAME')
117 |
118 | conv2_1 = conv(pool1, 3, 128, 'conv2_1')
119 | conv2_2 = conv(conv2_1, 3, 128, 'conv2_2')
120 | pool2 = max_pool(conv2_2, 'pool2', padding='SAME')
121 |
122 | conv3_1 = conv(pool2, 3, 256, 'conv3_1')
123 | conv3_2 = conv(conv3_1, 3, 256, 'conv3_2')
124 | conv3_3 = conv(conv3_2, 3, 256, 'conv3_3')
125 | conv3_4 = conv(conv3_3, 3, 256, 'conv3_4')
126 | pool3 = max_pool(conv3_4, 'pool3', padding='SAME')
127 |
128 | conv4_1 = conv(pool3, 3, 512, 'conv4_1')
129 | conv4_2 = conv(conv4_1, 3, 512, 'conv4_2')
130 | conv4_3 = conv(conv4_2, 3, 512, 'conv4_3')
131 | conv4_4 = conv(conv4_3, 3, 512, 'conv4_4')
132 | pool4 = max_pool(conv4_4, 'pool4', padding='SAME')
133 |
134 | conv5_1 = conv(pool4, 3, 512, 'conv5_1')
135 | conv5_2 = conv(conv5_1, 3, 512, 'conv5_2')
136 | conv5_3 = conv(conv5_2, 3, 512, 'conv5_3')
137 | conv5_4 = conv(conv5_3, 3, 512, 'conv5_4')
138 | pool5 = max_pool(conv5_4, 'pool5', padding='SAME')
139 |
140 | self.layer['conv1_2'] = conv1_2
141 | self.layer['conv2_2'] = conv2_2
142 | self.layer['conv3_4'] = conv3_4
143 | self.layer['conv4_4'] = conv4_4
144 | self.layer['pool5'] = pool5
145 | self.layer['conv_out'] = self.layer['conv5_4'] = conv5_4
146 |
147 | return pool5
148 |
149 | def _create_model(self):
150 |
151 | with tf.name_scope('input'):
152 | input_im = self.model_input[0]
153 | keep_prob = self.model_input[1]
154 |
155 | input_im = tf.reshape(input_im, [-1, 224, 224, 3])
156 |
157 | self.layer['input'] = input_im
158 | # Convert RGB image to BGR image
159 | red, green, blue = tf.split(axis=3,
160 | num_or_size_splits=3,
161 | value=input_im)
162 |
163 | input_bgr = tf.concat(axis=3, values=[
164 | blue - VGG_MEAN[0],
165 | green - VGG_MEAN[1],
166 | red - VGG_MEAN[2],
167 | ])
168 |
169 | data_dict = {}
170 | if self._is_load:
171 | data_dict = np.load(self._pre_train_path,
172 | encoding='latin1').item()
173 |
174 | conv_output = self._create_conv(input_bgr, data_dict)
175 |
176 | arg_scope = tf.contrib.framework.arg_scope
177 | with arg_scope([fc], trainable=True, data_dict=data_dict):
178 | fc6 = fc(conv_output, 4096, 'fc6', nl=tf.nn.relu)
179 | dropout_fc6 = dropout(fc6, keep_prob, self.is_training)
180 |
181 | fc7 = fc(dropout_fc6, 4096, 'fc7', nl=tf.nn.relu)
182 | dropout_fc7 = dropout(fc7, keep_prob, self.is_training)
183 |
184 | fc8 = fc(dropout_fc7, self.num_class, 'fc8')
185 |
186 | self.layer['fc6'] = fc6
187 | self.layer['fc7'] = fc7
188 | self.layer['fc8'] = self.layer['output'] = fc8
189 |
190 |
191 | class VGG19_FCN(VGG19):
192 |
193 | def _create_model(self):
194 |
195 | with tf.name_scope('input'):
196 | input_im = self.model_input[0]
197 | keep_prob = self.model_input[1]
198 |
199 | if self._is_rescale:
200 | input_im =\
201 | resize_tensor_image_with_smallest_side(input_im, 224)
202 | self.layer['input'] = input_im
203 |
204 | # Convert rgb image to bgr image
205 | red, green, blue = tf.split(axis=3, num_or_size_splits=3,
206 | value=input_im)
207 |
208 | input_bgr = tf.concat(axis=3, values=[
209 | blue - VGG_MEAN[0],
210 | green - VGG_MEAN[1],
211 | red - VGG_MEAN[2],
212 | ])
213 |
214 | data_dict = {}
215 | if self._is_load:
216 | data_dict = np.load(self._pre_train_path,
217 | encoding='latin1').item()
218 |
219 | conv_outptu = self._create_conv(input_bgr, data_dict)
220 |
221 | arg_scope = tf.contrib.framework.arg_scope
222 | with arg_scope([conv], trainable=True,
223 | data_dict=data_dict, padding='VALID'):
224 |
225 | fc6 = conv(conv_outptu, 7, 4096, 'fc6', nl=tf.nn.relu)
226 | dropout_fc6 = dropout(fc6, keep_prob, self.is_training)
227 |
228 | fc7 = conv(dropout_fc6, 1, 4096, 'fc7', nl=tf.nn.relu)
229 | dropout_fc7 = dropout(fc7, keep_prob, self.is_training)
230 |
231 | fc8 = conv(dropout_fc7, 1, self.num_class, 'fc8')
232 |
233 | self.layer['fc6'] = fc6
234 | self.layer['fc7'] = fc7
235 | self.layer['fc8'] = self.layer['output'] = fc8
236 |
237 | self.output = tf.identity(fc8, 'model_output')
238 |
239 | self.avg_output = global_avg_pool(fc8)
240 |
241 |
242 | class BaseVGG19(BaseModel):
243 | def __init__(self):
244 |
245 | self._trainable = False
246 | self._switch = False
247 |
248 | def _sub_mean(self, inputs):
249 | VGG_MEAN = [103.939, 116.779, 123.68]
250 | red, green, blue = tf.split(axis=3,
251 | num_or_size_splits=3,
252 | value=inputs)
253 | input_bgr = tf.concat(axis=3, values=[
254 | blue - VGG_MEAN[0],
255 | green - VGG_MEAN[1],
256 | red - VGG_MEAN[2],
257 | ])
258 | return input_bgr
259 |
260 | def _creat_conv(self, inputs, layer_dict, data_dict={}):
261 |
262 | self.receptive_s = 1
263 | self.stride_t = 1
264 | self.receptive_size = {}
265 | self.stride = {}
266 | self.cur_input = inputs
267 |
268 | def conv_layer(filter_size, out_dim, name):
269 | init_w = tf.keras.initializers.he_normal()
270 | # init_w = None
271 | layer_dict[name] = conv(self.cur_input, filter_size, out_dim, name, init_w=init_w)
272 | self.receptive_s = self.receptive_s + (filter_size - 1) * self.stride_t
273 | self.receptive_size[name] = self.receptive_s
274 | self.stride[name] = self.stride_t
275 | self.cur_input = layer_dict[name]
276 |
277 | def pool_layer(name, switch=True, padding='SAME'):
278 | layer_dict[name], layer_dict['switch_{}'.format(name)] =\
279 | L.max_pool(self.cur_input, name, padding=padding, switch=switch)
280 | self.receptive_s = self.receptive_s + self.stride_t
281 | self.receptive_size[name] = self.receptive_s
282 | self.stride_t = self.stride_t * 2
283 | self.stride[name] = self.stride_t
284 | self.cur_input = layer_dict[name]
285 |
286 | arg_scope = tf.contrib.framework.arg_scope
287 | with arg_scope([conv], nl=tf.nn.relu,
288 | trainable=self._trainable, data_dict=data_dict):
289 |
290 | conv_layer(3, 64, 'conv1_1')
291 | conv_layer(3, 64, 'conv1_2')
292 | pool_layer('pool1', switch=self._switch)
293 |
294 | conv_layer(3, 128, 'conv2_1')
295 | conv_layer(3, 128, 'conv2_2')
296 | pool_layer('pool2', switch=self._switch)
297 |
298 | conv_layer(3, 256, 'conv3_1')
299 | conv_layer(3, 256, 'conv3_2')
300 | conv_layer(3, 256, 'conv3_3')
301 | conv_layer(3, 256, 'conv3_4')
302 | pool_layer('pool3', switch=self._switch)
303 |
304 | conv_layer(3, 512, 'conv4_1')
305 | conv_layer(3, 512, 'conv4_2')
306 | conv_layer(3, 512, 'conv4_3')
307 | conv_layer(3, 512, 'conv4_4')
308 | pool_layer('pool4', switch=self._switch)
309 |
310 | conv_layer(3, 512, 'conv5_1')
311 | conv_layer(3, 512, 'conv5_2')
312 | conv_layer(3, 512, 'conv5_3')
313 | conv_layer(3, 512, 'conv5_4')
314 | pool_layer('pool5', switch=self._switch)
315 |
316 | return self.cur_input
317 |
318 |
319 | def threshold_tensor(x, thr, thr_type):
320 | cond = thr_type(x, tf.ones(tf.shape(x)) * thr)
321 | out = tf.where(cond, x, tf.zeros(tf.shape(x)))
322 |
323 | return out
324 |
325 | class DeconvBaseVGG19(BaseVGG19):
326 | def __init__(self, pre_train_path, feat_key, pick_feat=None):
327 |
328 | self.data_dict = np.load(pre_train_path,
329 | encoding='latin1').item()
330 |
331 | self.im = tf.placeholder(tf.float32,
332 | [None, None, None, 3],
333 | name='im')
334 |
335 | self._feat_key = feat_key
336 | self._pick_feat = pick_feat
337 | self._trainable = False
338 | self._switch = True
339 | self.layers = {}
340 | self._create_model()
341 |
342 | def _create_model(self):
343 | input_im = self._sub_mean(self.im)
344 | self._creat_conv(input_im, self.layers, data_dict=self.data_dict)
345 |
346 | cur_feats = self.layers[self._feat_key]
347 | try:
348 | self.max_act = tf.reduce_max(cur_feats[:, :, :, self._pick_feat])
349 | self.feats = threshold_tensor(cur_feats, self.max_act, tf.equal)
350 | except ValueError:
351 | # else:
352 | self.max_act = tf.reduce_max(cur_feats)
353 | self.feats = threshold_tensor(cur_feats, self.max_act, tf.greater_equal)
354 |
355 | self.layers['de{}'.format(self._feat_key)] = self.feats
356 | self._create_deconv(self.layers, data_dict=self.data_dict)
357 |
358 | def _create_deconv(self, layer_dict, data_dict={}):
359 | def deconv_block(input_key, output_key, n_feat, name):
360 | try:
361 | layer_dict[output_key] =\
362 | L.transpose_conv(layer_dict[input_key],
363 | out_dim=n_feat,
364 | name=name,
365 | )
366 | except KeyError:
367 | pass
368 |
369 | def unpool_block(input_key, output_key, switch_key, name):
370 | try:
371 | layer_dict[output_key] =\
372 | L.unpool_2d(layer_dict[input_key],
373 | layer_dict[switch_key],
374 | stride=[1, 2, 2, 1],
375 | scope=name)
376 | except KeyError:
377 | pass
378 |
379 | arg_scope = tf.contrib.framework.arg_scope
380 | with arg_scope([L.transpose_conv],
381 | filter_size=3,
382 | nl=tf.nn.relu,
383 | trainable=False,
384 | data_dict=data_dict,
385 | use_bias=False,
386 | stride=1,
387 | reuse=True):
388 |
389 | deconv_block('deconv5_4', 'deconv5_3', 512, 'conv5_4')
390 | deconv_block('deconv5_3', 'deconv5_2', 512, 'conv5_3')
391 | deconv_block('deconv5_2', 'deconv5_1', 512, 'conv5_2')
392 | deconv_block('deconv5_1', 'depool4', 512, 'conv5_1')
393 | unpool_block('depool4', 'deconv4_4', 'switch_pool4', 'unpool4')
394 |
395 | deconv_block('deconv4_4', 'deconv4_3', 512, 'conv4_4')
396 | deconv_block('deconv4_3', 'deconv4_2', 512, 'conv4_3')
397 | deconv_block('deconv4_2', 'deconv4_1', 512, 'conv4_2')
398 | deconv_block('deconv4_1', 'depool3', 256, 'conv4_1')
399 | unpool_block('depool3', 'deconv3_4', 'switch_pool3', 'unpool3')
400 |
401 | deconv_block('deconv3_4', 'deconv3_3', 256, 'conv3_4')
402 | deconv_block('deconv3_3', 'deconv3_2', 256, 'conv3_3')
403 | deconv_block('deconv3_2', 'deconv3_1', 256, 'conv3_2')
404 | deconv_block('deconv3_1', 'depool2', 128, 'conv3_1')
405 | unpool_block('depool2', 'deconv2_2', 'switch_pool2', 'unpool2')
406 |
407 | deconv_block('deconv2_2', 'deconv2_1', 128, 'conv2_2')
408 | deconv_block('deconv2_1', 'depool1', 64, 'conv2_1')
409 | unpool_block('depool1', 'deconv1_2', 'switch_pool1', 'unpool1')
410 |
411 | deconv_block('deconv1_2', 'deconv1_1', 64, 'conv1_2')
412 |
413 | layer_dict['deconvim'] =\
414 | L.transpose_conv(layer_dict['deconv1_1'],
415 | 3,
416 | 3,
417 | trainable=False,
418 | data_dict=data_dict,
419 | reuse=True,
420 | use_bias=False,
421 | stride=1,
422 | name='conv1_1')
423 |
424 |
--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/conan7882/CNN-Visualization/e08650cc126a3a489f7d633dc18bf3f0009792b1/lib/utils/__init__.py
--------------------------------------------------------------------------------
/lib/utils/image.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: image.py
4 | # Author: Qian Ge
5 |
6 | from scipy import misc
7 |
8 | def im_rescale(im, resize):
9 | im_shape = im.shape
10 | im = misc.imresize(im, (resize[0], resize[1], im_shape[-1]))
11 | return im
--------------------------------------------------------------------------------
/lib/utils/normalize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: normalize.py
4 | # Author: Qian Ge
5 |
6 | import numpy as np
7 |
8 | def indentity(filter_in):
9 | return filter_in
10 |
11 | def norm_std(filter_in):
12 | """ Normalization of conv2d filters for visualization
13 | https://github.com/jacobgil/keras-filter-visualization/blob/master/utils.py
14 |
15 | Args:
16 | filter_in: [size_x, size_y, n_channel]
17 |
18 | """
19 | x = filter_in
20 | x -= x.mean()
21 | x /= (x.std() + 1e-5)
22 | # make most of the value between [-0.5, 0.5]
23 | x *= 0.1
24 | # move to [0, 1]
25 | x += 0.5
26 | x *= 255
27 | x = np.clip(x, 0, 255).astype('uint8')
28 | return x
29 |
30 | def norm_range(filter_in):
31 | f_min = np.amin(filter_in)
32 | f_max = np.amax(filter_in)
33 |
34 | return (filter_in - f_min) * 1.0 / (f_max + 1e-5) * 255.0
35 |
--------------------------------------------------------------------------------
/lib/utils/viz.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: viz.py
4 | # Author: Qian Ge
5 |
6 | import numpy as np
7 | import scipy.misc
8 |
9 | import lib.utils.normalize as normlize
10 |
11 |
12 | def image_weight_mask(image, mask):
13 | """
14 | Args:
15 | image: image with size [HEIGHT, WIDTH, CHANNEL]
16 | mask: image with size [HEIGHT, WIDTH, 1] or [HEIGHT, WIDTH]
17 | """
18 | image = np.array(np.squeeze(image))
19 | mask = np.array(np.squeeze(mask))
20 | assert len(mask.shape) == 2
21 | assert len(image.shape) < 4
22 | mask.astype('float32')
23 | mask = np.reshape(mask, (mask.shape[0], mask.shape[1]))
24 | mask = mask / np.amax(mask)
25 |
26 | if len(image.shape) == 2:
27 | return np.multiply(image, mask)
28 | else:
29 | for c in range(0, image.shape[2]):
30 | image[:, :, c] = np.multiply(image[:, :, c], mask)
31 | return image
32 |
33 | # def save_merge_images(images, merge_grid, save_path, color=False, tanh=False):
34 | # """Save multiple images with same size into one larger image.
35 | # The best size number is
36 | # int(max(sqrt(image.shape[0]),sqrt(image.shape[1]))) + 1
37 | # Args:
38 | # images (np.ndarray): A batch of image array to be merged with size
39 | # [BATCH_SIZE, HEIGHT, WIDTH, CHANNEL].
40 | # merge_grid (list): List of length 2. The grid size for merge images.
41 | # save_path (str): Path for saving the merged image.
42 | # color (bool): Whether convert intensity image to color image.
43 | # tanh (bool): If True, will normalize the image in range [-1, 1]
44 | # to [0, 1] (for GAN models).
45 | # Example:
46 | # The batch_size is 64, then the size is recommended [8, 8].
47 | # The batch_size is 32, then the size is recommended [6, 6].
48 | # """
49 |
50 | # # normalization of tanh output
51 | # img = images
52 |
53 | # if tanh:
54 | # img = (img + 1.0) / 2.0
55 |
56 | # if color:
57 | # # TODO
58 | # img_list = []
59 | # for im in np.squeeze(img):
60 | # im = intensity_to_rgb(np.squeeze(im), normalize=True)
61 | # img_list.append(im)
62 | # img = np.array(img_list)
63 | # # img = np.expand_dims(img, 0)
64 |
65 | # if len(img.shape) == 2 or (len(img.shape) == 3 and img.shape[2] <= 4):
66 | # img = np.expand_dims(img, 0)
67 | # # img = images
68 | # h, w = img.shape[1], img.shape[2]
69 | # merge_img = np.zeros((h * merge_grid[0], w * merge_grid[1], 3))
70 | # if len(img.shape) < 4:
71 | # img = np.expand_dims(img, -1)
72 |
73 | # for idx, image in enumerate(img):
74 | # i = idx % merge_grid[1]
75 | # j = idx // merge_grid[1]
76 | # merge_img[j*h:j*h+h, i*w:i*w+w, :] = image
77 |
78 | # scipy.misc.imsave(save_path, merge_img)
79 |
80 | def viz_filters(filters,
81 | grid_size,
82 | save_path,
83 | gap=0,
84 | gap_color=0,
85 | nf=normlize.indentity,
86 | shuffle=True):
87 | """ Visualization conv2d filters
88 |
89 | Args:
90 | filters: [size_x, size_y, n_channel, n_features]
91 | or [size_x, size_y, n_features]
92 |
93 | """
94 | filters = np.array(filters)
95 | if len(filters.shape) == 4:
96 | n_channel = filters.shape[2]
97 | elif len(filters.shape) == 3:
98 | n_channel = 1
99 | filters = np.expand_dims(filters, axis=2)
100 | # assert len(filters.shape) == 4
101 | assert len(grid_size) == 2
102 |
103 | h = filters.shape[0]
104 | w = filters.shape[1]
105 |
106 | merge_im = np.zeros((h * grid_size[0] + (grid_size[0] + 1) * gap,
107 | w * grid_size[1] + (grid_size[1] + 1) * gap,
108 | n_channel)) + gap_color
109 |
110 | n_viz_filter = min(filters.shape[-1], grid_size[0] * grid_size[1])
111 | if shuffle == True:
112 | pick_id = np.random.permutation(filters.shape[-1])
113 | else:
114 | pick_id = range(0, filters.shape[-1])
115 | for idx in range(0, n_viz_filter):
116 | i = idx % grid_size[1]
117 | j = idx // grid_size[1]
118 | cur_filter = filters[:, :, :, pick_id[idx]]
119 | merge_im[j * (h + gap) + gap: j * (h + gap) + h + gap,
120 | i * (w + gap) + gap: i * (w + gap) + w + gap, :]\
121 | = nf(cur_filter)
122 | scipy.misc.imsave(save_path, np.squeeze(merge_im))
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | pillow
3 | numpy
4 | matplotlib
--------------------------------------------------------------------------------
/test/setup_test_env.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: setup_test_env.py
4 | # Author: Qian Ge
5 |
6 | import sys
7 | sys.path.append('lib/')
8 |
9 | IMPATH = 'data/'
10 | CLASS_IMPATH = 'data/class_test/'
11 | SAVE_DIR = 'data/'
12 |
--------------------------------------------------------------------------------
/test/test_cam.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: test_cam.py
4 | # Author: Qian Ge
5 |
6 | # import argparse
7 | from collections import namedtuple
8 | import tensorflow as tf
9 |
10 | from tensorcv.dataflow.image import ImageLabelFromFolder
11 | from tensorcv.callbacks import *
12 | from tensorcv.train.config import TrainConfig
13 | from tensorcv.train.simple import SimpleFeedTrainer
14 | from tensorcv.predicts.config import PridectConfig
15 | from tensorcv.predicts.simple import SimpleFeedPredictor
16 | from tensorcv.predicts import *
17 |
18 | from setup_test_env import *
19 | from models.cam import VGGCAM
20 |
21 | configpath = namedtuple('CONFIG_PATH', ['summary_dir'])
22 | config_path = configpath(summary_dir=SAVE_DIR)
23 |
24 | NUM_CHANNEL = 3
25 |
26 |
27 | def get_config(FLAGS):
28 | # data for training
29 | dataset_train = ImageLabelFromFolder(FLAGS.type,
30 | data_dir=CLASS_IMPATH,
31 | num_class=FLAGS.nclass,
32 | resize=224,
33 | num_channel=NUM_CHANNEL)
34 |
35 | # Print image class name and label
36 | # print(dataset_train.label_dict)
37 |
38 | training_callbacks = [
39 | # TrainSummary(key='train', periodic=1),
40 | CheckScalar(['accuracy/result', 'loss/result'], periodic=1)]
41 |
42 | inspect_class = None
43 |
44 | return TrainConfig(
45 | dataflow=dataset_train,
46 | model=VGGCAM(num_class=FLAGS.nclass,
47 | inspect_class=inspect_class,
48 | learning_rate=0.001,
49 | is_load=False),
50 | monitors=TFSummaryWriter(),
51 | callbacks=training_callbacks,
52 | batch_size=FLAGS.bsize,
53 | max_epoch=1,
54 | # summary_periodic=1,
55 | default_dirs=config_path)
56 |
57 |
58 | # def get_predict_config(FLAGS):
59 | # dataset_test = ImageFromFile(FLAGS.type,
60 | # data_dir=config_path.test_data_dir,
61 | # shuffle=False,
62 | # resize=224,
63 | # num_channel=NUM_CHANNEL)
64 | # # dataset_test = ImageLabelFromFolder('.jpg',
65 | # # data_dir = CLASS_IMPATH,
66 | # # num_class = FLAGS.nclass,
67 | # # resize = 224,
68 | # # num_channel = NUM_CHANNEL)
69 | # prediction_list = [
70 | # # PredictionScalar(['pre_label'], ['label']),
71 | # # PredictionMeanScalar('accuracy/result', 'test_accuracy'),
72 | # PredictionMat('classmap/result', ['test']),
73 | # PredictionOverlay(['classmap/result', 'image'], ['map', 'image'],
74 | # color=True, merge_im=True),
75 | # PredictionImage(['image'], ['image'], color=True, merge_im=True)]
76 |
77 | # return PridectConfig(
78 | # dataflow=dataset_test,
79 | # model=VGGCAM(num_class=FLAGS.nclass, inspect_class=FLAGS.label,
80 | # is_load=True, pre_train_path=config_path.vgg_dir),
81 | # model_name=FLAGS.model,
82 | # predictions=prediction_list,
83 | # batch_size=FLAGS.bsize,
84 | # default_dirs=config_path)
85 |
86 |
87 | # def get_args():
88 | # parser = argparse.ArgumentParser()
89 | # parser.add_argument('--bsize', default=1, type=int)
90 | # parser.add_argument('--label', default=-1, type=int,
91 | # help='Label of inspect class.')
92 | # parser.add_argument('--nclass', default=1, type=int,
93 | # help='number of image class')
94 |
95 | # parser.add_argument('--type', default='.jpg', type=str,
96 | # help='image type for training and testing')
97 |
98 | # parser.add_argument('--model', type=str,
99 | # help='file name of the trained model')
100 |
101 | # return parser.parse_args()
102 |
103 |
104 | def test_cam():
105 | inargs = namedtuple('IN_ARGS', ['bsize', 'label', 'nclass', 'type'])
106 | FLAGS = inargs(bsize=1, label=-1, nclass=1, type='.jpg')
107 |
108 | # FLAGS = get_args()
109 | config = get_config(FLAGS)
110 | SimpleFeedTrainer(config).train()
111 | tf.reset_default_graph()
112 | #
113 | # if FLAGS.train:
114 | # config = get_config(FLAGS)
115 | # SimpleFeedTrainer(config).train()
116 | # if FLAGS.predict:
117 | # config = get_predict_config(FLAGS)
118 | # SimpleFeedPredictor(config).run_predict()
119 |
120 | # 0.6861924529075623
121 |
--------------------------------------------------------------------------------
/test/test_gradcam.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: test_gradcam.py
4 | # Author: Qian Ge
5 |
6 | from itertools import count
7 |
8 | import tensorflow as tf
9 | import numpy as np
10 | from tensorcv.dataflow.image import ImageFromFile
11 | from tensorcv.utils.viz import image_overlay
12 |
13 | from setup_test_env import *
14 | from nets.vgg import VGG19_FCN
15 | from models.guided_backpro import GuideBackPro
16 | from models.grad_cam import ClassifyGradCAM
17 | from utils.viz import image_weight_mask
18 |
19 |
20 | def test_gradcam():
21 |
22 | # merge several output images in one large image
23 | merge_im = 1
24 | grid_size = np.ceil(merge_im**0.5).astype(int)
25 |
26 | # class label for Grad-CAM generation
27 | # 355 llama 543 dumbbell 605 iPod 515 hat 99 groose 283 tiger cat
28 | # 282 tabby cat 233 border collie 242 boxer
29 | # class_id = [355, 543, 605, 515]
30 | class_id = [283, 242]
31 |
32 | # initialize Grad-CAM
33 | # using VGG19
34 | gcam = ClassifyGradCAM(
35 | vis_model=VGG19_FCN(is_load=False, is_rescale=True))
36 | gbackprob = GuideBackPro(
37 | vis_model=VGG19_FCN(is_load=False, is_rescale=True))
38 |
39 | # placeholder for input image
40 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3])
41 |
42 | # create VGG19 model
43 | gcam.create_model(image)
44 | gcam.setup_graph()
45 |
46 | # generate class map and prediction label ops
47 | map_op = gcam.get_visualization(class_id=class_id)
48 | label_op = gcam.pre_label
49 |
50 | back_pro_op = gbackprob.get_visualization(image)
51 |
52 | # initialize input dataflow
53 | # change '.png' to other image types if other types of images are used
54 | input_im = ImageFromFile('.png', data_dir=IMPATH,
55 | num_channel=3, shuffle=False)
56 | input_im.set_batch_size(1)
57 |
58 | with tf.Session() as sess:
59 |
60 | sess.run(tf.global_variables_initializer())
61 |
62 | cnt = 0
63 | merge_cnt = 0
64 | o_im_list = []
65 | im = input_im.next_batch()[0]
66 | gcam_map, b_map, label, o_im =\
67 | sess.run([map_op, back_pro_op, label_op, gcam.input_im],
68 | feed_dict={image: im})
69 | print(label)
70 | o_im_list.extend(o_im)
71 | for idx, cid, cmap in zip(count(), gcam_map[1], gcam_map[0]):
72 | overlay_im = image_overlay(cmap, o_im)
73 | weight_im = image_weight_mask(b_map[0], cmap)
74 | try:
75 | weight_im_list[idx].append(weight_im)
76 | overlay_im_list[idx].append(overlay_im)
77 | except NameError:
78 | gcam_class_id = gcam_map[1]
79 | weight_im_list = [[] for i in range(len(gcam_class_id))]
80 | overlay_im_list = [[] for i in range(len(gcam_class_id))]
81 | weight_im_list[idx].append(weight_im)
82 | overlay_im_list[idx].append(overlay_im)
83 | tf.reset_default_graph()
84 |
--------------------------------------------------------------------------------
/test/test_guided_backprop.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # File: test_guided_backprop.py
4 | # Author: Qian Ge
5 |
6 | import tensorflow as tf
7 |
8 | from tensorcv.dataflow.image import ImageFromFile
9 |
10 | from setup_test_env import *
11 | from nets.vgg import VGG19_FCN
12 | from models.guided_backpro import GuideBackPro
13 |
14 |
15 | def test_guided_backprop():
16 | # placeholder for input image
17 | image = tf.placeholder(tf.float32, shape=[None, None, None, 3])
18 | # initialize input dataflow
19 | # change '.png' to other image types if other types of images are used
20 | input_im = ImageFromFile('.png', data_dir=IMPATH,
21 | num_channel=3, shuffle=False)
22 | # batch size has to be one
23 | input_im.set_batch_size(1)
24 |
25 | # initialize guided back propagation class
26 | # use VGG19 as an example
27 | # images will be rescaled to smallest side = 224 is is_rescale=True
28 | model = GuideBackPro(vis_model=VGG19_FCN(is_load=False,
29 | is_rescale=True))
30 |
31 | # get op to compute guided back propagation map
32 | # final output respect to input image
33 | back_pro_op = model.get_visualization(image)
34 |
35 | with tf.Session() as sess:
36 | sess.run(tf.global_variables_initializer())
37 |
38 | im = input_im.next_batch()[0]
39 | guided_backpro, label, o_im =\
40 | sess.run([back_pro_op, model.pre_label,
41 | model.input_im],
42 | feed_dict={image: im})
43 | print(label)
44 | tf.reset_default_graph()
45 |
46 |
--------------------------------------------------------------------------------