├── LICENSE
├── README.md
├── conv_demo.py
├── densenet.py
├── googlenet.py
├── lenet.py
├── mobilenetv1.py
├── mobilenetv2.py
├── requirements.txt
├── resnet.py
├── senet.py
├── shufflenetv1.py
├── shufflenetv2.py
├── utils.py
├── vgg.py
└── xception.py


/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [2021] [Mofan Zhou]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Tutorials of Computer Vision
  2 | 
  3 | This repo includes some implementations of Computer Vision algorithms using tf2+. Codes are easy to read and follow.
  4 | If you can read Chinese, I have a teaching website for studying AI models. 
  5 | 
  6 | All toy implementations are organised as following:
  7 | 
  8 | - CNN
  9 |     - [Numpy Convolution mechanism](#ConvMechanism)
 10 |     - [LeNet](#LeNet)
 11 |     - [VGG](#VGG)
 12 |     - [GoogLeNet](#GoogLeNet)
 13 |     - [ResNet](#ResNet)
 14 |     - [DenseNet](#DenseNet)
 15 |     - [SENet](#SENet)
 16 |     - [MobileNetV1](#MobileNetV1)
 17 |     - [MobileNetV2](#MobileNetV2)
 18 |     - [Xception](#Xception)
 19 |     - [ShuffleNetV1](#ShuffleNetV1)
 20 |     - [ShuffleNetV2](#ShuffleNetV2)
 21 | 
 22 | # Installation
 23 | ```shell script
 24 | $ git clone https://github.com/MorvanZhou/Computer-Vision
 25 | $ cd Computer-Vision
 26 | $ pip install -r requirements.txt
 27 | ```
 28 | # ConvMechanism
 29 | Convolution mechanism and feature map
 30 | 
 31 | [code](/conv_demo.py) - [gif result](https://mofanpy.com/static/results/cv/conv_mechanism.gif)
 32 | 
 33 | <a target="_blank" href="https://mofanpy.com/static/results/cv/conv_mechanism.gif" style="text-align: center">
 34 | <img src="https://mofanpy.com/static/results/cv/conv_mechanism.gif" height="250px" alt="net structure">
 35 | </a>
 36 | 
 37 | # LeNet
 38 | [Gradient-Based Learning Applied to Document Recognition](http://www.dengfanxin.cn/wp-content/uploads/2016/03/1998Lecun.pdf)
 39 | 
 40 | [code](/lenet.py) - [net structure](https://mofanpy.com/static/results/cv/LeNet_structure.png)
 41 | 
 42 | <a target="_blank" href="https://mofanpy.com/static/results/cv/LeNet_structure.png" style="text-align: center">
 43 | <img src="https://mofanpy.com/static/results/cv/LeNet_structure.png" height="250px" alt="net structure">
 44 | </a>
 45 | 
 46 | # VGG
 47 | [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
 48 | 
 49 | Deep stacked CNN.
 50 | 
 51 | [code](/vgg.py) - [net structure](https://mofanpy.com/static/results/cv/VGG_structure.png)
 52 | 
 53 | <a target="_blank" href="https://mofanpy.com/static/results/cv/VGG_structure.png" style="text-align: center">
 54 | <img src="https://mofanpy.com/static/results/cv/VGG_structure.png" height="250px" alt="net structure">
 55 | </a>
 56 | 
 57 | # GoogLeNet
 58 | [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
 59 | 
 60 | Multi kernel size to capture different local information
 61 | 
 62 | [code](/googlenet.py) - [net structure](https://mofanpy.com/static/results/cv/GoogleLeNet_structure.png)
 63 | 
 64 | <a target="_blank" href="https://mofanpy.com/static/results/cv/GoogleLeNet_structure.png" style="text-align: center">
 65 | <img src="https://mofanpy.com/static/results/cv/GoogleLeNet_structure.png" height="250px" alt="net structure">
 66 | </a>
 67 | 
 68 | # ResNet
 69 | [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
 70 | 
 71 | Add residual connection for better gradients.
 72 | 
 73 | [code](/resnet.py) - [net structure](https://mofanpy.com/static/results/cv/ResNet_structure.png)
 74 | 
 75 | <a target="_blank" href="https://mofanpy.com/static/results/cv/ResNet_structure.png" style="text-align: center">
 76 | <img src="https://mofanpy.com/static/results/cv/ResNet_structure.png" height="250px" alt="net structure">
 77 | </a>
 78 | 
 79 | # DenseNet
 80 | [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
 81 | 
 82 | Compared with resnet, it has less filter each conv, sees more previous inputs.
 83 | 
 84 | [code](/densenet.py) - [net structure](https://mofanpy.com/static/results/cv/DenseNet_structure.png)
 85 | 
 86 | <a target="_blank" href="https://mofanpy.com/static/results/cv/DenseNet_structure.png" style="text-align: center">
 87 | <img src="https://mofanpy.com/static/results/cv/DenseNet_structure.png" height="250px" alt="net structure">
 88 | </a>
 89 | 
 90 | # SENet
 91 | [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507)
 92 | 
 93 | SE is a module that learns to scale each feature map, it can be plugged in many cnn block, 
 94 | larger reduction_ratio reduce parameter size in FC layers with limited accuracy drop.
 95 | 
 96 | [code](/senet.py) - [net structure](https://mofanpy.com/static/results/cv/SENet_structure.png)
 97 | 
 98 | <a target="_blank" href="https://mofanpy.com/static/results/cv/SENet_structure.png" style="text-align: center">
 99 | <img src="https://mofanpy.com/static/results/cv/SENet_structure.png" height="250px" alt="net structure">
100 | </a>
101 | 
102 | # MobileNetV1
103 | [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861)
104 | 
105 | Decomposed classical conv to two operations (dw+pw). Small but effective cnn optimized on mobile (cpu).
106 | 
107 | [code](/mobilenetv1.py) - [net structure](https://mofanpy.com/static/results/cv/MobileNetV1_structure.png)
108 | 
109 | <a target="_blank" href="https://mofanpy.com/static/results/cv/MobileNetV1_structure.png" style="text-align: center">
110 | <img src="https://mofanpy.com/static/results/cv/MobileNetV1_structure.png" height="250px" alt="net structure">
111 | </a>
112 | 
113 | # MobileNetV2
114 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
115 | 
116 | MobileNet v2 is v1 with residual block and layer rearrange (residual+pw+dw+pw):
117 | 
118 | - mobilenet v1: dw > pw
119 | - mobilenet v2: pw > dw > pw    let dw see more feature maps
120 | 
121 | [code](/mobilenetv2.py) - [net structure](https://mofanpy.com/static/results/cv/MobileNetV2_structure.png)
122 | 
123 | <a target="_blank" href="https://mofanpy.com/static/results/cv/MobileNetV2_structure.png" style="text-align: center">
124 | <img src="https://mofanpy.com/static/results/cv/MobileNetV2_structure.png" height="250px" alt="net structure">
125 | </a>
126 | 
127 | # Xception
128 | [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357)
129 | 
130 | Just like MobileNetV2 without last pw (residual+pw+dw).
131 | 
132 | [code](/xception.py) - [net structure](https://mofanpy.com/static/results/cv/Xception_structure.png)
133 | 
134 | <a target="_blank" href="https://mofanpy.com/static/results/cv/Xception_structure.png" style="text-align: center">
135 | <img src="https://mofanpy.com/static/results/cv/Xception_structure.png" height="250px" alt="net structure">
136 | </a>
137 | 
138 | 
139 | # ShuffleNetV1
140 | [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https://arxiv.org/abs/1707.01083)
141 | 
142 | Shuffle the output from 1x1 conv, and do group conv to reduce connections and speed up computing.
143 | But MobileNet is better in this case, this may caused by group conv cuts off some feature map communications.
144 | 
145 | [code](/shufflenetv1.py) - [net structure](https://mofanpy.com/static/results/cv/ShuffleNetV1_structure.png)
146 | 
147 | <a target="_blank" href="https://mofanpy.com/static/results/cv/ShuffleNetV1_structure.png" style="text-align: center">
148 | <img src="https://mofanpy.com/static/results/cv/ShuffleNetV1_structure.png" height="250px" alt="net structure">
149 | </a>
150 | 
151 | # ShuffleNetV2
152 | [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
153 | 
154 | Further reduces parameters by switching group conv with split+concat, perform shuffle at end of block. Speed up calculation.
155 | But MobileNet is better in this case, this may caused by group conv cuts off some feature map communications.
156 |  
157 | [code](/shufflenetv2.py) - [net structure](https://mofanpy.com/static/results/cv/ShuffleNetV2_structure.png)
158 | 
159 | <a target="_blank" href="https://mofanpy.com/static/results/cv/ShuffleNetV2_structure.png" style="text-align: center">
160 | <img src="https://mofanpy.com/static/results/cv/ShuffleNetV2_structure.png" height="250px" alt="net structure">
161 | </a>


--------------------------------------------------------------------------------
/conv_demo.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import os
  4 | from PIL import Image
  5 | from utils import load_mnist
  6 | 
  7 | 
  8 | def save_gif(imgs_dir):
  9 |     imgs = []
 10 |     imgs_path = [os.path.join(imgs_dir, p) for p in os.listdir(imgs_dir) if p.endswith(".png")]
 11 |     for f in sorted(imgs_path, key=os.path.getmtime):
 12 |         if not f.endswith(".png"):
 13 |             continue
 14 |         img = Image.open(f)
 15 |         img = img.resize((img.width, img.height), Image.ANTIALIAS)
 16 |         imgs.append(img)
 17 |     path = "{}/conv_mechanism.gif".format(os.path.dirname(imgs_dir))
 18 |     if os.path.exists(path):
 19 |         os.remove(path)
 20 |     imgs[0].save(path, append_images=imgs[1:], optimize=False, save_all=True, duration=20, loop=0)
 21 |     print("saved ", path)
 22 | 
 23 | 
 24 | def show_conv(save_dir, image):
 25 |     filter = np.array([
 26 |             [1, 1, 1],
 27 |             [0, 0, 0],
 28 |             [-1, -1, -1]])
 29 |     plt.figure(0, figsize=(9, 5))
 30 |     ax1 = plt.subplot(121)
 31 |     ax1.imshow(image, cmap='gray_r')
 32 |     plt.xticks(())
 33 |     plt.yticks(())
 34 |     ax2 = plt.subplot(122)
 35 |     texts = []
 36 |     feature_map = np.zeros((26, 26))
 37 |     for i in range(26):
 38 |         for j in range(26):
 39 | 
 40 |             if texts:
 41 |                 fm.remove()
 42 |             for n in range(3):
 43 |                 for m in range(3):
 44 |                     if len(texts) != 9:
 45 |                         texts.append(ax1.text(j+m, i+n, filter[n, m], color='k', size=8, ha='center', va='center'))
 46 |                     else:
 47 |                         texts[n*3+m].set_position((j+m, i+n))
 48 | 
 49 |             feature_map[i, j] = np.sum(filter * image[i:i+3, j:j+3])
 50 |             fm = ax2.imshow(feature_map, cmap='gray', vmax=3, vmin=-3)
 51 |             plt.xticks(())
 52 |             plt.yticks(())
 53 |             plt.tight_layout()
 54 |             plt.savefig(os.path.join(save_dir, "i{}j{}.png".format(i, j)))
 55 | 
 56 | 
 57 | def show_feature_map(save_dir, image):
 58 |     filters = [
 59 |         np.array([
 60 |             [1, 1, 1],
 61 |             [0, 0, 0],
 62 |             [-1, -1, -1]]),
 63 |         np.array([
 64 |             [-1, -1, -1],
 65 |             [0, 0, 0],
 66 |             [1, 1, 1]]),
 67 |         np.array([
 68 |             [1, 0, -1],
 69 |             [1, 0, -1],
 70 |             [1, 0, -1]]),
 71 |         np.array([
 72 |             [-1, 0, 1],
 73 |             [-1, 0, 1],
 74 |             [-1, 0, 1]])
 75 |     ]
 76 | 
 77 |     plt.figure(1)
 78 |     plt.title('Original image')
 79 |     plt.imshow(image, cmap='gray_r')
 80 |     plt.xticks(())
 81 |     plt.yticks(())
 82 |     plt.savefig(os.path.join(save_dir, "original_img.png"))
 83 | 
 84 |     plt.figure(2)
 85 |     for n in range(4):
 86 |         feature_map = np.zeros((26, 26))
 87 | 
 88 |         for i in range(26):
 89 |             for j in range(26):
 90 |                 feature_map[i, j] = np.sum(image[i:i + 3, j:j + 3] * filters[n])
 91 | 
 92 |         plt.subplot(3, 4, 1 + n)
 93 |         plt.title('Filter%i' % n)
 94 |         plt.imshow(filters[n], cmap='gray', vmax=3, vmin=-3)
 95 |         plt.xticks(())
 96 |         plt.yticks(())
 97 | 
 98 |         plt.subplot(3, 4, 5 + n)
 99 |         plt.title('Conv%i' % n)
100 |         plt.imshow(feature_map, cmap='gray', vmax=3, vmin=-3)
101 |         plt.xticks(())
102 |         plt.yticks(())
103 | 
104 |         plt.subplot(3, 4, 9 + n)
105 |         plt.title('ReLU%i' % n)
106 |         feature_map = np.maximum(0, feature_map)
107 |         plt.imshow(feature_map, cmap='gray', vmax=3, vmin=-3)
108 |         plt.xticks(())
109 |         plt.yticks(())
110 | 
111 |     plt.tight_layout()
112 |     plt.savefig(os.path.join(save_dir, "feature_map.png"))
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     result_dir = "visual/basic"
117 |     conv_dir = os.path.join(result_dir, "conv")
118 |     os.makedirs(conv_dir, exist_ok=True)
119 |     (x_train, y_train), (x_test, y_test) = load_mnist()
120 | 
121 |     data = x_train[7].squeeze(axis=-1)
122 |     show_feature_map(result_dir, data)
123 |     show_conv(conv_dir, data)
124 |     save_gif(conv_dir)
125 | 


--------------------------------------------------------------------------------
/densenet.py:
--------------------------------------------------------------------------------
 1 | # [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # compared with resnet, it has less filter each conv, sees more previous inputs
13 | def bottleneck(xs, growth_rate):
14 |     if len(xs) == 1:
15 |         o = xs[0]
16 |     else:
17 |         o = layers.concatenate(xs, axis=-1)         # [n, h, w, c * xs]
18 |     o = layers.ReLU()(o)
19 |     o = layers.Conv2D(growth_rate, kernel_size=1, strides=1)(o)  # [n, h, w, c]
20 |     o = layers.ReLU()(o)
21 |     o = layers.Conv2D(growth_rate, kernel_size=3, strides=1, padding="same")(o)  # [n, h, w, c]
22 |     return o
23 | 
24 | 
25 | def block(x, growth_rate, n_bottleneck=2):
26 |     outs = [bottleneck([x], growth_rate)]   # [n, h, w, c]
27 |     for i in range(1, n_bottleneck):
28 |         o = bottleneck(outs, growth_rate)        # [n, h, w, c]
29 |         outs.append(o)
30 |     return layers.concatenate(outs, axis=-1)    # [n, h, w, c * n_bottleneck]
31 | 
32 | 
33 | def build_model():
34 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
35 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
36 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
37 |     x = block(x, growth_rate=8, n_bottleneck=2)     # [n, 14, 14, 8*2]
38 |     x = layers.MaxPool2D(2)(x)                      # [n, 7, 7, 8*2]
39 |     x = block(x, 8, 3)                              # [n, 7, 7, 8*3]
40 |     x = layers.GlobalAveragePooling2D()(x)          # [n, 24]
41 |     o = layers.Dense(10)(x)                         # [n, 10]
42 |     return keras.Model(inputs, o, name="DenseNet")
43 | 
44 | # show model
45 | model = build_model()
46 | model.summary()
47 | save_model_structure(model)
48 | 
49 | # define loss and optimizer
50 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
51 | opt = keras.optimizers.Adam(0.001)
52 | accuracy = keras.metrics.SparseCategoricalAccuracy()
53 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
54 | 
55 | # training and validation
56 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
57 | 
58 | # save model
59 | save_model_weights(model)
60 | 


--------------------------------------------------------------------------------
/googlenet.py:
--------------------------------------------------------------------------------
 1 | # [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers, activations
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # multi kernel size to capture different local information
13 | def inception(x, f1, f2, f3, f4, name):
14 |     act = activations.relu
15 |     inputs = layers.Input(shape=x.shape[1:])
16 | 
17 |     p1 = layers.Conv2D(filters=f1, kernel_size=1, strides=1, activation=act, name="p1")(inputs)
18 | 
19 |     p2 = layers.Conv2D(f2[0], 1, 1, name="p21")(inputs)
20 |     p2 = layers.Conv2D(f2[1], 3, 1, padding="same", activation=act, name="p22")(p2)
21 | 
22 |     p3 = layers.Conv2D(f3[0], 1, 1, name="p31")(inputs)
23 |     p3 = layers.Conv2D(f3[1], 5, 1, padding="same", activation=act, name="p32")(p3)
24 | 
25 |     p4 = layers.MaxPool2D(pool_size=3, strides=1, padding="same", name="p41")(inputs)
26 |     p4 = layers.Conv2D(f4, 1, activation=act, name="p42")(p4)
27 | 
28 |     p = layers.concatenate((p1, p2, p3, p4), axis=-1)
29 |     m = keras.Model(inputs, p, name=name)
30 |     return m(x)
31 | 
32 | 
33 | def build_model():
34 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
35 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
36 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
37 |     x = inception(x, f1=4, f2=(2, 4), f3=(2, 4), f4=4, name="inspection1")              # [n, 14, 14, 4*4]
38 |     x = layers.MaxPool2D(2, 2)(x)                                                       # [n, 7, 7, 4*4]
39 |     x = inception(x, f1=8, f2=(4, 8), f3=(4, 8), f4=8, name="inspection2")              # [n, 7, 7, 8*4]
40 |     x = layers.GlobalAveragePooling2D()(x)                                              # [n, 8*4]
41 |     o = layers.Dense(10)(x)                                                             # [n, 10]
42 |     return keras.Model(inputs, o, name="GoogLeNet")
43 | 
44 | 
45 | # show model
46 | model = build_model()
47 | model.summary()
48 | save_model_structure(model)
49 | 
50 | # define loss and optimizer
51 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
52 | opt = keras.optimizers.Adam(0.001)
53 | accuracy = keras.metrics.SparseCategoricalAccuracy()
54 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
55 | 
56 | # training and validation
57 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
58 | 
59 | # save model
60 | save_model_weights(model)
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/lenet.py:
--------------------------------------------------------------------------------
 1 | # [Gradient-Based Learning Applied to Document Recognition](http://www.dengfanxin.cn/wp-content/uploads/2016/03/1998Lecun.pdf)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | # define model
12 | model = keras.Sequential([
13 |     layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same", input_shape=(28, 28, 1)),   # [n, 28, 28, 8]
14 |     layers.MaxPool2D(pool_size=2, strides=2),                             # [n, 14, 14, 8]
15 |     layers.Conv2D(16, 3, 1, "same"),                                      # [n, 14, 14, 16]
16 |     layers.MaxPool2D(2, 2),                                               # [n, 7, 7, 16]
17 |     layers.Flatten(),                                                     # [n, 7*7*16]
18 |     layers.Dense(10)                                                      # [n, 10]
19 | ], name="LeNet")
20 | 
21 | # show model
22 | model.summary()
23 | save_model_structure(model)
24 | 
25 | # define loss and optimizer
26 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
27 | opt = keras.optimizers.Adam(0.001)
28 | accuracy = keras.metrics.SparseCategoricalAccuracy()
29 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
30 | 
31 | # training and validation
32 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
33 | 
34 | # save model
35 | save_model_weights(model)
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/mobilenetv1.py:
--------------------------------------------------------------------------------
 1 | # [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # (dw+pw). small but effective cnn optimized on mobile (cpu)
13 | def block(x, filters):
14 |     # Depthwise Separable convolutions
15 |     o = layers.DepthwiseConv2D(kernel_size=3, strides=1, padding="same")(x)     # [n, h, w, c] dw
16 |     o = layers.ReLU()(o)
17 |     o = layers.Conv2D(filters, 1, 1)(o)                                         # [n, h, w, f] pw
18 |     o = layers.ReLU()(o)
19 |     return o
20 | 
21 | 
22 | def build_model():
23 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
24 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
25 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
26 |     x = block(x, filters=20)                                        # [n, 14, 14, 20]
27 |     x = layers.MaxPool2D(2, 2)(x)                                   # [n, 7, 7, 20]
28 |     x = block(x, 40)                                                # [n, 7, 7, 40]
29 |     x = layers.GlobalAveragePooling2D()(x)                          # [n, 40]
30 |     o = layers.Dense(10)(x)                                         # [n, 10]
31 |     return keras.Model(inputs, o, name="MobileNetV1")
32 | 
33 | 
34 | # show model
35 | model = build_model()
36 | model.summary()
37 | save_model_structure(model)
38 | 
39 | # define loss and optimizer
40 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
41 | opt = keras.optimizers.Adam(0.001)
42 | accuracy = keras.metrics.SparseCategoricalAccuracy()
43 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
44 | 
45 | # training and validation
46 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
47 | 
48 | # save model
49 | save_model_weights(model)
50 | 


--------------------------------------------------------------------------------
/mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | # [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # (residual+pw+dw+pw). mobilenet v1 with residual block and layer rearrange
13 | def block(x, filters, expand_ratio=4):
14 |     # mobilenet v1: dw > pw
15 |     # mobilenet v2: pw > dw > pw    let dw see more feature maps
16 |     o = layers.Conv2D(int(filters*expand_ratio), 1, 1)(x)                       # [n, h, w, c*e]  pw expansion
17 |     o = layers.ReLU()(o)
18 |     o = layers.DepthwiseConv2D(kernel_size=3, strides=1, padding="same")(o)     # [n, h/s, w/s, c*e] dw
19 |     o = layers.ReLU()(o)
20 |     o = layers.Conv2D(filters, 1, 1)(o)                         # [n, h, w, c] pw
21 |     if x.shape[-1] == filters:
22 |         o = layers.add((o, x))                                  # residual connection
23 |     return o
24 | 
25 | 
26 | def build_model():
27 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
28 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
29 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
30 |     x = block(x, filters=5)                                            # [n, 14, 14, 5]
31 |     x = layers.MaxPool2D(2, 2)(x)                                   # [n, 7, 7, 5]
32 |     x = block(x, 10)                                                   # [n, 7, 7, 10]
33 |     x = layers.GlobalAveragePooling2D()(x)                          # [n, 10]
34 |     o = layers.Dense(10)(x)                                         # [n, 10]
35 |     return keras.Model(inputs, o, name="MobileNetV2")
36 | 
37 | 
38 | # show model
39 | model = build_model()
40 | model.summary()
41 | save_model_structure(model)
42 | 
43 | # define loss and optimizer
44 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
45 | opt = keras.optimizers.Adam(0.001)
46 | accuracy = keras.metrics.SparseCategoricalAccuracy()
47 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
48 | 
49 | # training and validation
50 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
51 | 
52 | # save model
53 | save_model_weights(model)
54 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.3.3
2 | numpy==1.18.5
3 | Pillow==8.0.1
4 | tensorflow==2.3.0
5 | pydot==1.4.1
6 | 


--------------------------------------------------------------------------------
/resnet.py:
--------------------------------------------------------------------------------
 1 | # [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # add residual connection for better gradients
13 | def bottleneck(x, filters, strides=1):
14 |     c = x.shape[-1]
15 |     if c != filters:
16 |         shortcut = layers.Conv2D(filters, 1, strides, "same")(x)
17 |     else:
18 |         shortcut = x
19 |     o = layers.ReLU()(x)
20 |     o = layers.Conv2D(c, kernel_size=3, strides=1, padding="same")(o)     # [n, h, w, c]
21 |     o = layers.ReLU()(o)
22 |     o = layers.Conv2D(filters, 3, strides, "same")(o)                               # [n, h/s, w/s, f]
23 |     o = layers.add((o, shortcut))
24 |     return o
25 | 
26 | 
27 | def block(x, filters, strides=1, n_bottleneck=2):
28 |     o = bottleneck(x, filters, strides)     # [n, h/s, w/s, f]
29 |     for i in range(1, n_bottleneck):
30 |         o = bottleneck(o, filters, 1)       # [n, h/s, w/s, f]
31 |     return o
32 | 
33 | 
34 | def build_model():
35 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
36 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
37 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
38 |     x = block(x, filters=8, strides=1, n_bottleneck=2)              # [n, 14, 14, 8]
39 |     x = block(x, 16, 2, 1)                                          # [n, 7, 7, 16]
40 |     x = layers.GlobalAveragePooling2D()(x)                          # [n, 16]
41 |     o = layers.Dense(10)(x)                                         # [n, 10]
42 |     return keras.Model(inputs, o, name="ResNet")
43 | 
44 | # show model
45 | model = build_model()
46 | model.summary()
47 | save_model_structure(model)
48 | 
49 | # define loss and optimizer
50 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
51 | opt = keras.optimizers.Adam(0.001)
52 | accuracy = keras.metrics.SparseCategoricalAccuracy()
53 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
54 | 
55 | # training and validation
56 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
57 | 
58 | # save model
59 | save_model_weights(model)
60 | 


--------------------------------------------------------------------------------
/senet.py:
--------------------------------------------------------------------------------
 1 | # [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # SE is a module that learns to scale each feature map, we can plug in a resnet block
13 | # larger reduction_ratio reduce parameter size in FC layers with limited accuracy drop
14 | def se(x, reduction_ratio=4):
15 |     c = x.shape[-1]
16 |     s = layers.GlobalAveragePooling2D()(x)
17 |     s = layers.Dense(c // reduction_ratio)(s)
18 |     s = layers.ReLU()(s)
19 |     s = layers.Dense(c, activation=keras.activations.sigmoid)(s)
20 |     return x * layers.Reshape((1, 1, c))(s)
21 | 
22 | 
23 | def bottleneck(x, filters, strides=1, reduction_ratio=4):
24 |     in_channels = x.shape[-1]
25 |     if in_channels != filters:
26 |         shortcut = layers.Conv2D(filters, 1, strides, "same", name="projection")(x)
27 |     else:
28 |         shortcut = x
29 |     o = layers.ReLU()(x)
30 |     o = layers.Conv2D(in_channels, kernel_size=3, strides=1, padding="same")(o)     # [n, h, w, c]
31 |     o = layers.ReLU()(o)
32 |     o = layers.Conv2D(filters, 3, strides, "same")(o)                               # [n, h/s, w/s, f]
33 |     o = se(o, reduction_ratio)
34 |     o = layers.add((o, shortcut))
35 |     return o
36 | 
37 | 
38 | def block(x, filters, strides=1, n_bottleneck=2):
39 |     o = bottleneck(x, filters, strides)     # [n, h/s, w/s, f]
40 |     for i in range(1, n_bottleneck):
41 |         o = bottleneck(o, filters, 1)       # [n, h/s, w/s, f]
42 |     return o
43 | 
44 | 
45 | def build_model():
46 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
47 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
48 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
49 |     x = block(x, filters=8, strides=1, n_bottleneck=2)              # [n, 14, 14, 8]
50 |     x = block(x, 16, 2, 1)                                          # [n, 7, 7, 16]
51 |     x = layers.GlobalAveragePooling2D()(x)                            # [n, 16]
52 |     o = layers.Dense(10)(x)                                         # [n, 10]
53 |     return keras.Model(inputs, o, name="SENet")
54 | 
55 | # show model
56 | model = build_model()
57 | model.summary()
58 | save_model_structure(model)
59 | 
60 | # define loss and optimizer
61 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
62 | opt = keras.optimizers.Adam(0.001)
63 | accuracy = keras.metrics.SparseCategoricalAccuracy()
64 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
65 | 
66 | # training and validation
67 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
68 | 
69 | # save model
70 | save_model_weights(model)
71 | 


--------------------------------------------------------------------------------
/shufflenetv1.py:
--------------------------------------------------------------------------------
 1 | # [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https://arxiv.org/abs/1707.01083)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | import tensorflow as tf
 7 | from utils import load_mnist, save_model_structure, save_model_weights
 8 | 
 9 | # get data
10 | (x_train, y_train), (x_test, y_test) = load_mnist()
11 | 
12 | 
13 | # Grouped 1x1 conv to reduce parameters, but shuffle grouped feature maps to mix isolated feature map.
14 | def shuffle(x, groups):
15 |     _, h, w, c = x.shape
16 |     gc = c // groups
17 |     o = layers.Reshape((h, w, groups, gc))(x)  # [n, h, w, g, gc]
18 |     o = layers.Permute((1, 2, 4, 3))(o)         # shuffle in groups
19 |     o = layers.Reshape((h, w, c))(o)
20 |     return o
21 | 
22 | 
23 | def group_conv(x, filters, groups):
24 |     assert x.shape[-1] % groups == 0
25 |     assert filters % groups == 0
26 |     if tf.test.is_built_with_gpu_support():
27 |         o = layers.Conv2D(filters, 1, 1, groups=groups)(x)  # [n, h, w, f] pw, groups=groups not works on cpu (tf=2.3.0)
28 |     else:
29 |         o = layers.concatenate([
30 |             layers.Conv2D(filters // groups, 1, 1)(g) for g in tf.split(x, groups, axis=-1)
31 |         ], axis=-1)  # this works on cpu        [n, h, w, f]
32 |     return o
33 | 
34 | 
35 | def block(x, filters, groups=4):
36 |     o = group_conv(x, x.shape[-1], groups)          # [n, h, w, c] gpw
37 |     o = shuffle(o, groups)
38 |     o = layers.ReLU()(o)
39 |     o = layers.DepthwiseConv2D(kernel_size=3, strides=1, padding="same")(o)  # [n, h, w, c] dw
40 |     o = group_conv(o, filters, groups)              # [n, h, w, f] gpw
41 |     if x.shape[-1] != filters:
42 |         x = group_conv(x, filters, groups)      # [n, h, w, f]
43 |     o = layers.add((o, x))  # residual connection
44 |     return o
45 | 
46 | 
47 | def build_model():
48 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
49 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
50 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
51 |     x = block(x, filters=32, groups=4)                              # [n, 14, 14, 32]
52 |     x = layers.MaxPool2D(2, 2)(x)                                   # [n, 7, 7, 32]
53 |     x = block(x, 64, 4)                                             # [n, 7, 7, 64]
54 |     x = layers.GlobalAveragePooling2D()(x)                          # [n, 64]
55 |     o = layers.Dense(10)(x)                                         # [n, 10]
56 |     return keras.Model(inputs, o, name="ShuffleNetV1")
57 | 
58 | 
59 | # show model
60 | model = build_model()
61 | model.summary()
62 | save_model_structure(model)
63 | 
64 | # define loss and optimizer
65 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
66 | opt = keras.optimizers.Adam(0.001)
67 | accuracy = keras.metrics.SparseCategoricalAccuracy()
68 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
69 | 
70 | # training and validation
71 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
72 | 
73 | # save model
74 | save_model_weights(model)
75 | 


--------------------------------------------------------------------------------
/shufflenetv2.py:
--------------------------------------------------------------------------------
 1 | # [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | import tensorflow as tf
 7 | from utils import load_mnist, save_model_structure, save_model_weights
 8 | 
 9 | # get data
10 | (x_train, y_train), (x_test, y_test) = load_mnist()
11 | 
12 | 
13 | # Further reduces parameters by switching group conv with split+concat, perform shuffle at end of block.
14 | # Speed up calculation.
15 | def shuffle(x, groups):
16 |     _, h, w, c = x.shape
17 |     gc = c // groups
18 |     o = layers.Reshape((h, w, groups, gc))(x)  # [n, h, w, g, gc]
19 |     o = layers.Permute((1, 2, 4, 3))(o)         # shuffle in groups
20 |     o = layers.Reshape((h, w, c))(o)
21 |     return o
22 | 
23 | 
24 | def block(x, filters):
25 |     x1, x2 = tf.split(x, 2, axis=-1)            # x1 [n, h, w, c/2], x2 [n, h, w, c/2]
26 |     c = x.shape[-1]
27 |     o = layers.Conv2D(x2.shape[-1], 1, 1)(x2)   # [n, h, w, c/2]
28 |     o = layers.ReLU()(o)
29 |     o = layers.DepthwiseConv2D(kernel_size=3, strides=1, padding="same")(o)     # [n, h, w, c/2]
30 |     o = layers.ReLU()(o)
31 |     o = layers.Conv2D(filters//2, 1, 1)(o)          # [n, h, w, f/2]
32 |     if filters != c:
33 |         x1 = layers.Conv2D(filters//2, 1, 1)(x1)    # [n, h, w, f/2]
34 |     o = layers.concatenate([x1, o], axis=-1)        # [n, h, w, f]
35 |     o = shuffle(o, 2)   #
36 |     return o
37 | 
38 | 
39 | def build_model():
40 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
41 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
42 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
43 |     x = block(x, filters=32)                                        # [n, 14, 14, 32]
44 |     x = layers.MaxPool2D(2, 2)(x)                                   # [n, 7, 7, 32]
45 |     x = block(x, 64)                                                # [n, 7, 7, 64]
46 |     x = layers.GlobalAveragePooling2D()(x)                          # [n, 64]
47 |     o = layers.Dense(10)(x)                                         # [n, 10]
48 |     return keras.Model(inputs, o, name="ShuffleNetV2")
49 | 
50 | 
51 | # show model
52 | model = build_model()
53 | model.summary()
54 | save_model_structure(model)
55 | 
56 | # define loss and optimizer
57 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
58 | opt = keras.optimizers.Adam(0.001)
59 | accuracy = keras.metrics.SparseCategoricalAccuracy()
60 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
61 | 
62 | # training and validation
63 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
64 | 
65 | # save model
66 | save_model_weights(model)
67 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from urllib.request import urlretrieve
 4 | from tensorflow import keras
 5 | 
 6 | MNIST_PATH = "./mnist.npz"
 7 | 
 8 | 
 9 | def load_mnist(path="./mnist.npz", norm=True):
10 |     if not os.path.isfile(path):
11 |         print("not mnist data is found, try downloading...")
12 |         urlretrieve("https://s3.amazonaws.com/img-datasets/mnist.npz", path)
13 |     with np.load(path, allow_pickle=True) as f:
14 |         x_train = f['x_train'].astype(np.float32)[:, :, :, None]
15 |         y_train = f['y_train'].astype(np.float32)[:, None]
16 |         x_test = f['x_test'].astype(np.float32)[:, :, :, None]
17 |         y_test = f['y_test'].astype(np.float32)[:, None]
18 |         if norm:
19 |             x_train /= 255
20 |             x_test /= 255
21 |         return (x_train, y_train), (x_test, y_test)
22 | 
23 | 
24 | def save_model_structure(model: keras.Model, path=None):
25 |     if path is None:
26 |         path = "visual/{}/{}_structure.png".format(model.name, model.name)
27 |     os.makedirs(os.path.dirname(path), exist_ok=True)
28 |     try:
29 |         keras.utils.plot_model(model, show_shapes=True, expand_nested=True, dpi=150, to_file=path)
30 |     except Exception as e:
31 |         print(e)
32 | 
33 | 
34 | def save_model_weights(model: keras.Model, path=None):
35 |     if path is None:
36 |         path = "visual/{}/model/net".format(model.name)
37 |     os.makedirs(os.path.dirname(path), exist_ok=True)
38 |     model.save_weights(path)
39 | 
40 | 
41 | def load_model_weights(model: keras.Model, path=None):
42 |     if path is None:
43 |         path = "visual/{}/model/net".format(model.name)
44 |     model.load_weights(path)


--------------------------------------------------------------------------------
/vgg.py:
--------------------------------------------------------------------------------
 1 | # [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | # define model
12 | # like LeNet with more layers and activations
13 | model = keras.Sequential([
14 |     layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same", input_shape=(28, 28, 1)),   # [n, 28, 28, 8]
15 |     layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same", input_shape=(28, 28, 1)),   # [n, 28, 28, 8]
16 |     layers.ReLU(),
17 |     layers.MaxPool2D(pool_size=2, strides=2),                             # [n, 14, 14, 8]
18 |     layers.Conv2D(16, 3, 1, "same"),                                      # [n, 14, 14, 16]
19 |     layers.Conv2D(16, 3, 1, "same"),                                      # [n, 14, 14, 16]
20 |     layers.ReLU(),
21 |     layers.MaxPool2D(2, 2),                                               # [n, 7, 7, 16]
22 |     layers.Flatten(),                                                     # [n, 7*7*16]
23 |     layers.Dense(32),                                                     # [n, 32]
24 |     layers.ReLU(),
25 |     layers.Dense(10)                                                      # [n, 32]
26 | ], name="VGG")
27 | 
28 | # show model
29 | model.summary()
30 | save_model_structure(model)
31 | 
32 | # define loss and optimizer
33 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
34 | opt = keras.optimizers.Adam(0.001)
35 | accuracy = keras.metrics.SparseCategoricalAccuracy()
36 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
37 | 
38 | # training and validation
39 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
40 | 
41 | # save model
42 | save_model_weights(model)
43 | 


--------------------------------------------------------------------------------
/xception.py:
--------------------------------------------------------------------------------
 1 | # [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357)
 2 | # dependency file: https://github.com/MorvanZhou/Computer-Vision/requirements.txt
 3 | 
 4 | from tensorflow import keras
 5 | from tensorflow.keras import layers
 6 | from utils import load_mnist, save_model_structure, save_model_weights
 7 | 
 8 | # get data
 9 | (x_train, y_train), (x_test, y_test) = load_mnist()
10 | 
11 | 
12 | # (residual+pw+dw) just like mobilenetv2 without last pw
13 | def block(x, filters):
14 |     if x.shape[-1] != filters:
15 |         shortcut = layers.Conv2D(filters, 1, 1)(x)
16 |     else:
17 |         shortcut = x
18 |     o = layers.ReLU()(x)
19 |     o = layers.Conv2D(filters, 1, 1)(o)                 # [n, h, w, f]  pw
20 |     o = layers.ReLU()(o)
21 |     o = layers.DepthwiseConv2D(3, 1, "same")(o)         # [n, h, w, f]  dw
22 |     o = layers.add((o, shortcut))  # residual connection
23 |     return o
24 | 
25 | 
26 | def build_model():
27 |     inputs = layers.Input(shape=(28, 28, 1), name="img")
28 |     x = layers.Conv2D(filters=8, kernel_size=3, strides=1, padding="same")(inputs)      # [n, 28, 28, 8]
29 |     x = layers.MaxPool2D(pool_size=2, strides=2)(x)                                     # [n, 14, 14, 8]
30 |     x = block(x, filters=16)                                           # [n, 14, 14, 16]
31 |     x = layers.MaxPool2D(2, 2)(x)                                   # [n, 7, 7, 16]
32 |     x = block(x, 32)                                                   # [n, 7, 7, 32]
33 |     x = layers.GlobalAveragePooling2D()(x)                          # [n, 32]
34 |     o = layers.Dense(10)(x)                                         # [n, 10]
35 |     return keras.Model(inputs, o, name="Xception")
36 | 
37 | 
38 | # show model
39 | model = build_model()
40 | model.summary()
41 | save_model_structure(model)
42 | 
43 | # define loss and optimizer
44 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
45 | opt = keras.optimizers.Adam(0.001)
46 | accuracy = keras.metrics.SparseCategoricalAccuracy()
47 | model.compile(optimizer=opt, loss=loss, metrics=[accuracy])
48 | 
49 | # training and validation
50 | model.fit(x=x_train, y=y_train, batch_size=32, epochs=3, validation_data=(x_test, y_test))
51 | 
52 | # save model
53 | save_model_weights(model)
54 | 


--------------------------------------------------------------------------------