├── .gitignore
├── LICENSE
├── README.md
├── data_helper.py
├── img_cnn.py
├── pre_train.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # custom
104 | inputs/
105 | log/
106 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2018 Lei Zhang
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # cnn-classification-dog-vs-cat
 2 | 基于CNN的图像分类器，使用Kaggle的猫狗图片数据。
 3 | 
 4 | ## 1 requirement
 5 | - python3
 6 | - numpy >= 1.14.2
 7 | - keras >= 2.1.6
 8 | - tensorflow >= 1.6.0
 9 | - h5py >= 2.7.0
10 | - python-gflags >= 3.1.2
11 | - opencv-python >= 3.4.0.12
12 | 
13 | ## 2 Description of files
14 | - inputs: 猫狗图片样本数据，[[下载地址]](https://www.kaggle.com/c/dogs-vs-cats/data)，使用keras库中的[ImageDataGenerator](https://keras.io/preprocessing/image/)类读取，需要将每个类的图片放在单独命名的文件夹中存放；
15 | - train.py: 自建的简单CNN，训练后测试集精度约83%；
16 | - pre_train.py: 利用已训练的常用网络(基于[ImageNet](http://www.image-net.org/)数据集训练)，进行迁移学习，测试集精度约95%以上；
17 | - data_helper.py: 数据读取和预处理模块；
18 | - img_cnn.py: 基于TensorFlow的自定义简单卷积神经网络。
19 | 
20 | ## 3 Start training
21 | - ### 训练自定义的小型CNN
22 |     ```shell
23 |     python train.py
24 |     ```
25 | - ### 在VGG16的基础上进行迁移学习
26 |     ```shell
27 |     python pre_train.py
28 |     ```
29 | 
30 | ## 4 Visualizing results in TensorBoard
31 | ```shell
32 | tensorboard --logdir /"PATH_OF_CODE"/log/"TIMESTAMP"/summaries/
33 | ```
34 | 
35 | ## 5 References
36 | [1]. 猫狗图像数据来源：
37 | https://www.kaggle.com/c/dogs-vs-cats/data
38 | 
39 | [2]. keras中载入已训练网络的方法：
40 | https://keras.io/applications/
41 | 
42 | [3]. keras中图像预处理的相关功能介绍：
43 | https://keras.io/preprocessing/image/


--------------------------------------------------------------------------------
/data_helper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | import cv2
 7 | 
 8 | 
 9 | def get_filenames_and_labels(dir_path, folder_names=['cat', 'dog'], shuffle=True):
10 |     img_path_list = []
11 |     label_list = []
12 |     img_count = 0
13 |     for folder_name in folder_names:
14 |         filenames = os.listdir(os.path.join(dir_path, folder_name))
15 |         for f in filenames:
16 |             img_path_list.append(os.path.join(dir_path, folder_name, f))
17 |             label_list.append([0,1] if 'cat' in f else [1,0])
18 |             img_count += 1
19 |     img_path_list = np.array(img_path_list)
20 |     label_list = np.array(label_list)
21 |     if shuffle == True:
22 |         index = np.random.permutation(np.arange(0, img_count, 1))
23 |         img_path_list_shuffled = img_path_list[index]
24 |         label_list_shuffled = label_list[index]
25 |     else:
26 |         img_path_list_shuffled = img_path_list
27 |         label_list_shuffled = label_list
28 |     return img_path_list_shuffled, label_list_shuffled
29 | 
30 | def img_resize(img_path, img_height, img_width):
31 |     img_src = cv2.imread(img_path)
32 |     img_resized = cv2.resize(img_src, (img_height,img_width), interpolation=cv2.INTER_CUBIC)
33 |     return img_resized
34 | 
35 | def rgb2gray(img_rgb):
36 |     img_gray = np.dot(img_rgb[...,:3], [0.299, 0.587, 0.114])
37 |     img_gray = img_gray / 255.0
38 |     return img_gray.reshape(img_rgb.shape[0], img_rgb.shape[1], 1)
39 | 
40 | def batch_iter(batch_size, num_epochs, img_path_list, label_list,
41 |         img_height, img_width, shuffle=True):
42 |     '''
43 |     Generates a batch iterator for a dataset.
44 |     '''
45 |     img_path_list = np.array(img_path_list)
46 |     label_list = np.array(label_list)
47 |     data_size = len(label_list)
48 |     num_batches_per_epoch = int((data_size-1)/batch_size)+1
49 |     for epoch in range(num_epochs):
50 |         if shuffle:
51 |             shuffle_indices = np.random.permutation(np.arange(data_size))
52 |             img_path_list_shuffled = img_path_list[shuffle_indices]
53 |             label_list_shuffled = label_list[shuffle_indices]
54 |         else:
55 |             img_path_list_shuffled = img_path_list
56 |             label_list_shuffled = label_list
57 |         for batch_num in range(num_batches_per_epoch):
58 |             start_index = batch_num*batch_size
59 |             end_index = min((batch_num+1)*batch_size, data_size)
60 |             img_list_shuffled = []
61 |             for i in range(start_index, end_index):
62 |                 img_data = img_resize(img_path=img_path_list_shuffled[i], img_height=img_height, img_width=img_width)
63 |                 # img_data_min, img_data_max = np.min(img_data), np.max(img_data)
64 |                 # img_data = (img_data - img_data_min) / (img_data_max - img_data_min)
65 |                 img_data = rgb2gray(img_data)
66 |                 img_list_shuffled.append(img_data)
67 |             img_list_shuffled = np.array(img_list_shuffled)
68 |             yield img_list_shuffled, label_list_shuffled[start_index:end_index]
69 | 
70 | def generate_arrays_from_file(batch_size, img_path_list, label_list,
71 |         img_height, img_width, shuffle=True):
72 |     '''
73 |     Generates a batch iterator for a dataset.
74 |     '''
75 |     img_path_list = np.array(img_path_list)
76 |     label_list = np.array(label_list)
77 |     data_size = len(label_list)
78 |     num_batches_per_epoch = int((data_size-1)/batch_size)+1
79 |     while True:
80 |         if shuffle:
81 |             shuffle_indices = np.random.permutation(np.arange(data_size))
82 |             img_path_list_shuffled = img_path_list[shuffle_indices]
83 |             label_list_shuffled = label_list[shuffle_indices]
84 |         else:
85 |             img_path_list_shuffled = img_path_list
86 |             label_list_shuffled = label_list
87 |         for batch_num in range(num_batches_per_epoch):
88 |             start_index = batch_num*batch_size
89 |             end_index = min((batch_num+1)*batch_size, data_size)
90 |             img_list_shuffled = []
91 |             for i in range(start_index, end_index):
92 |                 img_data = img_resize(img_path=img_path_list_shuffled[i], img_height=img_height, img_width=img_width)
93 |                 # img_data_min, img_data_max = np.min(img_data), np.max(img_data)
94 |                 # img_data = (img_data - img_data_min) / (img_data_max - img_data_min)
95 |                 img_list_shuffled.append(img_data)
96 |             img_list_shuffled = np.array(img_list_shuffled)
97 |             yield ({'input_1': img_list_shuffled}, {'output': label_list_shuffled[start_index:end_index]})
98 | 


--------------------------------------------------------------------------------
/img_cnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | class ImgCNN(object):
 8 |     '''
 9 |     A cnn for image classification.
10 |     '''
11 |     def __init__(self, n_classes, img_height, img_width, img_channel, device_name='/cpu:0'):
12 |         self.input_x = tf.placeholder(dtype=tf.float32, shape=[None, img_height, img_width, img_channel], name='input_x')
13 |         self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='input_y')
14 |         self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob')
15 | 
16 |         with tf.device(device_name):
17 |             self.input_image = tf.reshape(self.input_x, [-1,img_height,img_width,img_channel])
18 |             with tf.name_scope('conv_layer_1'):
19 |                 filter_shape_1 = [5,5,img_channel,8]
20 |                 self.h_conv_1 = self.conv2d(x=self.input_x, W=self.w_variable(shape=filter_shape_1), stride=1, padding='SAME')
21 |                 self.h_conv_1 = tf.nn.relu(features=self.h_conv_1, name='relu_conv_1')
22 |             with tf.name_scope('pooling_layer_1'):
23 |                 self.h_pool_1 = self.max_pool(x=self.h_conv_1, ksize=2, stride=2, padding='SAME')   # shape: [112 * 112 * 8]
24 | 
25 |             with tf.name_scope('conv_layer_2'):
26 |                 filter_shape_2 = [3,3,8,16]
27 |                 self.h_conv_2 = self.conv2d(x=self.h_pool_1, W=self.w_variable(shape=filter_shape_2), stride=1, padding='SAME')
28 |                 self.h_conv_2 = tf.nn.relu(features=self.h_conv_2, name='relu_conv_2')
29 |             with tf.name_scope('pooling_layer_2'):
30 |                 self.h_pool_2 = self.max_pool(x=self.h_conv_2, ksize=2, stride=2, padding='SAME')   # shape: [56 * 56 * 16]
31 | 
32 |             with tf.name_scope('conv_layer_3'):
33 |                 filter_shape_3 = [3,3,16,32]
34 |                 self.h_conv_3 = self.conv2d(x=self.h_pool_2, W=self.w_variable(shape=filter_shape_3), stride=1, padding='SAME')
35 |                 self.h_conv_3 = tf.nn.relu(features=self.h_conv_3, name='relu_conv_3')
36 |             with tf.name_scope('pooling_layer_3'):
37 |                 self.h_pool_3 = self.max_pool(x=self.h_conv_3, ksize=2, stride=2, padding='SAME')   # shape: [28 * 28 * 32]
38 | 
39 |             num_total_unit = self.h_pool_3.get_shape()[1:4].num_elements()
40 |             self.h_pool_3_flat = tf.reshape(self.h_pool_3, shape=[-1, num_total_unit])
41 | 
42 |             with tf.name_scope('fc_layer_1'):
43 |                 self.h_fc_1 = self.fc_layer(self.h_pool_3_flat, num_total_unit, 128, activation_function=tf.nn.relu)
44 | 
45 |             with tf.name_scope('dropout'):
46 |                  self.h_drop = tf.nn.dropout(self.h_fc_1, keep_prob=self.dropout_keep_prob, name='h_drop')
47 | 
48 |             with tf.name_scope('fc_layer_2'):
49 |                 self.output = self.fc_layer(self.h_drop, 128, n_classes, activation_function=None)
50 | 
51 |         with tf.device('/cpu:0'):
52 |             with tf.name_scope('prediction'):
53 |                 self.y_pred = tf.argmax(input=self.output, axis=1, name='y_pred')
54 | 
55 |             with tf.name_scope('loss'):
56 |                 self.loss = tf.reduce_mean(
57 |                     tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y, logits=self.output), name='cross_entropy_loss')
58 | 
59 |             with tf.name_scope('accuracy'):
60 |                 correct_predictions = tf.equal(self.y_pred, tf.argmax(self.input_y, axis=1))
61 |                 self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')
62 | 
63 |     def w_variable(self, shape):
64 |         return tf.Variable(initial_value=tf.truncated_normal(shape=shape, mean=0.0, stddev=0.1), dtype=tf.float32, name='W')
65 | 
66 |     def b_variable(self, shape):
67 |         return tf.Variable(initial_value=tf.constant(value=0.1, shape=shape), dtype=tf.float32, name='b')
68 | 
69 |     def conv2d(self, x, W, stride, padding='SAME'):
70 |         return tf.nn.conv2d(input=x, filter=W, strides=[1,stride,stride,1], padding=padding, name='conv')
71 | 
72 |     def max_pool(self, x, ksize, stride, padding='VALID'):
73 |         return tf.nn.max_pool(value=x, ksize=[1,ksize,ksize,1], strides=[1,stride,stride,1], padding=padding, name='max-pool')
74 | 
75 |     def fc_layer(self, x, in_size, out_size, activation_function=None):
76 |         w = self.w_variable(shape=[in_size, out_size])
77 |         b = self.b_variable(shape=[out_size])
78 |         z = tf.nn.xw_plus_b(x, w, b, name='Wx_plus_b')
79 |         if activation_function is None:
80 |             outputs = z
81 |         else:
82 |             outputs = activation_function(z)
83 |         return outputs
84 | 


--------------------------------------------------------------------------------
/pre_train.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import sys
  4 | import gflags
  5 | import keras
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | ### parameters ###
  9 | # ===============================================
 10 | FLAGS = gflags.FLAGS
 11 | 
 12 | # data loading parameters
 13 | gflags.DEFINE_string('train_data_dir', './inputs/train/',
 14 |                      'Directory of the training data.')
 15 | gflags.DEFINE_string('dev_data_dir', './inputs/dev/',
 16 |                      'Directory of the dev data.')
 17 | # gflags.DEFINE_float('dev_sample_percentage', 0.02, 'Percentage of the training data to user for validation (dev set).')
 18 | 
 19 | # model parameters
 20 | gflags.DEFINE_integer('img_height', 224,
 21 |                       'The height of the image for training (default: 227).')
 22 | gflags.DEFINE_integer('img_width', 224,
 23 |                       'The width of the image for training (default: 227).')
 24 | gflags.DEFINE_integer(
 25 |     'img_channels', 3,
 26 |     'The number of channels of the image for training (default: 3).')
 27 | gflags.DEFINE_float('dropout_keep_prob', 0.7,
 28 |                     'Dropout keep probability (default: 0.7).')
 29 | 
 30 | # training parameters
 31 | gflags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training.')
 32 | gflags.DEFINE_integer('batch_size', 32, 'The batch size for each train step.')
 33 | gflags.DEFINE_integer('num_epochs', 10,
 34 |                       'Number of training epochs (default: 200).')
 35 | 
 36 | FLAGS(sys.argv)
 37 | # show parameters
 38 | print('\nPARAMETERS:')
 39 | print('================================')
 40 | for attr, value in FLAGS.flag_values_dict().items():
 41 |     print('{0}: {1}'.format(attr.lower(), value))
 42 | print('================================\n\n')
 43 | 
 44 | ### use the pre-trained model
 45 | # create the base pre-trained model
 46 | base_model = keras.applications.VGG16(
 47 |     weights='imagenet',
 48 |     include_top=False,
 49 |     input_shape=(FLAGS.img_height, FLAGS.img_width, FLAGS.img_channels))
 50 | 
 51 | # add a global spatial average pooling layer
 52 | add_model = keras.Sequential(name='additional_layers')
 53 | add_model.add(keras.layers.Flatten(input_shape=base_model.output_shape[1:]))
 54 | add_model.add(keras.layers.Dense(128, activation='relu'))
 55 | add_model.add(keras.layers.Dense(2, activation='softmax'))
 56 | 
 57 | model = keras.models.Model(
 58 |     inputs=base_model.input, outputs=add_model(base_model.output))
 59 | 
 60 | # freeze all VGG16 layers
 61 | for layer in model.layers[:-1]:
 62 |     layer.trainable = False
 63 | 
 64 | model.compile(
 65 |     loss='categorical_crossentropy',
 66 |     optimizer=keras.optimizers.SGD(lr=FLAGS.learning_rate, momentum=0.9),
 67 |     metrics=['accuracy'])
 68 | 
 69 | model.summary()
 70 | 
 71 | train_datagen = keras.preprocessing.image.ImageDataGenerator(
 72 |     rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
 73 | validation_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
 74 | train_generator = train_datagen.flow_from_directory(
 75 |     directory=FLAGS.train_data_dir,
 76 |     target_size=(FLAGS.img_height, FLAGS.img_width),
 77 |     batch_size=FLAGS.batch_size,
 78 |     class_mode='categorical',
 79 |     seed=272)
 80 | validation_generator = validation_datagen.flow_from_directory(
 81 |     directory=FLAGS.dev_data_dir,
 82 |     target_size=(FLAGS.img_height, FLAGS.img_width),
 83 |     batch_size=FLAGS.batch_size,
 84 |     class_mode='categorical')
 85 | 
 86 | # train the model on the new data for a few epochs
 87 | history = model.fit_generator(
 88 |     # data_helper.generate_arrays_from_file(
 89 |     #     batch_size=FLAGS.batch_size, img_path_list=x_path_train, label_list=y_train, img_height=224, img_width=224),
 90 |     train_generator,
 91 |     steps_per_epoch=train_generator.n // FLAGS.batch_size,
 92 |     epochs=FLAGS.num_epochs,
 93 |     validation_data=validation_generator,
 94 |     verbose=1,
 95 |     callbacks=[
 96 |         keras.callbacks.ModelCheckpoint(
 97 |             './log/VGG16-transfer-learning.model',
 98 |             monitor='val_loss',
 99 |             save_best_only=True,
100 |             verbose=1)
101 |     ])
102 | 
103 | # summarize history for accuracy
104 | plt.plot(history.history['acc'])
105 | plt.plot(history.history['val_acc'])
106 | plt.title('model accuracy')
107 | plt.ylabel('accuracy')
108 | plt.xlabel('epoch')
109 | plt.legend(['train', 'validation'], loc='upper left')
110 | plt.show()
111 | 
112 | # summarize history for loss
113 | plt.plot(history.history['loss'])
114 | plt.plot(history.history['val_loss'])
115 | plt.title('model loss')
116 | plt.ylabel('loss')
117 | plt.xlabel('epoch')
118 | plt.legend(['train', 'validation'], loc='upper left')
119 | plt.show()
120 | 
121 | print("Training loss: {:.2f} / Validation loss: {:.2f}".format(
122 |     history.history['loss'][-1], history.history['val_loss'][-1]))
123 | print("Training accuracy: {:.2f}% / Validation accuracy: {:.2f}%".format(
124 |     100 * history.history['acc'][-1], 100 * history.history['val_acc'][-1]))
125 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | import os
  5 | import time
  6 | import datetime
  7 | import gflags
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import data_helper
 11 | from img_cnn import ImgCNN
 12 | 
 13 | 
 14 | ### parameters ###
 15 | # ===============================================
 16 | FLAGS = gflags.FLAGS
 17 | 
 18 | # data loading parameters
 19 | gflags.DEFINE_string('train_data_dir', './inputs/train/', 'Directory of the training data.')
 20 | gflags.DEFINE_float('dev_sample_percentage', 0.01, 'Percentage of the training data to user for validation (dev set).')
 21 | 
 22 | # model parameters
 23 | gflags.DEFINE_integer('img_height', 224, 'The height of the image for training (default: 227).')
 24 | gflags.DEFINE_integer('img_width', 224, 'The width of the image for training (default: 227).')
 25 | gflags.DEFINE_integer('img_channels', 1, 'The number of channels of the image for training (default: 3).')
 26 | gflags.DEFINE_float('dropout_keep_prob', 0.7, 'Dropout keep probability (default: 0.7).')
 27 | 
 28 | # training parameters
 29 | gflags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training.')
 30 | gflags.DEFINE_integer('batch_size', 32, 'The batch size for each train step.')
 31 | gflags.DEFINE_integer('num_epochs', 200, 'Number of training epochs (default: 200).')
 32 | gflags.DEFINE_integer('evaluate_every', 100, 'Evaluate model on dev set after this many of steps (default: 100).')
 33 | gflags.DEFINE_integer('checkpoint_every', 100, 'Save model after this many steps (default: 100).')
 34 | gflags.DEFINE_integer('num_checkpoints', 5, 'Number of checkpoints to store (default: 5).')
 35 | 
 36 | # device parameters
 37 | gflags.DEFINE_string('device_name', '/cpu:0', 'Device name for training.')
 38 | gflags.DEFINE_bool('allow_soft_placement', True, 'Allow device soft device placement.')
 39 | gflags.DEFINE_bool('log_device_placement', False, 'Log placement of ops on devices.')
 40 | 
 41 | FLAGS(sys.argv)
 42 | # show parameters
 43 | print('\nPARAMETERS:')
 44 | print('================================')
 45 | for attr, value in FLAGS.flag_values_dict().items():
 46 |     print('{0}: {1}'.format(attr.lower(), value))
 47 | print('================================\n\n')
 48 | 
 49 | 
 50 | ### data preparation ###
 51 | # ===============================================
 52 | 
 53 | # load data
 54 | print('Loading data...\n')
 55 | x_path, y = data_helper.get_filenames_and_labels(FLAGS.train_data_dir)
 56 | 
 57 | # split train/dev set
 58 | split_index = -int(float(len(y)) * FLAGS.dev_sample_percentage)
 59 | x_path_train, x_path_dev = x_path[:split_index], x_path[split_index:]
 60 | y_train, y_dev = y[:split_index], y[split_index:]
 61 | 
 62 | del x_path, y
 63 | 
 64 | x_dev = []
 65 | for i in range(len(x_path_dev)):
 66 |     img_data = data_helper.img_resize(img_path=x_path_dev[i], img_height=FLAGS.img_height, img_width=FLAGS.img_width)
 67 |     #img_data_min, img_data_max = np.min(img_data), np.max(img_data)
 68 |     #img_data = (img_data - img_data_min) / (img_data_max - img_data_min)
 69 |     img_data = data_helper.rgb2gray(img_data)
 70 |     x_dev.append(img_data)
 71 | x_dev = np.array(x_dev)
 72 | y_dev = np.array(y_dev)
 73 | 
 74 | 
 75 | input('press enter to start training...\n\n')
 76 | ### training
 77 | # ===============================================
 78 | print('start training...\n')
 79 | with tf.Graph().as_default():
 80 |     session_conf = tf.ConfigProto(
 81 |         allow_soft_placement=FLAGS.allow_soft_placement,
 82 |         log_device_placement=FLAGS.log_device_placement)
 83 |     sess = tf.Session(config=session_conf)
 84 |     with sess.as_default():
 85 |         cnn = ImgCNN(
 86 |             n_classes=y_train.shape[1],
 87 |             img_height=FLAGS.img_height,
 88 |             img_width=FLAGS.img_width,
 89 |             img_channel=FLAGS.img_channels,
 90 |             device_name=FLAGS.device_name
 91 |             )
 92 | 
 93 |         # define training procedure
 94 |         global_step = tf.Variable(0, trainable=False, name='global_step')
 95 |         optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
 96 |         grads_and_vars = optimizer.compute_gradients(cnn.loss)
 97 |         train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
 98 | 
 99 |         # output directory for models and summaries
100 |         timestamp = str(int(time.time()))
101 |         out_dir = os.path.abspath(os.path.join(os.curdir, 'log', timestamp))
102 |         print('Writing log to {}\n'.format(out_dir))
103 | 
104 |         # summary the input images
105 |         tf.summary.image('input_image', cnn.input_image, max_outputs=FLAGS.batch_size)
106 | 
107 |         # summary all the trainable variables
108 |         for var in tf.trainable_variables():
109 |             tf.summary.histogram(name=var.name, values=var)
110 | 
111 |         # summary loss and accuracy
112 |         loss_summary = tf.summary.scalar('loss', cnn.loss)
113 |         acc_summary = tf.summary.scalar('accuracy', cnn.accuracy)
114 | 
115 |         # train summaries
116 |         # train_summary_op = tf.summary.merge([loss_summary, acc_summary])
117 |         train_summary_op = tf.summary.merge_all()
118 |         train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
119 |         train_summary_writer = tf.summary.FileWriter(train_summary_dir, tf.get_default_graph())
120 | 
121 |         # test summaries
122 |         # dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
123 |         dev_summary_op = tf.summary.merge_all()
124 |         dev_summary_dir = os.path.join(out_dir, 'summaries', 'dev')
125 |         dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, tf.get_default_graph())
126 | 
127 |         # checkpointing, tensorflow assumes this directory already existed, so we need to create it
128 |         checkpoint_dir = os.path.join(out_dir, 'checkpoints')
129 |         checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
130 |         if not os.path.exists(checkpoint_dir):
131 |             os.makedirs(checkpoint_dir)
132 |         saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)
133 | 
134 |         def train_step(x_batch, y_batch, writer=None):
135 |             '''
136 |             A single training step.
137 |             '''
138 |             feed_dict = {
139 |                 cnn.input_x: x_batch,
140 |                 cnn.input_y: y_batch,
141 |                 cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
142 |             }
143 |             _, step, summaries, loss, accuracy = sess.run(
144 |                 [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
145 |                 feed_dict)
146 |             timestr = datetime.datetime.now().isoformat()
147 |             print('{}: step {}, loss {:g}, acc {:g}'.format(timestr, step, loss, accuracy))
148 |             if writer:
149 |                 writer.add_summary(summaries, step)
150 | 
151 |         def dev_step(x_batch, y_batch, writer=None):
152 |             '''
153 |             Evaluate the model on test set.
154 |             '''
155 |             feed_dict = {
156 |                 cnn.input_x: x_batch,
157 |                 cnn.input_y: y_batch,
158 |                 cnn.dropout_keep_prob: 1.0
159 |             }
160 |             step, summaries, loss, accuracy = sess.run(
161 |                 [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
162 |                 feed_dict)
163 |             timestr = datetime.datetime.now().isoformat()
164 |             print('{}: step {}, loss {:g}, acc {:g}'.format(timestr, step, loss, accuracy))
165 |             if writer:
166 |                 writer.add_summary(summaries, step)
167 | 
168 |         ### training loop
169 |         # train loop, for each batch
170 |         sess.run(tf.global_variables_initializer())
171 |         batches = data_helper.batch_iter(batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, img_path_list=x_path_train, label_list=y_train,
172 |             img_height=FLAGS.img_height, img_width=FLAGS.img_width)
173 |         for x_batch, y_batch in batches:
174 |             train_step(x_batch, y_batch, writer=train_summary_writer)
175 |             current_step = tf.train.global_step(sess, global_step)
176 |             if current_step % FLAGS.evaluate_every == 0:
177 |                 print('\nEvaluation on dev set:')
178 |                 dev_step(x_dev, y_dev, writer=dev_summary_writer)
179 |                 print('')
180 |             if current_step % FLAGS.checkpoint_every == 0:
181 |                 path = saver.save(sess=sess, save_path=checkpoint_prefix, global_step=global_step)
182 |                 print('\nSaved model checkpoint to {}\n'.format(path))
183 | 
184 | # end
185 | print('\n--- Done! ---\n')
186 | 


--------------------------------------------------------------------------------