├── .gitignore
├── LICENSE
├── README.md
├── README_CN.md
├── config.py
├── data
    └── rssrai_sense_cls
    │   └── ClsName2id.txt
├── examples
    ├── __init__.py
    ├── accuracy.py
    ├── multi_gpus_train.py
    └── submit.py
├── models
    ├── __init__.py
    ├── losses.py
    ├── network.py
    ├── resnet_utils.py
    └── run_net.py
├── prepare_data
    ├── __init__.py
    ├── gen_data_batch.py
    └── gen_tf_records.py
└── tools
    └── img_encode.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # sense_classification
 2 | 
 3 | [中文版本](README_CN.md)
 4 | 
 5 | ## Introduction
 6 | [RSCUP: 遥感图像场景分类](http://rscup.bjxintong.com.cn/#/theme/1)
 7 | 
 8 | 
 9 | 
10 | This repo is organized as follows:
11 | 
12 | ```
13 | sense_classification/
14 |     |->examples
15 |     |->models
16 |     |->prepare_data
17 |     |->data
18 |     |   |->rssrai_sense_cls
19 |     |   |   |->train
20 |     |   |   |->val
21 |     |   |   |->test
22 |     |   |->tf_records
23 |     |   |->train_list
24 |     |->ckpt
25 |     |->tools
26 | ```
27 | 
28 | ## Requirements
29 | 1. tensorflow-gpu==1.12.0 (I only test on tensorflow 1.12.0)
30 | 2. python==3.4.3
31 | 3. numpy
32 | 4. easydict
33 | 5. opencv==3.4.1
34 | 6. Python packages might missing. pls fix it according to the error message.
35 | 
36 | ## Installation, Prepare data, Training, Val, Generate submit
37 | ### Installation
38 | 1. Clone the sense_classification repository, and we'll call the directory that you cloned sense_classification as `${sense_classification_ROOT}`.
39 | 
40 | ```
41 | git clone https://github.com/vicwer/sense_classification.git
42 | ```
43 | 
44 | ### Prepare data
45 | data should be organized as follows:
46 | 
47 | ```
48 | data/
49 |     |->rssrai_sense_cls
50 |     |   |->train
51 |     |   |->val
52 |     |   |->test
53 |     |   |->ClsName2id.txt
54 |     |->train_list/train.txt
55 |     |->tf_records
56 | ```
57 | 1. Download dataset and unzip: train.zip, val.zip, test.zip, ClsName2id.txt
58 | 
59 | 2. Generate tf_records:
60 | 
61 | ```
62 | cd tools
63 | python3 img_encode.py
64 | ```
65 | 
66 | ### Training
67 | 
68 | I provide common used config.py in ${sense_classification_ROOT}, which can set hyperparameters.
69 | 
70 | e.g.
71 | ```
72 | cd ${sense_classification_ROOT}
73 | vim config.py
74 | cfg.train.num_gpus = {your gpu nums}
75 | etc.
76 | 
77 | cd ${sense_classification_ROOT}/examples/
78 | python3 multi_gpus_train.py
79 | ```
80 | 
81 | ### Val
82 | 
83 | ```
84 | cd ${sense_classification_ROOT}/examples/
85 | python3 accuracy.py
86 | ```
87 | 
88 | ### Generate submit
89 | 
90 | ```
91 | cd ${sense_classification_ROOT}/examples/
92 | python3 submit.py
93 | ```
94 | 
95 | ## Result:
96 | 
97 | Val: 0.908+
98 | Test: 0.90509
99 | 


--------------------------------------------------------------------------------
/README_CN.md:
--------------------------------------------------------------------------------
 1 | # 遥感图像场景分类
 2 | 
 3 | [English Version](README.md)
 4 | 
 5 | ## 简介
 6 | [RSCUP: 遥感图像场景分类](http://rscup.bjxintong.com.cn/#/theme/1)
 7 | 
 8 | 
 9 | 
10 | 仓库路径应该组织成如下结构:
11 | 
12 | ```
13 | sense_classification/
14 |     |->examples
15 |     |->models
16 |     |->prepare_data
17 |     |->data
18 |     |   |->rssrai_sense_cls
19 |     |   |   |->train
20 |     |   |   |->val
21 |     |   |   |->test
22 |     |   |->tf_records
23 |     |   |->train_list
24 |     |->ckpt
25 |     |->tools
26 | ```
27 | 
28 | ## 环境依赖
29 | 1. tensorflow-gpu==1.12.0 (I only test on tensorflow 1.12.0)
30 | 2. python==3.4.3
31 | 3. numpy
32 | 4. easydict
33 | 5. opencv==3.4.1
34 | 6. 有些包可能没列出来,根据错误提示安装
35 | 
36 | ## 安装, 准备数据, 训练, 验证, 生成提交文件
37 | ### 安装
38 | 1. 下载代码
39 | 
40 | ```
41 | git clone https://github.com/vicwer/sense_classification.git
42 | ```
43 | 
44 | ### 准备数据
45 | data目录结构:
46 | 
47 | ```
48 | data/
49 |     |->rssrai_sense_cls
50 |     |   |->train
51 |     |   |->val
52 |     |   |->test
53 |     |   |->ClsName2id.txt
54 |     |->train_list/train.txt
55 |     |->tf_records
56 | ```
57 | 1. 下载数据集并解压: train.zip, val.zip, test.zip, ClsName2id.txt
58 | 
59 | 2. 生成 tf_records:
60 | 
61 | ```
62 | cd tools
63 | python3 img_encode.py
64 | ```
65 | 
66 | ### 训练
67 | 
68 | ${sense_classification_ROOT}目录提供了config.py, 可设置超参数
69 | 
70 | 例如
71 | ```
72 | cd ${sense_classification_ROOT}
73 | vim config.py
74 | cfg.train.num_gpus = {your gpu nums}
75 | etc.
76 | 
77 | cd ${sense_classification_ROOT}/examples/
78 | python3 multi_gpus_train.py
79 | ```
80 | 
81 | ### 验证
82 | 
83 | ```
84 | cd ${sense_classification_ROOT}/examples/
85 | python3 accuracy.py
86 | ```
87 | 
88 | ### 生成提交文件
89 | 
90 | ```
91 | cd ${sense_classification_ROOT}/examples/
92 | python3 submit.py
93 | ```
94 | 
95 | ## 结果:
96 | 
97 | 验证集: 0.908+
98 | 测试集：0.90509
99 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import os
 5 | 
 6 | cfg = edict()
 7 | 
 8 | cfg.classes = 45
 9 | cfg.batch_size = 128
10 | # cfg.batch_size = 256
11 | 
12 | cfg.data_path = '../data/tf_records/train.records'
13 | cfg.ckpt_path = '../ckpt/'
14 | 
15 | # training options
16 | cfg.train = edict()
17 | 
18 | cfg.train.ignore_thresh = .5
19 | cfg.train.ohem_ratio = 0.8
20 | cfg.train.momentum = 0.9
21 | cfg.train.bn_training = True
22 | cfg.train.weight_decay = 0.0001
23 | cfg.train.learning_rate = [1e-3, 1e-4, 1e-5]
24 | cfg.train.max_batches = 30000
25 | cfg.train.lr_steps = [10000., 20000.]
26 | cfg.train.lr_scales = [.1, .1]
27 | cfg.train.num_gpus = 1
28 | cfg.train.tower = 'tower'
29 | 
30 | cfg.train.learn_rate = 0.001
31 | cfg.train.learn_rate_decay = 0.8
32 | cfg.train.learn_rate_decay_epoch = 2
33 | cfg.train.num_samples = 177408
34 | cfg.epochs = 100
35 | cfg.PRINT_LAYER_LOG = True
36 | 


--------------------------------------------------------------------------------
/data/rssrai_sense_cls/ClsName2id.txt:
--------------------------------------------------------------------------------
 1 | 旱地:dry-field:1
 2 | 水田:paddy-field:2
 3 | 梯田:terraced-field:3
 4 | 草地:meadow:4
 5 | 林地:forest:5
 6 | 商业区:commercial-area:6
 7 | 油田:oil-field:7
 8 | 油罐区:storage-tank:8
 9 | 工厂:works:9
10 | 矿区:mine:10
11 | 太阳能发电厂:solar-power-plant:11
12 | 风力发电站:wind-turbine:12
13 | 公园:park:13
14 | 游泳池:swimming-pool:14
15 | 教堂:church:15
16 | 墓地:cemetery:16
17 | 棒球场:baseball-field:17
18 | 篮球场:basketball-court:18
19 | 高尔夫球场:golf-course:19
20 | 足球场:soccer-field:20
21 | 温室:greenhouse:21
22 | 网球场:tennis-court:22
23 | 居民区:residential-area:23
24 | 岛屿:island:24
25 | 河流:river:25
26 | 停机坪:apron:26
27 | 直升机场:helipad:27
28 | 机场跑道:runway:28
29 | 桥梁:bridge:29
30 | 停车场:parking-lot:30
31 | 公路:road:31
32 | 路边停车区:roadside-parking-lot:32
33 | 转盘:roundabout:33
34 | 立交桥:viaduct:34
35 | 港口:port:35
36 | 铁路:railway:36
37 | 火车站:train-station:37
38 | 裸地:bare-land:38
39 | 沙漠:desert:39
40 | 冰岛:ice-land:40
41 | 山地:mountain:41
42 | 石质地:rock-land:42
43 | 稀疏灌木地:sparse-shrub-land:43
44 | 海滩:beach:44
45 | 湖泊:lake:45
46 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vicwer/sense_classification/687ae9196d1d89da1d309f764f6993e1523c74c5/examples/__init__.py


--------------------------------------------------------------------------------
/examples/accuracy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | import sys
 7 | sys.path.append('..')
 8 | from models.run_net import SenseClsNet
 9 | from config import cfg
10 | import cv2
11 | import os
12 | from tqdm import tqdm
13 | import zipfile
14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
15 | 
16 | def accuracy(img_path, label_file, epoch):
17 |     forders = os.listdir(img_path)
18 |     labels_f = open(label_file, 'r')
19 |     labels = labels_f.readlines()
20 |     label_dict = dict()
21 |     for l in labels:
22 |         key_value = l.strip().split(':')[0::2]
23 |         label_dict.update({key_value[0] : key_value[1]})
24 | 
25 |     is_training = False
26 |     cfg.batch_size = 1
27 |     ckpt_dir = cfg.ckpt_path
28 | 
29 |     correct = 0
30 |     wrong = 0
31 |     all_image = 0
32 | 
33 |     configer = tf.ConfigProto()
34 |     configer.gpu_options.per_process_gpu_memory_fraction = 0.1
35 |     with tf.Session(config=configer) as sess:
36 |         imgs_holder = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])
37 |         model = SenseClsNet(imgs_holder, None, is_training)
38 |         classes, scores = model.predict()
39 | 
40 |         saver = tf.train.Saver()
41 |         sess.run(tf.global_variables_initializer())
42 | 
43 |         saver.restore(sess, ckpt_dir + 'senceCls-' + str(epoch))
44 |         sess.run(tf.local_variables_initializer())
45 | 
46 |         for f in tqdm(forders):
47 |             label = float(label_dict[f])
48 |             imgs = os.listdir(os.path.join(img_path, f))
49 |             for img in imgs:
50 |                 path = os.path.join(os.path.join(img_path, f), img)
51 |                 img = cv2.imread(path)
52 | 
53 |                 image = cv2.resize(img, (224, 224))
54 |                 img_data = image.astype(np.float32) / 255.0 * 2.0
55 | 
56 |                 all_image += 1
57 |                 classes_index, scores_0 = sess.run([classes, scores], feed_dict={imgs_holder: np.reshape(img_data, [1, 224, 224, 3])})
58 |                 if classes_index[0] + 1 == label:
59 |                     correct += 1
60 |                 else:
61 |                     wrong += 1
62 | 
63 |         accuracy = float(correct) / float(correct + wrong)
64 |         # print('global_step: ', g_step)
65 |         print("All images:\n {}".format(int(correct + wrong)))
66 |         print("Accuracy: {:.4f}".format(accuracy))
67 | 
68 |     tf.reset_default_graph()
69 | 
70 | if __name__ == '__main__':
71 |     img_path = '../data/rssrai_sense_cls/train'
72 |     label_file = '../data/rssrai_sense_cls/ClsName2id.txt'
73 | 
74 |     epoch = np.arange(12, 1, -1)
75 |     print(epoch)
76 |     for i in epoch:
77 |         print('================{}================'.format(i))
78 |         accuracy(img_path, label_file, i)
79 | 


--------------------------------------------------------------------------------
/examples/multi_gpus_train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import sys
  7 | sys.path.append('..')
  8 | from models.run_net import SenseClsNet
  9 | from prepare_data.gen_data_batch import gen_data_batch
 10 | from config import cfg
 11 | import os
 12 | import re
 13 | import tensorflow.contrib.slim as slim
 14 | 
 15 | gpu_list = np.arange(cfg.train.num_gpus)
 16 | os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(i) for i in gpu_list)
 17 | 
 18 | def get_variables_to_restore(include_vars=[], exclude_global_pool=False):
 19 |     variables_to_restore = []
 20 |     for var in slim.get_model_variables():
 21 |         if exclude_global_pool and 'global_pool' in var.op.name:
 22 |             #print(var)
 23 |             continue
 24 |         variables_to_restore.append(var)
 25 |     for var in slim.get_variables_to_restore(include=include_vars):
 26 |         if exclude_global_pool and 'global_pool' in var.op.name:
 27 |             #print(var)
 28 |             continue
 29 |         variables_to_restore.append(var)
 30 |     return variables_to_restore
 31 | 
 32 | def average_gradients(tower_grads):
 33 |     """Calculate the average gradient for each shared variable across all towers.
 34 | 
 35 |     Note that this function provides a synchronization point across all towers.
 36 | 
 37 |     Args:
 38 |           tower_grads: List of lists of (gradient, variable) tuples. The outer list
 39 |         is over individual gradients. The inner list is over the gradient
 40 |         calculation for each tower.
 41 |     Returns:
 42 |           List of pairs of (gradient, variable) where the gradient has been averaged
 43 |         across all towers.
 44 |     """
 45 |     average_grads = []
 46 |     for grad_and_vars in zip(*tower_grads):
 47 |         # Note that each grad_and_vars looks like the following:
 48 |         #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
 49 |         grads = []
 50 |         for g, _ in grad_and_vars:
 51 |             # Add 0 dimension to the gradients to represent the tower.
 52 |             expanded_g = tf.expand_dims(g, 0)
 53 | 
 54 |             # Append on a 'tower' dimension which we will average over below.
 55 |             grads.append(expanded_g)
 56 | 
 57 |         # Average over the 'tower' dimension.
 58 |         grad = tf.concat(axis=0, values=grads)
 59 |         grad = tf.reduce_mean(grad, 0)
 60 | 
 61 |         # Keep in mind that the Variables are redundant because they are shared
 62 |         # across towers. So .. we will just return the first tower's pointer to
 63 |         # the Variable.
 64 |         v = grad_and_vars[0][1]
 65 |         grad_and_var = (grad, v)
 66 |         average_grads.append(grad_and_var)
 67 |     return average_grads
 68 | 
 69 | def train(finetune):
 70 |     is_training = True
 71 | 
 72 |     # data pipeline
 73 |     imgs, true_boxes = gen_data_batch(cfg.data_path, cfg.batch_size*cfg.train.num_gpus)
 74 |     imgs_split = tf.split(imgs, cfg.train.num_gpus)
 75 |     true_boxes_split = tf.split(true_boxes, cfg.train.num_gpus)
 76 | 
 77 |     global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.), trainable=False)
 78 |     #lr = tf.train.piecewise_constant(global_step, cfg.train.lr_steps, cfg.train.learning_rate)
 79 |     #optimizer = tf.train.AdamOptimizer(learning_rate=lr)
 80 |     learn_rate_decay_step = int(cfg.train.num_samples / cfg.batch_size / cfg.train.num_gpus * cfg.train.learn_rate_decay_epoch)
 81 |     learning_rate = tf.train.exponential_decay(cfg.train.learn_rate, global_step, learn_rate_decay_step, cfg.train.learn_rate_decay, staircase=True)
 82 |     #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 83 |     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
 84 | 
 85 |     # Calculate the gradients for each model tower.
 86 |     tower_grads = []
 87 |     with tf.variable_scope(tf.get_variable_scope()):
 88 |         for i in range(cfg.train.num_gpus):
 89 |             with tf.device('/gpu:%d' % i):
 90 |                 with tf.name_scope('%s_%d' % (cfg.train.tower, i)) as scope:
 91 |                     model = SenseClsNet(imgs_split[i], true_boxes_split[i], is_training)
 92 |                     loss = model.compute_loss()
 93 |                     tf.get_variable_scope().reuse_variables()
 94 |                     grads = optimizer.compute_gradients(loss)
 95 |                     tower_grads.append(grads)
 96 |                     if i == 0:
 97 |                         current_loss = loss
 98 |                         update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 99 |                         # print(tf.GraphKeys.UPDATE_OPS)
100 |                         # print(update_op)
101 |                         # print(grads)
102 |                         # vars_det = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="BioRecNet")
103 |     grads = average_gradients(tower_grads)
104 |     with tf.control_dependencies(update_op):
105 |         apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)
106 |         train_op = tf.group(apply_gradient_op,*update_op)
107 | 
108 |     # GPU config
109 |     config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
110 |     config.gpu_options.allow_growth = True
111 |     sess = tf.Session(config=config)
112 | 
113 |     # Create a saver
114 |     saver = tf.train.Saver(max_to_keep=1000)
115 |     ckpt_dir = cfg.ckpt_path
116 |     if not os.path.exists(ckpt_dir):
117 |         os.makedirs(ckpt_dir)
118 | 
119 |     # init
120 |     sess.run(tf.global_variables_initializer())
121 |     if finetune:
122 |         checkpoint = './pre_train.ckpt'
123 | 
124 |         # variables_to_restore = slim.get_variables_to_restore()
125 |         # init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
126 |         # sess.run(init_assign_op, init_feed_dict)
127 | 
128 |         variables_to_restore = get_variables_to_restore(exclude_global_pool=True)
129 |         init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
130 |         sess.run(init_assign_op, init_feed_dict)
131 | 
132 |     # running
133 |     cnt_epoch = 0
134 | 
135 |     for i in range(1, cfg.train.max_batches):
136 |         _, loss_, lr_ = sess.run([train_op, current_loss, learning_rate])
137 |         if(i % 5 == 0):
138 |             print(i,': ', loss_, '          lr: ', lr_)
139 |         if int(i) % int(cfg.train.num_samples / cfg.train.num_gpus / cfg.batch_size) == 0:
140 |             cnt_epoch += 1
141 |             saver.save(sess, ckpt_dir+'senceCls', global_step=cnt_epoch, write_meta_graph=True)
142 | 
143 | if __name__ == '__main__':
144 |     train(finetune=False)
145 | 


--------------------------------------------------------------------------------
/examples/submit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | import sys
 7 | sys.path.append('..')
 8 | from models.run_net import SenseClsNet
 9 | from config import cfg
10 | import cv2
11 | import os
12 | from tqdm import tqdm
13 | import zipfile
14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
15 | 
16 | def submit(img_path, submit_file, epoch):
17 |     submit_f = open(submit_file ,'w')
18 |     is_training = False
19 |     cfg.batch_size = 1
20 |     ckpt_dir = cfg.ckpt_path
21 | 
22 |     configer = tf.ConfigProto()
23 |     configer.gpu_options.per_process_gpu_memory_fraction = 0.1
24 |     with tf.Session(config=configer) as sess:
25 |         imgs_holder = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])
26 |         model = SenseClsNet(imgs_holder, None, is_training)
27 |         classes, scores = model.predict()
28 | 
29 |         saver = tf.train.Saver()
30 |         sess.run(tf.global_variables_initializer())
31 | 
32 |         saver.restore(sess, ckpt_dir + 'senceCls-' + str(epoch))
33 |         sess.run(tf.local_variables_initializer())
34 | 
35 |         for idx in tqdm(range(1, 89234)):
36 |             img = cv2.imread(os.path.join(img_path, str(idx).zfill(5) + '.jpg'))
37 |             image = cv2.resize(img, (224, 224))
38 |             img_data = image.astype(np.float32) / 255.0 * 2.0
39 | 
40 |             classes_index, scores_0 = sess.run([classes, scores], feed_dict={imgs_holder: np.reshape(img_data, [1, 224, 224, 3])})
41 |             submit_f.write(str(idx).zfill(5) + '.jpg' + ' ' + str(classes_index[0] + 1) + '\n')
42 | 
43 |     submit_f.close()
44 |     zf = zipfile.ZipFile('./submit/classification.zip', 'w', zipfile.zlib.DEFLATED)
45 |     zf.write(submit_file)
46 |     zf.close()
47 | 
48 |     tf.reset_default_graph()
49 | 
50 | if __name__ == '__main__':
51 |     img_path = '../data/rssrai_sense_cls/test'
52 |     if not os.path.exists('./submit'):
53 |         os.makedirs('./submit')
54 |     submit_file = './submit/classification.txt'
55 | 
56 |     epoch = [80]
57 |     print(epoch)
58 |     for i in epoch:
59 |         submit(img_path, submit_file, i)
60 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vicwer/sense_classification/687ae9196d1d89da1d309f764f6993e1523c74c5/models/__init__.py


--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | import tensorflow as tf
 5 | import sys
 6 | sys.path.append('..')
 7 | from config import cfg
 8 | 
 9 | PRINT_LAYER_LOG = cfg.PRINT_LAYER_LOG
10 | 
11 | def loss(preds, labels):
12 |     labels = tf.cast(labels, tf.int64)
13 |     if PRINT_LAYER_LOG:
14 |         print('pre labels', labels.get_shape())
15 |     labels = tf.reshape(labels, (cfg.batch_size, -1))
16 |     if PRINT_LAYER_LOG:
17 |         print('labels', labels.get_shape())
18 |     labels = tf.one_hot(labels, cfg.classes)
19 |     cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=preds, labels=labels)
20 |     pred_loss = tf.reduce_mean(cross_entropy)
21 |     return pred_loss
22 | 
23 | def loss_ohem(preds, labels):
24 |     labels = tf.cast(labels, tf.int64)
25 |     labels = tf.reshape(labels, (cfg.batch_size,))
26 |     print('pre labels', labels.get_shape())
27 |     labels = tf.one_hot(labels, cfg.classes)
28 |     print('labels', labels.get_shape())
29 |     cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=preds, labels=labels)
30 |     print('cross_entropy', cross_entropy.get_shape())
31 |     keep_num = tf.cast(cfg.batch_size * cfg.train.ohem_ratio, tf.int32)
32 |     cross_entropy = tf.reshape(cross_entropy, (cfg.batch_size,))
33 |     print('cross_entropy', cross_entropy.get_shape())
34 |     _, k_index = tf.nn.top_k(cross_entropy, keep_num)
35 |     loss = tf.gather(cross_entropy, k_index)
36 |     print('ohem loss', loss.get_shape())
37 | 
38 |     return tf.reduce_mean(loss)
39 | 


--------------------------------------------------------------------------------
/models/network.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | import tensorflow as tf
  5 | import tensorflow.contrib.slim as slim
  6 | from tensorflow.python.framework import ops
  7 | import sys
  8 | sys.path.append('..')
  9 | import numpy as np
 10 | from config import cfg
 11 | from models import resnet_utils
 12 | from tensorflow.contrib import layers as layers_lib
 13 | from tensorflow.contrib.framework.python.ops import add_arg_scope
 14 | from tensorflow.contrib.framework.python.ops import arg_scope
 15 | from tensorflow.contrib.layers.python.layers import initializers
 16 | from tensorflow.contrib.layers.python.layers import layers
 17 | from tensorflow.contrib.layers.python.layers import regularizers
 18 | from tensorflow.contrib.layers.python.layers import utils
 19 | from tensorflow.python.framework import ops
 20 | from tensorflow.python.ops import array_ops
 21 | from tensorflow.python.ops import nn_ops
 22 | from tensorflow.python.ops import variable_scope
 23 | 
 24 | PRINT_LAYER_LOG = cfg.PRINT_LAYER_LOG
 25 | 
 26 | def network_arg_scope(is_training=True,
 27 |                       weight_decay=cfg.train.weight_decay,
 28 |                       batch_norm_decay=0.997,
 29 |                       batch_norm_epsilon=1e-5,
 30 |                       batch_norm_scale=True):
 31 |     batch_norm_params = {
 32 |         'is_training': is_training, 'decay': batch_norm_decay,
 33 |         'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale,
 34 |         'updates_collections': ops.GraphKeys.UPDATE_OPS,
 35 |         #'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
 36 |         'trainable': cfg.train.bn_training,
 37 |     }
 38 | 
 39 |     with slim.arg_scope(
 40 |             [slim.conv2d, slim.separable_convolution2d],
 41 |             weights_regularizer=slim.l2_regularizer(weight_decay),
 42 |             weights_initializer=slim.variance_scaling_initializer(),
 43 |             trainable=is_training,
 44 |             activation_fn=tf.nn.relu6,
 45 |             #activation_fn=tf.nn.relu,
 46 |             normalizer_fn=slim.batch_norm,
 47 |             normalizer_params=batch_norm_params,
 48 |             padding='SAME'):
 49 |         with slim.arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc:
 50 |             return arg_sc
 51 | 
 52 | class Network(object):
 53 |     def __init__(self):
 54 |         pass
 55 | 
 56 |     def inference(self, mode, inputs, scope='SenseCls'):
 57 |         is_training = mode
 58 |         with slim.arg_scope(network_arg_scope(is_training=is_training)):
 59 |             with tf.variable_scope(scope, reuse=False):
 60 |                 conv0 = slim.conv2d(inputs,
 61 |                                     num_outputs=64,
 62 |                                     kernel_size=[7,7],
 63 |                                     stride=2,
 64 |                                     scope='conv0')
 65 |                 if PRINT_LAYER_LOG:
 66 |                     print(conv0.name, conv0.get_shape())
 67 |                 pool0 = slim.max_pool2d(conv0, kernel_size=[3, 3], stride=2, scope='pool0')
 68 |                 if PRINT_LAYER_LOG:
 69 |                     print('pool0', pool0.get_shape())
 70 | 
 71 |                 block0_0 = block(pool0, 64, 1, 'block0_0')
 72 |                 block0_1 = block(block0_0, 64, 1, 'block0_1')
 73 |                 block0_2 = block(block0_1, 64, 1, 'block0_2')
 74 | 
 75 |                 block1_0 = block(block0_2, 128, 2, 'block1_0')
 76 |                 block1_1 = block(block1_0, 128, 1, 'block1_1')
 77 |                 block1_2 = block(block1_1, 128, 1, 'block1_2')
 78 |                 block1_3 = block(block1_2, 128, 1, 'block1_3')
 79 | 
 80 |                 block2_0 = block(block1_3, 256, 2, 'block2_0')
 81 |                 block2_1 = block(block2_0, 256, 1, 'block2_1')
 82 |                 block2_2 = block(block2_1, 256, 1, 'block2_2')
 83 |                 block2_3 = block(block2_2, 256, 1, 'block2_3')
 84 |                 block2_4 = block(block2_3, 256, 1, 'block2_4')
 85 |                 block2_5 = block(block2_4, 256, 1, 'block2_5')
 86 | 
 87 |                 block3_0 = block(block2_5, 512, 2, 'block3_0')
 88 |                 block3_1 = block(block3_0, 512, 1, 'block3_1')
 89 |                 block3_2 = block(block3_1, 512, 1, 'block3_2')
 90 | 
 91 |                 net = tf.reduce_mean(block3_2, [1, 2], keepdims=True, name='global_pool_v4')
 92 |                 if PRINT_LAYER_LOG:
 93 |                     print('avg_pool', net.get_shape())
 94 |                 net = slim.flatten(net, scope='PreLogitsFlatten')
 95 |                 net = slim.dropout(net, 0.8, is_training=is_training, scope='dropout')
 96 |                 logits = fully_connected(net, cfg.classes, name='fc')
 97 |                 if PRINT_LAYER_LOG:
 98 |                     print('logits', logits.get_shape())
 99 |                 if is_training:
100 |                     l2_loss = tf.add_n(tf.losses.get_regularization_losses())
101 |                     return logits, l2_loss
102 |                 else:
103 |                     return logits
104 | 
105 | def block(inputs, c_outputs, s, name):
106 |     se_module = True
107 |     out1 = slim.conv2d(inputs,
108 |                        num_outputs=c_outputs,
109 |                        kernel_size=[3,3],
110 |                        stride=s,
111 |                        scope=name+'_0')
112 |     if PRINT_LAYER_LOG:
113 |         print(name+'_0', out1.get_shape())
114 |     output = slim.conv2d(out1,
115 |                        num_outputs=c_outputs,
116 |                        kernel_size=[3,3],
117 |                        stride=1,
118 |                        activation_fn=None,
119 |                        scope=name+'_1')
120 |     if PRINT_LAYER_LOG:
121 |         print(name+'_1', output.get_shape())
122 |     if s == 2:
123 |         return nn_ops.relu(output)
124 |     else:
125 |         if se_module:
126 |             squeeze = tf.reduce_mean(output, [1, 2], keepdims=True, name='global_pool_v4')
127 |             if PRINT_LAYER_LOG:
128 |                 print('squeeze', squeeze.get_shape())
129 |             fc1 = slim.conv2d(squeeze,
130 |                             num_outputs=squeeze.get_shape()[-1] // 16,
131 |                             normalizer_fn=None,
132 |                             normalizer_params=None,
133 |                             weights_regularizer=None,
134 |                             kernel_size=[1,1],
135 |                             stride=1,
136 |                             activation_fn=tf.nn.relu,
137 |                             scope=name+'_fc1')
138 |             if PRINT_LAYER_LOG:
139 |                 print('fc1', fc1.get_shape())
140 |             fc2 = slim.conv2d(fc1,
141 |                             num_outputs=squeeze.get_shape()[-1],
142 |                             normalizer_fn=None,
143 |                             normalizer_params=None,
144 |                             weights_regularizer=None,
145 |                             kernel_size=[1,1],
146 |                             stride=1,
147 |                             activation_fn=tf.sigmoid,
148 |                             scope=name+'_fc2')
149 |             if PRINT_LAYER_LOG:
150 |                 print('fc2', fc2.get_shape())
151 |             output = output * fc2
152 |         output = nn_ops.relu(inputs + output)
153 |         if PRINT_LAYER_LOG:
154 |             print(name, output.get_shape())
155 |         return output
156 | 
157 | def dense_block(inputs, depth, depth_bottleneck, stride, name, rate=1):
158 |     depth_in = inputs.get_shape()[3]
159 |     if depth == depth_in:
160 |         if stride == 1:
161 |             shortcut = inputs
162 |         else:
163 |             shortcut = layers.max_pool2d(inputs, [1, 1], stride=factor, scope=name+'_shortcut')
164 |     else:
165 |         shortcut = layers.conv2d(
166 |             inputs,
167 |             depth, [1, 1],
168 |             stride=stride,
169 |             activation_fn=None,
170 |             scope=name+'_shortcut')
171 |     if PRINT_LAYER_LOG:
172 |         print(name+'_shortcut', shortcut.get_shape())
173 | 
174 |     residual = layers.conv2d(
175 |         inputs, depth_bottleneck, [1, 1], stride=1, scope=name+'_conv1')
176 |     if PRINT_LAYER_LOG:
177 |         print(name+'_conv1', residual.get_shape())
178 |     residual = resnet_utils.conv2d_same(
179 |         residual, depth_bottleneck, 3, stride, rate=rate, scope=name+'_conv2')
180 |     if PRINT_LAYER_LOG:
181 |         print(name+'_conv2', residual.get_shape())
182 |     residual = layers.conv2d(
183 |         residual, depth, [1, 1], stride=1, activation_fn=None, scope=name+'_conv3')
184 |     if PRINT_LAYER_LOG:
185 |         print(name+'_conv3', residual.get_shape())
186 |     output = nn_ops.relu(shortcut + residual)
187 |     return output
188 | 
189 | def conv2d(inputs, c_outputs, s, name):
190 |     output = slim.conv2d(inputs, num_outputs=c_outputs, kernel_size=[3,3], stride=s, scope=name)
191 |     if PRINT_LAYER_LOG:
192 |         print(name, output.get_shape())
193 |     return output
194 | 
195 | def maxpool2x2(input, name):
196 |     output = slim.max_pool2d(input, kernel_size=[2, 2], stride=2, scope=name)
197 |     if PRINT_LAYER_LOG:
198 |         print(name, output.get_shape())
199 |     return output
200 | 
201 | def fully_connected(input, c_outputs, name):
202 |     output = slim.fully_connected(input, c_outputs, activation_fn=None, scope=name)
203 |     if PRINT_LAYER_LOG:
204 |         print(name, output.get_shape())
205 |     return output
206 | 
207 | def d_p_conv(inputs, c_outputs, s, name):
208 |     output = slim.separable_convolution2d(inputs,
209 |                                           num_outputs=None,
210 |                                           stride=s,
211 |                                           depth_multiplier=1,
212 |                                           kernel_size=[3, 3],
213 |                                           normalizer_fn=slim.batch_norm,
214 |                                           scope=name+'_d_conv')
215 |     if PRINT_LAYER_LOG:
216 |         print(name, output.get_shape())
217 | 
218 |     output = slim.conv2d(output,
219 |                          num_outputs=c_outputs,
220 |                          kernel_size=[1,1],
221 |                          stride=1,
222 |                          scope=name+'_p_conv')
223 |     if PRINT_LAYER_LOG:
224 |         print(name, output.get_shape())
225 |     return output
226 | 
227 | def route(input_list, name):
228 |     with tf.name_scope(name):
229 |         output = tf.concat(input_list, 3, name='concat')
230 |     if PRINT_LAYER_LOG:
231 |         print(name, output.get_shape())
232 |     return output
233 | 


--------------------------------------------------------------------------------
/models/resnet_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains building blocks for various versions of Residual Networks.
 16 | Residual networks (ResNets) were proposed in:
 17 |   Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 18 |   Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
 19 | More variants were introduced in:
 20 |   Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 21 |   Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
 22 | We can obtain different ResNet variants by changing the network depth, width,
 23 | and form of residual unit. This module implements the infrastructure for
 24 | building them. Concrete ResNet units and full ResNet networks are implemented in
 25 | the accompanying resnet_v1.py and resnet_v2.py modules.
 26 | Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
 27 | implementation we subsample the output activations in the last residual unit of
 28 | each block, instead of subsampling the input activations in the first residual
 29 | unit of each block. The two implementations give identical results but our
 30 | implementation is more memory efficient.
 31 | """
 32 | 
 33 | from __future__ import absolute_import
 34 | from __future__ import division
 35 | from __future__ import print_function
 36 | 
 37 | import collections
 38 | 
 39 | from tensorflow.contrib import layers as layers_lib
 40 | from tensorflow.contrib.framework.python.ops import add_arg_scope
 41 | from tensorflow.contrib.framework.python.ops import arg_scope
 42 | from tensorflow.contrib.layers.python.layers import initializers
 43 | from tensorflow.contrib.layers.python.layers import layers
 44 | from tensorflow.contrib.layers.python.layers import regularizers
 45 | from tensorflow.contrib.layers.python.layers import utils
 46 | from tensorflow.python.framework import ops
 47 | from tensorflow.python.ops import array_ops
 48 | from tensorflow.python.ops import nn_ops
 49 | from tensorflow.python.ops import variable_scope
 50 | 
 51 | def subsample(inputs, factor, scope=None):
 52 |   """Subsamples the input along the spatial dimensions.
 53 |   Args:
 54 |     inputs: A `Tensor` of size [batch, height_in, width_in, channels].
 55 |     factor: The subsampling factor.
 56 |     scope: Optional variable_scope.
 57 |   Returns:
 58 |     output: A `Tensor` of size [batch, height_out, width_out, channels] with the
 59 |       input, either intact (if factor == 1) or subsampled (if factor > 1).
 60 |   """
 61 |   if factor == 1:
 62 |     return inputs
 63 |   else:
 64 |     return layers.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
 65 | 
 66 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
 67 |   """Strided 2-D convolution with 'SAME' padding.
 68 |   When stride > 1, then we do explicit zero-padding, followed by conv2d with
 69 |   'VALID' padding.
 70 |   Note that
 71 |      net = conv2d_same(inputs, num_outputs, 3, stride=stride)
 72 |   is equivalent to
 73 |      net = tf.contrib.layers.conv2d(inputs, num_outputs, 3, stride=1,
 74 |      padding='SAME')
 75 |      net = subsample(net, factor=stride)
 76 |   whereas
 77 |      net = tf.contrib.layers.conv2d(inputs, num_outputs, 3, stride=stride,
 78 |      padding='SAME')
 79 |   is different when the input's height or width is even, which is why we add the
 80 |   current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
 81 |   Args:
 82 |     inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
 83 |     num_outputs: An integer, the number of output filters.
 84 |     kernel_size: An int with the kernel_size of the filters.
 85 |     stride: An integer, the output stride.
 86 |     rate: An integer, rate for atrous convolution.
 87 |     scope: Scope.
 88 |   Returns:
 89 |     output: A 4-D tensor of size [batch, height_out, width_out, channels] with
 90 |       the convolution output.
 91 |   """
 92 |   if stride == 1:
 93 |     return layers_lib.conv2d(
 94 |         inputs,
 95 |         num_outputs,
 96 |         kernel_size,
 97 |         stride=1,
 98 |         rate=rate,
 99 |         padding='SAME',
100 |         scope=scope)
101 |   else:
102 |     kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
103 |     pad_total = kernel_size_effective - 1
104 |     pad_beg = pad_total // 2
105 |     pad_end = pad_total - pad_beg
106 |     inputs = array_ops.pad(
107 |         inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
108 |     return layers_lib.conv2d(
109 |         inputs,
110 |         num_outputs,
111 |         kernel_size,
112 |         stride=stride,
113 |         rate=rate,
114 |         padding='VALID',
115 |         scope=scope)
116 | 


--------------------------------------------------------------------------------
/models/run_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | import tensorflow as tf
 5 | import sys
 6 | sys.path.append('..')
 7 | from models.network import Network
 8 | from config import cfg
 9 | from models.losses import loss, loss_ohem
10 | 
11 | class SenseClsNet:
12 |     def __init__(self, img, truth, is_training, batcn_norm_decay=0.997):
13 |         self.img = img
14 |         self.truth = truth
15 |         self.is_training = is_training
16 |         self.batch_norm_decay = batcn_norm_decay
17 |         self.img_shape = tf.shape(self.img)
18 |         backbone = Network()
19 |         if is_training:
20 |             self.head, self.l2_loss = backbone.inference(self.is_training, self.img)
21 |         else:
22 |             self.head = backbone.inference(self.is_training, self.img)
23 | 
24 |     def compute_loss(self):
25 |         with tf.name_scope('loss_0'):
26 |             cls_loss = loss(self.head, self.truth)
27 |             self.all_loss = cls_loss + self.l2_loss
28 |         return self.all_loss
29 | 
30 |     def predict(self):
31 |         '''
32 |         only support single image prediction
33 |         '''
34 |         pred_score = tf.reshape(self.head, (-1, cfg.classes))
35 |         score = tf.nn.softmax(tf.reshape(self.head, (-1, cfg.classes)))
36 |         class_index = tf.argmax(pred_score, 1)
37 |         return class_index, score
38 | 


--------------------------------------------------------------------------------
/prepare_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vicwer/sense_classification/687ae9196d1d89da1d309f764f6993e1523c74c5/prepare_data/__init__.py


--------------------------------------------------------------------------------
/prepare_data/gen_data_batch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import sys
  7 | sys.path.append('..')
  8 | from config import cfg
  9 | import os
 10 | import re
 11 | import cv2
 12 | import math
 13 | 
 14 | def distorted_bounding_box_crop(image,
 15 |                                 bbox,
 16 |                                 min_object_covered=0.1,
 17 |                                 aspect_ratio_range=(0.75, 1.33),
 18 |                                 area_range=(0.9, 1.0),
 19 |                                 max_attempts=100,
 20 |                                 scope=None):
 21 | 
 22 |     with tf.name_scope(scope, 'distort_image', [image, image.shape[0], image.shape[1], bbox]):
 23 |         if bbox is None:
 24 |             bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
 25 |                                 dtype=tf.float32,
 26 |                                 shape=[1, 1, 4])
 27 |     if image.dtype != tf.float32:
 28 |         img = tf.image.convert_image_dtype(image, dtype=tf.float32)
 29 | 
 30 |     with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
 31 |         sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
 32 |             tf.shape(image),
 33 |             bounding_boxes=bbox,
 34 |             min_object_covered=min_object_covered,
 35 |             aspect_ratio_range=aspect_ratio_range,
 36 |             area_range=area_range,
 37 |             max_attempts=max_attempts,
 38 |             use_image_if_no_bounding_boxes=True)
 39 |         bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
 40 | 
 41 |         # Crop the image to the specified bounding box.
 42 |         cropped_image = tf.slice(image, bbox_begin, bbox_size)
 43 |         return cropped_image, distort_bbox
 44 | 
 45 | def parser(example):
 46 |     feats = tf.parse_single_example(example, features={'label' : tf.FixedLenFeature([1], tf.float32),
 47 |                                                        'feature': tf.FixedLenFeature([], tf.string)})
 48 |     coord = feats['label']
 49 | 
 50 |     img = tf.decode_raw(feats['feature'], tf.uint8)
 51 |     img = tf.reshape(img, [256, 256, 3])
 52 |     img = tf.contrib.image.rotate(img, tf.random_uniform([], minval=-10*math.pi/180., maxval=10*math.pi/180.))
 53 | 
 54 |     rot_img = tf.image.rot90(img)
 55 |     rot_seed = tf.random_uniform([], maxval=1.0)
 56 |     img = tf.cond(rot_seed > 0.5, lambda: img, lambda: rot_img)
 57 |     img = tf.image.random_flip_left_right(img)
 58 |     img = tf.image.random_flip_up_down(img)
 59 | 
 60 |     crop_img = tf.random_crop(img, [224, 224, 3])
 61 |     img = tf.image.resize_images(img, [224, 224])
 62 |     img = tf.cast(img, tf.uint8)
 63 |     # img, _ = distorted_bounding_box_crop(img, bbox=None)
 64 |     # crop_img = tf.image.resize_images(crop_img, [224, 224])
 65 | 
 66 |     rand_seed = tf.random_uniform([], maxval=1.0)
 67 |     img = tf.cond(rand_seed > 0.5, lambda: img, lambda: crop_img)
 68 | 
 69 |     img = tf.cast(img, tf.float32) / 255.0
 70 |     img = tf.image.random_hue(img, max_delta=0.1)
 71 |     img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
 72 |     img = tf.image.random_brightness(img, max_delta=0.05)
 73 |     img = tf.image.random_saturation(img, lower=0.8, upper=1.2)
 74 |     img = tf.clip_by_value(img, 0.0, 1.0)
 75 |     img = tf.multiply(img, 2.0)
 76 |     return img, coord
 77 | 
 78 | def gen_data_batch(tf_records_filename, batch_size):
 79 |     dt = tf.data.TFRecordDataset(tf_records_filename)
 80 |     dt = dt.map(parser, num_parallel_calls=4)
 81 |     dt = dt.prefetch(batch_size)
 82 |     dt = dt.shuffle(buffer_size=8*batch_size)
 83 |     dt = dt.repeat()
 84 |     dt = dt.batch(batch_size)
 85 |     iterator = dt.make_one_shot_iterator()
 86 |     imgs, true_boxes = iterator.get_next()
 87 | 
 88 |     return imgs, true_boxes
 89 | 
 90 | if __name__ == '__main__':
 91 |     os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
 92 |     tf_records_filename = cfg.data_path
 93 | 
 94 |     imgs, true_boxes = gen_data_batch(tf_records_filename, cfg.batch_size)
 95 |     imgs_split = tf.split(imgs, cfg.train.num_gpus)
 96 |     true_boxes_split = tf.split(true_boxes, cfg.train.num_gpus)
 97 |     configer = tf.ConfigProto()
 98 |     configer.gpu_options.per_process_gpu_memory_fraction = 0.3
 99 |     sess=tf.Session(config=configer)
100 |     for i in range(2):
101 |         for j in range(cfg.train.num_gpus):
102 |             imgs_, true_boxes_ = sess.run([imgs_split[j], true_boxes_split[j]])
103 |             print(true_boxes_.shape)
104 | 
105 |             for k in range(imgs_.shape[0]):
106 |                 cv2.imshow('img', imgs_[k].astype(np.uint8))
107 |                 cv2.waitKey(0)
108 | 
109 | 


--------------------------------------------------------------------------------
/prepare_data/gen_tf_records.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | import os
 6 | import cv2
 7 | from tqdm import tqdm
 8 | import re
 9 | import sys
10 | sys.path.append('..')
11 | from config import cfg
12 | 
13 | def load_file(file_path):
14 |     '''
15 |     load imgs_path, classes and labels
16 |     '''
17 |     imgs_path = []
18 |     labels = []
19 |     with open(file_path, 'r') as f:
20 |         lines = f.readlines()
21 |         for line in lines:
22 |             img_path = line.strip().split(' ')[0]
23 |             label = float(line.strip().split(' ')[1]) - 1
24 |             imgs_path.append(img_path)
25 |             labels.append(label)
26 |     return np.asarray(imgs_path), np.asarray(labels)
27 | 
28 | def extract_image(image_path, height, width, is_resize=True):
29 |     '''
30 |     get b->g->r image data
31 |     '''
32 |     img = cv2.imread(image_path)
33 |     if is_resize:
34 |         h, w, _ = img.shape
35 |         if h == height and w == width:
36 |             image = img
37 |         else:
38 |             image = cv2.resize(img, (height, height))
39 |             # cv2.imshow("img", image)
40 |             # cv2.waitKey(0)
41 |     else:
42 |         image = img
43 |         # cv2.imshow('img', image)
44 |         # cv2.waitKey(0)
45 |     image_data = np.array(image, dtype='uint8')
46 |     return image_data
47 | 
48 | def run_encode(file_path, tf_records_filename):
49 |     '''
50 |     encode func
51 |     '''
52 |     print('generate records...')
53 |     imgs_path, labels = load_file(file_path)
54 |     height, width = 256, 256
55 |     imgs = []
56 |     writer = tf.python_io.TFRecordWriter(tf_records_filename)
57 |     for i in tqdm(range(imgs_path.shape[0])):
58 |         img = extract_image(imgs_path[i], height, width, is_resize=True)
59 |         img = img.tostring()
60 |         label = labels[i].flatten().tolist()
61 |         example = tf.train.Example(features=tf.train.Features(feature={
62 |                       'label' : tf.train.Feature(float_list = tf.train.FloatList(value=label)),
63 |                       'feature': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img]))
64 |                   }))
65 |         writer.write(example.SerializeToString())
66 |     writer.close()
67 | 
68 | if __name__ == '__main__':
69 |     file_path = './train.txt'
70 |     tf_records_filename = '/train.records'
71 | 
72 |     run_encode(file_path, tf_records_filename)
73 | 


--------------------------------------------------------------------------------
/tools/img_encode.py:
--------------------------------------------------------------------------------
 1 | #-*- coding:utf-8 -*-
 2 | 
 3 | import sys
 4 | import os
 5 | import numpy as np
 6 | import numpy.random as npr
 7 | sys.path.append('..')
 8 | from prepare_data.gen_tf_records import run_encode
 9 | import cv2
10 | from tqdm import tqdm
11 | 
12 | def gen_img_list(img_path, label_file, img_list_path):
13 |     forders = os.listdir(img_path)
14 |     print(forders)
15 |     labels_f = open(label_file, 'r')
16 |     labels = labels_f.readlines()
17 |     img_f = open(os.path.join(img_list_path, 'train.txt'), 'w')
18 | 
19 |     label_dict = dict()
20 |     for l in labels:
21 |         key_value = l.strip().split(':')[0::2]
22 |         label_dict.update({key_value[0] : key_value[1]})
23 |     print(label_dict)
24 | 
25 |     print('remove dead img')
26 |     cnt_path = os.getcwd()
27 |     for f in tqdm(forders):
28 |         label = label_dict[f]
29 |         imgs = os.listdir(os.path.join(img_path, f))
30 |         for img in imgs:
31 |             path = os.path.join(cnt_path, os.path.join(os.path.join(img_path, f), img))
32 |             img = cv2.imread(path)
33 |             try:
34 |                 h, w, _ = img.shape
35 |                 img_f.write(path + ' ' + label + '\n')
36 |             except:
37 |                 print('dead img: {}'.format(path))
38 |     img_f.close()
39 | 
40 | def shuffle_list(img_list_path):
41 |     with open(os.path.join(img_list_path, 'train.txt'), 'r') as f:
42 |         lines = f.readlines()
43 | 
44 |     with open(os.path.join(img_list_path, 'train.txt'), "w") as f:
45 |         num = len(lines)
46 |         lines_keep = npr.choice(len(lines), size=int(num),replace=False)
47 | 
48 |         for i in lines_keep:
49 |             f.write(lines[i])
50 | 
51 | def encode(img_path, label_file, img_list_path, records_path):
52 |     for dir_path in [img_list_path, records_path]:
53 |         if not os.path.exists(dir_path):
54 |             os.makedirs(dir_path)
55 | 
56 |     gen_img_list(img_path, label_file, img_list_path)
57 |     for i in range(10):
58 |         shuffle_list(img_list_path)
59 | 
60 |     records_f = os.path.join(records_path, 'train.records')
61 |     run_encode(os.path.join(img_list_path, 'train.txt'), records_f)    
62 | 
63 | if __name__ == '__main__':
64 |     img_path = '../data/rssrai_sense_cls/train'
65 |     label_file = '../data/rssrai_sense_cls/ClsName2id.txt'
66 |     img_list_path = '../data/train_list/'
67 |     records_path = '../data/tf_records'
68 |     encode(img_path, label_file, img_list_path, records_path)
69 | 


--------------------------------------------------------------------------------