├── .gitattributes
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── images
    ├── 0000.png
    └── 00001673.jpg
├── rcnn
    ├── PY_OP
    │   ├── __init__.py
    │   └── rpn_fpn_ohem3.py
    ├── __init__.py
    ├── config.py
    ├── core
    │   ├── __init__.py
    │   ├── callback.py
    │   ├── loader.py
    │   ├── metric.py
    │   ├── module.py
    │   ├── module_bak.py
    │   └── tester.py
    ├── cython
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── anchors.pyx
    │   ├── bbox.pyx
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── setup.py
    ├── dataset
    │   ├── __init__.py
    │   ├── ds_utils.py
    │   ├── imdb.py
    │   └── retinaface.py
    ├── io
    │   ├── __init__.py
    │   ├── image.py
    │   ├── rcnn.py
    │   └── rpn.py
    ├── logger.py
    ├── processing
    │   ├── __init__.py
    │   ├── assign_levels.py
    │   ├── bbox_regression.py
    │   ├── bbox_transform.py
    │   ├── generate_anchor.py
    │   └── nms.py
    ├── pycocotools
    │   ├── UPSTREAM_REV
    │   ├── __init__.py
    │   ├── _mask.c
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── mask.py
    │   ├── maskApi.c
    │   ├── maskApi.h
    │   └── setup.py
    ├── sample_config.py
    ├── symbol
    │   ├── __init__.py
    │   ├── pyramidbox.py
    │   ├── symbol_common.py
    │   ├── symbol_common.py.bak
    │   ├── symbol_mnet.py
    │   ├── symbol_mnet.py.bak
    │   ├── symbol_resnet.py
    │   └── symbol_ssh.py
    ├── tools
    │   ├── __init__.py
    │   ├── demo_images.py
    │   ├── demo_single_image.py
    │   ├── reeval.py
    │   ├── test_rcnn.py
    │   ├── test_rpn.py
    │   ├── train_maskrcnn.py
    │   ├── train_rcnn.py
    │   └── train_rpn.py
    └── utils
    │   ├── __init__.py
    │   ├── combine_model.py
    │   ├── load_data.py
    │   ├── load_model.py
    │   └── save_model.py
├── retinaface.py
├── test.py
├── test_widerface.py
├── train.py
└── train_model.sh


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # IPython
 79 | profile_default/
 80 | ipython_config.py
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # pipenv
 86 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 87 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 88 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 89 | #   install all needed dependencies.
 90 | #Pipfile.lock
 91 | 
 92 | # celery beat schedule file
 93 | celerybeat-schedule
 94 | 
 95 | # SageMath parsed files
 96 | *.sage.py
 97 | 
 98 | # Environments
 99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 | 
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 | 
111 | # Rope project settings
112 | .ropeproject
113 | 
114 | # mkdocs documentation
115 | /site
116 | 
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 | 
122 | # Pyre type checker
123 | .pyre/
124 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd rcnn/cython/; python setup.py build_ext --inplace; rm -rf build; cd ../../
3 | 	cd rcnn/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../
4 | clean:
5 | 	cd rcnn/cython/; rm *.so *.c *.cpp; cd ../../
6 | 	cd rcnn/pycocotools/; rm *.so; cd ../../
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RetinaDetection Object Detector
  2 | 
  3 | ## Introduction
  4 | 
  5 | RetinaDetector是基于RetinaFace修改的检测方法，原论文is a practical single-stage [SOTA](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) face detector which is initially described in [arXiv technical report](https://arxiv.org/abs/1905.00641)
  6 | 
  7 | ## Data
  8 | 
  9 | 0. Organise the dataset directory as follows:
 10 | 
 11 | ```Shell
 12 |   data/retinaface/
 13 |     train/
 14 |       images/
 15 |       label.txt
 16 |     val/
 17 |       images/
 18 |       label.txt
 19 |     test/
 20 |       images/
 21 |       label.txt
 22 | ```
 23 | 
 24 | ## Install
 25 | 
 26 | 1. Install MXNet with GPU support.
 27 | 2. Install Deformable Convolution V2 operator from [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets) if you use the DCN based backbone.
 28 | 3. Type ``make`` to build cxx tools.
 29 | 
 30 | ## Training
 31 | 
 32 | Please check ``train.py`` for training.
 33 | 
 34 | 1. Copy ``rcnn/sample_config.py`` to ``rcnn/config.py``
 35 | 
 36 | 为了获得更好的训练效果，可针对性的修改一些参数，如下：
 37 | 
 38 | ```Shell
 39 | config.TRAIN.MIN_BOX_SIZE = 10 #最小bbox
 40 | config.FACE_LANDMARK = False #使用landmark
 41 | config.USE_BLUR = False
 42 | config.BBOX_MASK_THRESH = 0
 43 | config.COLOR_MODE = 2 #增强
 44 | config.COLOR_JITTERING = 0.125
 45 | ```
 46 | 
 47 | 无效人脸的过滤，如下：
 48 | ```Shell
 49 | if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or (y2 - y1) < config.TRAIN.MIN_BOX_SIZE:
 50 |    continue
 51 | if self._split.startswith('train'):
 52 |    blur[ix] = values[19]
 53 |    if blur[ix] < 0.25:
 54 |       continue
 55 | if config.BBOX_MASK_THRESH > 0:
 56 |    if (x2 - x1) < config.BBOX_MASK_THRESH or (y2 - y1) < config.BBOX_MASK_THRESH:
 57 |       boxes_mask.append(np.array([x1, y1, x2, y2], np.float))
 58 |       continue
 59 |    if self._split.startswith('train'):
 60 |       if blur[ix] < 0.35:
 61 |          boxes_mask.append(np.array([x1, y1, x2, y2], np.float))
 62 |          continue
 63 | ```
 64 | 
 65 | 2. Download pretrained models and put them into ``model/``. 
 66 | 
 67 |     ImageNet ResNet50 ([baidu cloud](https://pan.baidu.com/s/1WAkU9ZA_j-OmzO-sdk9whA) and [dropbox](https://www.dropbox.com/s/48b850vmnaaasfl/imagenet-resnet-50.zip?dl=0)). 
 68 | 
 69 |     ImageNet ResNet152 ([baidu cloud](https://pan.baidu.com/s/1nzQ6CzmdKFzg8bM8ChZFQg) and [dropbox](https://www.dropbox.com/s/8ypcra4nqvm32v6/imagenet-resnet-152.zip?dl=0)).
 70 | 
 71 | 3. Start training with ``sh train_model.sh``.  
 72 | Before training, you can check the ``resnet`` network configuration (e.g. pretrained model path, anchor setting and learning rate policy etc..) in ``rcnn/config.py``.
 73 | 
 74 | ## Testing
 75 | 
 76 | Please check ``test.py`` for testing.
 77 | 
 78 | ## Result
 79 | 
 80 | 1. 缺陷检测
 81 | 
 82 | ![MASK1](https://github.com/bleakie/RetinaDetection/blob/master/images/00001673.jpg)
 83 | 
 84 | 2. 人脸检测+人脸对齐
 85 | 
 86 | ![MASK1](https://github.com/bleakie/RetinaDetection/blob/master/images/0000.png)
 87 | 
 88 | ## Models
 89 | 
 90 | 人脸检测模型，比原版误检更低，角度较大和模糊超过0.6的face会自动忽略，更适合人脸识别的应用：click [here](http://www.multcloud.com/share/5079e926-283b-4833-a216-b3de42eea0fe).
 91 | 
 92 | ## ToDo
 93 | 
 94 | 由于缺陷检测数据涉及私密性，缺陷检测的模型暂时不会释放
 95 | 
 96 | ## References
 97 | 
 98 | ```
 99 | @inproceedings{yangsai1991@163.com,
100 | year={2019}
101 | }
102 | ```
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/images/0000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/images/0000.png


--------------------------------------------------------------------------------
/images/00001673.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/images/00001673.jpg


--------------------------------------------------------------------------------
/rcnn/PY_OP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/PY_OP/__init__.py


--------------------------------------------------------------------------------
/rcnn/PY_OP/rpn_fpn_ohem3.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import print_function
  3 | import sys
  4 | import mxnet as mx
  5 | import numpy as np
  6 | from distutils.util import strtobool
  7 | from ..config import config, generate_config
  8 | 
  9 | 
 10 | STAT = {0:0}
 11 | STEP = 28800
 12 | 
 13 | class RPNFPNOHEM3Operator(mx.operator.CustomOp):
 14 |     def __init__(self, stride=0, network='', dataset='', prefix=''):
 15 |         super(RPNFPNOHEM3Operator, self).__init__()
 16 |         self.stride = int(stride)
 17 |         self.prefix = prefix
 18 |         generate_config(network, dataset)
 19 |         self.mode = 0
 20 |         if self.prefix!='face':
 21 |           self.mode = 0
 22 |         if network=='pbox':
 23 |           self.mode = 2
 24 |         global STAT
 25 |         for k in config.RPN_FEAT_STRIDE:
 26 |           STAT[k] = [0,0,0]
 27 | 
 28 |     def forward(self, is_train, req, in_data, out_data, aux):
 29 |         global STAT
 30 | 
 31 |         cls_score    = in_data[0].asnumpy() #BS, 2, ANCHORS
 32 |         labels_raw       = in_data[1].asnumpy() # BS, ANCHORS
 33 | 
 34 |         A = config.NUM_ANCHORS
 35 |         anchor_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1],1), dtype=np.float32 )
 36 |         valid_count = np.zeros( (labels_raw.shape[0],1), dtype=np.float32 )
 37 |         #print('anchor_weight', anchor_weight.shape)
 38 | 
 39 |         #assert labels.shape[0]==1
 40 |         #assert cls_score.shape[0]==1
 41 |         #assert bbox_weight.shape[0]==1
 42 |         #print('shape', cls_score.shape, labels.shape, file=sys.stderr)
 43 |         #print('bbox_weight 0', bbox_weight.shape, file=sys.stderr)
 44 |         #bbox_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1], 4), dtype=np.float32)
 45 |         _stat = [0,0,0]
 46 |         for ibatch in xrange(labels_raw.shape[0]):
 47 |           _anchor_weight = np.zeros( (labels_raw.shape[1],1), dtype=np.float32)
 48 |           labels = labels_raw[ibatch]
 49 |           fg_score = cls_score[ibatch,1,:] - cls_score[ibatch,0,:]
 50 | 
 51 | 
 52 | 
 53 |           fg_inds = np.where(labels>0)[0]
 54 |           num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE)
 55 |           origin_num_fg = len(fg_inds)
 56 |           #print(len(fg_inds), num_fg, file=sys.stderr)
 57 |           if len(fg_inds) > num_fg:
 58 |             if self.mode>=1:
 59 |               disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 60 |               labels[disable_inds] = -1
 61 |             else:
 62 |               pos_ohem_scores = fg_score[fg_inds]
 63 |               order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()
 64 |               sampled_inds = fg_inds[order_pos_ohem_scores[:num_fg]]
 65 |               labels[fg_inds] = -1
 66 |               labels[sampled_inds] = 1
 67 | 
 68 |           n_fg = np.sum(labels>0)
 69 |           fg_inds = np.where(labels>0)[0]
 70 |           num_bg = config.TRAIN.RPN_BATCH_SIZE - n_fg
 71 |           #num_bg = max(10, num_fg*int(1.0/config.TRAIN.RPN_FG_FRACTION-1))
 72 |           #if self.mode==2:
 73 |           #  num_bg = num_fg*int(1.0/config.TRAIN.RPN_FG_FRACTION-1)
 74 | 
 75 |           bg_inds = np.where(labels == 0)[0]
 76 |           origin_num_bg = len(bg_inds)
 77 |           if num_bg==0:
 78 |             labels[bg_inds] = -1
 79 |           elif len(bg_inds) > num_bg:
 80 |             # sort ohem scores
 81 | 
 82 |             if self.mode>=1:
 83 |               disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 84 |               labels[disable_inds] = -1
 85 |             else:
 86 |               neg_ohem_scores = fg_score[bg_inds]
 87 |               order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1]
 88 |               sampled_inds = bg_inds[order_neg_ohem_scores[:num_bg]]
 89 |               #print('sampled_inds_bg', sampled_inds, file=sys.stderr)
 90 |               labels[bg_inds] = -1
 91 |               labels[sampled_inds] = 0
 92 | 
 93 |           if n_fg>0:
 94 |             order0_labels = labels.reshape( (1, A, -1) ).transpose( (0, 2, 1) ).reshape( (-1,) )
 95 |             bbox_fg_inds = np.where(order0_labels>0)[0]
 96 |             #print('bbox_fg_inds, order0 ', bbox_fg_inds, file=sys.stderr)
 97 |             _anchor_weight[bbox_fg_inds,:] = 1.0
 98 |           anchor_weight[ibatch] = _anchor_weight
 99 |           valid_count[ibatch][0] = n_fg
100 | 
101 |           if self.prefix=='face':
102 |             #print('fg-bg', self.stride, n_fg, num_bg)
103 |             STAT[0]+=1
104 |             STAT[self.stride][0] += config.TRAIN.RPN_BATCH_SIZE
105 |             STAT[self.stride][1] += n_fg
106 |             STAT[self.stride][2] += np.sum(fg_score[fg_inds]>=0)
107 |             #_stat[0] += config.TRAIN.RPN_BATCH_SIZE
108 |             #_stat[1] += n_fg
109 |             #_stat[2] += np.sum(fg_score[fg_inds]>=0)
110 |             #print('stride num_fg', self.stride, n_fg, file=sys.stderr)
111 |             #ACC[self.stride] += np.sum(fg_score[fg_inds]>=0)
112 |             #x = float(labels_raw.shape[0]*len(config.RPN_FEAT_STRIDE))
113 |             x = 1.0
114 |             if STAT[0]%STEP==0:
115 |               _str = ['STAT']
116 |               STAT[0] = 0
117 |               for k in config.RPN_FEAT_STRIDE:
118 |                 acc = float(STAT[k][2])/STAT[k][1]
119 |                 acc0 = float(STAT[k][1])/STAT[k][0]
120 |                 #_str.append("%d: all-fg(%d, %d, %.4f), fg-fgcorrect(%d, %d, %.4f)"%(k,STAT[k][0], STAT[k][1], acc0, STAT[k][1], STAT[k][2], acc))
121 |                 _str.append("%d: (%d, %d, %.4f)"%(k, STAT[k][1], STAT[k][2], acc))
122 |                 STAT[k] = [0,0,0]
123 |               _str = ' | '.join(_str)
124 |               print(_str, file=sys.stderr)
125 |           #if self.stride==4 and num_fg>0:
126 |           #  print('_stat_', self.stride, num_fg, num_bg, file=sys.stderr)
127 | 
128 |         #labels_ohem = mx.nd.array(labels_raw)
129 |         #anchor_weight = mx.nd.array(anchor_weight)
130 |         #print('valid_count', self.stride, np.sum(valid_count))
131 |         #print('_stat', _stat, valid_count)
132 | 
133 |         for ind, val in enumerate([labels_raw, anchor_weight, valid_count]):
134 |             val = mx.nd.array(val)
135 |             self.assign(out_data[ind], req[ind], val)
136 | 
137 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
138 |         for i in range(len(in_grad)):
139 |             self.assign(in_grad[i], req[i], 0)
140 | 
141 | 
142 | @mx.operator.register('rpn_fpn_ohem3')
143 | class RPNFPNOHEM3Prop(mx.operator.CustomOpProp):
144 |     def __init__(self, stride=0, network='', dataset='', prefix=''):
145 |         super(RPNFPNOHEM3Prop, self).__init__(need_top_grad=False)
146 |         self.stride = stride
147 |         self.network=network
148 |         self.dataset=dataset
149 |         self.prefix = prefix
150 | 
151 |     def list_arguments(self):
152 |         return ['cls_score', 'labels']
153 | 
154 |     def list_outputs(self):
155 |         return ['labels_ohem', 'anchor_weight', 'valid_count']
156 | 
157 |     def infer_shape(self, in_shape):
158 |         labels_shape = in_shape[1]
159 |         #print('in_rpn_ohem', in_shape[0], in_shape[1], in_shape[2], file=sys.stderr)
160 |         anchor_weight_shape = [labels_shape[0], labels_shape[1], 1]
161 |         #print('in_rpn_ohem', labels_shape, anchor_weight_shape)
162 | 
163 |         return in_shape, \
164 |                [labels_shape, anchor_weight_shape, [labels_shape[0], 1]]
165 | 
166 |     def create_operator(self, ctx, shapes, dtypes):
167 |         return RPNFPNOHEM3Operator(self.stride, self.network, self.dataset, self.prefix)
168 | 
169 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
170 |         return []
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/__init__.py


--------------------------------------------------------------------------------
/rcnn/config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from easydict import EasyDict as edict
  3 | 
  4 | config = edict()
  5 | 
  6 | # network related params
  7 | config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
  8 | config.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
  9 | config.PIXEL_SCALE = 1.0
 10 | config.IMAGE_STRIDE = 0
 11 | 
 12 | # dataset related params
 13 | config.NUM_CLASSES = 2
 14 | config.PRE_SCALES = [(1200, 1600)]  # first is scale (the shorter side); second is max size
 15 | config.SCALES = [(640, 640)]  # first is scale (the shorter side); second is max size
 16 | #config.SCALES = [(800, 800)]  # first is scale (the shorter side); second is max size
 17 | config.ORIGIN_SCALE = False
 18 | 
 19 | _ratio = (1.,)
 20 | 
 21 | RAC_SSH = {
 22 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 23 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 24 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 25 | }
 26 | 
 27 | _ratio = (1.,1.5)
 28 | RAC_SSH2 = {
 29 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 30 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 31 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 32 | }
 33 | 
 34 | _ratio = (1.,1.5)
 35 | RAC_SSH3 = {
 36 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 37 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 38 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 39 |     '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 40 | }
 41 | 
 42 | RAC_RETINA = {}
 43 | _ratios = (1.0,)
 44 | _ass = 2.0**(1.0/3)
 45 | _basescale = 1.0
 46 | for _stride in [4, 8, 16, 32, 64]:
 47 |   key = str(_stride)
 48 |   value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999}
 49 |   scales = []
 50 |   for _ in range(3):
 51 |     scales.append(_basescale)
 52 |     _basescale *= _ass
 53 |   value['SCALES'] = tuple(scales)
 54 |   RAC_RETINA[key] = value
 55 | 
 56 | 
 57 | config.RPN_ANCHOR_CFG = RAC_SSH #default
 58 | 
 59 | config.NET_MODE = 2
 60 | # config.HEAD_MODULE = 'SSH'
 61 | config.HEAD_MODULE = 'RF'
 62 | config.LR_MODE = 0
 63 | config.LANDMARK_LR_MULT = 2.0
 64 | config.HEAD_FILTER_NUM = 256
 65 | config.CONTEXT_FILTER_RATIO = 1
 66 | config.max_feat_channel = 9999
 67 | 
 68 | config.USE_CROP = True
 69 | config.USE_DCN = 0
 70 | config.FACE_LANDMARK = False
 71 | config.USE_OCCLUSION = False
 72 | config.USE_BLUR = False
 73 | config.MORE_SMALL_BOX = True
 74 | 
 75 | config.LAYER_FIX = False
 76 | 
 77 | config.HEAD_BOX = False
 78 | config.DENSE_ANCHOR = False
 79 | config.USE_MAXOUT = 0
 80 | config.SHARE_WEIGHT_BBOX = False
 81 | config.SHARE_WEIGHT_LANDMARK = False
 82 | 
 83 | config.RANDOM_FEAT_STRIDE = False
 84 | config.NUM_CPU = 4
 85 | config.MIXUP = 0.0
 86 | config.USE_3D = False
 87 | 
 88 | config.BBOX_MASK_THRESH = 0
 89 | config.COLOR_MODE = 2
 90 | config.COLOR_JITTERING = 0.125
 91 | 
 92 | 
 93 | config.TRAIN = edict()
 94 | 
 95 | config.TRAIN.IMAGE_ALIGN = 0
 96 | config.TRAIN.MIN_BOX_SIZE = 5
 97 | # R-CNN and RPN
 98 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 99 | config.TRAIN.BATCH_IMAGES = 8
100 | # e2e changes behavior of anchor loader and metric
101 | config.TRAIN.END2END = True
102 | # group images with similar aspect ratio
103 | config.TRAIN.ASPECT_GROUPING = False
104 | 
105 | # RPN anchor loader
106 | # rpn anchors batch size
107 | config.TRAIN.RPN_ENABLE_OHEM = 2
108 | config.TRAIN.RPN_BATCH_SIZE = 256
109 | # rpn anchors sampling params
110 | config.TRAIN.RPN_FG_FRACTION = 0.25
111 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5
112 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
113 | config.TRAIN.RPN_CLOBBER_POSITIVES = False
114 | config.TRAIN.RPN_FORCE_POSITIVE = False
115 | # rpn bounding box regression params
116 | 
117 | config.TEST = edict()
118 | 
119 | # R-CNN testing
120 | # use rpn to generate proposal
121 | config.TEST.HAS_RPN = False
122 | # size of images for each device
123 | config.TEST.BATCH_IMAGES = 1
124 | 
125 | # RPN proposal
126 | config.TEST.CXX_PROPOSAL = True
127 | config.TEST.RPN_NMS_THRESH = 0.3
128 | config.TEST.RPN_PRE_NMS_TOP_N = 1000
129 | config.TEST.RPN_POST_NMS_TOP_N = 3000
130 | # RCNN nms
131 | config.TEST.NMS = 0.3
132 | 
133 | config.TEST.PYRAMID_SCALES = [0.5, 1.0, 1.5]
134 | config.TEST.SCORE_THRESH = 0.5
135 | config.TEST.IOU_THRESH = 0.5
136 | 
137 | 
138 | # network settings
139 | network = edict()
140 | 
141 | # network.ssh = edict()
142 | 
143 | network.mnet = edict()
144 | network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
145 | network.mnet.BATCH_IMAGES = 16
146 | network.mnet.HEAD_FILTER_NUM = 64
147 | network.mnet.CONTEXT_FILTER_RATIO = 1
148 | 
149 | network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
150 | network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
151 | network.mnet.PIXEL_SCALE = 1.0
152 | network.mnet.pretrained = 'model/mobilenet025fd0' #78
153 | network.mnet.pretrained_epoch = 0
154 | network.mnet.max_feat_channel = 8888
155 | network.mnet.COLOR_MODE = 1
156 | network.mnet.USE_CROP = True
157 | network.mnet.RPN_ANCHOR_CFG = RAC_SSH
158 | network.mnet.LAYER_FIX = True
159 | network.mnet.LANDMARK_LR_MULT = 2.5
160 | 
161 | 
162 | network.resnet = edict()
163 | network.resnet.pretrained_epoch = 0
164 | network.resnet.lr_step = '1,2,3,4,5,55,68,80'
165 | network.resnet.lr = 0.004
166 | network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
167 | network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
168 | network.resnet.PIXEL_SCALE = 1.0
169 | network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
170 | network.resnet.BATCH_IMAGES = 8
171 | network.resnet.HEAD_FILTER_NUM = 256
172 | network.resnet.CONTEXT_FILTER_RATIO = 1
173 | network.resnet.USE_DCN = 2
174 | network.resnet.RPN_BATCH_SIZE = 256
175 | network.resnet.RPN_ANCHOR_CFG = RAC_RETINA
176 | 
177 | network.resnet.USE_DCN = 0
178 | network.resnet.pretrained = 'model/pretrain-model/imagenet-resnet-50/resnet-50'
179 | network.resnet.RPN_ANCHOR_CFG = RAC_SSH
180 | 
181 | 
182 | # dataset settings
183 | dataset = edict()
184 | 
185 | dataset.retinaface = edict()
186 | dataset.retinaface.dataset = 'retinaface'
187 | dataset.retinaface.image_set = 'train'
188 | dataset.retinaface.test_image_set = 'val'
189 | dataset.retinaface.root_path = 'data'
190 | dataset.retinaface.dataset_path = 'data/retinaface'
191 | dataset.retinaface.NUM_CLASSES = 2
192 | 
193 | # default settings
194 | default = edict()
195 | 
196 | config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling']
197 | 
198 | # default network
199 | default.network = 'resnet'
200 | default.pretrained = 'model/imagenet-resnet-50'
201 | default.pretrained_epoch = 0
202 | # default dataset
203 | default.dataset = 'retinaface'
204 | default.image_set = 'train'
205 | default.test_image_set = 'val'
206 | default.root_path = 'data'
207 | default.dataset_path = 'data/retinaface'
208 | # default training
209 | default.frequent = 20
210 | default.kvstore = 'device'
211 | # default e2e
212 | default.prefix = 'model/defect'
213 | default.end_epoch = 10000
214 | default.lr_step = '55,68,80'
215 | default.lr = 0.01
216 | 
217 | def generate_config(_network, _dataset):
218 |     for k, v in network[_network].items():
219 |         if k in config:
220 |             config[k] = v
221 |         elif k in default:
222 |             default[k] = v
223 |         if k in config.TRAIN:
224 |           config.TRAIN[k] = v
225 |     for k, v in dataset[_dataset].items():
226 |         if k in config:
227 |             config[k] = v
228 |         elif k in default:
229 |             default[k] = v
230 |         if k in config.TRAIN:
231 |           config.TRAIN[k] = v
232 |     config.network = _network
233 |     config.dataset = _dataset
234 |     config.RPN_FEAT_STRIDE = []
235 |     num_anchors = []
236 |     for k in config.RPN_ANCHOR_CFG:
237 |       config.RPN_FEAT_STRIDE.append( int(k) )
238 |       _num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES'])*len(config.RPN_ANCHOR_CFG[k]['RATIOS'])
239 |       if config.DENSE_ANCHOR:
240 |         _num_anchors *= 2
241 |       config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors
242 |       num_anchors.append(_num_anchors)
243 |     config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True)
244 |     for j in range(1,len(num_anchors)):
245 |       assert num_anchors[0]==num_anchors[j]
246 |     config.NUM_ANCHORS = num_anchors[0]
247 | 
248 | 


--------------------------------------------------------------------------------
/rcnn/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/core/__init__.py


--------------------------------------------------------------------------------
/rcnn/core/callback.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def do_checkpoint(prefix, means, stds):
 5 |     def _callback(iter_no, sym, arg, aux):
 6 |       if 'bbox_pred_weight' in arg:
 7 |         arg['bbox_pred_weight_test'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T
 8 |         arg['bbox_pred_bias_test'] = arg['bbox_pred_bias'] * mx.nd.array(stds) + mx.nd.array(means)
 9 |       mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
10 |       if 'bbox_pred_weight' in arg:
11 |         arg.pop('bbox_pred_weight_test')
12 |         arg.pop('bbox_pred_bias_test')
13 |     return _callback
14 | 


--------------------------------------------------------------------------------
/rcnn/core/metric.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import mxnet as mx
  4 | import numpy as np
  5 | 
  6 | from rcnn.config import config
  7 | 
  8 | 
  9 | def get_rpn_names():
 10 |     pred = ['rpn_cls_prob', 'rpn_bbox_loss', 'rpn_label', 'rpn_bbox_weight']
 11 |     label = ['rpn_label', 'rpn_bbox_target', 'rpn_bbox_weight']
 12 |     return pred, label
 13 | 
 14 | 
 15 | 
 16 | class RPNAccMetric(mx.metric.EvalMetric):
 17 |     def __init__(self, pred_idx=-1, label_idx=-1,name='RPNAcc'):
 18 |         super(RPNAccMetric, self).__init__(name)
 19 |         self.pred, self.label = get_rpn_names()
 20 |         #self.name = 'RPNAcc'
 21 |         self.name = [name, name+'_BG', name+'_FG']
 22 |         self.pred_idx = pred_idx
 23 |         self.label_idx = label_idx
 24 |         self.STAT = [0, 0, 0]
 25 | 
 26 |     def reset(self):
 27 |         """Clear the internal statistics to initial state."""
 28 |         if isinstance(self.name, str):
 29 |           self.num_inst = 0
 30 |           self.sum_metric = 0.0
 31 |         else:
 32 |           #print('reset to ',len(self.name), self.name, file=sys.stderr)
 33 |           self.num_inst = [0] * len(self.name)
 34 |           self.sum_metric = [0.0] * len(self.name)
 35 | 
 36 | 
 37 |     def get(self):
 38 |         if isinstance(self.name, str):
 39 |             if self.num_inst == 0:
 40 |                 return (self.name, float('nan'))
 41 |             else:
 42 |                 return (self.name, self.sum_metric / self.num_inst)
 43 |         else:
 44 |             names = ['%s'%(self.name[i]) for i in range(len(self.name))]
 45 |             values = [x / y if y != 0 else float('nan') \
 46 |                 for x, y in zip(self.sum_metric, self.num_inst)]
 47 |             return (names, values)
 48 | 
 49 |     def update(self, labels, preds):
 50 |         if self.pred_idx>=0 and self.label_idx>=0:
 51 |           pred = preds[self.pred_idx]
 52 |           label = preds[self.label_idx]
 53 |         else:
 54 |           pred = preds[self.pred.index('rpn_cls_prob')]
 55 |           label = labels[self.label.index('rpn_label')]
 56 |           #label = preds[self.pred.index('rpn_label')]
 57 | 
 58 |         num_images = pred.shape[0]
 59 |         #print(pred.shape, label.shape, file=sys.stderr)
 60 |         # pred (b, c, p) or (b, c, h, w)
 61 |         pred_label = mx.ndarray.argmax_channel(pred).asnumpy().astype('int32')
 62 |         #pred_label = pred_label.reshape((pred_label.shape[0], -1))
 63 |         pred_label = pred_label.reshape(-1,)
 64 |         # label (b, p)
 65 |         label = label.asnumpy().astype('int32').reshape(-1,)
 66 |         #print(pred_label.shape, label.shape)
 67 | 
 68 |         # filter with keep_inds
 69 |         keep_inds = np.where(label != -1)[0]
 70 |         #print('in_metric', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr)
 71 |         #print(keep_inds, file=sys.stderr)
 72 |         _pred_label = pred_label[keep_inds]
 73 |         _label = label[keep_inds]
 74 |         #print('in_metric2', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr)
 75 |         if isinstance(self.name, str):
 76 |           self.sum_metric += np.sum(_pred_label.flat == _label.flat)
 77 |           self.num_inst += len(_pred_label.flat)
 78 |         else:
 79 |           self.sum_metric[0] += np.sum(_pred_label.flat == _label.flat)
 80 |           self.num_inst[0] += len(_pred_label.flat)
 81 | 
 82 |           keep_inds = np.where(label == 0)[0]
 83 |           _pred_label = pred_label[keep_inds]
 84 |           _label = label[keep_inds]
 85 |           self.sum_metric[1] += np.sum(_pred_label.flat == _label.flat)
 86 |           self.num_inst[1] += len(_pred_label.flat)
 87 | 
 88 |           keep_inds = np.where(label == 1)[0]
 89 |           _pred_label = pred_label[keep_inds]
 90 |           _label = label[keep_inds]
 91 |           a = np.sum(_pred_label.flat == _label.flat)
 92 |           b = len(_pred_label.flat)
 93 |           self.sum_metric[2] += a
 94 |           self.num_inst[2] += b
 95 | 
 96 |           #self.STAT[0]+=a
 97 |           #self.STAT[1]+=b
 98 |           #self.STAT[2]+=num_images
 99 |           #if self.STAT[2]%400==0:
100 |           #  print('FG_ACC', self.pred_idx, self.STAT[2], self.STAT[0], self.STAT[1], float(self.STAT[0])/self.STAT[1], file=sys.stderr)
101 |           #  self.STAT = [0,0,0]
102 | 
103 | 
104 | class RPNLogLossMetric(mx.metric.EvalMetric):
105 |     def __init__(self, pred_idx=-1, label_idx=-1):
106 |         super(RPNLogLossMetric, self).__init__('RPNLogLoss')
107 |         self.pred, self.label = get_rpn_names()
108 |         self.pred_idx = pred_idx
109 |         self.label_idx = label_idx
110 | 
111 |     def update(self, labels, preds):
112 |         if self.pred_idx>=0 and self.label_idx>=0:
113 |           pred = preds[self.pred_idx]
114 |           label = preds[self.label_idx]
115 |         else:
116 |           pred = preds[self.pred.index('rpn_cls_prob')]
117 |           label = labels[self.label.index('rpn_label')]
118 |           #label = preds[self.pred.index('rpn_label')]
119 | 
120 |         # label (b, p)
121 |         label = label.asnumpy().astype('int32').reshape((-1))
122 |         # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c)
123 |         pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
124 |         pred = pred.reshape((label.shape[0], -1))
125 | 
126 |         # filter with keep_inds
127 |         keep_inds = np.where(label != -1)[0]
128 |         label = label[keep_inds]
129 |         cls = pred[keep_inds, label]
130 |         #print('in_metric log', label.shape, cls.shape, file=sys.stderr)
131 | 
132 |         cls += 1e-14
133 |         cls_loss = -1 * np.log(cls)
134 |         cls_loss = np.sum(cls_loss)
135 |         self.sum_metric += cls_loss
136 |         self.num_inst += label.shape[0]
137 | 
138 | 
139 | class RPNL1LossMetric(mx.metric.EvalMetric):
140 |     def __init__(self, loss_idx=-1, weight_idx=-1, name='RPNL1Loss'):
141 |         super(RPNL1LossMetric, self).__init__(name)
142 |         self.pred, self.label = get_rpn_names()
143 |         self.loss_idx = loss_idx
144 |         self.weight_idx = weight_idx
145 |         self.name = name
146 | 
147 |     def update(self, labels, preds):
148 |         if self.loss_idx>=0 and self.weight_idx>=0:
149 |           bbox_loss = preds[self.loss_idx].asnumpy()
150 |           bbox_weight = preds[self.weight_idx].asnumpy()
151 |         else:
152 |           bbox_loss = preds[self.pred.index('rpn_bbox_loss')].asnumpy()
153 |           bbox_weight = labels[self.label.index('rpn_bbox_weight')].asnumpy()
154 |           #bbox_weight = preds[self.pred.index('rpn_bbox_weight')].asnumpy()
155 | 
156 |         #print('in_metric', self.name, bbox_weight.shape, bbox_loss.shape)
157 | 
158 |         # calculate num_inst (average on those fg anchors)
159 |         num_inst = np.sum(bbox_weight > 0) / (bbox_weight.shape[1]/config.NUM_ANCHORS)
160 |         #print('in_metric log', bbox_loss.shape, num_inst, file=sys.stderr)
161 | 
162 |         self.sum_metric += np.sum(bbox_loss)
163 |         self.num_inst += num_inst
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/rcnn/core/module.py:
--------------------------------------------------------------------------------
  1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape
  2 | varying with training iterations. If shapes vary, executors will rebind,
  3 | using shared arrays from the initial module binded with maximum shape.
  4 | """
  5 | 
  6 | import logging
  7 | 
  8 | from mxnet import context as ctx
  9 | from mxnet.initializer import Uniform
 10 | from mxnet.module.base_module import BaseModule
 11 | from mxnet.module.module import Module
 12 | 
 13 | class MutableModule(BaseModule):
 14 |     """A mutable module is a module that supports variable input data.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     symbol : Symbol
 19 |     data_names : list of str
 20 |     label_names : list of str
 21 |     logger : Logger
 22 |     context : Context or list of Context
 23 |     work_load_list : list of number
 24 |     max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 25 |     max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 26 |     fixed_param_prefix : list of str, indicating fixed parameters
 27 |     """
 28 |     def __init__(self, symbol, data_names, label_names,
 29 |                  logger=logging, context=ctx.cpu(), work_load_list=None,
 30 |                  max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None):
 31 |         super(MutableModule, self).__init__(logger=logger)
 32 |         self._symbol = symbol
 33 |         self._data_names = data_names
 34 |         self._label_names = label_names
 35 |         self._context = context
 36 |         self._work_load_list = work_load_list
 37 | 
 38 |         self._curr_module = None
 39 |         self._max_data_shapes = max_data_shapes
 40 |         self._max_label_shapes = max_label_shapes
 41 |         self._fixed_param_prefix = fixed_param_prefix
 42 | 
 43 |         fixed_param_names = list()
 44 |         if fixed_param_prefix is not None:
 45 |             for name in self._symbol.list_arguments():
 46 |                 for prefix in self._fixed_param_prefix:
 47 |                     if prefix in name:
 48 |                         fixed_param_names.append(name)
 49 |         self._fixed_param_names = fixed_param_names
 50 | 
 51 |     def _reset_bind(self):
 52 |         self.binded = False
 53 |         self._curr_module = None
 54 | 
 55 |     @property
 56 |     def data_names(self):
 57 |         return self._data_names
 58 | 
 59 |     @property
 60 |     def output_names(self):
 61 |         return self._symbol.list_outputs()
 62 | 
 63 |     @property
 64 |     def data_shapes(self):
 65 |         assert self.binded
 66 |         return self._curr_module.data_shapes
 67 | 
 68 |     @property
 69 |     def label_shapes(self):
 70 |         assert self.binded
 71 |         return self._curr_module.label_shapes
 72 | 
 73 |     @property
 74 |     def output_shapes(self):
 75 |         assert self.binded
 76 |         return self._curr_module.output_shapes
 77 | 
 78 |     def get_params(self):
 79 |         assert self.binded and self.params_initialized
 80 |         return self._curr_module.get_params()
 81 | 
 82 |     def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
 83 |                     allow_missing=False, force_init=False, allow_extra=False):
 84 |         if self.params_initialized and not force_init:
 85 |             return
 86 |         assert self.binded, 'call bind before initializing the parameters'
 87 |         self._curr_module.init_params(initializer=initializer, arg_params=arg_params,
 88 |                                       aux_params=aux_params, allow_missing=allow_missing,
 89 |                                       force_init=force_init, allow_extra=allow_extra)
 90 |         self.params_initialized = True
 91 | 
 92 |     def bind(self, data_shapes, label_shapes=None, for_training=True,
 93 |              inputs_need_grad=False, force_rebind=False, shared_module=None):
 94 |         # in case we already initialized params, keep it
 95 |         if self.params_initialized:
 96 |             arg_params, aux_params = self.get_params()
 97 | 
 98 |         # force rebinding is typically used when one want to switch from
 99 |         # training to prediction phase.
100 |         if force_rebind:
101 |             self._reset_bind()
102 | 
103 |         if self.binded:
104 |             self.logger.warning('Already binded, ignoring bind()')
105 |             return
106 | 
107 |         assert shared_module is None, 'shared_module for MutableModule is not supported'
108 | 
109 |         self.for_training = for_training
110 |         self.inputs_need_grad = inputs_need_grad
111 |         self.binded = True
112 | 
113 |         max_shapes_dict = dict()
114 |         if self._max_data_shapes is not None:
115 |             max_shapes_dict.update(dict(self._max_data_shapes))
116 |         if self._max_label_shapes is not None:
117 |             max_shapes_dict.update(dict(self._max_label_shapes))
118 | 
119 |         max_data_shapes = list()
120 |         for name, shape in data_shapes:
121 |             if name in max_shapes_dict:
122 |                 max_data_shapes.append((name, max_shapes_dict[name]))
123 |             else:
124 |                 max_data_shapes.append((name, shape))
125 | 
126 |         max_label_shapes = list()
127 |         if label_shapes is not None:
128 |             for name, shape in label_shapes:
129 |                 if name in max_shapes_dict:
130 |                     max_label_shapes.append((name, max_shapes_dict[name]))
131 |                 else:
132 |                     max_label_shapes.append((name, shape))
133 | 
134 |         if len(max_label_shapes) == 0:
135 |             max_label_shapes = None
136 | 
137 |         module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger,
138 |                         context=self._context, work_load_list=self._work_load_list,
139 |                         fixed_param_names=self._fixed_param_names)
140 |         module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad,
141 |                     force_rebind=False, shared_module=None)
142 |         self._curr_module = module
143 | 
144 |         # copy back saved params, if already initialized
145 |         if self.params_initialized:
146 |             self.set_params(arg_params, aux_params)
147 | 
148 |     def init_optimizer(self, kvstore='local', optimizer='sgd',
149 |                        optimizer_params=(('learning_rate', 0.01),), force_init=False):
150 |         assert self.binded and self.params_initialized
151 |         if self.optimizer_initialized and not force_init:
152 |             self.logger.warning('optimizer already initialized, ignoring.')
153 |             return
154 | 
155 |         self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params,
156 |                                          force_init=force_init)
157 |         self.optimizer_initialized = True
158 | 
159 |     def forward(self, data_batch, is_train=None):
160 |         assert self.binded and self.params_initialized
161 | 
162 |         # get current_shapes
163 |         if self._curr_module.label_shapes is not None:
164 |             current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes)
165 |         else:
166 |             current_shapes = dict(self._curr_module.data_shapes)
167 | 
168 |         # get input_shapes
169 |         if data_batch.provide_label is not None:
170 |             input_shapes = dict(data_batch.provide_data + data_batch.provide_label)
171 |         else:
172 |             input_shapes = dict(data_batch.provide_data)
173 | 
174 |         # decide if shape changed
175 |         shape_changed = False
176 |         for k, v in current_shapes.items():
177 |             if v != input_shapes[k]:
178 |                 shape_changed = True
179 | 
180 |         if shape_changed:
181 |             module = Module(self._symbol, self._data_names, self._label_names,
182 |                             logger=self.logger, context=self._context,
183 |                             work_load_list=self._work_load_list,
184 |                             fixed_param_names=self._fixed_param_names)
185 |             module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training,
186 |                         self._curr_module.inputs_need_grad, force_rebind=False,
187 |                         shared_module=self._curr_module)
188 |             self._curr_module = module
189 | 
190 |         self._curr_module.forward(data_batch, is_train=is_train)
191 | 
192 |     def backward(self, out_grads=None):
193 |         assert self.binded and self.params_initialized
194 |         self._curr_module.backward(out_grads=out_grads)
195 | 
196 |     def update(self):
197 |         assert self.binded and self.params_initialized and self.optimizer_initialized
198 |         self._curr_module.update()
199 | 
200 |     def get_outputs(self, merge_multi_context=True):
201 |         assert self.binded and self.params_initialized
202 |         return self._curr_module.get_outputs(merge_multi_context=merge_multi_context)
203 | 
204 |     def get_input_grads(self, merge_multi_context=True):
205 |         assert self.binded and self.params_initialized and self.inputs_need_grad
206 |         return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context)
207 | 
208 |     def update_metric(self, eval_metric, labels):
209 |         assert self.binded and self.params_initialized
210 |         self._curr_module.update_metric(eval_metric, labels)
211 | 
212 |     def install_monitor(self, mon):
213 |         """ Install monitor on all executors """
214 |         assert self.binded
215 |         self._curr_module.install_monitor(mon)
216 | 


--------------------------------------------------------------------------------
/rcnn/core/module_bak.py:
--------------------------------------------------------------------------------
  1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape
  2 | varying with training iterations. If shapes vary, executors will rebind,
  3 | using shared arrays from the initial module binded with maximum shape.
  4 | """
  5 | 
  6 | import logging
  7 | 
  8 | from mxnet import context as ctx
  9 | from mxnet.initializer import Uniform
 10 | from mxnet.module.base_module import BaseModule
 11 | from mxnet.module.module import Module
 12 | 
 13 | class MutableModule(BaseModule):
 14 |     """A mutable module is a module that supports variable input data.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     symbol : Symbol
 19 |     data_names : list of str
 20 |     label_names : list of str
 21 |     logger : Logger
 22 |     context : Context or list of Context
 23 |     work_load_list : list of number
 24 |     max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 25 |     max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 26 |     fixed_param_prefix : list of str, indicating fixed parameters
 27 |     """
 28 |     def __init__(self, symbol, data_names, label_names,
 29 |                  logger=logging, context=ctx.cpu(), work_load_list=None,
 30 |                  max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None):
 31 |         super(MutableModule, self).__init__(logger=logger)
 32 |         self._symbol = symbol
 33 |         self._data_names = data_names
 34 |         self._label_names = label_names
 35 |         self._context = context
 36 |         self._work_load_list = work_load_list
 37 | 
 38 |         self._curr_module = None
 39 |         self._max_data_shapes = max_data_shapes
 40 |         self._max_label_shapes = max_label_shapes
 41 |         self._fixed_param_prefix = fixed_param_prefix
 42 | 
 43 |         fixed_param_names = list()
 44 |         if fixed_param_prefix is not None:
 45 |             for name in self._symbol.list_arguments():
 46 |                 for prefix in self._fixed_param_prefix:
 47 |                     if prefix in name:
 48 |                         fixed_param_names.append(name)
 49 |         self._fixed_param_names = fixed_param_names
 50 | 
 51 |     def _reset_bind(self):
 52 |         self.binded = False
 53 |         self._curr_module = None
 54 | 
 55 |     @property
 56 |     def data_names(self):
 57 |         return self._data_names
 58 | 
 59 |     @property
 60 |     def output_names(self):
 61 |         return self._symbol.list_outputs()
 62 | 
 63 |     @property
 64 |     def data_shapes(self):
 65 |         assert self.binded
 66 |         return self._curr_module.data_shapes
 67 | 
 68 |     @property
 69 |     def label_shapes(self):
 70 |         assert self.binded
 71 |         return self._curr_module.label_shapes
 72 | 
 73 |     @property
 74 |     def output_shapes(self):
 75 |         assert self.binded
 76 |         return self._curr_module.output_shapes
 77 | 
 78 |     def get_params(self):
 79 |         assert self.binded and self.params_initialized
 80 |         return self._curr_module.get_params()
 81 | 
 82 |     def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
 83 |                     allow_missing=False, force_init=False, allow_extra=False):
 84 |         if self.params_initialized and not force_init:
 85 |             return
 86 |         assert self.binded, 'call bind before initializing the parameters'
 87 |         self._curr_module.init_params(initializer=initializer, arg_params=arg_params,
 88 |                                       aux_params=aux_params, allow_missing=allow_missing,
 89 |                                       force_init=force_init, allow_extra=allow_extra)
 90 |         self.params_initialized = True
 91 | 
 92 |     def bind(self, data_shapes, label_shapes=None, for_training=True,
 93 |              inputs_need_grad=False, force_rebind=False, shared_module=None, grad_req='write'):
 94 |         # in case we already initialized params, keep it
 95 |         if self.params_initialized:
 96 |             arg_params, aux_params = self.get_params()
 97 | 
 98 |         # force rebinding is typically used when one want to switch from
 99 |         # training to prediction phase.
100 |         if force_rebind:
101 |             self._reset_bind()
102 | 
103 |         if self.binded:
104 |             self.logger.warning('Already binded, ignoring bind()')
105 |             return
106 | 
107 |         assert shared_module is None, 'shared_module for MutableModule is not supported'
108 | 
109 |         self.for_training = for_training
110 |         self.inputs_need_grad = inputs_need_grad
111 |         self.binded = True
112 | 
113 |         max_shapes_dict = dict()
114 |         if self._max_data_shapes is not None:
115 |             max_shapes_dict.update(dict(self._max_data_shapes))
116 |         if self._max_label_shapes is not None:
117 |             max_shapes_dict.update(dict(self._max_label_shapes))
118 | 
119 |         max_data_shapes = list()
120 |         for name, shape in data_shapes:
121 |             if name in max_shapes_dict:
122 |                 max_data_shapes.append((name, max_shapes_dict[name]))
123 |             else:
124 |                 max_data_shapes.append((name, shape))
125 | 
126 |         max_label_shapes = list()
127 |         if label_shapes is not None:
128 |             for name, shape in label_shapes:
129 |                 if name in max_shapes_dict:
130 |                     max_label_shapes.append((name, max_shapes_dict[name]))
131 |                 else:
132 |                     max_label_shapes.append((name, shape))
133 | 
134 |         if len(max_label_shapes) == 0:
135 |             max_label_shapes = None
136 | 
137 |         module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger,
138 |                         context=self._context, work_load_list=self._work_load_list,
139 |                         fixed_param_names=self._fixed_param_names)
140 |         module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad,
141 |                     force_rebind=False, shared_module=None)
142 |         self._curr_module = module
143 | 
144 |         # copy back saved params, if already initialized
145 |         if self.params_initialized:
146 |             self.set_params(arg_params, aux_params)
147 | 
148 |     def init_optimizer(self, kvstore='local', optimizer='sgd',
149 |                        optimizer_params=(('learning_rate', 0.01),), force_init=False):
150 |         assert self.binded and self.params_initialized
151 |         if self.optimizer_initialized and not force_init:
152 |             self.logger.warning('optimizer already initialized, ignoring.')
153 |             return
154 | 
155 |         self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params,
156 |                                          force_init=force_init)
157 |         self.optimizer_initialized = True
158 | 
159 |     def forward(self, data_batch, is_train=None):
160 |         assert self.binded and self.params_initialized
161 | 
162 |         # get current_shapes
163 |         if self._curr_module.label_shapes is not None:
164 |             current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes)
165 |         else:
166 |             current_shapes = dict(self._curr_module.data_shapes)
167 | 
168 |         # get input_shapes
169 |         if data_batch.provide_label is not None:
170 |             input_shapes = dict(data_batch.provide_data + data_batch.provide_label)
171 |         else:
172 |             input_shapes = dict(data_batch.provide_data)
173 | 
174 |         # decide if shape changed
175 |         shape_changed = False
176 |         for k, v in current_shapes.items():
177 |             if v != input_shapes[k]:
178 |                 shape_changed = True
179 | 
180 |         if shape_changed:
181 |             module = Module(self._symbol, self._data_names, self._label_names,
182 |                             logger=self.logger, context=self._context,
183 |                             work_load_list=self._work_load_list,
184 |                             fixed_param_names=self._fixed_param_names)
185 |             module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training,
186 |                         self._curr_module.inputs_need_grad, force_rebind=False,
187 |                         shared_module=self._curr_module)
188 |             self._curr_module = module
189 | 
190 |         self._curr_module.forward(data_batch, is_train=is_train)
191 | 
192 |     def backward(self, out_grads=None):
193 |         assert self.binded and self.params_initialized
194 |         self._curr_module.backward(out_grads=out_grads)
195 | 
196 |     def update(self):
197 |         assert self.binded and self.params_initialized and self.optimizer_initialized
198 |         self._curr_module.update()
199 | 
200 |     def get_outputs(self, merge_multi_context=True):
201 |         assert self.binded and self.params_initialized
202 |         return self._curr_module.get_outputs(merge_multi_context=merge_multi_context)
203 | 
204 |     def get_input_grads(self, merge_multi_context=True):
205 |         assert self.binded and self.params_initialized and self.inputs_need_grad
206 |         return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context)
207 | 
208 |     def update_metric(self, eval_metric, labels):
209 |         assert self.binded and self.params_initialized
210 |         self._curr_module.update_metric(eval_metric, labels)
211 | 
212 |     def install_monitor(self, mon):
213 |         """ Install monitor on all executors """
214 |         assert self.binded
215 |         self._curr_module.install_monitor(mon)
216 | 


--------------------------------------------------------------------------------
/rcnn/cython/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/rcnn/cython/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/cython/__init__.py


--------------------------------------------------------------------------------
/rcnn/cython/anchors.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | DTYPE = np.float32
 6 | ctypedef np.float32_t DTYPE_t
 7 | 
 8 | def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors):
 9 |     """
10 |     Parameters
11 |     ----------
12 |     height: height of plane
13 |     width:  width of plane
14 |     stride: stride ot the original image
15 |     anchors_base: (A, 4) a base set of anchors
16 |     Returns
17 |     -------
18 |     all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
19 |     """
20 |     cdef unsigned int A = base_anchors.shape[0]
21 |     cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE)
22 |     cdef unsigned int iw, ih
23 |     cdef unsigned int k
24 |     cdef unsigned int sh
25 |     cdef unsigned int sw
26 |     for iw in range(width):
27 |         sw = iw * stride
28 |         for ih in range(height):
29 |             sh = ih * stride
30 |             for k in range(A):
31 |                 all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw
32 |                 all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh
33 |                 all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw
34 |                 all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh
35 |     return all_anchors


--------------------------------------------------------------------------------
/rcnn/cython/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps_cython(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/rcnn/cython/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/rcnn/cython/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/rcnn/cython/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/rcnn/cython/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/rcnn/cython/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 | 
 30 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 |     and values giving the absolute path to each directory.
 32 | 
 33 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 |     is based on finding 'nvcc' in the PATH.
 35 |     """
 36 | 
 37 |     # first check if the CUDAHOME env variable is in use
 38 |     if 'CUDAHOME' in os.environ:
 39 |         home = os.environ['CUDAHOME']
 40 |         nvcc = pjoin(home, 'bin', 'nvcc')
 41 |     else:
 42 |         # otherwise, search the PATH for NVCC
 43 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 |         if nvcc is None:
 46 |             raise EnvironmentError('The nvcc binary could not be '
 47 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 |         home = os.path.dirname(os.path.dirname(nvcc))
 49 | 
 50 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 51 |                   'include': pjoin(home, 'include'),
 52 |                   'lib64': pjoin(home, 'lib64')}
 53 |     for k, v in cudaconfig.items():
 54 |         if not os.path.exists(v):
 55 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | 
 57 |     return cudaconfig
 58 | 
 59 | 
 60 | # Test if cuda could be foun
 61 | try:
 62 |     CUDA = locate_cuda()
 63 | except EnvironmentError:
 64 |     CUDA = None
 65 | 
 66 | 
 67 | # Obtain the numpy include directory.  This logic works across numpy versions.
 68 | try:
 69 |     numpy_include = np.get_include()
 70 | except AttributeError:
 71 |     numpy_include = np.get_numpy_include()
 72 | 
 73 | 
 74 | def customize_compiler_for_nvcc(self):
 75 |     """inject deep into distutils to customize how the dispatch
 76 |     to gcc/nvcc works.
 77 | 
 78 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 79 |     injected in, and still have the right customizations (i.e.
 80 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 81 |     the OO route, I have this. Note, it's kindof like a wierd functional
 82 |     subclassing going on."""
 83 | 
 84 |     # tell the compiler it can processes .cu
 85 |     self.src_extensions.append('.cu')
 86 | 
 87 |     # save references to the default compiler_so and _comple methods
 88 |     default_compiler_so = self.compiler_so
 89 |     super = self._compile
 90 | 
 91 |     # now redefine the _compile method. This gets executed for each
 92 |     # object but distutils doesn't have the ability to change compilers
 93 |     # based on source extension: we add it.
 94 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 95 |         if os.path.splitext(src)[1] == '.cu':
 96 |             # use the cuda for .cu files
 97 |             self.set_executable('compiler_so', CUDA['nvcc'])
 98 |             # use only a subset of the extra_postargs, which are 1-1 translated
 99 |             # from the extra_compile_args in the Extension class
100 |             postargs = extra_postargs['nvcc']
101 |         else:
102 |             postargs = extra_postargs['gcc']
103 | 
104 |         super(obj, src, ext, cc_args, postargs, pp_opts)
105 |         # reset the default compiler_so, which we might have changed for cuda
106 |         self.compiler_so = default_compiler_so
107 | 
108 |     # inject our redefined _compile method into the class
109 |     self._compile = _compile
110 | 
111 | 
112 | # run the customize_compiler
113 | class custom_build_ext(build_ext):
114 |     def build_extensions(self):
115 |         customize_compiler_for_nvcc(self.compiler)
116 |         build_ext.build_extensions(self)
117 | 
118 | 
119 | ext_modules = [
120 |     Extension(
121 |         "bbox",
122 |         ["bbox.pyx"],
123 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
124 |         include_dirs=[numpy_include]
125 |     ),
126 |     Extension(
127 |         "anchors",
128 |         ["anchors.pyx"],
129 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
130 |         include_dirs=[numpy_include]
131 |     ),
132 |     Extension(
133 |         "cpu_nms",
134 |         ["cpu_nms.pyx"],
135 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
136 |         include_dirs = [numpy_include]
137 |     ),
138 | ]
139 | 
140 | if CUDA is not None:
141 |     ext_modules.append(
142 |         Extension('gpu_nms',
143 |             ['nms_kernel.cu', 'gpu_nms.pyx'],
144 |             library_dirs=[CUDA['lib64']],
145 |             libraries=['cudart'],
146 |             language='c++',
147 |             runtime_library_dirs=[CUDA['lib64']],
148 |             # this syntax is specific to this build system
149 |             # we're only going to use certain compiler args with nvcc and not with
150 |             # gcc the implementation of this trick is in customize_compiler() below
151 |             extra_compile_args={'gcc': ["-Wno-unused-function"],
152 |                                 'nvcc': ['-arch=sm_35',
153 |                                          '--ptxas-options=-v',
154 |                                          '-c',
155 |                                          '--compiler-options',
156 |                                          "'-fPIC'"]},
157 |             include_dirs = [numpy_include, CUDA['include']]
158 |         )
159 |     )
160 | else:
161 |     print('Skipping GPU_NMS')
162 | 
163 | 
164 | setup(
165 |     name='frcnn_cython',
166 |     ext_modules=ext_modules,
167 |     # inject our custom trigger
168 |     cmdclass={'build_ext': custom_build_ext},
169 | )
170 | 


--------------------------------------------------------------------------------
/rcnn/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .imdb import IMDB
2 | from .retinaface import retinaface
3 | 


--------------------------------------------------------------------------------
/rcnn/dataset/ds_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def unique_boxes(boxes, scale=1.0):
 5 |     """ return indices of unique boxes """
 6 |     v = np.array([1, 1e3, 1e6, 1e9])
 7 |     hashes = np.round(boxes * scale).dot(v).astype(np.int)
 8 |     _, index = np.unique(hashes, return_index=True)
 9 |     return np.sort(index)
10 | 
11 | 
12 | def filter_small_boxes(boxes, min_size):
13 |     w = boxes[:, 2] - boxes[:, 0]
14 |     h = boxes[:, 3] - boxes[:, 1]
15 |     keep = np.where((w >= min_size) & (h > min_size))[0]
16 |     return keep
17 | 


--------------------------------------------------------------------------------
/rcnn/dataset/retinaface.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | try:
  3 |     import cPickle as pickle
  4 | except ImportError:
  5 |     import pickle
  6 | import cv2
  7 | import os
  8 | import numpy as np
  9 | import json
 10 | from PIL import Image
 11 | 
 12 | from ..logger import logger
 13 | from .imdb import IMDB
 14 | from .ds_utils import unique_boxes, filter_small_boxes
 15 | from ..config import config
 16 | 
 17 | class retinaface(IMDB):
 18 |     def __init__(self, image_set, root_path, data_path):
 19 |         super(retinaface, self).__init__('retinaface', image_set, root_path, data_path)
 20 |         #assert image_set=='train'
 21 | 
 22 |         split = image_set
 23 |         self._split = image_set
 24 |         self._image_set = image_set
 25 | 
 26 | 
 27 |         self.root_path = root_path
 28 |         self.data_path = data_path
 29 | 
 30 | 
 31 |         self._dataset_path = self.data_path
 32 |         self._imgs_path = os.path.join(self._dataset_path, image_set, 'images')
 33 |         self._fp_bbox_map = {}
 34 |         label_file = os.path.join(self._dataset_path, image_set, 'label.txt')#os.path.join(self._dataset_path, image_set, 'label.txt')
 35 |         name = None
 36 |         for line in open(label_file, 'r'):
 37 |           line = line.strip()
 38 |           if line.startswith('#'):
 39 |             name = line[1:].strip()
 40 |             self._fp_bbox_map[name] = []
 41 |             continue
 42 |           assert name is not None
 43 |           assert name in self._fp_bbox_map
 44 |           self._fp_bbox_map[name].append(line)
 45 |         print('origin image size', len(self._fp_bbox_map))
 46 | 
 47 |         #self.num_images = len(self._image_paths)
 48 |         #self._image_index = range(len(self._image_paths))
 49 |         self.classes = ['bg', 'face']
 50 |         self.num_classes = len(self.classes)
 51 | 
 52 | 
 53 |     def gt_roidb(self):
 54 |         cache_file = os.path.join(self.cache_path, '{}_{}_gt_roidb.pkl'.format(self.name, self._split))
 55 |         if os.path.exists(cache_file):
 56 |             with open(cache_file, 'rb') as fid:
 57 |                 roidb = pickle.load(fid)
 58 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
 59 |             self.num_images = len(roidb)
 60 |             return roidb
 61 | 
 62 |         roidb = []
 63 |         max_num_boxes = 0
 64 |         nonattr_box_num = 0
 65 |         landmark_num = 0
 66 | 
 67 |         for fp in self._fp_bbox_map:
 68 |             if self._split=='test':
 69 |               image_path = os.path.join(self._imgs_path, fp)
 70 |               roi = {'image': image_path}
 71 |               roidb.append(roi)
 72 |               continue
 73 |             boxes = np.zeros([len(self._fp_bbox_map[fp]), 4], np.float)
 74 |             landmarks = np.zeros([len(self._fp_bbox_map[fp]), 5, 3], np.float)
 75 |             blur = np.zeros((len(self._fp_bbox_map[fp]),), np.float)
 76 |             boxes_mask = []
 77 | 
 78 |             gt_classes = np.ones([len(self._fp_bbox_map[fp])], np.int32)
 79 |             overlaps = np.zeros([len(self._fp_bbox_map[fp]), 2], np.float)
 80 | 
 81 |             ix = 0
 82 | 
 83 |             for aline in self._fp_bbox_map[fp]:
 84 |                 imsize = Image.open(os.path.join(self._imgs_path, fp)).size
 85 |                 values = [float(x) for x in aline.strip().split()]
 86 |                 bbox = [values[0], values[1], values[0]+values[2], values[1]+values[3]]
 87 | 
 88 |                 x1 = bbox[0]
 89 |                 y1 = bbox[1]
 90 |                 x2 = min(imsize[0], bbox[2])
 91 |                 y2 = min(imsize[1], bbox[3])
 92 |                 if x1>=x2 or y1>=y2:
 93 |                   continue
 94 | 
 95 |                 if config.BBOX_MASK_THRESH>0:
 96 |                   if (x2 - x1) < config.BBOX_MASK_THRESH or y2 - y1 < config.BBOX_MASK_THRESH:
 97 |                     boxes_mask.append(np.array([x1, y1, x2, y2], np.float))
 98 |                     continue
 99 |                 if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or y2 - y1 < config.TRAIN.MIN_BOX_SIZE:
100 |                     continue
101 | 
102 |                 boxes[ix, :] = np.array([x1, y1, x2, y2], np.float)
103 |                 if self._split=='train':
104 |                   landmark = np.array( values[4:19], dtype=np.float32 ).reshape((5,3))
105 |                   for li in range(5):
106 |                     #print(landmark)
107 |                     if landmark[li][0]==-1. and landmark[li][1]==-1.: #missing landmark
108 |                       assert landmark[li][2]==-1
109 |                     else:
110 |                       assert landmark[li][2]>=0
111 |                       if li==0:
112 |                         landmark_num+=1
113 |                       if landmark[li][2]==0.0:#visible
114 |                         landmark[li][2] = 1.0
115 |                       else:
116 |                         landmark[li][2] = 0.0
117 | 
118 |                   landmarks[ix] = landmark
119 | 
120 |                   blur[ix] = values[19]
121 |                   #print(aline, blur[ix])
122 |                   if blur[ix]<0.0:
123 |                     blur[ix] = 0.3
124 |                     nonattr_box_num+=1
125 | 
126 |                 cls = int(1)
127 |                 gt_classes[ix] = cls
128 |                 overlaps[ix, cls] = 1.0
129 |                 ix += 1
130 |             max_num_boxes = max(max_num_boxes, ix)
131 |             #overlaps = scipy.sparse.csr_matrix(overlaps)
132 |             if self._split=='train' and ix==0:
133 |               continue
134 |             boxes = boxes[:ix,:]
135 |             landmarks = landmarks[:ix,:,:]
136 |             blur = blur[:ix]
137 |             gt_classes = gt_classes[:ix]
138 |             overlaps = overlaps[:ix,:]
139 |             image_path = os.path.join(self._imgs_path, fp)
140 |             with open(image_path, 'rb') as fin:
141 |                 stream = fin.read()
142 |             stream = np.fromstring(stream, dtype=np.uint8)
143 | 
144 |             roi = {
145 |               'image': image_path,
146 |               'stream': stream,
147 |               'height': imsize[1],
148 |               'width': imsize[0],
149 |               'boxes': boxes,
150 |               'landmarks': landmarks,
151 |               'blur': blur,
152 |               'gt_classes': gt_classes,
153 |               'gt_overlaps': overlaps,
154 |               'max_classes': overlaps.argmax(axis=1),
155 |               'max_overlaps': overlaps.max(axis=1),
156 |               'flipped': False,
157 |             }
158 |             if len(boxes_mask)>0:
159 |               boxes_mask = np.array(boxes_mask)
160 |               roi['boxes_mask'] = boxes_mask
161 |             roidb.append(roi)
162 |         for roi in roidb:
163 |           roi['max_num_boxes'] = max_num_boxes
164 |         self.num_images = len(roidb)
165 |         print('roidb size', len(roidb))
166 |         print('non attr box num', nonattr_box_num)
167 |         print('landmark num', landmark_num)
168 |         with open(cache_file, 'wb') as fid:
169 |             pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL)
170 |         print('wrote gt roidb to {}'.format(cache_file))
171 | 
172 |         return roidb
173 | 
174 |     def write_detections(self, all_boxes, output_dir='./output/'):
175 |       pass
176 | 
177 |         
178 |     def evaluate_detections(self, all_boxes, output_dir='./output/',method_name='insightdetection'):
179 |       pass
180 | 


--------------------------------------------------------------------------------
/rcnn/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/io/__init__.py


--------------------------------------------------------------------------------
/rcnn/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | # set up logger
4 | logging.basicConfig()
5 | logger = logging.getLogger()
6 | logger.setLevel(logging.INFO)
7 | 


--------------------------------------------------------------------------------
/rcnn/processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/processing/__init__.py


--------------------------------------------------------------------------------
/rcnn/processing/assign_levels.py:
--------------------------------------------------------------------------------
 1 | from rcnn.config import config
 2 | import numpy as np
 3 | 
 4 | 
 5 | def compute_assign_targets(rois, threshold):
 6 |     rois_area = np.sqrt((rois[:, 2] - rois[:, 0] + 1) * (rois[:, 3] - rois[:, 1] + 1))
 7 |     num_rois = np.shape(rois)[0]
 8 |     assign_levels = np.zeros(num_rois, dtype=np.uint8)
 9 |     for i, stride in enumerate(config.RCNN_FEAT_STRIDE):
10 |         thd = threshold[i]
11 |         idx = np.logical_and(thd[1] <= rois_area, rois_area < thd[0])
12 |         assign_levels[idx] = stride
13 | 
14 |     assert 0 not in assign_levels, "All rois should assign to specify levels."
15 |     return assign_levels
16 | 
17 | 
18 | def add_assign_targets(roidb):
19 |     """
20 |     given roidb, add ['assign_level']
21 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
22 |     """
23 |     print 'add assign targets'
24 |     assert len(roidb) > 0
25 |     assert 'boxes' in roidb[0]
26 | 
27 |     area_threshold = [[np.inf, 448],
28 |                       [448,    224],
29 |                       [224,    112],
30 |                       [112,     0]]
31 | 
32 |     assert len(config.RCNN_FEAT_STRIDE) == len(area_threshold)
33 | 
34 |     num_images = len(roidb)
35 |     for im_i in range(num_images):
36 |         rois = roidb[im_i]['boxes']
37 |         roidb[im_i]['assign_levels'] = compute_assign_targets(rois, area_threshold)
38 | 


--------------------------------------------------------------------------------
/rcnn/processing/bbox_regression.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file has functions about generating bounding box regression targets
  3 | """
  4 | 
  5 | from ..pycocotools.mask import encode
  6 | import numpy as np
  7 | 
  8 | from ..logger import logger
  9 | from .bbox_transform import bbox_overlaps, bbox_transform
 10 | from rcnn.config import config
 11 | import math
 12 | import cv2
 13 | import PIL.Image as Image
 14 | import threading
 15 | import Queue
 16 | 
 17 | 
 18 | def compute_bbox_regression_targets(rois, overlaps, labels):
 19 |     """
 20 |     given rois, overlaps, gt labels, compute bounding box regression targets
 21 |     :param rois: roidb[i]['boxes'] k * 4
 22 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
 23 |     :param labels: roidb[i]['max_classes'] k * 1
 24 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
 25 |     """
 26 |     # Ensure ROIs are floats
 27 |     rois = rois.astype(np.float, copy=False)
 28 | 
 29 |     # Sanity check
 30 |     if len(rois) != len(overlaps):
 31 |         logger.warning('bbox regression: len(rois) != len(overlaps)')
 32 | 
 33 |     # Indices of ground-truth ROIs
 34 |     gt_inds = np.where(overlaps == 1)[0]
 35 |     if len(gt_inds) == 0:
 36 |         logger.warning('bbox regression: len(gt_inds) == 0')
 37 | 
 38 |     # Indices of examples for which we try to make predictions
 39 |     ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
 40 | 
 41 |     # Get IoU overlap between each ex ROI and gt ROI
 42 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
 43 | 
 44 |     # Find which gt ROI each ex ROI has max overlap with:
 45 |     # this will be the ex ROI's gt target
 46 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
 47 |     gt_rois = rois[gt_inds[gt_assignment], :]
 48 |     ex_rois = rois[ex_inds, :]
 49 | 
 50 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
 51 |     targets[ex_inds, 0] = labels[ex_inds]
 52 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
 53 |     return targets
 54 | 
 55 | 
 56 | def add_bbox_regression_targets(roidb):
 57 |     """
 58 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
 59 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
 60 |     :return: means, std variances of targets
 61 |     """
 62 |     logger.info('bbox regression: add bounding box regression targets')
 63 |     assert len(roidb) > 0
 64 |     assert 'max_classes' in roidb[0]
 65 | 
 66 |     num_images = len(roidb)
 67 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 68 |     for im_i in range(num_images):
 69 |         rois = roidb[im_i]['boxes']
 70 |         max_overlaps = roidb[im_i]['max_overlaps']
 71 |         max_classes = roidb[im_i]['max_classes']
 72 |         roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes)
 73 | 
 74 |     if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
 75 |         # use fixed / precomputed means and stds instead of empirical values
 76 |         means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1))
 77 |         stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1))
 78 |     else:
 79 |         # compute mean, std values
 80 |         class_counts = np.zeros((num_classes, 1)) + 1e-14
 81 |         sums = np.zeros((num_classes, 4))
 82 |         squared_sums = np.zeros((num_classes, 4))
 83 |         for im_i in range(num_images):
 84 |             targets = roidb[im_i]['bbox_targets']
 85 |             for cls in range(1, num_classes):
 86 |                 cls_indexes = np.where(targets[:, 0] == cls)[0]
 87 |                 if cls_indexes.size > 0:
 88 |                     class_counts[cls] += cls_indexes.size
 89 |                     sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
 90 |                     squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
 91 | 
 92 |         means = sums / class_counts
 93 |         # var(x) = E(x^2) - E(x)^2
 94 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 95 | 
 96 |     # normalized targets
 97 |     for im_i in range(num_images):
 98 |         targets = roidb[im_i]['bbox_targets']
 99 |         for cls in range(1, num_classes):
100 |             cls_indexes = np.where(targets[:, 0] == cls)[0]
101 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
102 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
103 | 
104 |     return means.ravel(), stds.ravel()
105 | 
106 | 
107 | def expand_bbox_regression_targets(bbox_targets_data, num_classes):
108 |     """
109 |     expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
110 |     :param bbox_targets_data: [k * 5]
111 |     :param num_classes: number of classes
112 |     :return: bbox target processed [k * 4 num_classes]
113 |     bbox_weights ! only foreground boxes have bbox regression computation!
114 |     """
115 |     classes = bbox_targets_data[:, 0]
116 |     bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
117 |     bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
118 |     indexes = np.where(classes > 0)[0]
119 |     for index in indexes:
120 |         cls = classes[index]
121 |         start = int(4 * cls)
122 |         end = start + 4
123 |         bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
124 |         bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS
125 |     return bbox_targets, bbox_weights
126 | 
127 | 
128 | def compute_mask_and_label(ex_rois, ex_labels, seg, flipped):
129 |     # assert os.path.exists(seg_gt), 'Path does not exist: {}'.format(seg_gt)
130 |     # im = Image.open(seg_gt)
131 |     # pixel = list(im.getdata())
132 |     # pixel = np.array(pixel).reshape([im.size[1], im.size[0]])
133 |     im = Image.open(seg)
134 |     pixel = list(im.getdata())
135 |     ins_seg = np.array(pixel).reshape([im.size[1], im.size[0]])
136 |     if flipped:
137 |         ins_seg = ins_seg[:, ::-1]
138 |     rois = ex_rois
139 |     n_rois = ex_rois.shape[0]
140 |     label = ex_labels
141 |     class_id = config.CLASS_ID
142 |     mask_target = np.zeros((n_rois, 28, 28), dtype=np.int8)
143 |     mask_label = np.zeros((n_rois), dtype=np.int8)
144 |     for n in range(n_rois):
145 |         target = ins_seg[int(rois[n, 1]): int(rois[n, 3]), int(rois[n, 0]): int(rois[n, 2])]
146 |         ids = np.unique(target)
147 |         ins_id = 0
148 |         max_count = 0
149 |         for id in ids:
150 |             if math.floor(id / 1000) == class_id[int(label[int(n)])]:
151 |                 px = np.where(ins_seg == int(id))
152 |                 x_min = np.min(px[1])
153 |                 y_min = np.min(px[0])
154 |                 x_max = np.max(px[1])
155 |                 y_max = np.max(px[0])
156 |                 x1 = max(rois[n, 0], x_min)
157 |                 y1 = max(rois[n, 1], y_min)
158 |                 x2 = min(rois[n, 2], x_max)
159 |                 y2 = min(rois[n, 3], y_max)
160 |                 iou = (x2 - x1) * (y2 - y1)
161 |                 iou = iou / ((rois[n, 2] - rois[n, 0]) * (rois[n, 3] - rois[n, 1])
162 |                              + (x_max - x_min) * (y_max - y_min) - iou)
163 |                 if iou > max_count:
164 |                     ins_id = id
165 |                     max_count = iou
166 | 
167 |         if max_count == 0:
168 |             continue
169 |         # print max_count
170 |         mask = np.zeros(target.shape)
171 |         idx = np.where(target == ins_id)
172 |         mask[idx] = 1
173 |         mask = cv2.resize(mask, (28, 28), interpolation=cv2.INTER_NEAREST)
174 | 
175 |         mask_target[n] = mask
176 |         mask_label[n] = label[int(n)]
177 |     return mask_target, mask_label
178 | 
179 | 
180 | def compute_bbox_mask_targets_and_label(rois, overlaps, labels, seg, flipped):
181 |     """
182 |     given rois, overlaps, gt labels, seg, compute bounding box mask targets
183 |     :param rois: roidb[i]['boxes'] k * 4
184 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
185 |     :param labels: roidb[i]['max_classes'] k * 1
186 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
187 |     """
188 |     # Ensure ROIs are floats
189 |     rois = rois.astype(np.float, copy=False)
190 | 
191 |     # Sanity check
192 |     if len(rois) != len(overlaps):
193 |         print 'bbox regression: this should not happen'
194 | 
195 |     # Indices of ground-truth ROIs
196 |     gt_inds = np.where(overlaps == 1)[0]
197 |     if len(gt_inds) == 0:
198 |         print 'something wrong : zero ground truth rois'
199 |     # Indices of examples for which we try to make predictions
200 |     ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
201 | 
202 |     # Get IoU overlap between each ex ROI and gt ROI
203 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
204 | 
205 | 
206 |     # Find which gt ROI each ex ROI has max overlap with:
207 |     # this will be the ex ROI's gt target
208 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
209 |     gt_rois = rois[gt_inds[gt_assignment], :]
210 |     ex_rois = rois[ex_inds, :]
211 | 
212 |     mask_targets, mask_label = compute_mask_and_label(ex_rois, labels[ex_inds], seg, flipped)
213 |     return mask_targets, mask_label, ex_inds
214 | 
215 | def add_mask_targets(roidb):
216 |     """
217 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
218 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
219 |     :return: means, std variances of targets
220 |     """
221 |     print 'add bounding box mask targets'
222 |     assert len(roidb) > 0
223 |     assert 'max_classes' in roidb[0]
224 | 
225 |     num_images = len(roidb)
226 | 
227 |     # Multi threads processing
228 |     im_quene = Queue.Queue(maxsize=0)
229 |     for im_i in range(num_images):
230 |         im_quene.put(im_i)
231 | 
232 |     def process():
233 |         while not im_quene.empty():
234 |             im_i = im_quene.get()
235 |             print "-----process img {}".format(im_i)
236 |             rois = roidb[im_i]['boxes']
237 |             max_overlaps = roidb[im_i]['max_overlaps']
238 |             max_classes = roidb[im_i]['max_classes']
239 |             ins_seg = roidb[im_i]['ins_seg']
240 |             flipped = roidb[im_i]['flipped']
241 |             roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \
242 |                 compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg, flipped)
243 |     threads = [threading.Thread(target=process, args=()) for i in xrange(10)]
244 |     for t in threads: t.start()
245 |     for t in threads: t.join()
246 |     # Single thread
247 |     # for im_i in range(num_images):
248 |     #     print "-----processing img {}".format(im_i)
249 |     #     rois = roidb[im_i]['boxes']
250 |     #     max_overlaps = roidb[im_i]['max_overlaps']
251 |     #     max_classes = roidb[im_i]['max_classes']
252 |     #     ins_seg = roidb[im_i]['ins_seg']
253 |     #     # roidb[im_i]['mask_targets'] = compute_bbox_mask_targets(rois, max_overlaps, max_classes, ins_seg)
254 |     #     roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \
255 |     #         compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg)
256 | 


--------------------------------------------------------------------------------
/rcnn/processing/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from ..cython.bbox import bbox_overlaps_cython
  3 | #from rcnn.config import config
  4 | 
  5 | 
  6 | def bbox_overlaps(boxes, query_boxes):
  7 |     return bbox_overlaps_cython(boxes, query_boxes)
  8 | 
  9 | 
 10 | def bbox_overlaps_py(boxes, query_boxes):
 11 |     """
 12 |     determine overlaps between boxes and query_boxes
 13 |     :param boxes: n * 4 bounding boxes
 14 |     :param query_boxes: k * 4 bounding boxes
 15 |     :return: overlaps: n * k overlaps
 16 |     """
 17 |     n_ = boxes.shape[0]
 18 |     k_ = query_boxes.shape[0]
 19 |     overlaps = np.zeros((n_, k_), dtype=np.float)
 20 |     for k in range(k_):
 21 |         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 22 |         for n in range(n_):
 23 |             iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
 24 |             if iw > 0:
 25 |                 ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
 26 |                 if ih > 0:
 27 |                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
 28 |                     all_area = float(box_area + query_box_area - iw * ih)
 29 |                     overlaps[n, k] = iw * ih / all_area
 30 |     return overlaps
 31 | 
 32 | 
 33 | def clip_boxes(boxes, im_shape):
 34 |     """
 35 |     Clip boxes to image boundaries.
 36 |     :param boxes: [N, 4* num_classes]
 37 |     :param im_shape: tuple of 2
 38 |     :return: [N, 4* num_classes]
 39 |     """
 40 |     # x1 >= 0
 41 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
 42 |     # y1 >= 0
 43 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
 44 |     # x2 < im_shape[1]
 45 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
 46 |     # y2 < im_shape[0]
 47 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
 48 |     return boxes
 49 | 
 50 | 
 51 | def nonlinear_transform(ex_rois, gt_rois):
 52 |     """
 53 |     compute bounding box regression targets from ex_rois to gt_rois
 54 |     :param ex_rois: [N, 4]
 55 |     :param gt_rois: [N, 4]
 56 |     :return: [N, 4]
 57 |     """
 58 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 59 | 
 60 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 61 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 62 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 63 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 64 | 
 65 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 66 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 67 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
 68 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
 69 | 
 70 |     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
 71 |     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
 72 |     targets_dw = np.log(gt_widths / ex_widths)
 73 |     targets_dh = np.log(gt_heights / ex_heights)
 74 | 
 75 |     if gt_rois.shape[1]<=4:
 76 |       targets = np.vstack(
 77 |           (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 78 |       return targets
 79 |     else:
 80 |       targets = [targets_dx, targets_dy, targets_dw, targets_dh]
 81 |       #if config.USE_BLUR:
 82 |       #  for i in range(4, gt_rois.shape[1]):
 83 |       #    t = gt_rois[:,i]
 84 |       #    targets.append(t)
 85 |       targets = np.vstack(targets).transpose()
 86 |       return targets
 87 | 
 88 | def landmark_transform(ex_rois, gt_rois):
 89 | 
 90 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 91 | 
 92 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 93 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 94 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 95 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 96 | 
 97 |     
 98 |     targets = []
 99 |     for i in range(gt_rois.shape[1]):
100 |       for j in range(gt_rois.shape[2]):
101 |         #if not config.USE_OCCLUSION and j==2:
102 |         #  continue
103 |         if j==2:
104 |           continue
105 |         if j==0: #w
106 |           target = (gt_rois[:,i,j] - ex_ctr_x) / (ex_widths + 1e-14)
107 |         elif j==1: #h
108 |           target = (gt_rois[:,i,j] - ex_ctr_y) / (ex_heights + 1e-14)
109 |         else: #visibile
110 |           target = gt_rois[:,i,j]
111 |         targets.append(target)
112 | 
113 | 
114 |     targets = np.vstack(targets).transpose()
115 |     return targets
116 | 
117 | 
118 | def nonlinear_pred(boxes, box_deltas):
119 |     """
120 |     Transform the set of class-agnostic boxes into class-specific boxes
121 |     by applying the predicted offsets (box_deltas)
122 |     :param boxes: !important [N 4]
123 |     :param box_deltas: [N, 4 * num_classes]
124 |     :return: [N 4 * num_classes]
125 |     """
126 |     if boxes.shape[0] == 0:
127 |         return np.zeros((0, box_deltas.shape[1]))
128 | 
129 |     boxes = boxes.astype(np.float, copy=False)
130 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
131 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
132 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
133 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
134 | 
135 |     dx = box_deltas[:, 0::4]
136 |     dy = box_deltas[:, 1::4]
137 |     dw = box_deltas[:, 2::4]
138 |     dh = box_deltas[:, 3::4]
139 | 
140 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
141 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
142 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
143 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
144 | 
145 |     pred_boxes = np.zeros(box_deltas.shape)
146 |     # x1
147 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
148 |     # y1
149 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
150 |     # x2
151 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
152 |     # y2
153 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
154 | 
155 |     return pred_boxes
156 | 
157 | def landmark_pred(boxes, landmark_deltas):
158 |     if boxes.shape[0] == 0:
159 |         return np.zeros((0, landmark_deltas.shape[1]))
160 |     boxes = boxes.astype(np.float, copy=False)
161 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
162 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
163 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
164 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
165 |     preds = []
166 |     for i in range(landmark_deltas.shape[1]):
167 |       if i%2==0:
168 |         pred = (landmark_deltas[:,i]*widths + ctr_x)
169 |       else:
170 |         pred = (landmark_deltas[:,i]*heights + ctr_y)
171 |       preds.append(pred)
172 |     preds = np.vstack(preds).transpose()
173 |     return preds
174 | 
175 | def iou_transform(ex_rois, gt_rois):
176 |     """ return bbox targets, IoU loss uses gt_rois as gt """
177 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
178 |     return gt_rois
179 | 
180 | 
181 | def iou_pred(boxes, box_deltas):
182 |     """
183 |     Transform the set of class-agnostic boxes into class-specific boxes
184 |     by applying the predicted offsets (box_deltas)
185 |     :param boxes: !important [N 4]
186 |     :param box_deltas: [N, 4 * num_classes]
187 |     :return: [N 4 * num_classes]
188 |     """
189 |     if boxes.shape[0] == 0:
190 |         return np.zeros((0, box_deltas.shape[1]))
191 | 
192 |     boxes = boxes.astype(np.float, copy=False)
193 |     x1 = boxes[:, 0]
194 |     y1 = boxes[:, 1]
195 |     x2 = boxes[:, 2]
196 |     y2 = boxes[:, 3]
197 | 
198 |     dx1 = box_deltas[:, 0::4]
199 |     dy1 = box_deltas[:, 1::4]
200 |     dx2 = box_deltas[:, 2::4]
201 |     dy2 = box_deltas[:, 3::4]
202 | 
203 |     pred_boxes = np.zeros(box_deltas.shape)
204 |     # x1
205 |     pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]
206 |     # y1
207 |     pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]
208 |     # x2
209 |     pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]
210 |     # y2
211 |     pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]
212 | 
213 |     return pred_boxes
214 | 
215 | 
216 | # define bbox_transform and bbox_pred
217 | bbox_transform = nonlinear_transform
218 | bbox_pred = nonlinear_pred
219 | 


--------------------------------------------------------------------------------
/rcnn/processing/generate_anchor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate base anchors on index 0
  3 | """
  4 | from __future__ import print_function
  5 | import sys
  6 | from builtins import range
  7 | import numpy as np
  8 | from ..cython.anchors import anchors_cython
  9 | #from ..config import config
 10 | 
 11 | 
 12 | def anchors_plane(feat_h, feat_w, stride, base_anchor):
 13 |     return anchors_cython(feat_h, feat_w, stride, base_anchor)
 14 | 
 15 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 16 |                      scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False):
 17 |     """
 18 |     Generate anchor (reference) windows by enumerating aspect ratios X
 19 |     scales wrt a reference (0, 0, 15, 15) window.
 20 |     """
 21 | 
 22 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 23 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 24 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 25 |                          for i in range(ratio_anchors.shape[0])])
 26 |     if dense_anchor:
 27 |       assert stride%2==0
 28 |       anchors2 = anchors.copy()
 29 |       anchors2[:,:] += int(stride/2)
 30 |       anchors = np.vstack( (anchors, anchors2) )
 31 |     #print('GA',base_anchor.shape, ratio_anchors.shape, anchors.shape)
 32 |     return anchors
 33 | 
 34 | #def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8):
 35 | #    """
 36 | #    Generate anchor (reference) windows by enumerating aspect ratios X
 37 | #    scales wrt a reference (0, 0, 15, 15) window.
 38 | #    """
 39 | #    anchors = []
 40 | #    _ratios = ratios.reshape( (len(base_size), -1) )
 41 | #    _scales = scales.reshape( (len(base_size), -1) )
 42 | #    for i,bs in enumerate(base_size):
 43 | #      __ratios = _ratios[i]
 44 | #      __scales = _scales[i]
 45 | #      #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)
 46 | #      r = generate_anchors(bs, __ratios, __scales)
 47 | #      #print('anchors_fpn', r.shape, file=sys.stderr)
 48 | #      anchors.append(r)
 49 | #    return anchors
 50 | 
 51 | def generate_anchors_fpn(dense_anchor=False, cfg = None):
 52 |     #assert(False)
 53 |     """
 54 |     Generate anchor (reference) windows by enumerating aspect ratios X
 55 |     scales wrt a reference (0, 0, 15, 15) window.
 56 |     """
 57 |     if cfg is None:
 58 |       from ..config import config
 59 |       cfg = config.RPN_ANCHOR_CFG
 60 |     RPN_FEAT_STRIDE = []
 61 |     for k in cfg:
 62 |       RPN_FEAT_STRIDE.append( int(k) )
 63 |     RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True)
 64 |     anchors = []
 65 |     for k in RPN_FEAT_STRIDE:
 66 |       v = cfg[str(k)]
 67 |       bs = v['BASE_SIZE']
 68 |       __ratios = np.array(v['RATIOS'])
 69 |       __scales = np.array(v['SCALES'])
 70 |       stride = int(k)
 71 |       #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)
 72 |       r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor)
 73 |       #print('anchors_fpn', r.shape, file=sys.stderr)
 74 |       anchors.append(r)
 75 | 
 76 |     return anchors
 77 | 
 78 | def _whctrs(anchor):
 79 |     """
 80 |     Return width, height, x center, and y center for an anchor (window).
 81 |     """
 82 | 
 83 |     w = anchor[2] - anchor[0] + 1
 84 |     h = anchor[3] - anchor[1] + 1
 85 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 86 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 87 |     return w, h, x_ctr, y_ctr
 88 | 
 89 | 
 90 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 91 |     """
 92 |     Given a vector of widths (ws) and heights (hs) around a center
 93 |     (x_ctr, y_ctr), output a set of anchors (windows).
 94 |     """
 95 | 
 96 |     ws = ws[:, np.newaxis]
 97 |     hs = hs[:, np.newaxis]
 98 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 99 |                          y_ctr - 0.5 * (hs - 1),
100 |                          x_ctr + 0.5 * (ws - 1),
101 |                          y_ctr + 0.5 * (hs - 1)))
102 |     return anchors
103 | 
104 | 
105 | def _ratio_enum(anchor, ratios):
106 |     """
107 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
108 |     """
109 | 
110 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
111 |     size = w * h
112 |     size_ratios = size / ratios
113 |     ws = np.round(np.sqrt(size_ratios))
114 |     hs = np.round(ws * ratios)
115 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
116 |     return anchors
117 | 
118 | 
119 | def _scale_enum(anchor, scales):
120 |     """
121 |     Enumerate a set of anchors for each scale wrt an anchor.
122 |     """
123 | 
124 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
125 |     ws = w * scales
126 |     hs = h * scales
127 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
128 |     return anchors
129 | 


--------------------------------------------------------------------------------
/rcnn/processing/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..cython.cpu_nms import cpu_nms
 3 | try:
 4 |     from ..cython.gpu_nms import gpu_nms
 5 | except ImportError:
 6 |     gpu_nms = None
 7 | 
 8 | 
 9 | def py_nms_wrapper(thresh):
10 |     def _nms(dets):
11 |         return nms(dets, thresh)
12 |     return _nms
13 | 
14 | 
15 | def cpu_nms_wrapper(thresh):
16 |     def _nms(dets):
17 |         return cpu_nms(dets, thresh)
18 |     return _nms
19 | 
20 | 
21 | def gpu_nms_wrapper(thresh, device_id):
22 |     def _nms(dets):
23 |         return gpu_nms(dets, thresh, device_id)
24 |     if gpu_nms is not None:
25 |         return _nms
26 |     else:
27 |         return cpu_nms_wrapper(thresh)
28 | 
29 | 
30 | def nms(dets, thresh):
31 |     """
32 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
33 |     rule out overlap >= thresh
34 |     :param dets: [[x1, y1, x2, y2 score]]
35 |     :param thresh: retain overlap < thresh
36 |     :return: indexes to keep
37 |     """
38 |     x1 = dets[:, 0]
39 |     y1 = dets[:, 1]
40 |     x2 = dets[:, 2]
41 |     y2 = dets[:, 3]
42 |     scores = dets[:, 4]
43 | 
44 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
45 |     order = scores.argsort()[::-1]
46 | 
47 |     keep = []
48 |     while order.size > 0:
49 |         i = order[0]
50 |         keep.append(i)
51 |         xx1 = np.maximum(x1[i], x1[order[1:]])
52 |         yy1 = np.maximum(y1[i], y1[order[1:]])
53 |         xx2 = np.minimum(x2[i], x2[order[1:]])
54 |         yy2 = np.minimum(y2[i], y2[order[1:]])
55 | 
56 |         w = np.maximum(0.0, xx2 - xx1 + 1)
57 |         h = np.maximum(0.0, yy2 - yy1 + 1)
58 |         inter = w * h
59 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
60 | 
61 |         inds = np.where(ovr <= thresh)[0]
62 |         order = order[inds + 1]
63 | 
64 |     return keep
65 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/336d2a27c91e3c0663d2dcf0b13574674d30f88e
2 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/_mask.pyx:
--------------------------------------------------------------------------------
  1 | # distutils: language = c
  2 | # distutils: sources = maskApi.c
  3 | 
  4 | #**************************************************************************
  5 | # Microsoft COCO Toolbox.      version 2.0
  6 | # Data, paper, and tutorials available at:  http://mscoco.org/
  7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  8 | # Licensed under the Simplified BSD License [see coco/license.txt]
  9 | #**************************************************************************
 10 | 
 11 | __author__ = 'tsungyi'
 12 | 
 13 | import sys
 14 | PYTHON_VERSION = sys.version_info[0]
 15 | 
 16 | # import both Python-level and C-level symbols of Numpy
 17 | # the API uses Numpy to interface C and Python
 18 | import numpy as np
 19 | cimport numpy as np
 20 | from libc.stdlib cimport malloc, free
 21 | 
 22 | # intialized Numpy. must do.
 23 | np.import_array()
 24 | 
 25 | # import numpy C function
 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
 27 | cdef extern from "numpy/arrayobject.h":
 28 |     void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
 29 | 
 30 | # Declare the prototype of the C functions in MaskApi.h
 31 | cdef extern from "maskApi.h":
 32 |     ctypedef unsigned int uint
 33 |     ctypedef unsigned long siz
 34 |     ctypedef unsigned char byte
 35 |     ctypedef double* BB
 36 |     ctypedef struct RLE:
 37 |         siz h,
 38 |         siz w,
 39 |         siz m,
 40 |         uint* cnts,
 41 |     void rlesInit( RLE **R, siz n )
 42 |     void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
 43 |     void rleDecode( const RLE *R, byte *mask, siz n )
 44 |     void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
 45 |     void rleArea( const RLE *R, siz n, uint *a )
 46 |     void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
 47 |     void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
 48 |     void rleToBbox( const RLE *R, BB bb, siz n )
 49 |     void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
 50 |     void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
 51 |     char* rleToString( const RLE *R )
 52 |     void rleFrString( RLE *R, char *s, siz h, siz w )
 53 | 
 54 | # python class to wrap RLE array in C
 55 | # the class handles the memory allocation and deallocation
 56 | cdef class RLEs:
 57 |     cdef RLE *_R
 58 |     cdef siz _n
 59 | 
 60 |     def __cinit__(self, siz n =0):
 61 |         rlesInit(&self._R, n)
 62 |         self._n = n
 63 | 
 64 |     # free the RLE array here
 65 |     def __dealloc__(self):
 66 |         if self._R is not NULL:
 67 |             for i in range(self._n):
 68 |                 free(self._R[i].cnts)
 69 |             free(self._R)
 70 |     def __getattr__(self, key):
 71 |         if key == 'n':
 72 |             return self._n
 73 |         raise AttributeError(key)
 74 | 
 75 | # python class to wrap Mask array in C
 76 | # the class handles the memory allocation and deallocation
 77 | cdef class Masks:
 78 |     cdef byte *_mask
 79 |     cdef siz _h
 80 |     cdef siz _w
 81 |     cdef siz _n
 82 | 
 83 |     def __cinit__(self, h, w, n):
 84 |         self._mask = <byte*> malloc(h*w*n* sizeof(byte))
 85 |         self._h = h
 86 |         self._w = w
 87 |         self._n = n
 88 |     # def __dealloc__(self):
 89 |         # the memory management of _mask has been passed to np.ndarray
 90 |         # it doesn't need to be freed here
 91 | 
 92 |     # called when passing into np.array() and return an np.ndarray in column-major order
 93 |     def __array__(self):
 94 |         cdef np.npy_intp shape[1]
 95 |         shape[0] = <np.npy_intp> self._h*self._w*self._n
 96 |         # Create a 1D array, and reshape it to fortran/Matlab column-major array
 97 |         ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
 98 |         # The _mask allocated by Masks is now handled by ndarray
 99 |         PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
100 |         return ndarray
101 | 
102 | # internal conversion from Python RLEs object to compressed RLE format
103 | def _toString(RLEs Rs):
104 |     cdef siz n = Rs.n
105 |     cdef bytes py_string
106 |     cdef char* c_string
107 |     objs = []
108 |     for i in range(n):
109 |         c_string = rleToString( <RLE*> &Rs._R[i] )
110 |         py_string = c_string
111 |         objs.append({
112 |             'size': [Rs._R[i].h, Rs._R[i].w],
113 |             'counts': py_string
114 |         })
115 |         free(c_string)
116 |     return objs
117 | 
118 | # internal conversion from compressed RLE format to Python RLEs object
119 | def _frString(rleObjs):
120 |     cdef siz n = len(rleObjs)
121 |     Rs = RLEs(n)
122 |     cdef bytes py_string
123 |     cdef char* c_string
124 |     for i, obj in enumerate(rleObjs):
125 |         if PYTHON_VERSION == 2:
126 |             py_string = str(obj['counts']).encode('utf8')
127 |         elif PYTHON_VERSION == 3:
128 |             py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
129 |         else:
130 |             raise Exception('Python version must be 2 or 3')
131 |         c_string = py_string
132 |         rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
133 |     return Rs
134 | 
135 | # encode mask to RLEs objects
136 | # list of RLE string can be generated by RLEs member function
137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
138 |     h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
139 |     cdef RLEs Rs = RLEs(n)
140 |     rleEncode(Rs._R,<byte*>mask.data,h,w,n)
141 |     objs = _toString(Rs)
142 |     return objs
143 | 
144 | # decode mask from compressed list of RLE string or RLEs object
145 | def decode(rleObjs):
146 |     cdef RLEs Rs = _frString(rleObjs)
147 |     h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
148 |     masks = Masks(h, w, n)
149 |     rleDecode(<RLE*>Rs._R, masks._mask, n);
150 |     return np.array(masks)
151 | 
152 | def merge(rleObjs, intersect=0):
153 |     cdef RLEs Rs = _frString(rleObjs)
154 |     cdef RLEs R = RLEs(1)
155 |     rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
156 |     obj = _toString(R)[0]
157 |     return obj
158 | 
159 | def area(rleObjs):
160 |     cdef RLEs Rs = _frString(rleObjs)
161 |     cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
162 |     rleArea(Rs._R, Rs._n, _a)
163 |     cdef np.npy_intp shape[1]
164 |     shape[0] = <np.npy_intp> Rs._n
165 |     a = np.array((Rs._n, ), dtype=np.uint8)
166 |     a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
167 |     PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
168 |     return a
169 | 
170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox).
171 | def iou( dt, gt, pyiscrowd ):
172 |     def _preproc(objs):
173 |         if len(objs) == 0:
174 |             return objs
175 |         if type(objs) == np.ndarray:
176 |             if len(objs.shape) == 1:
177 |                 objs = objs.reshape((objs[0], 1))
178 |             # check if it's Nx4 bbox
179 |             if not len(objs.shape) == 2 or not objs.shape[1] == 4:
180 |                 raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
181 |             objs = objs.astype(np.double)
182 |         elif type(objs) == list:
183 |             # check if list is in box format and convert it to np.ndarray
184 |             isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
185 |             isrle = np.all(np.array([type(obj) == dict for obj in objs]))
186 |             if isbox:
187 |                 objs = np.array(objs, dtype=np.double)
188 |                 if len(objs.shape) == 1:
189 |                     objs = objs.reshape((1,objs.shape[0]))
190 |             elif isrle:
191 |                 objs = _frString(objs)
192 |             else:
193 |                 raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
194 |         else:
195 |             raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
196 |         return objs
197 |     def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
198 |         rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
199 |     def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
200 |         bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
201 |     def _len(obj):
202 |         cdef siz N = 0
203 |         if type(obj) == RLEs:
204 |             N = obj.n
205 |         elif len(obj)==0:
206 |             pass
207 |         elif type(obj) == np.ndarray:
208 |             N = obj.shape[0]
209 |         return N
210 |     # convert iscrowd to numpy array
211 |     cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
212 |     # simple type checking
213 |     cdef siz m, n
214 |     dt = _preproc(dt)
215 |     gt = _preproc(gt)
216 |     m = _len(dt)
217 |     n = _len(gt)
218 |     if m == 0 or n == 0:
219 |         return []
220 |     if not type(dt) == type(gt):
221 |         raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
222 | 
223 |     # define local variables
224 |     cdef double* _iou = <double*> 0
225 |     cdef np.npy_intp shape[1]
226 |     # check type and assign iou function
227 |     if type(dt) == RLEs:
228 |         _iouFun = _rleIou
229 |     elif type(dt) == np.ndarray:
230 |         _iouFun = _bbIou
231 |     else:
232 |         raise Exception('input data type not allowed.')
233 |     _iou = <double*> malloc(m*n* sizeof(double))
234 |     iou = np.zeros((m*n, ), dtype=np.double)
235 |     shape[0] = <np.npy_intp> m*n
236 |     iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
237 |     PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
238 |     _iouFun(dt, gt, iscrowd, m, n, iou)
239 |     return iou.reshape((m,n), order='F')
240 | 
241 | def toBbox( rleObjs ):
242 |     cdef RLEs Rs = _frString(rleObjs)
243 |     cdef siz n = Rs.n
244 |     cdef BB _bb = <BB> malloc(4*n* sizeof(double))
245 |     rleToBbox( <const RLE*> Rs._R, _bb, n )
246 |     cdef np.npy_intp shape[1]
247 |     shape[0] = <np.npy_intp> 4*n
248 |     bb = np.array((1,4*n), dtype=np.double)
249 |     bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
250 |     PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
251 |     return bb
252 | 
253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
254 |     cdef siz n = bb.shape[0]
255 |     Rs = RLEs(n)
256 |     rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
257 |     objs = _toString(Rs)
258 |     return objs
259 | 
260 | def frPoly( poly, siz h, siz w ):
261 |     cdef np.ndarray[np.double_t, ndim=1] np_poly
262 |     n = len(poly)
263 |     Rs = RLEs(n)
264 |     for i, p in enumerate(poly):
265 |         np_poly = np.array(p, dtype=np.double, order='F')
266 |         rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
267 |     objs = _toString(Rs)
268 |     return objs
269 | 
270 | def frUncompressedRLE(ucRles, siz h, siz w):
271 |     cdef np.ndarray[np.uint32_t, ndim=1] cnts
272 |     cdef RLE R
273 |     cdef uint *data
274 |     n = len(ucRles)
275 |     objs = []
276 |     for i in range(n):
277 |         Rs = RLEs(1)
278 |         cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
279 |         # time for malloc can be saved here but it's fine
280 |         data = <uint*> malloc(len(cnts)* sizeof(uint))
281 |         for j in range(len(cnts)):
282 |             data[j] = <uint> cnts[j]
283 |         R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
284 |         Rs._R[0] = R
285 |         objs.append(_toString(Rs)[0])
286 |     return objs
287 | 
288 | def frPyObjects(pyobj, h, w):
289 |     # encode rle from a list of python objects
290 |     if type(pyobj) == np.ndarray:
291 |         objs = frBbox(pyobj, h, w)
292 |     elif type(pyobj) == list and len(pyobj[0]) == 4:
293 |         objs = frBbox(pyobj, h, w)
294 |     elif type(pyobj) == list and len(pyobj[0]) > 4:
295 |         objs = frPoly(pyobj, h, w)
296 |     elif type(pyobj) == list and type(pyobj[0]) == dict \
297 |         and 'counts' in pyobj[0] and 'size' in pyobj[0]:
298 |         objs = frUncompressedRLE(pyobj, h, w)
299 |     # encode rle from single python object
300 |     elif type(pyobj) == list and len(pyobj) == 4:
301 |         objs = frBbox([pyobj], h, w)[0]
302 |     elif type(pyobj) == list and len(pyobj) > 4:
303 |         objs = frPoly([pyobj], h, w)[0]
304 |     elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
305 |         objs = frUncompressedRLE([pyobj], h, w)[0]
306 |     else:
307 |         raise Exception('input type is not supported.')
308 |     return objs
309 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | from rcnn.pycocotools import _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]
104 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
142 |     }
143 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
144 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
145 |   }
146 | }
147 | 
148 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
149 |   siz i; for( i=0; i<n; i++ ) {
150 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
151 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
152 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
153 |     rleFrPoly( R+i, xy, 4, h, w );
154 |   }
155 | }
156 | 
157 | int uintCompare(const void *a, const void *b) {
158 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
159 | }
160 | 
161 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
162 |   /* upsample and get discrete points densely along entire boundary */
163 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
164 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
165 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
166 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
167 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
168 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
169 |   for( j=0; j<k; j++ ) {
170 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
171 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
172 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
173 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
174 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
175 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
176 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
177 |     } else for( d=0; d<=dy; d++ ) {
178 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
179 |     }
180 |   }
181 |   /* get points along y-boundary and downsample */
182 |   free(x); free(y); k=m; m=0; double xd, yd;
183 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
184 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
185 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
186 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
187 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
188 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
189 |     x[m]=(int) xd; y[m]=(int) yd; m++;
190 |   }
191 |   /* compute rle encoding given y-boundary points */
192 |   k=m; a=malloc(sizeof(uint)*(k+1));
193 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
194 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
195 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
196 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
197 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
198 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
199 |     j++; if(j<k) b[m-1]+=a[j++]; }
200 |   rleInit(R,h,w,m,b); free(a); free(b);
201 | }
202 | 
203 | char* rleToString( const RLE *R ) {
204 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
205 |   siz i, m=R->m, p=0; long x; int more;
206 |   char *s=malloc(sizeof(char)*m*6);
207 |   for( i=0; i<m; i++ ) {
208 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
209 |     while( more ) {
210 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
211 |       if(more) c |= 0x20; c+=48; s[p++]=c;
212 |     }
213 |   }
214 |   s[p]=0; return s;
215 | }
216 | 
217 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
218 |   siz m=0, p=0, k; long x; int more; uint *cnts;
219 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
220 |   while( s[p] ) {
221 |     x=0; k=0; more=1;
222 |     while( more ) {
223 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
224 |       more = c & 0x20; p++; k++;
225 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
226 |     }
227 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
228 |   }
229 |   rleInit(R,h,w,m,cnts); free(cnts);
230 | }
231 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['maskApi.c', '_mask.pyx'],
13 |         include_dirs=[np.get_include()],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       ext_modules=cythonize(ext_modules)
20 | )
21 | 


--------------------------------------------------------------------------------
/rcnn/sample_config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from easydict import EasyDict as edict
  3 | 
  4 | config = edict()
  5 | 
  6 | # network related params
  7 | config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
  8 | config.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
  9 | config.PIXEL_SCALE = 1.0
 10 | config.IMAGE_STRIDE = 0
 11 | 
 12 | # dataset related params
 13 | config.NUM_CLASSES = 2
 14 | config.PRE_SCALES = [(1200, 1600)]  # first is scale (the shorter side); second is max size
 15 | config.SCALES = [(640, 640)]  # first is scale (the shorter side); second is max size
 16 | #config.SCALES = [(800, 800)]  # first is scale (the shorter side); second is max size
 17 | config.ORIGIN_SCALE = False
 18 | 
 19 | _ratio = (1.,)
 20 | 
 21 | RAC_SSH = {
 22 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 23 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 24 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 25 | }
 26 | 
 27 | _ratio = (1.,1.5)
 28 | RAC_SSH2 = {
 29 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 30 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 31 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 32 | }
 33 | 
 34 | _ratio = (1.,1.5)
 35 | RAC_SSH3 = {
 36 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 37 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 38 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 39 |     '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 40 | }
 41 | 
 42 | RAC_RETINA = {}
 43 | _ratios = (1.0,)
 44 | _ass = 2.0**(1.0/3)
 45 | _basescale = 1.0
 46 | for _stride in [4, 8, 16, 32, 64]:
 47 |   key = str(_stride)
 48 |   value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999}
 49 |   scales = []
 50 |   for _ in range(3):
 51 |     scales.append(_basescale)
 52 |     _basescale *= _ass
 53 |   value['SCALES'] = tuple(scales)
 54 |   RAC_RETINA[key] = value
 55 | 
 56 | 
 57 | config.RPN_ANCHOR_CFG = RAC_SSH #default
 58 | 
 59 | config.NET_MODE = 2
 60 | config.HEAD_MODULE = 'SSH'
 61 | #config.HEAD_MODULE = 'RF'
 62 | config.LR_MODE = 0
 63 | config.LANDMARK_LR_MULT = 2.0
 64 | config.HEAD_FILTER_NUM = 256
 65 | config.CONTEXT_FILTER_RATIO = 1
 66 | config.max_feat_channel = 9999
 67 | 
 68 | config.USE_CROP = True
 69 | config.USE_DCN = 0
 70 | config.FACE_LANDMARK = True
 71 | config.USE_OCCLUSION = False
 72 | config.USE_BLUR = False
 73 | config.MORE_SMALL_BOX = True
 74 | 
 75 | config.LAYER_FIX = False
 76 | 
 77 | config.HEAD_BOX = False
 78 | config.DENSE_ANCHOR = False
 79 | config.USE_MAXOUT = 0
 80 | config.SHARE_WEIGHT_BBOX = False
 81 | config.SHARE_WEIGHT_LANDMARK = False
 82 | 
 83 | config.RANDOM_FEAT_STRIDE = False
 84 | config.NUM_CPU = 4
 85 | config.MIXUP = 0.0
 86 | config.USE_3D = False
 87 | 
 88 | #config.BBOX_MASK_THRESH = 0
 89 | config.COLOR_MODE = 2
 90 | config.COLOR_JITTERING = 0.125
 91 | #config.COLOR_JITTERING = 0
 92 | #config.COLOR_JITTERING = 0.2
 93 | 
 94 | 
 95 | config.TRAIN = edict()
 96 | 
 97 | config.TRAIN.IMAGE_ALIGN = 0
 98 | config.TRAIN.MIN_BOX_SIZE = 0
 99 | config.BBOX_MASK_THRESH = config.TRAIN.MIN_BOX_SIZE
100 | # R-CNN and RPN
101 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
102 | config.TRAIN.BATCH_IMAGES = 8
103 | # e2e changes behavior of anchor loader and metric
104 | config.TRAIN.END2END = True
105 | # group images with similar aspect ratio
106 | config.TRAIN.ASPECT_GROUPING = False
107 | 
108 | # RPN anchor loader
109 | # rpn anchors batch size
110 | config.TRAIN.RPN_ENABLE_OHEM = 2
111 | config.TRAIN.RPN_BATCH_SIZE = 256
112 | # rpn anchors sampling params
113 | config.TRAIN.RPN_FG_FRACTION = 0.25
114 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5
115 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
116 | config.TRAIN.RPN_CLOBBER_POSITIVES = False
117 | config.TRAIN.RPN_FORCE_POSITIVE = False
118 | # rpn bounding box regression params
119 | #config.TRAIN.RPN_BBOX_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
120 | #config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
121 | #config.TRAIN.RPN_LANDMARK_WEIGHTS = (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)
122 | #config.TRAIN.RPN_INVALID_LANDMARK_WEIGHTS = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
123 | 
124 | # used for end2end training
125 | # RPN proposal
126 | #config.TRAIN.CXX_PROPOSAL = True
127 | #config.TRAIN.RPN_NMS_THRESH = 0.7
128 | #config.TRAIN.RPN_PRE_NMS_TOP_N = 12000
129 | #config.TRAIN.RPN_POST_NMS_TOP_N = 2000
130 | #config.TRAIN.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE
131 | #config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True
132 | #config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0)
133 | #config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2)
134 | 
135 | config.TEST = edict()
136 | 
137 | # R-CNN testing
138 | # use rpn to generate proposal
139 | config.TEST.HAS_RPN = False
140 | # size of images for each device
141 | config.TEST.BATCH_IMAGES = 1
142 | 
143 | # RPN proposal
144 | config.TEST.CXX_PROPOSAL = True
145 | config.TEST.RPN_NMS_THRESH = 0.3
146 | config.TEST.RPN_PRE_NMS_TOP_N = 1000
147 | config.TEST.RPN_POST_NMS_TOP_N = 3000
148 | #config.TEST.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE
149 | #config.TEST.RPN_MIN_SIZE = [0,0,0]
150 | 
151 | # RCNN nms
152 | config.TEST.NMS = 0.3
153 | 
154 | config.TEST.SCORE_THRESH = 0.05
155 | config.TEST.IOU_THRESH = 0.5
156 | 
157 | 
158 | # network settings
159 | network = edict()
160 | 
161 | network.ssh = edict()
162 | 
163 | network.mnet = edict()
164 | #network.mnet.pretrained = 'model/mnasnet'
165 | #network.mnet.pretrained = 'model/mobilenetv2_0_5'
166 | #network.mnet.pretrained = 'model/mobilenet_0_5'
167 | #network.mnet.MULTIPLIER = 0.5
168 | #network.mnet.pretrained = 'model/mobilenet_0_25'
169 | #network.mnet.pretrained_epoch = 0
170 | #network.mnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
171 | #network.mnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
172 | #network.mnet.PIXEL_SCALE = 255.0
173 | network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
174 | network.mnet.BATCH_IMAGES = 16
175 | network.mnet.HEAD_FILTER_NUM = 64
176 | network.mnet.CONTEXT_FILTER_RATIO = 1
177 | 
178 | network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
179 | network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
180 | network.mnet.PIXEL_SCALE = 1.0
181 | #network.mnet.pretrained = 'model/mobilenetfd_0_25' #78
182 | #network.mnet.pretrained = 'model/mobilenetfd2' #75
183 | network.mnet.pretrained = 'model/mobilenet025fd0' #78
184 | #network.mnet.pretrained = 'model/mobilenet025fd1' #75
185 | #network.mnet.pretrained = 'model/mobilenet025fd2' #
186 | network.mnet.pretrained_epoch = 0
187 | network.mnet.max_feat_channel = 8888
188 | network.mnet.COLOR_MODE = 1
189 | network.mnet.USE_CROP = True
190 | network.mnet.RPN_ANCHOR_CFG = RAC_SSH
191 | network.mnet.LAYER_FIX = True
192 | network.mnet.LANDMARK_LR_MULT = 2.5
193 | 
194 | 
195 | network.resnet = edict()
196 | #network.resnet.pretrained = 'model/ResNet50_v1d'
197 | #network.resnet.pretrained = 'model/resnet-50'
198 | network.resnet.pretrained = 'model/resnet-152'
199 | #network.resnet.pretrained = 'model/senet154'
200 | #network.resnet.pretrained = 'model/densenet161'
201 | network.resnet.pretrained_epoch = 0
202 | #network.mnet.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
203 | #network.mnet.PIXEL_STDS = np.array([57.375, 57.12, 58.393])
204 | #network.resnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
205 | #network.resnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
206 | #network.resnet.PIXEL_SCALE = 255.0
207 | network.resnet.lr_step = '1,2,3,4,5,55,68,80'
208 | network.resnet.lr = 0.001
209 | network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
210 | network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
211 | network.resnet.PIXEL_SCALE = 1.0
212 | network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
213 | network.resnet.BATCH_IMAGES = 8
214 | network.resnet.HEAD_FILTER_NUM = 256
215 | network.resnet.CONTEXT_FILTER_RATIO = 1
216 | network.resnet.USE_DCN = 2
217 | network.resnet.RPN_BATCH_SIZE = 256
218 | network.resnet.RPN_ANCHOR_CFG = RAC_RETINA
219 | 
220 | network.resnet.USE_DCN = 0
221 | network.resnet.pretrained = 'model/resnet-50'
222 | network.resnet.RPN_ANCHOR_CFG = RAC_SSH
223 | 
224 | 
225 | # dataset settings
226 | dataset = edict()
227 | 
228 | dataset.widerface = edict()
229 | dataset.widerface.dataset = 'widerface'
230 | dataset.widerface.image_set = 'train'
231 | dataset.widerface.test_image_set = 'val'
232 | dataset.widerface.root_path = 'data'
233 | dataset.widerface.dataset_path = 'data/widerface'
234 | dataset.widerface.NUM_CLASSES = 2
235 | 
236 | dataset.retinaface = edict()
237 | dataset.retinaface.dataset = 'retinaface'
238 | dataset.retinaface.image_set = 'train'
239 | dataset.retinaface.test_image_set = 'val'
240 | dataset.retinaface.root_path = 'data'
241 | dataset.retinaface.dataset_path = 'data/retinaface'
242 | dataset.retinaface.NUM_CLASSES = 2
243 | 
244 | # default settings
245 | default = edict()
246 | 
247 | config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling']
248 | #config.FIXED_PARAMS = ['^.*upsampling']
249 | #config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3']
250 | #config.FIXED_PARAMS = ['^conv0', '^stage1', 'gamma', 'beta']  #for resnet
251 | 
252 | # default network
253 | default.network = 'resnet'
254 | default.pretrained = 'model/resnet-152'
255 | #default.network = 'resnetssh'
256 | default.pretrained_epoch = 0
257 | # default dataset
258 | default.dataset = 'retinaface'
259 | default.image_set = 'train'
260 | default.test_image_set = 'val'
261 | default.root_path = 'data'
262 | default.dataset_path = 'data/retinaface'
263 | # default training
264 | default.frequent = 20
265 | default.kvstore = 'device'
266 | # default e2e
267 | default.prefix = 'model/retinaface'
268 | default.end_epoch = 10000
269 | default.lr_step = '55,68,80'
270 | default.lr = 0.01
271 | 
272 | def generate_config(_network, _dataset):
273 |     for k, v in network[_network].items():
274 |         if k in config:
275 |             config[k] = v
276 |         elif k in default:
277 |             default[k] = v
278 |         if k in config.TRAIN:
279 |           config.TRAIN[k] = v
280 |     for k, v in dataset[_dataset].items():
281 |         if k in config:
282 |             config[k] = v
283 |         elif k in default:
284 |             default[k] = v
285 |         if k in config.TRAIN:
286 |           config.TRAIN[k] = v
287 |     config.network = _network
288 |     config.dataset = _dataset
289 |     config.RPN_FEAT_STRIDE = []
290 |     num_anchors = []
291 |     for k in config.RPN_ANCHOR_CFG:
292 |       config.RPN_FEAT_STRIDE.append( int(k) )
293 |       _num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES'])*len(config.RPN_ANCHOR_CFG[k]['RATIOS'])
294 |       if config.DENSE_ANCHOR:
295 |         _num_anchors *= 2
296 |       config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors
297 |       num_anchors.append(_num_anchors)
298 |     config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True)
299 |     for j in range(1,len(num_anchors)):
300 |       assert num_anchors[0]==num_anchors[j]
301 |     config.NUM_ANCHORS = num_anchors[0]
302 | 
303 | 


--------------------------------------------------------------------------------
/rcnn/symbol/__init__.py:
--------------------------------------------------------------------------------
1 | from .symbol_ssh import *
2 | from .symbol_mnet import *
3 | from .symbol_resnet import *
4 | 


--------------------------------------------------------------------------------
/rcnn/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/tools/__init__.py


--------------------------------------------------------------------------------
/rcnn/tools/demo_single_image.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from ..config import default, generate_config
  3 | from ..symbol import *
  4 | from ..utils.load_model import load_param
  5 | from ..core.module import MutableModule
  6 | from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes
  7 | from rcnn.processing.nms import py_nms_wrapper
  8 | import mxnet as mx
  9 | #from rcnn.processing.nms import processing_nms_wrapper
 10 | bbox_pred = nonlinear_pred
 11 | 
 12 | import numpy as np
 13 | import cv2
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | def demo_maskrcnn(network, ctx, prefix, epoch,img_path,
 17 |                    vis= True, has_rpn = True, thresh = 0.001):
 18 |     
 19 |     assert has_rpn,"Only has_rpn==True has been supported."
 20 |     #sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
 21 |     sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES)
 22 |     arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)
 23 |     for k,v in arg_params.iteritems():
 24 |       print(k, v.shape)
 25 |     
 26 |     max_image_shape = (1,3,1024,1024)
 27 |     max_data_shapes = [("data",max_image_shape),("im_info",(1,3))]
 28 |     mod = MutableModule(symbol = sym, data_names = ["data","im_info"], label_names= None,
 29 |                             max_data_shapes = max_data_shapes,
 30 |                               context=ctx)
 31 |     mod.bind(data_shapes = max_data_shapes, label_shapes = None, for_training=False)
 32 |     mod.init_params(arg_params=arg_params, aux_params=aux_params)
 33 | 
 34 |     class OneDataBatch():
 35 |         def __init__(self,img):
 36 |             im_info = mx.nd.array([[img.shape[0],img.shape[1],1.0]])
 37 |             img = np.transpose(img,(2,0,1)) 
 38 |             img = img[np.newaxis,(2,1,0)]
 39 |             self.data = [mx.nd.array(img),im_info]
 40 |             self.label = None
 41 |             self.provide_label = None
 42 |             self.provide_data = [("data",(1,3,img.shape[2],img.shape[3])),("im_info",(1,3))]
 43 |     
 44 |     img_ori = cv2.imread(img_path)
 45 |     batch = OneDataBatch(img_ori)
 46 |     mod.forward(batch, False)
 47 |     results = mod.get_outputs()
 48 |     output = dict(zip(mod.output_names, results))
 49 |     rois = output['rois_output'].asnumpy()[:, 1:]
 50 | 
 51 | 
 52 |     scores = output['cls_prob_reshape_output'].asnumpy()[0]
 53 |     bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
 54 |     mask_output = output['mask_prob_output'].asnumpy()
 55 | 
 56 |     pred_boxes = bbox_pred(rois, bbox_deltas)
 57 |     pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0],img_ori.shape[1]])
 58 | 
 59 |     nms = py_nms_wrapper(config.TEST.NMS)
 60 |     #nms = processing_nms_wrapper(config.TEST.NMS, 0.7)
 61 |     boxes= pred_boxes
 62 | 
 63 |     CLASSES  = ('__background__', 'text')
 64 | 
 65 |     all_boxes = [[[] for _ in xrange(1)]
 66 |                  for _ in xrange(len(CLASSES))]
 67 |     all_masks = [[[] for _ in xrange(1)]
 68 |                  for _ in xrange(len(CLASSES))]
 69 |     label = np.argmax(scores, axis=1)
 70 |     label = label[:, np.newaxis]
 71 | 
 72 |     for cls in CLASSES:
 73 |         cls_ind = CLASSES.index(cls)
 74 |         cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
 75 |         cls_masks = mask_output[:, cls_ind, :, :]
 76 |         cls_scores = scores[:, cls_ind, np.newaxis]
 77 |         #print cls_scores.shape, label.shape
 78 |         keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0]
 79 |         cls_masks = cls_masks[keep, :, :]
 80 |         dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
 81 |         keep = nms(dets)
 82 |         #print dets.shape, cls_masks.shape
 83 |         all_boxes[cls_ind] = dets[keep, :]
 84 |         all_masks[cls_ind] = cls_masks[keep, :, :]
 85 | 
 86 |     boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
 87 |     masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))]
 88 | 
 89 | 
 90 |     import copy
 91 |     import random
 92 |     class_names = CLASSES
 93 |     color_white = (255, 255, 255)
 94 |     scale = 1.0
 95 |     im = copy.copy(img_ori)
 96 | 
 97 |     for j, name in enumerate(class_names):
 98 |         if name == '__background__':
 99 |             continue
100 |         color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))  # generate a random color
101 |         dets = boxes_this_image[j]
102 |         masks = masks_this_image[j]
103 |         for i in range(len(dets)):
104 |             bbox = dets[i, :4] * scale
105 |             if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3]  :
106 |                 continue
107 |             score = dets[i, -1]
108 |             bbox = map(int, bbox)
109 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2)
110 |             cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10),
111 |                         color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5)
112 |             mask = masks[i, :, :]
113 |             mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR)
114 |             mask[mask > 0.5] = 1
115 |             mask[mask <= 0.5] = 0
116 |             mask_color = random.randint(0, 255)
117 |             c = random.randint(0, 2)
118 |             target = im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] + mask_color * mask
119 |             target[target >= 255] = 255
120 |             im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] = target
121 |     ##im = im[:,:,(2,1,0)]
122 |     ##plt.imshow(im)
123 |     cv2.imwrite("figures/test_result.jpg",im)
124 |     #if vis:
125 |      #   plt.show()
126 |    # else:
127 |      #   plt.savefig("figures/test_result.jpg")
128 | def parse_args():
129 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
130 |     # general
131 |     parser.add_argument('--network', help='network name', default=default.network, type=str)    
132 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
133 |     args, rest = parser.parse_known_args()
134 |     generate_config(args.network, args.dataset)
135 |     # testing
136 |     parser.add_argument('--prefix', help='model to test with', default=default.rcnn_prefix, type=str)
137 |     parser.add_argument('--epoch', help='model to test with', default=default.rcnn_epoch, type=int)
138 |     parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int)
139 |     # rcnn
140 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
141 |     parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float)
142 |     parser.add_argument('--image_name', help='image file path',type=str)
143 |     
144 |     
145 |     args = parser.parse_args()
146 | 
147 |     return args
148 | 
149 | 
150 | def main():
151 |     args = parse_args()
152 |     ctx = mx.gpu(args.gpu)
153 |     print args
154 |     demo_maskrcnn(network = args.network, 
155 |                   ctx = ctx,
156 |                   prefix = args.prefix,
157 |                   epoch = args.epoch, 
158 |                   img_path = args.image_name,
159 |                   vis= args.vis, 
160 |                   has_rpn = True,
161 |                   thresh = args.thresh)
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/rcnn/tools/reeval.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | try:
 3 |     import cPickle as pickle
 4 | except ImportError:
 5 |     import pickle
 6 | import os
 7 | import mxnet as mx
 8 | 
 9 | from ..logger import logger
10 | from ..config import config, default, generate_config
11 | from ..dataset import *
12 | 
13 | 
14 | def reeval(args):
15 |     # load imdb
16 |     imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path)
17 | 
18 |     # load detection results
19 |     cache_file = os.path.join(imdb.cache_path, imdb.name, 'detections.pkl')
20 |     with open(cache_file) as f:
21 |         detections = pickle.load(f)
22 | 
23 |     # eval
24 |     imdb.evaluate_detections(detections)
25 | 
26 | 
27 | def parse_args():
28 |     parser = argparse.ArgumentParser(description='imdb test')
29 |     # general
30 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
31 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
32 |     args, rest = parser.parse_known_args()
33 |     generate_config(args.network, args.dataset)
34 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
35 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
36 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
37 |     # other
38 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
39 |     args = parser.parse_args()
40 |     return args
41 | 
42 | 
43 | def main():
44 |     args = parse_args()
45 |     logger.info('Called with argument: %s' % args)
46 |     reeval(args)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/rcnn/tools/test_rcnn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pprint
  3 | import mxnet as mx
  4 | 
  5 | from ..logger import logger
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..dataset import *
  9 | from ..core.loader import TestLoader
 10 | from ..core.tester import Predictor, pred_eval
 11 | from ..utils.load_model import load_param
 12 | 
 13 | 
 14 | def test_rcnn(network, dataset, image_set, root_path, dataset_path,
 15 |               ctx, prefix, epoch,
 16 |               vis, shuffle, has_rpn, proposal, thresh):
 17 |     # set config
 18 |     if has_rpn:
 19 |         config.TEST.HAS_RPN = True
 20 | 
 21 |     # print config
 22 |     logger.info(pprint.pformat(config))
 23 | 
 24 |     # load symbol and testing data
 25 |     if has_rpn:
 26 |         sym = eval('get_' + network + '_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
 27 |         imdb = eval(dataset)(image_set, root_path, dataset_path)
 28 |         roidb = imdb.gt_roidb()
 29 |     else:
 30 |         sym = eval('get_' + network + '_rcnn_test')(num_classes=config.NUM_CLASSES)
 31 |         imdb = eval(dataset)(image_set, root_path, dataset_path)
 32 |         gt_roidb = imdb.gt_roidb()
 33 |         roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb)
 34 | 
 35 |     # get test data iter
 36 |     test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn)
 37 | 
 38 |     # load model
 39 |     arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)
 40 | 
 41 |     # infer shape
 42 |     data_shape_dict = dict(test_data.provide_data)
 43 |     arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
 44 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 45 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 46 | 
 47 |     # check parameters
 48 |     for k in sym.list_arguments():
 49 |         if k in data_shape_dict or 'label' in k:
 50 |             continue
 51 |         assert k in arg_params, k + ' not initialized'
 52 |         assert arg_params[k].shape == arg_shape_dict[k], \
 53 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
 54 |     for k in sym.list_auxiliary_states():
 55 |         assert k in aux_params, k + ' not initialized'
 56 |         assert aux_params[k].shape == aux_shape_dict[k], \
 57 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
 58 | 
 59 |     # decide maximum shape
 60 |     data_names = [k[0] for k in test_data.provide_data]
 61 |     label_names = None
 62 |     max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 63 |     if not has_rpn:
 64 |         max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))
 65 | 
 66 |     # create predictor
 67 |     predictor = Predictor(sym, data_names, label_names,
 68 |                           context=ctx, max_data_shapes=max_data_shape,
 69 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
 70 |                           arg_params=arg_params, aux_params=aux_params)
 71 | 
 72 |     # start detection
 73 |     pred_eval(predictor, test_data, imdb, vis=vis, thresh=thresh)
 74 | 
 75 | 
 76 | def parse_args():
 77 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 78 |     # general
 79 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
 80 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
 81 |     args, rest = parser.parse_known_args()
 82 |     generate_config(args.network, args.dataset)
 83 |     parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str)
 84 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
 85 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
 86 |     # testing
 87 |     parser.add_argument('--prefix', help='model to test with', default=default.rcnn_prefix, type=str)
 88 |     parser.add_argument('--epoch', help='model to test with', default=default.rcnn_epoch, type=int)
 89 |     parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int)
 90 |     # rcnn
 91 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
 92 |     parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float)
 93 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
 94 |     parser.add_argument('--has_rpn', help='generate proposals on the fly', action='store_true')
 95 |     parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str)
 96 |     args = parser.parse_args()
 97 |     return args
 98 | 
 99 | 
100 | def main():
101 |     args = parse_args()
102 |     logger.info('Called with argument: %s' % args)
103 |     ctx = mx.gpu(args.gpu)
104 |     test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
105 |               ctx, args.prefix, args.epoch,
106 |               args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh)
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/rcnn/tools/test_rpn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pprint
  3 | import mxnet as mx
  4 | 
  5 | from ..logger import logger
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..dataset import *
  9 | from ..core.loader import TestLoader
 10 | from ..core.tester import Predictor, generate_proposals, test_proposals
 11 | from ..utils.load_model import load_param
 12 | 
 13 | 
 14 | def test_rpn(network, dataset, image_set, root_path, dataset_path,
 15 |              ctx, prefix, epoch,
 16 |              vis, shuffle, thresh, test_output=False):
 17 |     # rpn generate proposal config
 18 |     config.TEST.HAS_RPN = True
 19 | 
 20 |     # print config
 21 |     logger.info(pprint.pformat(config))
 22 | 
 23 |     # load symbol
 24 |     sym = eval('get_' + network + '_rpn_test')()
 25 | 
 26 |     # load dataset and prepare imdb for training
 27 |     imdb = eval(dataset)(image_set, root_path, dataset_path)
 28 |     roidb = imdb.gt_roidb()
 29 |     test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=True, withlabel=True)
 30 | 
 31 |     # load model
 32 |     arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx)
 33 | 
 34 |     # infer shape
 35 |     data_shape_dict = dict(test_data.provide_data)
 36 |     arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
 37 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 38 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 39 | 
 40 |     # check parameters
 41 |     for k in sym.list_arguments():
 42 |         if k in data_shape_dict or 'label' in k:
 43 |             continue
 44 |         assert k in arg_params, k + ' not initialized'
 45 |         assert arg_params[k].shape == arg_shape_dict[k], \
 46 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
 47 |     for k in sym.list_auxiliary_states():
 48 |         assert k in aux_params, k + ' not initialized'
 49 |         assert aux_params[k].shape == aux_shape_dict[k], \
 50 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
 51 | 
 52 |     # decide maximum shape
 53 |     data_names = [k[0] for k in test_data.provide_data]
 54 |     label_names = None if test_data.provide_label is None else [k[0] for k in test_data.provide_label]
 55 |     max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 56 | 
 57 |     # create predictor
 58 |     predictor = Predictor(sym, data_names, label_names,
 59 |                           context=ctx, max_data_shapes=max_data_shape,
 60 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
 61 |                           arg_params=arg_params, aux_params=aux_params)
 62 | 
 63 |     # start testing
 64 |     if not test_output:
 65 |       imdb_boxes = generate_proposals(predictor, test_data, imdb, vis=vis, thresh=thresh)
 66 |       imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
 67 |     else:
 68 |       test_proposals(predictor, test_data, imdb, roidb, vis=vis)
 69 | 
 70 | 
 71 | def parse_args():
 72 |     parser = argparse.ArgumentParser(description='Test a Region Proposal Network')
 73 |     # general
 74 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
 75 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
 76 |     args, rest = parser.parse_known_args()
 77 |     generate_config(args.network, args.dataset)
 78 |     parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str)
 79 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
 80 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
 81 |     # testing
 82 |     parser.add_argument('--prefix', help='model to test with', default=default.rpn_prefix, type=str)
 83 |     parser.add_argument('--epoch', help='model to test with', default=default.rpn_epoch, type=int)
 84 |     # rpn
 85 |     parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int)
 86 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
 87 |     parser.add_argument('--thresh', help='rpn proposal threshold', default=0, type=float)
 88 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
 89 |     args = parser.parse_args()
 90 |     return args
 91 | 
 92 | 
 93 | def main():
 94 |     args = parse_args()
 95 |     logger.info('Called with argument: %s' % args)
 96 |     ctx = mx.gpu(args.gpu)
 97 |     test_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
 98 |              ctx, args.prefix, args.epoch,
 99 |              args.vis, args.shuffle, args.thresh)
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/rcnn/tools/train_maskrcnn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import pprint
  4 | import mxnet as mx
  5 | import numpy as np
  6 | import os.path as osp
  7 | import cPickle as pkl
  8 | 
  9 | from ..config import config, default, generate_config
 10 | from ..symbol import *
 11 | from ..core import callback, metric
 12 | from ..core.loader import MaskROIIter
 13 | from ..core.module import MutableModule
 14 | from ..processing.bbox_regression import add_bbox_regression_targets, add_mask_targets
 15 | from ..processing.assign_levels import add_assign_targets
 16 | from ..utils.load_data import load_proposal_roidb, merge_roidb #, filter_roidb
 17 | from ..utils.load_model import load_param
 18 | 
 19 | def train_maskrcnn(network, dataset, image_set, root_path, dataset_path,
 20 |                frequent, kvstore, work_load_list, no_flip, no_shuffle, resume,
 21 |                ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 22 |                train_shared, lr, lr_step, proposal, maskrcnn_stage=None):
 23 |     # set up logger
 24 |     logging.basicConfig()
 25 |     logger = logging.getLogger()
 26 |     logger.setLevel(logging.INFO)
 27 | 
 28 |     # load symbol
 29 |     config.TRAIN.BATCH_IMAGES = 1
 30 |     config.TRAIN.BATCH_ROIS = 256
 31 |     sym = eval('get_' + network + '_maskrcnn')(num_classes=config.NUM_CLASSES)
 32 | 
 33 |     # setup multi-gpu
 34 |     batch_size = len(ctx)
 35 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 36 | 
 37 |     # print config
 38 |     pprint.pprint(config)
 39 | 
 40 |     USE_CACHE = True
 41 | 
 42 |     if USE_CACHE:
 43 |       roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask.pkl'
 44 |       mean_file = root_path + '/cache/' + dataset + '_roidb_mean.pkl'
 45 |       std_file = root_path + '/cache/' + dataset + '_roidb_std.pkl'
 46 |       if maskrcnn_stage is not None:
 47 |           roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask_' + maskrcnn_stage + '.pkl'
 48 |           mean_file = root_path + '/cache/' + dataset + '_roidb_mean_' + maskrcnn_stage + '.pkl'
 49 |           std_file = root_path + '/cache/' + dataset + '_roidb_std_' + maskrcnn_stage + '.pkl'
 50 | 
 51 |     if USE_CACHE and osp.exists(roidb_file) and osp.exists(mean_file) and osp.exists(std_file):
 52 |         print 'Load ' + roidb_file
 53 |         with open(roidb_file, 'r') as f:
 54 |             roidb = pkl.load(f)
 55 |         print 'Load ' + mean_file
 56 |         with open(mean_file, 'r') as f:
 57 |             means = pkl.load(f)
 58 |         print 'Load ' + std_file
 59 |         with open(std_file, 'r') as f:
 60 |             stds = pkl.load(f)
 61 |     else:
 62 |         # load dataset and prepare imdb for training
 63 |         image_sets = [iset for iset in image_set.split('+')]
 64 |         roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path,
 65 |                                       proposal=proposal, append_gt=True, flip=not no_flip)
 66 |                   for image_set in image_sets]
 67 |         roidb = merge_roidb(roidbs)
 68 | 
 69 |         def filter_roidb(roidb):
 70 |             """ remove roidb entries without usable rois """
 71 | 
 72 |             def is_valid(entry):
 73 |                 """ valid images have at least 1 fg or bg roi """
 74 |                 overlaps = entry['max_overlaps']
 75 |                 fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
 76 |                 bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
 77 |                 valid = len(fg_inds) > 0 and len(bg_inds) > 0
 78 |                 return valid
 79 | 
 80 |             num = len(roidb)
 81 |             filtered_roidb = [entry for entry in roidb if is_valid(entry)]
 82 |             num_after = len(filtered_roidb)
 83 |             print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)
 84 | 
 85 |             return filtered_roidb
 86 | 
 87 |         roidb = filter_roidb(roidb)
 88 |         means, stds = add_bbox_regression_targets(roidb)
 89 |         add_assign_targets(roidb)
 90 |         add_mask_targets(roidb)
 91 |         if USE_CACHE:
 92 |           for file, obj in zip([roidb_file, mean_file, std_file], [roidb, means, stds]):
 93 |               with open(file, 'w') as f:
 94 |                   pkl.dump(obj, f, -1)
 95 | 
 96 |     # load training data
 97 |     train_data = MaskROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 98 |                              ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING)
 99 | 
100 |     # infer max shape
101 |     max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
102 |     max_label_shape = []
103 |     for s in config.RCNN_FEAT_STRIDE:
104 |         max_data_shape.append(('rois_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, 5)))
105 |         max_label_shape.append(('label_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS)))
106 |         max_label_shape.append(('bbox_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS*config.NUM_CLASSES*4)))
107 |         max_label_shape.append(('bbox_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS*config.NUM_CLASSES*4)))
108 |         max_label_shape.append(('mask_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 28, 28)))
109 |         max_label_shape.append(('mask_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 1, 1)))
110 |     # infer shape
111 |     data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
112 | 
113 |     arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
114 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
115 |     out_shape_dict = zip(sym.list_outputs(), out_shape)
116 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
117 |     print 'output shape'
118 |     pprint.pprint(out_shape_dict)
119 | 
120 |     # load and initialize params
121 |     if resume:
122 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
123 |     else:
124 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
125 |         init_bbox_pred = mx.init.Normal(sigma=0.001)
126 |         init_internal = mx.init.Normal(sigma=0.01)
127 |         init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
128 |         for k in sym.list_arguments():
129 |             if k in data_shape_dict:
130 |                 continue
131 |             if k not in arg_params:
132 |                 print 'init', k
133 |                 arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
134 |                 init_internal(k, arg_params[k])
135 |                 if k in ['rcnn_fc_bbox_weight', 'bbox_pred_weight']:
136 |                     init_bbox_pred(k, arg_params[k])
137 |                 if k.endswith('bias'):
138 |                     arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
139 |                 if 'ctx_red_weight' in k:
140 |                     ctx_shape = np.array(arg_shape_dict[k])
141 |                     ctx_shape[1] /= 2
142 |                     arg_params[k][:] = np.concatenate((np.eye(ctx_shape[1]).reshape(ctx_shape), np.zeros(ctx_shape)), axis=1)
143 | 
144 |         for k in sym.list_auxiliary_states():
145 |             if k not in aux_params:
146 |                 print 'init', k
147 |                 aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k])
148 |                 init(k, aux_params[k])
149 | 
150 |     # check parameter shapes
151 |     for k in sym.list_arguments():
152 |         if k in data_shape_dict:
153 |             continue
154 |         assert k in arg_params, k + ' not initialized'
155 |         assert arg_params[k].shape == arg_shape_dict[k], \
156 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
157 |     for k in sym.list_auxiliary_states():
158 |         assert k in aux_params, k + ' not initialized'
159 |         assert aux_params[k].shape == aux_shape_dict[k], \
160 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
161 | 
162 |     # prepare training
163 |     # create solver
164 |     data_names = [k[0] for k in train_data.provide_data]
165 |     label_names = [k[0] for k in train_data.provide_label]
166 |     if train_shared:
167 |         fixed_param_prefix = config.FIXED_PARAMS_SHARED
168 |     else:
169 |         fixed_param_prefix = config.FIXED_PARAMS
170 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
171 |                         logger=logger, context=ctx, work_load_list=work_load_list,
172 |                         max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
173 |                         fixed_param_prefix=fixed_param_prefix)
174 | 
175 |     # decide training params
176 |     # metric
177 |     eval_metric = metric.RCNNAccMetric()
178 |     cls_metric = metric.RCNNLogLossMetric()
179 |     bbox_metric = metric.RCNNL1LossMetric()
180 |     mask_acc_metric = metric.MaskAccMetric()
181 |     mask_log_metric = metric.MaskLogLossMetric()
182 |     eval_metrics = mx.metric.CompositeEvalMetric()
183 |     for child_metric in [eval_metric, cls_metric, bbox_metric, mask_acc_metric, mask_log_metric]:
184 |         eval_metrics.add(child_metric)
185 |     # callback
186 |     batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent)
187 |     epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
188 |     # decide learning rate
189 |     base_lr = lr
190 |     lr_factor = 0.1
191 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
192 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
193 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
194 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
195 |     print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters
196 |     lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
197 |     # optimizer
198 |     optimizer_params = {'momentum': 0.9,
199 |                         'wd': 0.0001,
200 |                         'learning_rate': lr,
201 |                         'lr_scheduler': lr_scheduler,
202 |                         'rescale_grad': (1.0 / batch_size),
203 |                         'clip_gradient': 5}
204 | 
205 |     # train
206 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
207 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
208 |             optimizer='sgd', optimizer_params=optimizer_params,
209 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
210 | 
211 | 


--------------------------------------------------------------------------------
/rcnn/tools/train_rcnn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pprint
  3 | import mxnet as mx
  4 | 
  5 | from ..logger import logger
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..core import callback, metric
  9 | from ..core.loader import ROIIter
 10 | from ..core.module import MutableModule
 11 | from ..processing.bbox_regression import add_bbox_regression_targets
 12 | from ..utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb
 13 | from ..utils.load_model import load_param
 14 | 
 15 | 
 16 | def train_rcnn(network, dataset, image_set, root_path, dataset_path,
 17 |                frequent, kvstore, work_load_list, no_flip, no_shuffle, resume,
 18 |                ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 19 |                train_shared, lr, lr_step, proposal):
 20 |     # set up config
 21 |     config.TRAIN.BATCH_IMAGES = 2
 22 |     config.TRAIN.BATCH_ROIS = 128
 23 |     if proposal == 'ss':
 24 |         config.TRAIN.BG_THRESH_LO = 0.1  # reproduce Fast R-CNN
 25 | 
 26 |     # load symbol
 27 |     sym = eval('get_' + network + '_rcnn')(num_classes=config.NUM_CLASSES)
 28 | 
 29 |     # setup multi-gpu
 30 |     batch_size = len(ctx)
 31 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 32 | 
 33 |     # print config
 34 |     logger.info(pprint.pformat(config))
 35 | 
 36 |     # load dataset and prepare imdb for training
 37 |     image_sets = [iset for iset in image_set.split('+')]
 38 |     roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path,
 39 |                                   proposal=proposal, append_gt=True, flip=not no_flip)
 40 |               for image_set in image_sets]
 41 |     roidb = merge_roidb(roidbs)
 42 |     roidb = filter_roidb(roidb)
 43 |     means, stds = add_bbox_regression_targets(roidb)
 44 | 
 45 |     # load training data
 46 |     train_data = ROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 47 |                          ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING)
 48 | 
 49 |     # infer max shape
 50 |     max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 51 |     logger.info('providing maximum shape %s' % max_data_shape)
 52 | 
 53 |     # infer shape
 54 |     data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
 55 |     arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
 56 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 57 |     out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
 58 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 59 |     logger.info('output shape %s' % pprint.pformat(out_shape_dict))
 60 | 
 61 |     # load and initialize params
 62 |     if resume:
 63 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 64 |     else:
 65 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 66 |         arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
 67 |         arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
 68 |         arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
 69 |         arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])
 70 | 
 71 |     # check parameter shapes
 72 |     for k in sym.list_arguments():
 73 |         if k in data_shape_dict:
 74 |             continue
 75 |         assert k in arg_params, k + ' not initialized'
 76 |         assert arg_params[k].shape == arg_shape_dict[k], \
 77 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
 78 |     for k in sym.list_auxiliary_states():
 79 |         assert k in aux_params, k + ' not initialized'
 80 |         assert aux_params[k].shape == aux_shape_dict[k], \
 81 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
 82 | 
 83 |     # prepare training
 84 |     # create solver
 85 |     data_names = [k[0] for k in train_data.provide_data]
 86 |     label_names = [k[0] for k in train_data.provide_label]
 87 |     if train_shared:
 88 |         fixed_param_prefix = config.FIXED_PARAMS_SHARED
 89 |     else:
 90 |         fixed_param_prefix = config.FIXED_PARAMS
 91 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
 92 |                         logger=logger, context=ctx, work_load_list=work_load_list,
 93 |                         max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix)
 94 | 
 95 |     # decide training params
 96 |     # metric
 97 |     eval_metric = metric.RCNNAccMetric()
 98 |     cls_metric = metric.RCNNLogLossMetric()
 99 |     bbox_metric = metric.RCNNL1LossMetric()
100 |     eval_metrics = mx.metric.CompositeEvalMetric()
101 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
102 |         eval_metrics.add(child_metric)
103 |     # callback
104 |     batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent, auto_reset=False)
105 |     epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
106 |     # decide learning rate
107 |     base_lr = lr
108 |     lr_factor = 0.1
109 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
110 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
111 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
112 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
113 |     logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters))
114 |     lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
115 |     # optimizer
116 |     optimizer_params = {'momentum': 0.9,
117 |                         'wd': 0.0005,
118 |                         'learning_rate': lr,
119 |                         'lr_scheduler': lr_scheduler,
120 |                         'rescale_grad': (1.0 / batch_size),
121 |                         'clip_gradient': 5}
122 | 
123 |     # train
124 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
125 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
126 |             optimizer='sgd', optimizer_params=optimizer_params,
127 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
128 | 
129 | 
130 | def parse_args():
131 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN Network')
132 |     # general
133 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
134 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
135 |     args, rest = parser.parse_known_args()
136 |     generate_config(args.network, args.dataset)
137 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
138 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
139 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
140 |     # training
141 |     parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int)
142 |     parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str)
143 |     parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list)
144 |     parser.add_argument('--no_flip', help='disable flip images', action='store_true')
145 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
146 |     parser.add_argument('--resume', help='continue training', action='store_true')
147 |     # rcnn
148 |     parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str)
149 |     parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str)
150 |     parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int)
151 |     parser.add_argument('--prefix', help='new model prefix', default=default.rcnn_prefix, type=str)
152 |     parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int)
153 |     parser.add_argument('--end_epoch', help='end epoch of training', default=default.rcnn_epoch, type=int)
154 |     parser.add_argument('--lr', help='base learning rate', default=default.rcnn_lr, type=float)
155 |     parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rcnn_lr_step, type=str)
156 |     parser.add_argument('--train_shared', help='second round train shared params', action='store_true')
157 |     parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str)
158 |     args = parser.parse_args()
159 |     return args
160 | 
161 | 
162 | def main():
163 |     args = parse_args()
164 |     logger.info('Called with argument: %s' % args)
165 |     ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
166 |     train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
167 |                args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
168 |                ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
169 |                train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step, proposal=args.proposal)
170 | 
171 | if __name__ == '__main__':
172 |     main()
173 | 


--------------------------------------------------------------------------------
/rcnn/tools/train_rpn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import pprint
  4 | import mxnet as mx
  5 | 
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..core import callback, metric
  9 | from ..core.loader import AnchorLoaderFPN
 10 | from ..core.module import MutableModule
 11 | from ..utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
 12 | from ..utils.load_model import load_param
 13 | 
 14 | 
 15 | def train_rpn(network, dataset, image_set, root_path, dataset_path,
 16 |               frequent, kvstore, work_load_list, no_flip, no_shuffle, resume,
 17 |               ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 18 |               train_shared, lr, lr_step):
 19 |     # set up logger
 20 |     logging.basicConfig()
 21 |     logger = logging.getLogger()
 22 |     logger.setLevel(logging.INFO)
 23 | 
 24 |     # setup config
 25 |     assert config.TRAIN.BATCH_IMAGES==1
 26 | 
 27 |     # load symbol
 28 |     sym = eval('get_' + network + '_rpn')()
 29 |     feat_sym = []
 30 |     for stride in config.RPN_FEAT_STRIDE:
 31 |         feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride])
 32 | 
 33 | 
 34 |     # setup multi-gpu
 35 |     batch_size = len(ctx)
 36 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 37 | 
 38 |     # print config
 39 |     pprint.pprint(config)
 40 | 
 41 |     # load dataset and prepare imdb for training
 42 |     image_sets = [iset for iset in image_set.split('+')]
 43 |     roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path,
 44 |                             flip=not no_flip)
 45 |               for image_set in image_sets]
 46 |     roidb = merge_roidb(roidbs)
 47 |     roidb = filter_roidb(roidb)
 48 | 
 49 |     # load training data
 50 |     #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 51 |     #                              ctx=ctx, work_load_list=work_load_list,
 52 |     #                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
 53 |     #                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING,
 54 |     #                              allowed_border=9999)
 55 |     train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 56 |                                   ctx=ctx, work_load_list=work_load_list)
 57 | 
 58 |     # infer max shape
 59 |     max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 60 |     max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
 61 |     print 'providing maximum shape', max_data_shape, max_label_shape
 62 | 
 63 |     # infer shape
 64 |     data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
 65 |     arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
 66 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 67 |     out_shape_dict = zip(sym.list_outputs(), out_shape)
 68 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 69 |     print 'output shape'
 70 |     pprint.pprint(out_shape_dict)
 71 | 
 72 |     # load and initialize params
 73 |     if resume:
 74 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 75 |     else:
 76 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 77 |         init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
 78 |         init_internal = mx.init.Normal(sigma=0.01)
 79 |         for k in sym.list_arguments():
 80 |             if k in data_shape_dict:
 81 |                 continue
 82 |             if k not in arg_params:
 83 |                 print 'init', k
 84 |                 arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
 85 |                 if not k.endswith('bias'):
 86 |                     init_internal(k, arg_params[k])
 87 | 
 88 |         for k in sym.list_auxiliary_states():
 89 |             if k not in aux_params:
 90 |                 print 'init', k
 91 |                 aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k])
 92 |                 init(k, aux_params[k])
 93 | 
 94 |     # check parameter shapes
 95 |     for k in sym.list_arguments():
 96 |         if k in data_shape_dict:
 97 |             continue
 98 |         assert k in arg_params, k + ' not initialized'
 99 |         assert arg_params[k].shape == arg_shape_dict[k], \
100 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
101 |     for k in sym.list_auxiliary_states():
102 |         assert k in aux_params, k + ' not initialized'
103 |         assert aux_params[k].shape == aux_shape_dict[k], \
104 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
105 | 
106 |     # create solver
107 |     data_names = [k[0] for k in train_data.provide_data]
108 |     label_names = [k[0] for k in train_data.provide_label]
109 |     if train_shared:
110 |         fixed_param_prefix = config.FIXED_PARAMS_SHARED
111 |     else:
112 |         fixed_param_prefix = config.FIXED_PARAMS
113 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
114 |                         logger=logger, context=ctx, work_load_list=work_load_list,
115 |                         max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
116 |                         fixed_param_prefix=fixed_param_prefix)
117 | 
118 |     # decide training params
119 |     # metric
120 |     eval_metric = metric.RPNAccMetric()
121 |     cls_metric = metric.RPNLogLossMetric()
122 |     bbox_metric = metric.RPNL1LossMetric()
123 |     eval_metrics = mx.metric.CompositeEvalMetric()
124 |     for child_metric in [eval_metric,cls_metric,bbox_metric]:
125 |         eval_metrics.add(child_metric)
126 |     # callback
127 |     batch_end_callback = []
128 |     batch_end_callback.append(mx.callback.Speedometer(train_data.batch_size, frequent=frequent))
129 |     epoch_end_callback = mx.callback.do_checkpoint(prefix)
130 |     # decide learning rate
131 |     base_lr = lr
132 |     lr_factor = 0.1
133 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
134 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
135 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
136 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
137 |     print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters
138 |     lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
139 |     # optimizer
140 |     optimizer_params = {'momentum': 0.9,
141 |                         'wd': 0.0001,
142 |                         'learning_rate': lr,
143 |                         'lr_scheduler': lr_scheduler,
144 |                         'rescale_grad': (1.0 / batch_size),
145 |                         'clip_gradient': 5}
146 | 
147 |     # train
148 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
149 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
150 |             optimizer='sgd', optimizer_params=optimizer_params,
151 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
152 | 
153 | 
154 | def parse_args():
155 |     parser = argparse.ArgumentParser(description='Train a Region Proposal Network')
156 |     # general
157 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
158 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
159 |     args, rest = parser.parse_known_args()
160 |     generate_config(args.network, args.dataset)
161 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
162 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
163 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
164 |     # training
165 |     parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int)
166 |     parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str)
167 |     parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list)
168 |     parser.add_argument('--no_flip', help='disable flip images', action='store_true')
169 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
170 |     parser.add_argument('--resume', help='continue training', action='store_true')
171 |     # rpn
172 |     parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str)
173 |     parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str)
174 |     parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int)
175 |     parser.add_argument('--prefix', help='new model prefix', default=default.rpn_prefix, type=str)
176 |     parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int)
177 |     parser.add_argument('--end_epoch', help='end epoch of training', default=default.rpn_epoch, type=int)
178 |     parser.add_argument('--lr', help='base learning rate', default=default.rpn_lr, type=float)
179 |     parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rpn_lr_step, type=str)
180 |     parser.add_argument('--train_shared', help='second round train shared params', action='store_true')
181 |     args = parser.parse_args()
182 |     return args
183 | 
184 | 
185 | def main():
186 |     args = parse_args()
187 |     print 'Called with argument:', args
188 |     ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
189 |     train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
190 |               args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
191 |               ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
192 |               train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step)
193 | 
194 | if __name__ == '__main__':
195 |     main()
196 | 


--------------------------------------------------------------------------------
/rcnn/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/utils/__init__.py


--------------------------------------------------------------------------------
/rcnn/utils/combine_model.py:
--------------------------------------------------------------------------------
 1 | from .load_model import load_checkpoint
 2 | from .save_model import save_checkpoint
 3 | 
 4 | 
 5 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out):
 6 |     args1, auxs1 = load_checkpoint(prefix1, epoch1)
 7 |     args2, auxs2 = load_checkpoint(prefix2, epoch2)
 8 |     arg_names = args1.keys() + args2.keys()
 9 |     aux_names = auxs1.keys() + auxs2.keys()
10 |     args = dict()
11 |     for arg in arg_names:
12 |         if arg in args1:
13 |             args[arg] = args1[arg]
14 |         else:
15 |             args[arg] = args2[arg]
16 |     auxs = dict()
17 |     for aux in aux_names:
18 |         if aux in auxs1:
19 |             auxs[aux] = auxs1[aux]
20 |         else:
21 |             auxs[aux] = auxs2[aux]
22 |     save_checkpoint(prefix_out, epoch_out, args, auxs)
23 | 


--------------------------------------------------------------------------------
/rcnn/utils/load_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..logger import logger
 3 | from ..config import config
 4 | from ..dataset import *
 5 | 
 6 | 
 7 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path,
 8 |                   flip=False):
 9 |     """ load ground truth roidb """
10 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path)
11 |     roidb = imdb.gt_roidb()
12 |     print('roidb size', len(roidb))
13 |     if flip:
14 |         roidb = imdb.append_flipped_images(roidb)
15 |     print('flipped roidb size', len(roidb))
16 |     return roidb
17 | 
18 | 
19 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path,
20 |                         proposal='rpn', append_gt=True, flip=False):
21 |     """ load proposal roidb (append_gt when training) """
22 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path)
23 |     gt_roidb = imdb.gt_roidb()
24 |     roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt)
25 |     if flip:
26 |         roidb = imdb.append_flipped_images(roidb)
27 |     return roidb
28 | 
29 | 
30 | def merge_roidb(roidbs):
31 |     """ roidb are list, concat them together """
32 |     roidb = roidbs[0]
33 |     for r in roidbs[1:]:
34 |         roidb.extend(r)
35 |     return roidb
36 | 
37 | 
38 | def filter_roidb(roidb):
39 |     """ remove roidb entries without usable rois """
40 | 
41 |     def is_valid(entry):
42 |         """ valid images have at least 1 fg or bg roi """
43 |         overlaps = entry['max_overlaps']
44 |         fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
45 |         bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
46 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
47 |         #valid = len(fg_inds) > 0
48 |         return valid
49 | 
50 |     num = len(roidb)
51 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
52 |     num_after = len(filtered_roidb)
53 |     logger.info('load data: filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after))
54 | 
55 |     return filtered_roidb
56 | 


--------------------------------------------------------------------------------
/rcnn/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def load_checkpoint(prefix, epoch):
 5 |     """
 6 |     Load model checkpoint from file.
 7 |     :param prefix: Prefix of model name.
 8 |     :param epoch: Epoch number of model we would like to load.
 9 |     :return: (arg_params, aux_params)
10 |     arg_params : dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's weights.
12 |     aux_params : dict of str to NDArray
13 |         Model parameter, dict of name to NDArray of net's auxiliary states.
14 |     """
15 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
16 |     arg_params = {}
17 |     aux_params = {}
18 |     for k, v in save_dict.items():
19 |         tp, name = k.split(':', 1)
20 |         if tp == 'arg':
21 |             arg_params[name] = v
22 |         if tp == 'aux':
23 |             aux_params[name] = v
24 |     return arg_params, aux_params
25 | 
26 | 
27 | def convert_context(params, ctx):
28 |     """
29 |     :param params: dict of str to NDArray
30 |     :param ctx: the context to convert to
31 |     :return: dict of str of NDArray with context ctx
32 |     """
33 |     new_params = dict()
34 |     for k, v in params.items():
35 |         new_params[k] = v.as_in_context(ctx)
36 |     return new_params
37 | 
38 | 
39 | def load_param(prefix, epoch, convert=False, ctx=None, process=False):
40 |     """
41 |     wrapper for load checkpoint
42 |     :param prefix: Prefix of model name.
43 |     :param epoch: Epoch number of model we would like to load.
44 |     :param convert: reference model should be converted to GPU NDArray first
45 |     :param ctx: if convert then ctx must be designated.
46 |     :param process: model should drop any test
47 |     :return: (arg_params, aux_params)
48 |     """
49 |     arg_params, aux_params = load_checkpoint(prefix, epoch)
50 |     if convert:
51 |         if ctx is None:
52 |             ctx = mx.cpu()
53 |         arg_params = convert_context(arg_params, ctx)
54 |         aux_params = convert_context(aux_params, ctx)
55 |     if process:
56 |         tests = [k for k in arg_params.keys() if '_test' in k]
57 |         for test in tests:
58 |             arg_params[test.replace('_test', '')] = arg_params.pop(test)
59 |     return arg_params, aux_params
60 | 


--------------------------------------------------------------------------------
/rcnn/utils/save_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params):
 5 |     """Checkpoint the model data into file.
 6 |     :param prefix: Prefix of model name.
 7 |     :param epoch: The epoch number of the model.
 8 |     :param arg_params: dict of str to NDArray
 9 |         Model parameter, dict of name to NDArray of net's weights.
10 |     :param aux_params: dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's auxiliary states.
12 |     :return: None
13 |     prefix-epoch.params will be saved for parameters.
14 |     """
15 |     save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()}
16 |     save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()})
17 |     param_name = '%s-%04d.params' % (prefix, epoch)
18 |     mx.nd.save(param_name, save_dict)
19 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import sys
 3 | import numpy as np
 4 | import datetime
 5 | import os
 6 | import glob
 7 | from retinaface import RetinaFace
 8 | 
 9 | detector = RetinaFace(gpu=0)
10 | 
11 | img_path = 'data/retinaface/val/images'
12 | 
13 | dir = os.listdir(img_path)
14 | for im in dir:
15 |     img = cv2.imread(os.path.join(img_path, im))
16 | 
17 |     faces, landmarks = detector.detect(img, scales_index=1, do_flip=True)
18 | 
19 |     if faces is not None:
20 |         print('find', faces.shape[0], 'faces')
21 |         for i in range(faces.shape[0]):
22 |             box = faces[i].astype(np.int)
23 |             color = (0, 0, 255)
24 |             cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2)
25 |             title = "%.2f" % (faces[i][4])
26 |             p3 = (max(box[0], 15), max(box[1], 15))
27 |             cv2.putText(img, title, p3, cv2.FONT_ITALIC, 0.6, (0, 255, 0), 1)
28 |             if landmarks is not None:
29 |                 landmark5 = landmarks[i].astype(np.int)
30 |                 for l in range(landmark5.shape[0]):
31 |                     color = (0, 0, 255)
32 |                     if l == 0 or l == 3:
33 |                         color = (0, 255, 0)
34 |                     cv2.circle(img, (landmark5[l][0], landmark5[l][1]), 1, color, 2)
35 |         cv2.imwrite(im, img)
36 |     cv2.imshow('0', img)
37 |     cv2.waitKey(1)
38 | 


--------------------------------------------------------------------------------
/test_widerface.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import sys
  5 | import os
  6 | import time
  7 | import numpy as np
  8 | import mxnet as mx
  9 | from mxnet import ndarray as nd
 10 | import cv2
 11 | from rcnn.logger import logger
 12 | #from rcnn.config import config, default, generate_config
 13 | #from rcnn.tools.test_rcnn import test_rcnn
 14 | #from rcnn.tools.test_rpn import test_rpn
 15 | from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, landmark_pred
 16 | from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane
 17 | from rcnn.processing.nms import gpu_nms_wrapper
 18 | from rcnn.processing.bbox_transform import bbox_overlaps
 19 | from rcnn.dataset import retinaface
 20 | from retinaface import RetinaFace
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser(description='Test widerface by retinaface detector')
 25 |     # general
 26 |     parser.add_argument('--network', help='network name', default='net3', type=str)
 27 |     parser.add_argument('--dataset', help='dataset name', default='retinaface', type=str)
 28 |     parser.add_argument('--image-set', help='image_set name', default='val', type=str)
 29 |     parser.add_argument('--root-path', help='output data folder', default='./data', type=str)
 30 |     parser.add_argument('--dataset-path', help='dataset path', default='./data/retinaface', type=str)
 31 |     parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int)
 32 |     # testing
 33 |     parser.add_argument('--prefix', help='model to test with', default='', type=str)
 34 |     parser.add_argument('--epoch', help='model to test with', default=0, type=int)
 35 |     parser.add_argument('--output', help='output folder', default='./wout', type=str)
 36 |     parser.add_argument('--nocrop', help='', action='store_true')
 37 |     parser.add_argument('--thresh', help='valid detection threshold', default=0.02, type=float)
 38 |     parser.add_argument('--mode', help='test mode, 0 for fast, 1 for accurate', default=1, type=int)
 39 |     #parser.add_argument('--pyramid', help='enable pyramid test', action='store_true')
 40 |     #parser.add_argument('--bbox-vote', help='', action='store_true')
 41 |     parser.add_argument('--part', help='', default=0, type=int)
 42 |     parser.add_argument('--parts', help='', default=1, type=int)
 43 |     args = parser.parse_args()
 44 |     return args
 45 | 
 46 | detector = None
 47 | args = None
 48 | imgid = -1
 49 | 
 50 | def get_boxes(roi, pyramid):
 51 |   global imgid
 52 |   im = cv2.imread(roi['image'])
 53 |   do_flip = False
 54 |   if not pyramid:
 55 |     target_size = 1200
 56 |     max_size = 1600
 57 |     #do_flip = True
 58 |     target_size = 1504
 59 |     max_size = 2000
 60 |     target_size = 1600
 61 |     max_size = 2150
 62 |     im_shape = im.shape
 63 |     im_size_min = np.min(im_shape[0:2])
 64 |     im_size_max = np.max(im_shape[0:2])
 65 |     im_scale = float(target_size) / float(im_size_min)
 66 |     # prevent bigger axis from being more than max_size:
 67 |     if np.round(im_scale * im_size_max) > max_size:
 68 |         im_scale = float(max_size) / float(im_size_max)
 69 |     scales = [im_scale]
 70 |   else:
 71 |     do_flip = True
 72 |     #TEST_SCALES = [500, 800, 1200, 1600]
 73 |     TEST_SCALES = [500, 800, 1100, 1400, 1700]
 74 |     target_size = 800
 75 |     max_size = 1200
 76 |     im_shape = im.shape
 77 |     im_size_min = np.min(im_shape[0:2])
 78 |     im_size_max = np.max(im_shape[0:2])
 79 |     im_scale = float(target_size) / float(im_size_min)
 80 |     # prevent bigger axis from being more than max_size:
 81 |     if np.round(im_scale * im_size_max) > max_size:
 82 |         im_scale = float(max_size) / float(im_size_max)
 83 |     scales = [float(scale)/target_size*im_scale for scale in TEST_SCALES]
 84 |   boxes, landmarks = detector.detect(im, threshold=args.thresh, scales = scales, do_flip=do_flip)
 85 |   #print(boxes.shape, landmarks.shape)
 86 |   if imgid>=0 and imgid<100:
 87 |     font = cv2.FONT_HERSHEY_SIMPLEX
 88 |     for i in xrange(boxes.shape[0]):
 89 |       box = boxes[i]
 90 |       ibox = box[0:4].copy().astype(np.int)
 91 |       cv2.rectangle(im, (ibox[0], ibox[1]), (ibox[2], ibox[3]), (255, 0, 0), 2)
 92 |       #print('box', ibox)
 93 |       #if len(ibox)>5:
 94 |       #  for l in xrange(5):
 95 |       #    pp = (ibox[5+l*2], ibox[6+l*2])
 96 |       #    cv2.circle(im, (pp[0], pp[1]), 1, (0, 0, 255), 1)
 97 |       blur = box[5]
 98 |       k = "%.3f"%blur
 99 |       cv2.putText(im,k,(ibox[0]+2,ibox[1]+14), font, 0.6, (0,255,0), 2)
100 |       #landmarks = box[6:21].reshape( (5,3) )
101 |       if landmarks is not None:
102 |         for l in xrange(5):
103 |           color = (0,255,0)
104 |           landmark = landmarks[i][l]
105 |           pp = (int(landmark[0]), int(landmark[1]))
106 |           if landmark[2]-0.5<0.0:
107 |             color = (0,0,255)
108 |           cv2.circle(im, (pp[0], pp[1]), 1, color, 2)
109 |     filename = './testimages/%d.jpg'%imgid
110 |     cv2.imwrite(filename, im)
111 |     print(filename, 'wrote')
112 |     imgid+=1
113 | 
114 |   return boxes
115 | 
116 | 
117 | def test(args):
118 |   print('test with', args)
119 |   global detector
120 |   output_folder = args.output
121 |   if not os.path.exists(output_folder):
122 |     os.mkdir(output_folder)
123 |   detector = RetinaFace(args.prefix, args.epoch, args.gpu, network=args.network, nocrop=args.nocrop, vote=args.bbox_vote)
124 |   imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path)
125 |   roidb = imdb.gt_roidb()
126 |   gt_overlaps = np.zeros(0)
127 |   overall = [0.0, 0.0]
128 |   gt_max = np.array( (0.0, 0.0) )
129 |   num_pos = 0
130 |   print('roidb size', len(roidb))
131 | 
132 |   for i in xrange(len(roidb)):
133 |     if i%args.parts!=args.part:
134 |       continue
135 |     #if i%10==0:
136 |     #  print('processing', i, file=sys.stderr)
137 |     roi = roidb[i]
138 |     boxes = get_boxes(roi, args.pyramid)
139 |     if 'boxes' in roi:
140 |       gt_boxes = roi['boxes'].copy()
141 |       gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1)
142 |       num_pos += gt_boxes.shape[0]
143 | 
144 |       overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float))
145 |       #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)
146 | 
147 |       _gt_overlaps = np.zeros((gt_boxes.shape[0]))
148 | 
149 |       if boxes.shape[0]>0:
150 |         _gt_overlaps = overlaps.max(axis=0)
151 |         #print('max_overlaps', _gt_overlaps, file=sys.stderr)
152 |         for j in range(len(_gt_overlaps)):
153 |           if _gt_overlaps[j]>0.5:
154 |             continue
155 |           #print(j, 'failed', gt_boxes[j],  'max_overlap:', _gt_overlaps[j], file=sys.stderr)
156 | 
157 |         # append recorded IoU coverage level
158 |         found = (_gt_overlaps > 0.5).sum()
159 |         recall = found / float(gt_boxes.shape[0])
160 |         #print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, 'num:', i, file=sys.stderr)
161 |         overall[0]+=found
162 |         overall[1]+=gt_boxes.shape[0]
163 |         #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
164 |         #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
165 |         recall_all = float(overall[0])/overall[1]
166 |         #print('recall_all', _recall, file=sys.stderr)
167 |         print('[%d]'%i, 'recall', recall, (gt_boxes.shape[0], boxes.shape[0]), 'all:', recall_all, file=sys.stderr)
168 |     else:
169 |       print('[%d]'%i, 'detect %d faces'%boxes.shape[0])
170 | 
171 | 
172 |     _vec = roidb[i]['image'].split('/')
173 |     out_dir = os.path.join(output_folder, _vec[-2])
174 |     if not os.path.exists(out_dir):
175 |         os.mkdir(out_dir)
176 |     out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
177 |     with open(out_file, 'w') as f:
178 |       name = '/'.join(roidb[i]['image'].split('/')[-2:])
179 |       f.write("%s\n"%(name))
180 |       f.write("%d\n"%(boxes.shape[0]))
181 |       for b in range(boxes.shape[0]):
182 |         box = boxes[b]
183 |         f.write("%d %d %d %d %g \n"%(box[0], box[1], box[2]-box[0], box[3]-box[1], box[4]))
184 | 
185 | def main():
186 |     global args
187 |     args = parse_args()
188 |     args.pyramid = False
189 |     args.bbox_vote = False
190 |     if args.mode==1:
191 |       args.pyramid = True
192 |       args.bbox_vote = True
193 |     logger.info('Called with argument: %s' % args)
194 |     test(args)
195 | 
196 | if __name__ == '__main__':
197 |     main()
198 | 
199 | 


--------------------------------------------------------------------------------
/train_model.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | source /etc/profile
3 | export CUDA_VISIBLE_DEVICES='0'
4 | nohup python -u train.py --network resnet 2>&1 > log.log &


--------------------------------------------------------------------------------