├── .gitattributes ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── images ├── 0000.png └── 00001673.jpg ├── rcnn ├── PY_OP │ ├── __init__.py │ └── rpn_fpn_ohem3.py ├── __init__.py ├── config.py ├── core │ ├── __init__.py │ ├── callback.py │ ├── loader.py │ ├── metric.py │ ├── module.py │ ├── module_bak.py │ └── tester.py ├── cython │ ├── .gitignore │ ├── __init__.py │ ├── anchors.pyx │ ├── bbox.pyx │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── setup.py ├── dataset │ ├── __init__.py │ ├── ds_utils.py │ ├── imdb.py │ └── retinaface.py ├── io │ ├── __init__.py │ ├── image.py │ ├── rcnn.py │ └── rpn.py ├── logger.py ├── processing │ ├── __init__.py │ ├── assign_levels.py │ ├── bbox_regression.py │ ├── bbox_transform.py │ ├── generate_anchor.py │ └── nms.py ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── mask.py │ ├── maskApi.c │ ├── maskApi.h │ └── setup.py ├── sample_config.py ├── symbol │ ├── __init__.py │ ├── pyramidbox.py │ ├── symbol_common.py │ ├── symbol_common.py.bak │ ├── symbol_mnet.py │ ├── symbol_mnet.py.bak │ ├── symbol_resnet.py │ └── symbol_ssh.py ├── tools │ ├── __init__.py │ ├── demo_images.py │ ├── demo_single_image.py │ ├── reeval.py │ ├── test_rcnn.py │ ├── test_rpn.py │ ├── train_maskrcnn.py │ ├── train_rcnn.py │ └── train_rpn.py └── utils │ ├── __init__.py │ ├── combine_model.py │ ├── load_data.py │ ├── load_model.py │ └── save_model.py ├── retinaface.py ├── test.py ├── test_widerface.py ├── train.py └── train_model.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd rcnn/cython/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | cd rcnn/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 4 | clean: 5 | cd rcnn/cython/; rm *.so *.c *.cpp; cd ../../ 6 | cd rcnn/pycocotools/; rm *.so; cd ../../ 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RetinaDetection Object Detector 2 | 3 | ## Introduction 4 | 5 | RetinaDetector是基于RetinaFace修改的检测方法,原论文is a practical single-stage [SOTA](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) face detector which is initially described in [arXiv technical report](https://arxiv.org/abs/1905.00641) 6 | 7 | ## Data 8 | 9 | 0. Organise the dataset directory as follows: 10 | 11 | ```Shell 12 | data/retinaface/ 13 | train/ 14 | images/ 15 | label.txt 16 | val/ 17 | images/ 18 | label.txt 19 | test/ 20 | images/ 21 | label.txt 22 | ``` 23 | 24 | ## Install 25 | 26 | 1. Install MXNet with GPU support. 27 | 2. Install Deformable Convolution V2 operator from [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets) if you use the DCN based backbone. 28 | 3. Type ``make`` to build cxx tools. 29 | 30 | ## Training 31 | 32 | Please check ``train.py`` for training. 33 | 34 | 1. Copy ``rcnn/sample_config.py`` to ``rcnn/config.py`` 35 | 36 | 为了获得更好的训练效果,可针对性的修改一些参数,如下: 37 | 38 | ```Shell 39 | config.TRAIN.MIN_BOX_SIZE = 10 #最小bbox 40 | config.FACE_LANDMARK = False #使用landmark 41 | config.USE_BLUR = False 42 | config.BBOX_MASK_THRESH = 0 43 | config.COLOR_MODE = 2 #增强 44 | config.COLOR_JITTERING = 0.125 45 | ``` 46 | 47 | 无效人脸的过滤,如下: 48 | ```Shell 49 | if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or (y2 - y1) < config.TRAIN.MIN_BOX_SIZE: 50 | continue 51 | if self._split.startswith('train'): 52 | blur[ix] = values[19] 53 | if blur[ix] < 0.25: 54 | continue 55 | if config.BBOX_MASK_THRESH > 0: 56 | if (x2 - x1) < config.BBOX_MASK_THRESH or (y2 - y1) < config.BBOX_MASK_THRESH: 57 | boxes_mask.append(np.array([x1, y1, x2, y2], np.float)) 58 | continue 59 | if self._split.startswith('train'): 60 | if blur[ix] < 0.35: 61 | boxes_mask.append(np.array([x1, y1, x2, y2], np.float)) 62 | continue 63 | ``` 64 | 65 | 2. Download pretrained models and put them into ``model/``. 66 | 67 | ImageNet ResNet50 ([baidu cloud](https://pan.baidu.com/s/1WAkU9ZA_j-OmzO-sdk9whA) and [dropbox](https://www.dropbox.com/s/48b850vmnaaasfl/imagenet-resnet-50.zip?dl=0)). 68 | 69 | ImageNet ResNet152 ([baidu cloud](https://pan.baidu.com/s/1nzQ6CzmdKFzg8bM8ChZFQg) and [dropbox](https://www.dropbox.com/s/8ypcra4nqvm32v6/imagenet-resnet-152.zip?dl=0)). 70 | 71 | 3. Start training with ``sh train_model.sh``. 72 | Before training, you can check the ``resnet`` network configuration (e.g. pretrained model path, anchor setting and learning rate policy etc..) in ``rcnn/config.py``. 73 | 74 | ## Testing 75 | 76 | Please check ``test.py`` for testing. 77 | 78 | ## Result 79 | 80 | 1. 缺陷检测 81 | 82 | ![MASK1](https://github.com/bleakie/RetinaDetection/blob/master/images/00001673.jpg) 83 | 84 | 2. 人脸检测+人脸对齐 85 | 86 | ![MASK1](https://github.com/bleakie/RetinaDetection/blob/master/images/0000.png) 87 | 88 | ## Models 89 | 90 | 人脸检测模型,比原版误检更低,角度较大和模糊超过0.6的face会自动忽略,更适合人脸识别的应用:click [here](http://www.multcloud.com/share/5079e926-283b-4833-a216-b3de42eea0fe). 91 | 92 | ## ToDo 93 | 94 | 由于缺陷检测数据涉及私密性,缺陷检测的模型暂时不会释放 95 | 96 | ## References 97 | 98 | ``` 99 | @inproceedings{yangsai1991@163.com, 100 | year={2019} 101 | } 102 | ``` 103 | 104 | 105 | -------------------------------------------------------------------------------- /images/0000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/images/0000.png -------------------------------------------------------------------------------- /images/00001673.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/images/00001673.jpg -------------------------------------------------------------------------------- /rcnn/PY_OP/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/PY_OP/__init__.py -------------------------------------------------------------------------------- /rcnn/PY_OP/rpn_fpn_ohem3.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import sys 4 | import mxnet as mx 5 | import numpy as np 6 | from distutils.util import strtobool 7 | from ..config import config, generate_config 8 | 9 | 10 | STAT = {0:0} 11 | STEP = 28800 12 | 13 | class RPNFPNOHEM3Operator(mx.operator.CustomOp): 14 | def __init__(self, stride=0, network='', dataset='', prefix=''): 15 | super(RPNFPNOHEM3Operator, self).__init__() 16 | self.stride = int(stride) 17 | self.prefix = prefix 18 | generate_config(network, dataset) 19 | self.mode = 0 20 | if self.prefix!='face': 21 | self.mode = 0 22 | if network=='pbox': 23 | self.mode = 2 24 | global STAT 25 | for k in config.RPN_FEAT_STRIDE: 26 | STAT[k] = [0,0,0] 27 | 28 | def forward(self, is_train, req, in_data, out_data, aux): 29 | global STAT 30 | 31 | cls_score = in_data[0].asnumpy() #BS, 2, ANCHORS 32 | labels_raw = in_data[1].asnumpy() # BS, ANCHORS 33 | 34 | A = config.NUM_ANCHORS 35 | anchor_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1],1), dtype=np.float32 ) 36 | valid_count = np.zeros( (labels_raw.shape[0],1), dtype=np.float32 ) 37 | #print('anchor_weight', anchor_weight.shape) 38 | 39 | #assert labels.shape[0]==1 40 | #assert cls_score.shape[0]==1 41 | #assert bbox_weight.shape[0]==1 42 | #print('shape', cls_score.shape, labels.shape, file=sys.stderr) 43 | #print('bbox_weight 0', bbox_weight.shape, file=sys.stderr) 44 | #bbox_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1], 4), dtype=np.float32) 45 | _stat = [0,0,0] 46 | for ibatch in xrange(labels_raw.shape[0]): 47 | _anchor_weight = np.zeros( (labels_raw.shape[1],1), dtype=np.float32) 48 | labels = labels_raw[ibatch] 49 | fg_score = cls_score[ibatch,1,:] - cls_score[ibatch,0,:] 50 | 51 | 52 | 53 | fg_inds = np.where(labels>0)[0] 54 | num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE) 55 | origin_num_fg = len(fg_inds) 56 | #print(len(fg_inds), num_fg, file=sys.stderr) 57 | if len(fg_inds) > num_fg: 58 | if self.mode>=1: 59 | disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) 60 | labels[disable_inds] = -1 61 | else: 62 | pos_ohem_scores = fg_score[fg_inds] 63 | order_pos_ohem_scores = pos_ohem_scores.ravel().argsort() 64 | sampled_inds = fg_inds[order_pos_ohem_scores[:num_fg]] 65 | labels[fg_inds] = -1 66 | labels[sampled_inds] = 1 67 | 68 | n_fg = np.sum(labels>0) 69 | fg_inds = np.where(labels>0)[0] 70 | num_bg = config.TRAIN.RPN_BATCH_SIZE - n_fg 71 | #num_bg = max(10, num_fg*int(1.0/config.TRAIN.RPN_FG_FRACTION-1)) 72 | #if self.mode==2: 73 | # num_bg = num_fg*int(1.0/config.TRAIN.RPN_FG_FRACTION-1) 74 | 75 | bg_inds = np.where(labels == 0)[0] 76 | origin_num_bg = len(bg_inds) 77 | if num_bg==0: 78 | labels[bg_inds] = -1 79 | elif len(bg_inds) > num_bg: 80 | # sort ohem scores 81 | 82 | if self.mode>=1: 83 | disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) 84 | labels[disable_inds] = -1 85 | else: 86 | neg_ohem_scores = fg_score[bg_inds] 87 | order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1] 88 | sampled_inds = bg_inds[order_neg_ohem_scores[:num_bg]] 89 | #print('sampled_inds_bg', sampled_inds, file=sys.stderr) 90 | labels[bg_inds] = -1 91 | labels[sampled_inds] = 0 92 | 93 | if n_fg>0: 94 | order0_labels = labels.reshape( (1, A, -1) ).transpose( (0, 2, 1) ).reshape( (-1,) ) 95 | bbox_fg_inds = np.where(order0_labels>0)[0] 96 | #print('bbox_fg_inds, order0 ', bbox_fg_inds, file=sys.stderr) 97 | _anchor_weight[bbox_fg_inds,:] = 1.0 98 | anchor_weight[ibatch] = _anchor_weight 99 | valid_count[ibatch][0] = n_fg 100 | 101 | if self.prefix=='face': 102 | #print('fg-bg', self.stride, n_fg, num_bg) 103 | STAT[0]+=1 104 | STAT[self.stride][0] += config.TRAIN.RPN_BATCH_SIZE 105 | STAT[self.stride][1] += n_fg 106 | STAT[self.stride][2] += np.sum(fg_score[fg_inds]>=0) 107 | #_stat[0] += config.TRAIN.RPN_BATCH_SIZE 108 | #_stat[1] += n_fg 109 | #_stat[2] += np.sum(fg_score[fg_inds]>=0) 110 | #print('stride num_fg', self.stride, n_fg, file=sys.stderr) 111 | #ACC[self.stride] += np.sum(fg_score[fg_inds]>=0) 112 | #x = float(labels_raw.shape[0]*len(config.RPN_FEAT_STRIDE)) 113 | x = 1.0 114 | if STAT[0]%STEP==0: 115 | _str = ['STAT'] 116 | STAT[0] = 0 117 | for k in config.RPN_FEAT_STRIDE: 118 | acc = float(STAT[k][2])/STAT[k][1] 119 | acc0 = float(STAT[k][1])/STAT[k][0] 120 | #_str.append("%d: all-fg(%d, %d, %.4f), fg-fgcorrect(%d, %d, %.4f)"%(k,STAT[k][0], STAT[k][1], acc0, STAT[k][1], STAT[k][2], acc)) 121 | _str.append("%d: (%d, %d, %.4f)"%(k, STAT[k][1], STAT[k][2], acc)) 122 | STAT[k] = [0,0,0] 123 | _str = ' | '.join(_str) 124 | print(_str, file=sys.stderr) 125 | #if self.stride==4 and num_fg>0: 126 | # print('_stat_', self.stride, num_fg, num_bg, file=sys.stderr) 127 | 128 | #labels_ohem = mx.nd.array(labels_raw) 129 | #anchor_weight = mx.nd.array(anchor_weight) 130 | #print('valid_count', self.stride, np.sum(valid_count)) 131 | #print('_stat', _stat, valid_count) 132 | 133 | for ind, val in enumerate([labels_raw, anchor_weight, valid_count]): 134 | val = mx.nd.array(val) 135 | self.assign(out_data[ind], req[ind], val) 136 | 137 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 138 | for i in range(len(in_grad)): 139 | self.assign(in_grad[i], req[i], 0) 140 | 141 | 142 | @mx.operator.register('rpn_fpn_ohem3') 143 | class RPNFPNOHEM3Prop(mx.operator.CustomOpProp): 144 | def __init__(self, stride=0, network='', dataset='', prefix=''): 145 | super(RPNFPNOHEM3Prop, self).__init__(need_top_grad=False) 146 | self.stride = stride 147 | self.network=network 148 | self.dataset=dataset 149 | self.prefix = prefix 150 | 151 | def list_arguments(self): 152 | return ['cls_score', 'labels'] 153 | 154 | def list_outputs(self): 155 | return ['labels_ohem', 'anchor_weight', 'valid_count'] 156 | 157 | def infer_shape(self, in_shape): 158 | labels_shape = in_shape[1] 159 | #print('in_rpn_ohem', in_shape[0], in_shape[1], in_shape[2], file=sys.stderr) 160 | anchor_weight_shape = [labels_shape[0], labels_shape[1], 1] 161 | #print('in_rpn_ohem', labels_shape, anchor_weight_shape) 162 | 163 | return in_shape, \ 164 | [labels_shape, anchor_weight_shape, [labels_shape[0], 1]] 165 | 166 | def create_operator(self, ctx, shapes, dtypes): 167 | return RPNFPNOHEM3Operator(self.stride, self.network, self.dataset, self.prefix) 168 | 169 | def declare_backward_dependency(self, out_grad, in_data, out_data): 170 | return [] 171 | 172 | 173 | -------------------------------------------------------------------------------- /rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/__init__.py -------------------------------------------------------------------------------- /rcnn/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from easydict import EasyDict as edict 3 | 4 | config = edict() 5 | 6 | # network related params 7 | config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68]) 8 | config.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 9 | config.PIXEL_SCALE = 1.0 10 | config.IMAGE_STRIDE = 0 11 | 12 | # dataset related params 13 | config.NUM_CLASSES = 2 14 | config.PRE_SCALES = [(1200, 1600)] # first is scale (the shorter side); second is max size 15 | config.SCALES = [(640, 640)] # first is scale (the shorter side); second is max size 16 | #config.SCALES = [(800, 800)] # first is scale (the shorter side); second is max size 17 | config.ORIGIN_SCALE = False 18 | 19 | _ratio = (1.,) 20 | 21 | RAC_SSH = { 22 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 23 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 24 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 25 | } 26 | 27 | _ratio = (1.,1.5) 28 | RAC_SSH2 = { 29 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 30 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 31 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 32 | } 33 | 34 | _ratio = (1.,1.5) 35 | RAC_SSH3 = { 36 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 37 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 38 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 39 | '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 40 | } 41 | 42 | RAC_RETINA = {} 43 | _ratios = (1.0,) 44 | _ass = 2.0**(1.0/3) 45 | _basescale = 1.0 46 | for _stride in [4, 8, 16, 32, 64]: 47 | key = str(_stride) 48 | value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999} 49 | scales = [] 50 | for _ in range(3): 51 | scales.append(_basescale) 52 | _basescale *= _ass 53 | value['SCALES'] = tuple(scales) 54 | RAC_RETINA[key] = value 55 | 56 | 57 | config.RPN_ANCHOR_CFG = RAC_SSH #default 58 | 59 | config.NET_MODE = 2 60 | # config.HEAD_MODULE = 'SSH' 61 | config.HEAD_MODULE = 'RF' 62 | config.LR_MODE = 0 63 | config.LANDMARK_LR_MULT = 2.0 64 | config.HEAD_FILTER_NUM = 256 65 | config.CONTEXT_FILTER_RATIO = 1 66 | config.max_feat_channel = 9999 67 | 68 | config.USE_CROP = True 69 | config.USE_DCN = 0 70 | config.FACE_LANDMARK = False 71 | config.USE_OCCLUSION = False 72 | config.USE_BLUR = False 73 | config.MORE_SMALL_BOX = True 74 | 75 | config.LAYER_FIX = False 76 | 77 | config.HEAD_BOX = False 78 | config.DENSE_ANCHOR = False 79 | config.USE_MAXOUT = 0 80 | config.SHARE_WEIGHT_BBOX = False 81 | config.SHARE_WEIGHT_LANDMARK = False 82 | 83 | config.RANDOM_FEAT_STRIDE = False 84 | config.NUM_CPU = 4 85 | config.MIXUP = 0.0 86 | config.USE_3D = False 87 | 88 | config.BBOX_MASK_THRESH = 0 89 | config.COLOR_MODE = 2 90 | config.COLOR_JITTERING = 0.125 91 | 92 | 93 | config.TRAIN = edict() 94 | 95 | config.TRAIN.IMAGE_ALIGN = 0 96 | config.TRAIN.MIN_BOX_SIZE = 5 97 | # R-CNN and RPN 98 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 99 | config.TRAIN.BATCH_IMAGES = 8 100 | # e2e changes behavior of anchor loader and metric 101 | config.TRAIN.END2END = True 102 | # group images with similar aspect ratio 103 | config.TRAIN.ASPECT_GROUPING = False 104 | 105 | # RPN anchor loader 106 | # rpn anchors batch size 107 | config.TRAIN.RPN_ENABLE_OHEM = 2 108 | config.TRAIN.RPN_BATCH_SIZE = 256 109 | # rpn anchors sampling params 110 | config.TRAIN.RPN_FG_FRACTION = 0.25 111 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5 112 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 113 | config.TRAIN.RPN_CLOBBER_POSITIVES = False 114 | config.TRAIN.RPN_FORCE_POSITIVE = False 115 | # rpn bounding box regression params 116 | 117 | config.TEST = edict() 118 | 119 | # R-CNN testing 120 | # use rpn to generate proposal 121 | config.TEST.HAS_RPN = False 122 | # size of images for each device 123 | config.TEST.BATCH_IMAGES = 1 124 | 125 | # RPN proposal 126 | config.TEST.CXX_PROPOSAL = True 127 | config.TEST.RPN_NMS_THRESH = 0.3 128 | config.TEST.RPN_PRE_NMS_TOP_N = 1000 129 | config.TEST.RPN_POST_NMS_TOP_N = 3000 130 | # RCNN nms 131 | config.TEST.NMS = 0.3 132 | 133 | config.TEST.PYRAMID_SCALES = [0.5, 1.0, 1.5] 134 | config.TEST.SCORE_THRESH = 0.5 135 | config.TEST.IOU_THRESH = 0.5 136 | 137 | 138 | # network settings 139 | network = edict() 140 | 141 | # network.ssh = edict() 142 | 143 | network.mnet = edict() 144 | network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling'] 145 | network.mnet.BATCH_IMAGES = 16 146 | network.mnet.HEAD_FILTER_NUM = 64 147 | network.mnet.CONTEXT_FILTER_RATIO = 1 148 | 149 | network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0]) 150 | network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 151 | network.mnet.PIXEL_SCALE = 1.0 152 | network.mnet.pretrained = 'model/mobilenet025fd0' #78 153 | network.mnet.pretrained_epoch = 0 154 | network.mnet.max_feat_channel = 8888 155 | network.mnet.COLOR_MODE = 1 156 | network.mnet.USE_CROP = True 157 | network.mnet.RPN_ANCHOR_CFG = RAC_SSH 158 | network.mnet.LAYER_FIX = True 159 | network.mnet.LANDMARK_LR_MULT = 2.5 160 | 161 | 162 | network.resnet = edict() 163 | network.resnet.pretrained_epoch = 0 164 | network.resnet.lr_step = '1,2,3,4,5,55,68,80' 165 | network.resnet.lr = 0.004 166 | network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0]) 167 | network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 168 | network.resnet.PIXEL_SCALE = 1.0 169 | network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling'] 170 | network.resnet.BATCH_IMAGES = 8 171 | network.resnet.HEAD_FILTER_NUM = 256 172 | network.resnet.CONTEXT_FILTER_RATIO = 1 173 | network.resnet.USE_DCN = 2 174 | network.resnet.RPN_BATCH_SIZE = 256 175 | network.resnet.RPN_ANCHOR_CFG = RAC_RETINA 176 | 177 | network.resnet.USE_DCN = 0 178 | network.resnet.pretrained = 'model/pretrain-model/imagenet-resnet-50/resnet-50' 179 | network.resnet.RPN_ANCHOR_CFG = RAC_SSH 180 | 181 | 182 | # dataset settings 183 | dataset = edict() 184 | 185 | dataset.retinaface = edict() 186 | dataset.retinaface.dataset = 'retinaface' 187 | dataset.retinaface.image_set = 'train' 188 | dataset.retinaface.test_image_set = 'val' 189 | dataset.retinaface.root_path = 'data' 190 | dataset.retinaface.dataset_path = 'data/retinaface' 191 | dataset.retinaface.NUM_CLASSES = 2 192 | 193 | # default settings 194 | default = edict() 195 | 196 | config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling'] 197 | 198 | # default network 199 | default.network = 'resnet' 200 | default.pretrained = 'model/imagenet-resnet-50' 201 | default.pretrained_epoch = 0 202 | # default dataset 203 | default.dataset = 'retinaface' 204 | default.image_set = 'train' 205 | default.test_image_set = 'val' 206 | default.root_path = 'data' 207 | default.dataset_path = 'data/retinaface' 208 | # default training 209 | default.frequent = 20 210 | default.kvstore = 'device' 211 | # default e2e 212 | default.prefix = 'model/defect' 213 | default.end_epoch = 10000 214 | default.lr_step = '55,68,80' 215 | default.lr = 0.01 216 | 217 | def generate_config(_network, _dataset): 218 | for k, v in network[_network].items(): 219 | if k in config: 220 | config[k] = v 221 | elif k in default: 222 | default[k] = v 223 | if k in config.TRAIN: 224 | config.TRAIN[k] = v 225 | for k, v in dataset[_dataset].items(): 226 | if k in config: 227 | config[k] = v 228 | elif k in default: 229 | default[k] = v 230 | if k in config.TRAIN: 231 | config.TRAIN[k] = v 232 | config.network = _network 233 | config.dataset = _dataset 234 | config.RPN_FEAT_STRIDE = [] 235 | num_anchors = [] 236 | for k in config.RPN_ANCHOR_CFG: 237 | config.RPN_FEAT_STRIDE.append( int(k) ) 238 | _num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES'])*len(config.RPN_ANCHOR_CFG[k]['RATIOS']) 239 | if config.DENSE_ANCHOR: 240 | _num_anchors *= 2 241 | config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors 242 | num_anchors.append(_num_anchors) 243 | config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True) 244 | for j in range(1,len(num_anchors)): 245 | assert num_anchors[0]==num_anchors[j] 246 | config.NUM_ANCHORS = num_anchors[0] 247 | 248 | -------------------------------------------------------------------------------- /rcnn/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/core/__init__.py -------------------------------------------------------------------------------- /rcnn/core/callback.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def do_checkpoint(prefix, means, stds): 5 | def _callback(iter_no, sym, arg, aux): 6 | if 'bbox_pred_weight' in arg: 7 | arg['bbox_pred_weight_test'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T 8 | arg['bbox_pred_bias_test'] = arg['bbox_pred_bias'] * mx.nd.array(stds) + mx.nd.array(means) 9 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 10 | if 'bbox_pred_weight' in arg: 11 | arg.pop('bbox_pred_weight_test') 12 | arg.pop('bbox_pred_bias_test') 13 | return _callback 14 | -------------------------------------------------------------------------------- /rcnn/core/metric.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | from rcnn.config import config 7 | 8 | 9 | def get_rpn_names(): 10 | pred = ['rpn_cls_prob', 'rpn_bbox_loss', 'rpn_label', 'rpn_bbox_weight'] 11 | label = ['rpn_label', 'rpn_bbox_target', 'rpn_bbox_weight'] 12 | return pred, label 13 | 14 | 15 | 16 | class RPNAccMetric(mx.metric.EvalMetric): 17 | def __init__(self, pred_idx=-1, label_idx=-1,name='RPNAcc'): 18 | super(RPNAccMetric, self).__init__(name) 19 | self.pred, self.label = get_rpn_names() 20 | #self.name = 'RPNAcc' 21 | self.name = [name, name+'_BG', name+'_FG'] 22 | self.pred_idx = pred_idx 23 | self.label_idx = label_idx 24 | self.STAT = [0, 0, 0] 25 | 26 | def reset(self): 27 | """Clear the internal statistics to initial state.""" 28 | if isinstance(self.name, str): 29 | self.num_inst = 0 30 | self.sum_metric = 0.0 31 | else: 32 | #print('reset to ',len(self.name), self.name, file=sys.stderr) 33 | self.num_inst = [0] * len(self.name) 34 | self.sum_metric = [0.0] * len(self.name) 35 | 36 | 37 | def get(self): 38 | if isinstance(self.name, str): 39 | if self.num_inst == 0: 40 | return (self.name, float('nan')) 41 | else: 42 | return (self.name, self.sum_metric / self.num_inst) 43 | else: 44 | names = ['%s'%(self.name[i]) for i in range(len(self.name))] 45 | values = [x / y if y != 0 else float('nan') \ 46 | for x, y in zip(self.sum_metric, self.num_inst)] 47 | return (names, values) 48 | 49 | def update(self, labels, preds): 50 | if self.pred_idx>=0 and self.label_idx>=0: 51 | pred = preds[self.pred_idx] 52 | label = preds[self.label_idx] 53 | else: 54 | pred = preds[self.pred.index('rpn_cls_prob')] 55 | label = labels[self.label.index('rpn_label')] 56 | #label = preds[self.pred.index('rpn_label')] 57 | 58 | num_images = pred.shape[0] 59 | #print(pred.shape, label.shape, file=sys.stderr) 60 | # pred (b, c, p) or (b, c, h, w) 61 | pred_label = mx.ndarray.argmax_channel(pred).asnumpy().astype('int32') 62 | #pred_label = pred_label.reshape((pred_label.shape[0], -1)) 63 | pred_label = pred_label.reshape(-1,) 64 | # label (b, p) 65 | label = label.asnumpy().astype('int32').reshape(-1,) 66 | #print(pred_label.shape, label.shape) 67 | 68 | # filter with keep_inds 69 | keep_inds = np.where(label != -1)[0] 70 | #print('in_metric', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr) 71 | #print(keep_inds, file=sys.stderr) 72 | _pred_label = pred_label[keep_inds] 73 | _label = label[keep_inds] 74 | #print('in_metric2', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr) 75 | if isinstance(self.name, str): 76 | self.sum_metric += np.sum(_pred_label.flat == _label.flat) 77 | self.num_inst += len(_pred_label.flat) 78 | else: 79 | self.sum_metric[0] += np.sum(_pred_label.flat == _label.flat) 80 | self.num_inst[0] += len(_pred_label.flat) 81 | 82 | keep_inds = np.where(label == 0)[0] 83 | _pred_label = pred_label[keep_inds] 84 | _label = label[keep_inds] 85 | self.sum_metric[1] += np.sum(_pred_label.flat == _label.flat) 86 | self.num_inst[1] += len(_pred_label.flat) 87 | 88 | keep_inds = np.where(label == 1)[0] 89 | _pred_label = pred_label[keep_inds] 90 | _label = label[keep_inds] 91 | a = np.sum(_pred_label.flat == _label.flat) 92 | b = len(_pred_label.flat) 93 | self.sum_metric[2] += a 94 | self.num_inst[2] += b 95 | 96 | #self.STAT[0]+=a 97 | #self.STAT[1]+=b 98 | #self.STAT[2]+=num_images 99 | #if self.STAT[2]%400==0: 100 | # print('FG_ACC', self.pred_idx, self.STAT[2], self.STAT[0], self.STAT[1], float(self.STAT[0])/self.STAT[1], file=sys.stderr) 101 | # self.STAT = [0,0,0] 102 | 103 | 104 | class RPNLogLossMetric(mx.metric.EvalMetric): 105 | def __init__(self, pred_idx=-1, label_idx=-1): 106 | super(RPNLogLossMetric, self).__init__('RPNLogLoss') 107 | self.pred, self.label = get_rpn_names() 108 | self.pred_idx = pred_idx 109 | self.label_idx = label_idx 110 | 111 | def update(self, labels, preds): 112 | if self.pred_idx>=0 and self.label_idx>=0: 113 | pred = preds[self.pred_idx] 114 | label = preds[self.label_idx] 115 | else: 116 | pred = preds[self.pred.index('rpn_cls_prob')] 117 | label = labels[self.label.index('rpn_label')] 118 | #label = preds[self.pred.index('rpn_label')] 119 | 120 | # label (b, p) 121 | label = label.asnumpy().astype('int32').reshape((-1)) 122 | # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c) 123 | pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1)) 124 | pred = pred.reshape((label.shape[0], -1)) 125 | 126 | # filter with keep_inds 127 | keep_inds = np.where(label != -1)[0] 128 | label = label[keep_inds] 129 | cls = pred[keep_inds, label] 130 | #print('in_metric log', label.shape, cls.shape, file=sys.stderr) 131 | 132 | cls += 1e-14 133 | cls_loss = -1 * np.log(cls) 134 | cls_loss = np.sum(cls_loss) 135 | self.sum_metric += cls_loss 136 | self.num_inst += label.shape[0] 137 | 138 | 139 | class RPNL1LossMetric(mx.metric.EvalMetric): 140 | def __init__(self, loss_idx=-1, weight_idx=-1, name='RPNL1Loss'): 141 | super(RPNL1LossMetric, self).__init__(name) 142 | self.pred, self.label = get_rpn_names() 143 | self.loss_idx = loss_idx 144 | self.weight_idx = weight_idx 145 | self.name = name 146 | 147 | def update(self, labels, preds): 148 | if self.loss_idx>=0 and self.weight_idx>=0: 149 | bbox_loss = preds[self.loss_idx].asnumpy() 150 | bbox_weight = preds[self.weight_idx].asnumpy() 151 | else: 152 | bbox_loss = preds[self.pred.index('rpn_bbox_loss')].asnumpy() 153 | bbox_weight = labels[self.label.index('rpn_bbox_weight')].asnumpy() 154 | #bbox_weight = preds[self.pred.index('rpn_bbox_weight')].asnumpy() 155 | 156 | #print('in_metric', self.name, bbox_weight.shape, bbox_loss.shape) 157 | 158 | # calculate num_inst (average on those fg anchors) 159 | num_inst = np.sum(bbox_weight > 0) / (bbox_weight.shape[1]/config.NUM_ANCHORS) 160 | #print('in_metric log', bbox_loss.shape, num_inst, file=sys.stderr) 161 | 162 | self.sum_metric += np.sum(bbox_loss) 163 | self.num_inst += num_inst 164 | 165 | 166 | -------------------------------------------------------------------------------- /rcnn/core/module.py: -------------------------------------------------------------------------------- 1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape 2 | varying with training iterations. If shapes vary, executors will rebind, 3 | using shared arrays from the initial module binded with maximum shape. 4 | """ 5 | 6 | import logging 7 | 8 | from mxnet import context as ctx 9 | from mxnet.initializer import Uniform 10 | from mxnet.module.base_module import BaseModule 11 | from mxnet.module.module import Module 12 | 13 | class MutableModule(BaseModule): 14 | """A mutable module is a module that supports variable input data. 15 | 16 | Parameters 17 | ---------- 18 | symbol : Symbol 19 | data_names : list of str 20 | label_names : list of str 21 | logger : Logger 22 | context : Context or list of Context 23 | work_load_list : list of number 24 | max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary 25 | max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary 26 | fixed_param_prefix : list of str, indicating fixed parameters 27 | """ 28 | def __init__(self, symbol, data_names, label_names, 29 | logger=logging, context=ctx.cpu(), work_load_list=None, 30 | max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None): 31 | super(MutableModule, self).__init__(logger=logger) 32 | self._symbol = symbol 33 | self._data_names = data_names 34 | self._label_names = label_names 35 | self._context = context 36 | self._work_load_list = work_load_list 37 | 38 | self._curr_module = None 39 | self._max_data_shapes = max_data_shapes 40 | self._max_label_shapes = max_label_shapes 41 | self._fixed_param_prefix = fixed_param_prefix 42 | 43 | fixed_param_names = list() 44 | if fixed_param_prefix is not None: 45 | for name in self._symbol.list_arguments(): 46 | for prefix in self._fixed_param_prefix: 47 | if prefix in name: 48 | fixed_param_names.append(name) 49 | self._fixed_param_names = fixed_param_names 50 | 51 | def _reset_bind(self): 52 | self.binded = False 53 | self._curr_module = None 54 | 55 | @property 56 | def data_names(self): 57 | return self._data_names 58 | 59 | @property 60 | def output_names(self): 61 | return self._symbol.list_outputs() 62 | 63 | @property 64 | def data_shapes(self): 65 | assert self.binded 66 | return self._curr_module.data_shapes 67 | 68 | @property 69 | def label_shapes(self): 70 | assert self.binded 71 | return self._curr_module.label_shapes 72 | 73 | @property 74 | def output_shapes(self): 75 | assert self.binded 76 | return self._curr_module.output_shapes 77 | 78 | def get_params(self): 79 | assert self.binded and self.params_initialized 80 | return self._curr_module.get_params() 81 | 82 | def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None, 83 | allow_missing=False, force_init=False, allow_extra=False): 84 | if self.params_initialized and not force_init: 85 | return 86 | assert self.binded, 'call bind before initializing the parameters' 87 | self._curr_module.init_params(initializer=initializer, arg_params=arg_params, 88 | aux_params=aux_params, allow_missing=allow_missing, 89 | force_init=force_init, allow_extra=allow_extra) 90 | self.params_initialized = True 91 | 92 | def bind(self, data_shapes, label_shapes=None, for_training=True, 93 | inputs_need_grad=False, force_rebind=False, shared_module=None): 94 | # in case we already initialized params, keep it 95 | if self.params_initialized: 96 | arg_params, aux_params = self.get_params() 97 | 98 | # force rebinding is typically used when one want to switch from 99 | # training to prediction phase. 100 | if force_rebind: 101 | self._reset_bind() 102 | 103 | if self.binded: 104 | self.logger.warning('Already binded, ignoring bind()') 105 | return 106 | 107 | assert shared_module is None, 'shared_module for MutableModule is not supported' 108 | 109 | self.for_training = for_training 110 | self.inputs_need_grad = inputs_need_grad 111 | self.binded = True 112 | 113 | max_shapes_dict = dict() 114 | if self._max_data_shapes is not None: 115 | max_shapes_dict.update(dict(self._max_data_shapes)) 116 | if self._max_label_shapes is not None: 117 | max_shapes_dict.update(dict(self._max_label_shapes)) 118 | 119 | max_data_shapes = list() 120 | for name, shape in data_shapes: 121 | if name in max_shapes_dict: 122 | max_data_shapes.append((name, max_shapes_dict[name])) 123 | else: 124 | max_data_shapes.append((name, shape)) 125 | 126 | max_label_shapes = list() 127 | if label_shapes is not None: 128 | for name, shape in label_shapes: 129 | if name in max_shapes_dict: 130 | max_label_shapes.append((name, max_shapes_dict[name])) 131 | else: 132 | max_label_shapes.append((name, shape)) 133 | 134 | if len(max_label_shapes) == 0: 135 | max_label_shapes = None 136 | 137 | module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger, 138 | context=self._context, work_load_list=self._work_load_list, 139 | fixed_param_names=self._fixed_param_names) 140 | module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad, 141 | force_rebind=False, shared_module=None) 142 | self._curr_module = module 143 | 144 | # copy back saved params, if already initialized 145 | if self.params_initialized: 146 | self.set_params(arg_params, aux_params) 147 | 148 | def init_optimizer(self, kvstore='local', optimizer='sgd', 149 | optimizer_params=(('learning_rate', 0.01),), force_init=False): 150 | assert self.binded and self.params_initialized 151 | if self.optimizer_initialized and not force_init: 152 | self.logger.warning('optimizer already initialized, ignoring.') 153 | return 154 | 155 | self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params, 156 | force_init=force_init) 157 | self.optimizer_initialized = True 158 | 159 | def forward(self, data_batch, is_train=None): 160 | assert self.binded and self.params_initialized 161 | 162 | # get current_shapes 163 | if self._curr_module.label_shapes is not None: 164 | current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes) 165 | else: 166 | current_shapes = dict(self._curr_module.data_shapes) 167 | 168 | # get input_shapes 169 | if data_batch.provide_label is not None: 170 | input_shapes = dict(data_batch.provide_data + data_batch.provide_label) 171 | else: 172 | input_shapes = dict(data_batch.provide_data) 173 | 174 | # decide if shape changed 175 | shape_changed = False 176 | for k, v in current_shapes.items(): 177 | if v != input_shapes[k]: 178 | shape_changed = True 179 | 180 | if shape_changed: 181 | module = Module(self._symbol, self._data_names, self._label_names, 182 | logger=self.logger, context=self._context, 183 | work_load_list=self._work_load_list, 184 | fixed_param_names=self._fixed_param_names) 185 | module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training, 186 | self._curr_module.inputs_need_grad, force_rebind=False, 187 | shared_module=self._curr_module) 188 | self._curr_module = module 189 | 190 | self._curr_module.forward(data_batch, is_train=is_train) 191 | 192 | def backward(self, out_grads=None): 193 | assert self.binded and self.params_initialized 194 | self._curr_module.backward(out_grads=out_grads) 195 | 196 | def update(self): 197 | assert self.binded and self.params_initialized and self.optimizer_initialized 198 | self._curr_module.update() 199 | 200 | def get_outputs(self, merge_multi_context=True): 201 | assert self.binded and self.params_initialized 202 | return self._curr_module.get_outputs(merge_multi_context=merge_multi_context) 203 | 204 | def get_input_grads(self, merge_multi_context=True): 205 | assert self.binded and self.params_initialized and self.inputs_need_grad 206 | return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context) 207 | 208 | def update_metric(self, eval_metric, labels): 209 | assert self.binded and self.params_initialized 210 | self._curr_module.update_metric(eval_metric, labels) 211 | 212 | def install_monitor(self, mon): 213 | """ Install monitor on all executors """ 214 | assert self.binded 215 | self._curr_module.install_monitor(mon) 216 | -------------------------------------------------------------------------------- /rcnn/core/module_bak.py: -------------------------------------------------------------------------------- 1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape 2 | varying with training iterations. If shapes vary, executors will rebind, 3 | using shared arrays from the initial module binded with maximum shape. 4 | """ 5 | 6 | import logging 7 | 8 | from mxnet import context as ctx 9 | from mxnet.initializer import Uniform 10 | from mxnet.module.base_module import BaseModule 11 | from mxnet.module.module import Module 12 | 13 | class MutableModule(BaseModule): 14 | """A mutable module is a module that supports variable input data. 15 | 16 | Parameters 17 | ---------- 18 | symbol : Symbol 19 | data_names : list of str 20 | label_names : list of str 21 | logger : Logger 22 | context : Context or list of Context 23 | work_load_list : list of number 24 | max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary 25 | max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary 26 | fixed_param_prefix : list of str, indicating fixed parameters 27 | """ 28 | def __init__(self, symbol, data_names, label_names, 29 | logger=logging, context=ctx.cpu(), work_load_list=None, 30 | max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None): 31 | super(MutableModule, self).__init__(logger=logger) 32 | self._symbol = symbol 33 | self._data_names = data_names 34 | self._label_names = label_names 35 | self._context = context 36 | self._work_load_list = work_load_list 37 | 38 | self._curr_module = None 39 | self._max_data_shapes = max_data_shapes 40 | self._max_label_shapes = max_label_shapes 41 | self._fixed_param_prefix = fixed_param_prefix 42 | 43 | fixed_param_names = list() 44 | if fixed_param_prefix is not None: 45 | for name in self._symbol.list_arguments(): 46 | for prefix in self._fixed_param_prefix: 47 | if prefix in name: 48 | fixed_param_names.append(name) 49 | self._fixed_param_names = fixed_param_names 50 | 51 | def _reset_bind(self): 52 | self.binded = False 53 | self._curr_module = None 54 | 55 | @property 56 | def data_names(self): 57 | return self._data_names 58 | 59 | @property 60 | def output_names(self): 61 | return self._symbol.list_outputs() 62 | 63 | @property 64 | def data_shapes(self): 65 | assert self.binded 66 | return self._curr_module.data_shapes 67 | 68 | @property 69 | def label_shapes(self): 70 | assert self.binded 71 | return self._curr_module.label_shapes 72 | 73 | @property 74 | def output_shapes(self): 75 | assert self.binded 76 | return self._curr_module.output_shapes 77 | 78 | def get_params(self): 79 | assert self.binded and self.params_initialized 80 | return self._curr_module.get_params() 81 | 82 | def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None, 83 | allow_missing=False, force_init=False, allow_extra=False): 84 | if self.params_initialized and not force_init: 85 | return 86 | assert self.binded, 'call bind before initializing the parameters' 87 | self._curr_module.init_params(initializer=initializer, arg_params=arg_params, 88 | aux_params=aux_params, allow_missing=allow_missing, 89 | force_init=force_init, allow_extra=allow_extra) 90 | self.params_initialized = True 91 | 92 | def bind(self, data_shapes, label_shapes=None, for_training=True, 93 | inputs_need_grad=False, force_rebind=False, shared_module=None, grad_req='write'): 94 | # in case we already initialized params, keep it 95 | if self.params_initialized: 96 | arg_params, aux_params = self.get_params() 97 | 98 | # force rebinding is typically used when one want to switch from 99 | # training to prediction phase. 100 | if force_rebind: 101 | self._reset_bind() 102 | 103 | if self.binded: 104 | self.logger.warning('Already binded, ignoring bind()') 105 | return 106 | 107 | assert shared_module is None, 'shared_module for MutableModule is not supported' 108 | 109 | self.for_training = for_training 110 | self.inputs_need_grad = inputs_need_grad 111 | self.binded = True 112 | 113 | max_shapes_dict = dict() 114 | if self._max_data_shapes is not None: 115 | max_shapes_dict.update(dict(self._max_data_shapes)) 116 | if self._max_label_shapes is not None: 117 | max_shapes_dict.update(dict(self._max_label_shapes)) 118 | 119 | max_data_shapes = list() 120 | for name, shape in data_shapes: 121 | if name in max_shapes_dict: 122 | max_data_shapes.append((name, max_shapes_dict[name])) 123 | else: 124 | max_data_shapes.append((name, shape)) 125 | 126 | max_label_shapes = list() 127 | if label_shapes is not None: 128 | for name, shape in label_shapes: 129 | if name in max_shapes_dict: 130 | max_label_shapes.append((name, max_shapes_dict[name])) 131 | else: 132 | max_label_shapes.append((name, shape)) 133 | 134 | if len(max_label_shapes) == 0: 135 | max_label_shapes = None 136 | 137 | module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger, 138 | context=self._context, work_load_list=self._work_load_list, 139 | fixed_param_names=self._fixed_param_names) 140 | module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad, 141 | force_rebind=False, shared_module=None) 142 | self._curr_module = module 143 | 144 | # copy back saved params, if already initialized 145 | if self.params_initialized: 146 | self.set_params(arg_params, aux_params) 147 | 148 | def init_optimizer(self, kvstore='local', optimizer='sgd', 149 | optimizer_params=(('learning_rate', 0.01),), force_init=False): 150 | assert self.binded and self.params_initialized 151 | if self.optimizer_initialized and not force_init: 152 | self.logger.warning('optimizer already initialized, ignoring.') 153 | return 154 | 155 | self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params, 156 | force_init=force_init) 157 | self.optimizer_initialized = True 158 | 159 | def forward(self, data_batch, is_train=None): 160 | assert self.binded and self.params_initialized 161 | 162 | # get current_shapes 163 | if self._curr_module.label_shapes is not None: 164 | current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes) 165 | else: 166 | current_shapes = dict(self._curr_module.data_shapes) 167 | 168 | # get input_shapes 169 | if data_batch.provide_label is not None: 170 | input_shapes = dict(data_batch.provide_data + data_batch.provide_label) 171 | else: 172 | input_shapes = dict(data_batch.provide_data) 173 | 174 | # decide if shape changed 175 | shape_changed = False 176 | for k, v in current_shapes.items(): 177 | if v != input_shapes[k]: 178 | shape_changed = True 179 | 180 | if shape_changed: 181 | module = Module(self._symbol, self._data_names, self._label_names, 182 | logger=self.logger, context=self._context, 183 | work_load_list=self._work_load_list, 184 | fixed_param_names=self._fixed_param_names) 185 | module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training, 186 | self._curr_module.inputs_need_grad, force_rebind=False, 187 | shared_module=self._curr_module) 188 | self._curr_module = module 189 | 190 | self._curr_module.forward(data_batch, is_train=is_train) 191 | 192 | def backward(self, out_grads=None): 193 | assert self.binded and self.params_initialized 194 | self._curr_module.backward(out_grads=out_grads) 195 | 196 | def update(self): 197 | assert self.binded and self.params_initialized and self.optimizer_initialized 198 | self._curr_module.update() 199 | 200 | def get_outputs(self, merge_multi_context=True): 201 | assert self.binded and self.params_initialized 202 | return self._curr_module.get_outputs(merge_multi_context=merge_multi_context) 203 | 204 | def get_input_grads(self, merge_multi_context=True): 205 | assert self.binded and self.params_initialized and self.inputs_need_grad 206 | return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context) 207 | 208 | def update_metric(self, eval_metric, labels): 209 | assert self.binded and self.params_initialized 210 | self._curr_module.update_metric(eval_metric, labels) 211 | 212 | def install_monitor(self, mon): 213 | """ Install monitor on all executors """ 214 | assert self.binded 215 | self._curr_module.install_monitor(mon) 216 | -------------------------------------------------------------------------------- /rcnn/cython/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /rcnn/cython/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/cython/__init__.py -------------------------------------------------------------------------------- /rcnn/cython/anchors.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | DTYPE = np.float32 6 | ctypedef np.float32_t DTYPE_t 7 | 8 | def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors): 9 | """ 10 | Parameters 11 | ---------- 12 | height: height of plane 13 | width: width of plane 14 | stride: stride ot the original image 15 | anchors_base: (A, 4) a base set of anchors 16 | Returns 17 | ------- 18 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane 19 | """ 20 | cdef unsigned int A = base_anchors.shape[0] 21 | cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) 22 | cdef unsigned int iw, ih 23 | cdef unsigned int k 24 | cdef unsigned int sh 25 | cdef unsigned int sw 26 | for iw in range(width): 27 | sw = iw * stride 28 | for ih in range(height): 29 | sh = ih * stride 30 | for k in range(A): 31 | all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw 32 | all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh 33 | all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw 34 | all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh 35 | return all_anchors -------------------------------------------------------------------------------- /rcnn/cython/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps_cython( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /rcnn/cython/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /rcnn/cython/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /rcnn/cython/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /rcnn/cython/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /rcnn/cython/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | 30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | and values giving the absolute path to each directory. 32 | 33 | Starts by looking for the CUDAHOME env variable. If not found, everything 34 | is based on finding 'nvcc' in the PATH. 35 | """ 36 | 37 | # first check if the CUDAHOME env variable is in use 38 | if 'CUDAHOME' in os.environ: 39 | home = os.environ['CUDAHOME'] 40 | nvcc = pjoin(home, 'bin', 'nvcc') 41 | else: 42 | # otherwise, search the PATH for NVCC 43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | if nvcc is None: 46 | raise EnvironmentError('The nvcc binary could not be ' 47 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | home = os.path.dirname(os.path.dirname(nvcc)) 49 | 50 | cudaconfig = {'home':home, 'nvcc':nvcc, 51 | 'include': pjoin(home, 'include'), 52 | 'lib64': pjoin(home, 'lib64')} 53 | for k, v in cudaconfig.items(): 54 | if not os.path.exists(v): 55 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | 57 | return cudaconfig 58 | 59 | 60 | # Test if cuda could be foun 61 | try: 62 | CUDA = locate_cuda() 63 | except EnvironmentError: 64 | CUDA = None 65 | 66 | 67 | # Obtain the numpy include directory. This logic works across numpy versions. 68 | try: 69 | numpy_include = np.get_include() 70 | except AttributeError: 71 | numpy_include = np.get_numpy_include() 72 | 73 | 74 | def customize_compiler_for_nvcc(self): 75 | """inject deep into distutils to customize how the dispatch 76 | to gcc/nvcc works. 77 | 78 | If you subclass UnixCCompiler, it's not trivial to get your subclass 79 | injected in, and still have the right customizations (i.e. 80 | distutils.sysconfig.customize_compiler) run on it. So instead of going 81 | the OO route, I have this. Note, it's kindof like a wierd functional 82 | subclassing going on.""" 83 | 84 | # tell the compiler it can processes .cu 85 | self.src_extensions.append('.cu') 86 | 87 | # save references to the default compiler_so and _comple methods 88 | default_compiler_so = self.compiler_so 89 | super = self._compile 90 | 91 | # now redefine the _compile method. This gets executed for each 92 | # object but distutils doesn't have the ability to change compilers 93 | # based on source extension: we add it. 94 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 95 | if os.path.splitext(src)[1] == '.cu': 96 | # use the cuda for .cu files 97 | self.set_executable('compiler_so', CUDA['nvcc']) 98 | # use only a subset of the extra_postargs, which are 1-1 translated 99 | # from the extra_compile_args in the Extension class 100 | postargs = extra_postargs['nvcc'] 101 | else: 102 | postargs = extra_postargs['gcc'] 103 | 104 | super(obj, src, ext, cc_args, postargs, pp_opts) 105 | # reset the default compiler_so, which we might have changed for cuda 106 | self.compiler_so = default_compiler_so 107 | 108 | # inject our redefined _compile method into the class 109 | self._compile = _compile 110 | 111 | 112 | # run the customize_compiler 113 | class custom_build_ext(build_ext): 114 | def build_extensions(self): 115 | customize_compiler_for_nvcc(self.compiler) 116 | build_ext.build_extensions(self) 117 | 118 | 119 | ext_modules = [ 120 | Extension( 121 | "bbox", 122 | ["bbox.pyx"], 123 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 124 | include_dirs=[numpy_include] 125 | ), 126 | Extension( 127 | "anchors", 128 | ["anchors.pyx"], 129 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 130 | include_dirs=[numpy_include] 131 | ), 132 | Extension( 133 | "cpu_nms", 134 | ["cpu_nms.pyx"], 135 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 136 | include_dirs = [numpy_include] 137 | ), 138 | ] 139 | 140 | if CUDA is not None: 141 | ext_modules.append( 142 | Extension('gpu_nms', 143 | ['nms_kernel.cu', 'gpu_nms.pyx'], 144 | library_dirs=[CUDA['lib64']], 145 | libraries=['cudart'], 146 | language='c++', 147 | runtime_library_dirs=[CUDA['lib64']], 148 | # this syntax is specific to this build system 149 | # we're only going to use certain compiler args with nvcc and not with 150 | # gcc the implementation of this trick is in customize_compiler() below 151 | extra_compile_args={'gcc': ["-Wno-unused-function"], 152 | 'nvcc': ['-arch=sm_35', 153 | '--ptxas-options=-v', 154 | '-c', 155 | '--compiler-options', 156 | "'-fPIC'"]}, 157 | include_dirs = [numpy_include, CUDA['include']] 158 | ) 159 | ) 160 | else: 161 | print('Skipping GPU_NMS') 162 | 163 | 164 | setup( 165 | name='frcnn_cython', 166 | ext_modules=ext_modules, 167 | # inject our custom trigger 168 | cmdclass={'build_ext': custom_build_ext}, 169 | ) 170 | -------------------------------------------------------------------------------- /rcnn/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .imdb import IMDB 2 | from .retinaface import retinaface 3 | -------------------------------------------------------------------------------- /rcnn/dataset/ds_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def unique_boxes(boxes, scale=1.0): 5 | """ return indices of unique boxes """ 6 | v = np.array([1, 1e3, 1e6, 1e9]) 7 | hashes = np.round(boxes * scale).dot(v).astype(np.int) 8 | _, index = np.unique(hashes, return_index=True) 9 | return np.sort(index) 10 | 11 | 12 | def filter_small_boxes(boxes, min_size): 13 | w = boxes[:, 2] - boxes[:, 0] 14 | h = boxes[:, 3] - boxes[:, 1] 15 | keep = np.where((w >= min_size) & (h > min_size))[0] 16 | return keep 17 | -------------------------------------------------------------------------------- /rcnn/dataset/retinaface.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | try: 3 | import cPickle as pickle 4 | except ImportError: 5 | import pickle 6 | import cv2 7 | import os 8 | import numpy as np 9 | import json 10 | from PIL import Image 11 | 12 | from ..logger import logger 13 | from .imdb import IMDB 14 | from .ds_utils import unique_boxes, filter_small_boxes 15 | from ..config import config 16 | 17 | class retinaface(IMDB): 18 | def __init__(self, image_set, root_path, data_path): 19 | super(retinaface, self).__init__('retinaface', image_set, root_path, data_path) 20 | #assert image_set=='train' 21 | 22 | split = image_set 23 | self._split = image_set 24 | self._image_set = image_set 25 | 26 | 27 | self.root_path = root_path 28 | self.data_path = data_path 29 | 30 | 31 | self._dataset_path = self.data_path 32 | self._imgs_path = os.path.join(self._dataset_path, image_set, 'images') 33 | self._fp_bbox_map = {} 34 | label_file = os.path.join(self._dataset_path, image_set, 'label.txt')#os.path.join(self._dataset_path, image_set, 'label.txt') 35 | name = None 36 | for line in open(label_file, 'r'): 37 | line = line.strip() 38 | if line.startswith('#'): 39 | name = line[1:].strip() 40 | self._fp_bbox_map[name] = [] 41 | continue 42 | assert name is not None 43 | assert name in self._fp_bbox_map 44 | self._fp_bbox_map[name].append(line) 45 | print('origin image size', len(self._fp_bbox_map)) 46 | 47 | #self.num_images = len(self._image_paths) 48 | #self._image_index = range(len(self._image_paths)) 49 | self.classes = ['bg', 'face'] 50 | self.num_classes = len(self.classes) 51 | 52 | 53 | def gt_roidb(self): 54 | cache_file = os.path.join(self.cache_path, '{}_{}_gt_roidb.pkl'.format(self.name, self._split)) 55 | if os.path.exists(cache_file): 56 | with open(cache_file, 'rb') as fid: 57 | roidb = pickle.load(fid) 58 | print('{} gt roidb loaded from {}'.format(self.name, cache_file)) 59 | self.num_images = len(roidb) 60 | return roidb 61 | 62 | roidb = [] 63 | max_num_boxes = 0 64 | nonattr_box_num = 0 65 | landmark_num = 0 66 | 67 | for fp in self._fp_bbox_map: 68 | if self._split=='test': 69 | image_path = os.path.join(self._imgs_path, fp) 70 | roi = {'image': image_path} 71 | roidb.append(roi) 72 | continue 73 | boxes = np.zeros([len(self._fp_bbox_map[fp]), 4], np.float) 74 | landmarks = np.zeros([len(self._fp_bbox_map[fp]), 5, 3], np.float) 75 | blur = np.zeros((len(self._fp_bbox_map[fp]),), np.float) 76 | boxes_mask = [] 77 | 78 | gt_classes = np.ones([len(self._fp_bbox_map[fp])], np.int32) 79 | overlaps = np.zeros([len(self._fp_bbox_map[fp]), 2], np.float) 80 | 81 | ix = 0 82 | 83 | for aline in self._fp_bbox_map[fp]: 84 | imsize = Image.open(os.path.join(self._imgs_path, fp)).size 85 | values = [float(x) for x in aline.strip().split()] 86 | bbox = [values[0], values[1], values[0]+values[2], values[1]+values[3]] 87 | 88 | x1 = bbox[0] 89 | y1 = bbox[1] 90 | x2 = min(imsize[0], bbox[2]) 91 | y2 = min(imsize[1], bbox[3]) 92 | if x1>=x2 or y1>=y2: 93 | continue 94 | 95 | if config.BBOX_MASK_THRESH>0: 96 | if (x2 - x1) < config.BBOX_MASK_THRESH or y2 - y1 < config.BBOX_MASK_THRESH: 97 | boxes_mask.append(np.array([x1, y1, x2, y2], np.float)) 98 | continue 99 | if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or y2 - y1 < config.TRAIN.MIN_BOX_SIZE: 100 | continue 101 | 102 | boxes[ix, :] = np.array([x1, y1, x2, y2], np.float) 103 | if self._split=='train': 104 | landmark = np.array( values[4:19], dtype=np.float32 ).reshape((5,3)) 105 | for li in range(5): 106 | #print(landmark) 107 | if landmark[li][0]==-1. and landmark[li][1]==-1.: #missing landmark 108 | assert landmark[li][2]==-1 109 | else: 110 | assert landmark[li][2]>=0 111 | if li==0: 112 | landmark_num+=1 113 | if landmark[li][2]==0.0:#visible 114 | landmark[li][2] = 1.0 115 | else: 116 | landmark[li][2] = 0.0 117 | 118 | landmarks[ix] = landmark 119 | 120 | blur[ix] = values[19] 121 | #print(aline, blur[ix]) 122 | if blur[ix]<0.0: 123 | blur[ix] = 0.3 124 | nonattr_box_num+=1 125 | 126 | cls = int(1) 127 | gt_classes[ix] = cls 128 | overlaps[ix, cls] = 1.0 129 | ix += 1 130 | max_num_boxes = max(max_num_boxes, ix) 131 | #overlaps = scipy.sparse.csr_matrix(overlaps) 132 | if self._split=='train' and ix==0: 133 | continue 134 | boxes = boxes[:ix,:] 135 | landmarks = landmarks[:ix,:,:] 136 | blur = blur[:ix] 137 | gt_classes = gt_classes[:ix] 138 | overlaps = overlaps[:ix,:] 139 | image_path = os.path.join(self._imgs_path, fp) 140 | with open(image_path, 'rb') as fin: 141 | stream = fin.read() 142 | stream = np.fromstring(stream, dtype=np.uint8) 143 | 144 | roi = { 145 | 'image': image_path, 146 | 'stream': stream, 147 | 'height': imsize[1], 148 | 'width': imsize[0], 149 | 'boxes': boxes, 150 | 'landmarks': landmarks, 151 | 'blur': blur, 152 | 'gt_classes': gt_classes, 153 | 'gt_overlaps': overlaps, 154 | 'max_classes': overlaps.argmax(axis=1), 155 | 'max_overlaps': overlaps.max(axis=1), 156 | 'flipped': False, 157 | } 158 | if len(boxes_mask)>0: 159 | boxes_mask = np.array(boxes_mask) 160 | roi['boxes_mask'] = boxes_mask 161 | roidb.append(roi) 162 | for roi in roidb: 163 | roi['max_num_boxes'] = max_num_boxes 164 | self.num_images = len(roidb) 165 | print('roidb size', len(roidb)) 166 | print('non attr box num', nonattr_box_num) 167 | print('landmark num', landmark_num) 168 | with open(cache_file, 'wb') as fid: 169 | pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) 170 | print('wrote gt roidb to {}'.format(cache_file)) 171 | 172 | return roidb 173 | 174 | def write_detections(self, all_boxes, output_dir='./output/'): 175 | pass 176 | 177 | 178 | def evaluate_detections(self, all_boxes, output_dir='./output/',method_name='insightdetection'): 179 | pass 180 | -------------------------------------------------------------------------------- /rcnn/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/io/__init__.py -------------------------------------------------------------------------------- /rcnn/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | # set up logger 4 | logging.basicConfig() 5 | logger = logging.getLogger() 6 | logger.setLevel(logging.INFO) 7 | -------------------------------------------------------------------------------- /rcnn/processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/processing/__init__.py -------------------------------------------------------------------------------- /rcnn/processing/assign_levels.py: -------------------------------------------------------------------------------- 1 | from rcnn.config import config 2 | import numpy as np 3 | 4 | 5 | def compute_assign_targets(rois, threshold): 6 | rois_area = np.sqrt((rois[:, 2] - rois[:, 0] + 1) * (rois[:, 3] - rois[:, 1] + 1)) 7 | num_rois = np.shape(rois)[0] 8 | assign_levels = np.zeros(num_rois, dtype=np.uint8) 9 | for i, stride in enumerate(config.RCNN_FEAT_STRIDE): 10 | thd = threshold[i] 11 | idx = np.logical_and(thd[1] <= rois_area, rois_area < thd[0]) 12 | assign_levels[idx] = stride 13 | 14 | assert 0 not in assign_levels, "All rois should assign to specify levels." 15 | return assign_levels 16 | 17 | 18 | def add_assign_targets(roidb): 19 | """ 20 | given roidb, add ['assign_level'] 21 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 22 | """ 23 | print 'add assign targets' 24 | assert len(roidb) > 0 25 | assert 'boxes' in roidb[0] 26 | 27 | area_threshold = [[np.inf, 448], 28 | [448, 224], 29 | [224, 112], 30 | [112, 0]] 31 | 32 | assert len(config.RCNN_FEAT_STRIDE) == len(area_threshold) 33 | 34 | num_images = len(roidb) 35 | for im_i in range(num_images): 36 | rois = roidb[im_i]['boxes'] 37 | roidb[im_i]['assign_levels'] = compute_assign_targets(rois, area_threshold) 38 | -------------------------------------------------------------------------------- /rcnn/processing/bbox_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file has functions about generating bounding box regression targets 3 | """ 4 | 5 | from ..pycocotools.mask import encode 6 | import numpy as np 7 | 8 | from ..logger import logger 9 | from .bbox_transform import bbox_overlaps, bbox_transform 10 | from rcnn.config import config 11 | import math 12 | import cv2 13 | import PIL.Image as Image 14 | import threading 15 | import Queue 16 | 17 | 18 | def compute_bbox_regression_targets(rois, overlaps, labels): 19 | """ 20 | given rois, overlaps, gt labels, compute bounding box regression targets 21 | :param rois: roidb[i]['boxes'] k * 4 22 | :param overlaps: roidb[i]['max_overlaps'] k * 1 23 | :param labels: roidb[i]['max_classes'] k * 1 24 | :return: targets[i][class, dx, dy, dw, dh] k * 5 25 | """ 26 | # Ensure ROIs are floats 27 | rois = rois.astype(np.float, copy=False) 28 | 29 | # Sanity check 30 | if len(rois) != len(overlaps): 31 | logger.warning('bbox regression: len(rois) != len(overlaps)') 32 | 33 | # Indices of ground-truth ROIs 34 | gt_inds = np.where(overlaps == 1)[0] 35 | if len(gt_inds) == 0: 36 | logger.warning('bbox regression: len(gt_inds) == 0') 37 | 38 | # Indices of examples for which we try to make predictions 39 | ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] 40 | 41 | # Get IoU overlap between each ex ROI and gt ROI 42 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 43 | 44 | # Find which gt ROI each ex ROI has max overlap with: 45 | # this will be the ex ROI's gt target 46 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 47 | gt_rois = rois[gt_inds[gt_assignment], :] 48 | ex_rois = rois[ex_inds, :] 49 | 50 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 51 | targets[ex_inds, 0] = labels[ex_inds] 52 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 53 | return targets 54 | 55 | 56 | def add_bbox_regression_targets(roidb): 57 | """ 58 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 59 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 60 | :return: means, std variances of targets 61 | """ 62 | logger.info('bbox regression: add bounding box regression targets') 63 | assert len(roidb) > 0 64 | assert 'max_classes' in roidb[0] 65 | 66 | num_images = len(roidb) 67 | num_classes = roidb[0]['gt_overlaps'].shape[1] 68 | for im_i in range(num_images): 69 | rois = roidb[im_i]['boxes'] 70 | max_overlaps = roidb[im_i]['max_overlaps'] 71 | max_classes = roidb[im_i]['max_classes'] 72 | roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes) 73 | 74 | if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: 75 | # use fixed / precomputed means and stds instead of empirical values 76 | means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1)) 77 | stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1)) 78 | else: 79 | # compute mean, std values 80 | class_counts = np.zeros((num_classes, 1)) + 1e-14 81 | sums = np.zeros((num_classes, 4)) 82 | squared_sums = np.zeros((num_classes, 4)) 83 | for im_i in range(num_images): 84 | targets = roidb[im_i]['bbox_targets'] 85 | for cls in range(1, num_classes): 86 | cls_indexes = np.where(targets[:, 0] == cls)[0] 87 | if cls_indexes.size > 0: 88 | class_counts[cls] += cls_indexes.size 89 | sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) 90 | squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) 91 | 92 | means = sums / class_counts 93 | # var(x) = E(x^2) - E(x)^2 94 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 95 | 96 | # normalized targets 97 | for im_i in range(num_images): 98 | targets = roidb[im_i]['bbox_targets'] 99 | for cls in range(1, num_classes): 100 | cls_indexes = np.where(targets[:, 0] == cls)[0] 101 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] 102 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] 103 | 104 | return means.ravel(), stds.ravel() 105 | 106 | 107 | def expand_bbox_regression_targets(bbox_targets_data, num_classes): 108 | """ 109 | expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets 110 | :param bbox_targets_data: [k * 5] 111 | :param num_classes: number of classes 112 | :return: bbox target processed [k * 4 num_classes] 113 | bbox_weights ! only foreground boxes have bbox regression computation! 114 | """ 115 | classes = bbox_targets_data[:, 0] 116 | bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) 117 | bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 118 | indexes = np.where(classes > 0)[0] 119 | for index in indexes: 120 | cls = classes[index] 121 | start = int(4 * cls) 122 | end = start + 4 123 | bbox_targets[index, start:end] = bbox_targets_data[index, 1:] 124 | bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS 125 | return bbox_targets, bbox_weights 126 | 127 | 128 | def compute_mask_and_label(ex_rois, ex_labels, seg, flipped): 129 | # assert os.path.exists(seg_gt), 'Path does not exist: {}'.format(seg_gt) 130 | # im = Image.open(seg_gt) 131 | # pixel = list(im.getdata()) 132 | # pixel = np.array(pixel).reshape([im.size[1], im.size[0]]) 133 | im = Image.open(seg) 134 | pixel = list(im.getdata()) 135 | ins_seg = np.array(pixel).reshape([im.size[1], im.size[0]]) 136 | if flipped: 137 | ins_seg = ins_seg[:, ::-1] 138 | rois = ex_rois 139 | n_rois = ex_rois.shape[0] 140 | label = ex_labels 141 | class_id = config.CLASS_ID 142 | mask_target = np.zeros((n_rois, 28, 28), dtype=np.int8) 143 | mask_label = np.zeros((n_rois), dtype=np.int8) 144 | for n in range(n_rois): 145 | target = ins_seg[int(rois[n, 1]): int(rois[n, 3]), int(rois[n, 0]): int(rois[n, 2])] 146 | ids = np.unique(target) 147 | ins_id = 0 148 | max_count = 0 149 | for id in ids: 150 | if math.floor(id / 1000) == class_id[int(label[int(n)])]: 151 | px = np.where(ins_seg == int(id)) 152 | x_min = np.min(px[1]) 153 | y_min = np.min(px[0]) 154 | x_max = np.max(px[1]) 155 | y_max = np.max(px[0]) 156 | x1 = max(rois[n, 0], x_min) 157 | y1 = max(rois[n, 1], y_min) 158 | x2 = min(rois[n, 2], x_max) 159 | y2 = min(rois[n, 3], y_max) 160 | iou = (x2 - x1) * (y2 - y1) 161 | iou = iou / ((rois[n, 2] - rois[n, 0]) * (rois[n, 3] - rois[n, 1]) 162 | + (x_max - x_min) * (y_max - y_min) - iou) 163 | if iou > max_count: 164 | ins_id = id 165 | max_count = iou 166 | 167 | if max_count == 0: 168 | continue 169 | # print max_count 170 | mask = np.zeros(target.shape) 171 | idx = np.where(target == ins_id) 172 | mask[idx] = 1 173 | mask = cv2.resize(mask, (28, 28), interpolation=cv2.INTER_NEAREST) 174 | 175 | mask_target[n] = mask 176 | mask_label[n] = label[int(n)] 177 | return mask_target, mask_label 178 | 179 | 180 | def compute_bbox_mask_targets_and_label(rois, overlaps, labels, seg, flipped): 181 | """ 182 | given rois, overlaps, gt labels, seg, compute bounding box mask targets 183 | :param rois: roidb[i]['boxes'] k * 4 184 | :param overlaps: roidb[i]['max_overlaps'] k * 1 185 | :param labels: roidb[i]['max_classes'] k * 1 186 | :return: targets[i][class, dx, dy, dw, dh] k * 5 187 | """ 188 | # Ensure ROIs are floats 189 | rois = rois.astype(np.float, copy=False) 190 | 191 | # Sanity check 192 | if len(rois) != len(overlaps): 193 | print 'bbox regression: this should not happen' 194 | 195 | # Indices of ground-truth ROIs 196 | gt_inds = np.where(overlaps == 1)[0] 197 | if len(gt_inds) == 0: 198 | print 'something wrong : zero ground truth rois' 199 | # Indices of examples for which we try to make predictions 200 | ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] 201 | 202 | # Get IoU overlap between each ex ROI and gt ROI 203 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 204 | 205 | 206 | # Find which gt ROI each ex ROI has max overlap with: 207 | # this will be the ex ROI's gt target 208 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 209 | gt_rois = rois[gt_inds[gt_assignment], :] 210 | ex_rois = rois[ex_inds, :] 211 | 212 | mask_targets, mask_label = compute_mask_and_label(ex_rois, labels[ex_inds], seg, flipped) 213 | return mask_targets, mask_label, ex_inds 214 | 215 | def add_mask_targets(roidb): 216 | """ 217 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 218 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 219 | :return: means, std variances of targets 220 | """ 221 | print 'add bounding box mask targets' 222 | assert len(roidb) > 0 223 | assert 'max_classes' in roidb[0] 224 | 225 | num_images = len(roidb) 226 | 227 | # Multi threads processing 228 | im_quene = Queue.Queue(maxsize=0) 229 | for im_i in range(num_images): 230 | im_quene.put(im_i) 231 | 232 | def process(): 233 | while not im_quene.empty(): 234 | im_i = im_quene.get() 235 | print "-----process img {}".format(im_i) 236 | rois = roidb[im_i]['boxes'] 237 | max_overlaps = roidb[im_i]['max_overlaps'] 238 | max_classes = roidb[im_i]['max_classes'] 239 | ins_seg = roidb[im_i]['ins_seg'] 240 | flipped = roidb[im_i]['flipped'] 241 | roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \ 242 | compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg, flipped) 243 | threads = [threading.Thread(target=process, args=()) for i in xrange(10)] 244 | for t in threads: t.start() 245 | for t in threads: t.join() 246 | # Single thread 247 | # for im_i in range(num_images): 248 | # print "-----processing img {}".format(im_i) 249 | # rois = roidb[im_i]['boxes'] 250 | # max_overlaps = roidb[im_i]['max_overlaps'] 251 | # max_classes = roidb[im_i]['max_classes'] 252 | # ins_seg = roidb[im_i]['ins_seg'] 253 | # # roidb[im_i]['mask_targets'] = compute_bbox_mask_targets(rois, max_overlaps, max_classes, ins_seg) 254 | # roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \ 255 | # compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg) 256 | -------------------------------------------------------------------------------- /rcnn/processing/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..cython.bbox import bbox_overlaps_cython 3 | #from rcnn.config import config 4 | 5 | 6 | def bbox_overlaps(boxes, query_boxes): 7 | return bbox_overlaps_cython(boxes, query_boxes) 8 | 9 | 10 | def bbox_overlaps_py(boxes, query_boxes): 11 | """ 12 | determine overlaps between boxes and query_boxes 13 | :param boxes: n * 4 bounding boxes 14 | :param query_boxes: k * 4 bounding boxes 15 | :return: overlaps: n * k overlaps 16 | """ 17 | n_ = boxes.shape[0] 18 | k_ = query_boxes.shape[0] 19 | overlaps = np.zeros((n_, k_), dtype=np.float) 20 | for k in range(k_): 21 | query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) 22 | for n in range(n_): 23 | iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 24 | if iw > 0: 25 | ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 26 | if ih > 0: 27 | box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) 28 | all_area = float(box_area + query_box_area - iw * ih) 29 | overlaps[n, k] = iw * ih / all_area 30 | return overlaps 31 | 32 | 33 | def clip_boxes(boxes, im_shape): 34 | """ 35 | Clip boxes to image boundaries. 36 | :param boxes: [N, 4* num_classes] 37 | :param im_shape: tuple of 2 38 | :return: [N, 4* num_classes] 39 | """ 40 | # x1 >= 0 41 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 42 | # y1 >= 0 43 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 44 | # x2 < im_shape[1] 45 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 46 | # y2 < im_shape[0] 47 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 48 | return boxes 49 | 50 | 51 | def nonlinear_transform(ex_rois, gt_rois): 52 | """ 53 | compute bounding box regression targets from ex_rois to gt_rois 54 | :param ex_rois: [N, 4] 55 | :param gt_rois: [N, 4] 56 | :return: [N, 4] 57 | """ 58 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 59 | 60 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 61 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 62 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 63 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 64 | 65 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 66 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 67 | gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) 68 | gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) 69 | 70 | targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) 71 | targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) 72 | targets_dw = np.log(gt_widths / ex_widths) 73 | targets_dh = np.log(gt_heights / ex_heights) 74 | 75 | if gt_rois.shape[1]<=4: 76 | targets = np.vstack( 77 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 78 | return targets 79 | else: 80 | targets = [targets_dx, targets_dy, targets_dw, targets_dh] 81 | #if config.USE_BLUR: 82 | # for i in range(4, gt_rois.shape[1]): 83 | # t = gt_rois[:,i] 84 | # targets.append(t) 85 | targets = np.vstack(targets).transpose() 86 | return targets 87 | 88 | def landmark_transform(ex_rois, gt_rois): 89 | 90 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 91 | 92 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 93 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 94 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 95 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 96 | 97 | 98 | targets = [] 99 | for i in range(gt_rois.shape[1]): 100 | for j in range(gt_rois.shape[2]): 101 | #if not config.USE_OCCLUSION and j==2: 102 | # continue 103 | if j==2: 104 | continue 105 | if j==0: #w 106 | target = (gt_rois[:,i,j] - ex_ctr_x) / (ex_widths + 1e-14) 107 | elif j==1: #h 108 | target = (gt_rois[:,i,j] - ex_ctr_y) / (ex_heights + 1e-14) 109 | else: #visibile 110 | target = gt_rois[:,i,j] 111 | targets.append(target) 112 | 113 | 114 | targets = np.vstack(targets).transpose() 115 | return targets 116 | 117 | 118 | def nonlinear_pred(boxes, box_deltas): 119 | """ 120 | Transform the set of class-agnostic boxes into class-specific boxes 121 | by applying the predicted offsets (box_deltas) 122 | :param boxes: !important [N 4] 123 | :param box_deltas: [N, 4 * num_classes] 124 | :return: [N 4 * num_classes] 125 | """ 126 | if boxes.shape[0] == 0: 127 | return np.zeros((0, box_deltas.shape[1])) 128 | 129 | boxes = boxes.astype(np.float, copy=False) 130 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 131 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 132 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 133 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 134 | 135 | dx = box_deltas[:, 0::4] 136 | dy = box_deltas[:, 1::4] 137 | dw = box_deltas[:, 2::4] 138 | dh = box_deltas[:, 3::4] 139 | 140 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 141 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 142 | pred_w = np.exp(dw) * widths[:, np.newaxis] 143 | pred_h = np.exp(dh) * heights[:, np.newaxis] 144 | 145 | pred_boxes = np.zeros(box_deltas.shape) 146 | # x1 147 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) 148 | # y1 149 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) 150 | # x2 151 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) 152 | # y2 153 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 154 | 155 | return pred_boxes 156 | 157 | def landmark_pred(boxes, landmark_deltas): 158 | if boxes.shape[0] == 0: 159 | return np.zeros((0, landmark_deltas.shape[1])) 160 | boxes = boxes.astype(np.float, copy=False) 161 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 162 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 163 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 164 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 165 | preds = [] 166 | for i in range(landmark_deltas.shape[1]): 167 | if i%2==0: 168 | pred = (landmark_deltas[:,i]*widths + ctr_x) 169 | else: 170 | pred = (landmark_deltas[:,i]*heights + ctr_y) 171 | preds.append(pred) 172 | preds = np.vstack(preds).transpose() 173 | return preds 174 | 175 | def iou_transform(ex_rois, gt_rois): 176 | """ return bbox targets, IoU loss uses gt_rois as gt """ 177 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 178 | return gt_rois 179 | 180 | 181 | def iou_pred(boxes, box_deltas): 182 | """ 183 | Transform the set of class-agnostic boxes into class-specific boxes 184 | by applying the predicted offsets (box_deltas) 185 | :param boxes: !important [N 4] 186 | :param box_deltas: [N, 4 * num_classes] 187 | :return: [N 4 * num_classes] 188 | """ 189 | if boxes.shape[0] == 0: 190 | return np.zeros((0, box_deltas.shape[1])) 191 | 192 | boxes = boxes.astype(np.float, copy=False) 193 | x1 = boxes[:, 0] 194 | y1 = boxes[:, 1] 195 | x2 = boxes[:, 2] 196 | y2 = boxes[:, 3] 197 | 198 | dx1 = box_deltas[:, 0::4] 199 | dy1 = box_deltas[:, 1::4] 200 | dx2 = box_deltas[:, 2::4] 201 | dy2 = box_deltas[:, 3::4] 202 | 203 | pred_boxes = np.zeros(box_deltas.shape) 204 | # x1 205 | pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] 206 | # y1 207 | pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] 208 | # x2 209 | pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] 210 | # y2 211 | pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] 212 | 213 | return pred_boxes 214 | 215 | 216 | # define bbox_transform and bbox_pred 217 | bbox_transform = nonlinear_transform 218 | bbox_pred = nonlinear_pred 219 | -------------------------------------------------------------------------------- /rcnn/processing/generate_anchor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate base anchors on index 0 3 | """ 4 | from __future__ import print_function 5 | import sys 6 | from builtins import range 7 | import numpy as np 8 | from ..cython.anchors import anchors_cython 9 | #from ..config import config 10 | 11 | 12 | def anchors_plane(feat_h, feat_w, stride, base_anchor): 13 | return anchors_cython(feat_h, feat_w, stride, base_anchor) 14 | 15 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 16 | scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False): 17 | """ 18 | Generate anchor (reference) windows by enumerating aspect ratios X 19 | scales wrt a reference (0, 0, 15, 15) window. 20 | """ 21 | 22 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 23 | ratio_anchors = _ratio_enum(base_anchor, ratios) 24 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 25 | for i in range(ratio_anchors.shape[0])]) 26 | if dense_anchor: 27 | assert stride%2==0 28 | anchors2 = anchors.copy() 29 | anchors2[:,:] += int(stride/2) 30 | anchors = np.vstack( (anchors, anchors2) ) 31 | #print('GA',base_anchor.shape, ratio_anchors.shape, anchors.shape) 32 | return anchors 33 | 34 | #def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8): 35 | # """ 36 | # Generate anchor (reference) windows by enumerating aspect ratios X 37 | # scales wrt a reference (0, 0, 15, 15) window. 38 | # """ 39 | # anchors = [] 40 | # _ratios = ratios.reshape( (len(base_size), -1) ) 41 | # _scales = scales.reshape( (len(base_size), -1) ) 42 | # for i,bs in enumerate(base_size): 43 | # __ratios = _ratios[i] 44 | # __scales = _scales[i] 45 | # #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) 46 | # r = generate_anchors(bs, __ratios, __scales) 47 | # #print('anchors_fpn', r.shape, file=sys.stderr) 48 | # anchors.append(r) 49 | # return anchors 50 | 51 | def generate_anchors_fpn(dense_anchor=False, cfg = None): 52 | #assert(False) 53 | """ 54 | Generate anchor (reference) windows by enumerating aspect ratios X 55 | scales wrt a reference (0, 0, 15, 15) window. 56 | """ 57 | if cfg is None: 58 | from ..config import config 59 | cfg = config.RPN_ANCHOR_CFG 60 | RPN_FEAT_STRIDE = [] 61 | for k in cfg: 62 | RPN_FEAT_STRIDE.append( int(k) ) 63 | RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True) 64 | anchors = [] 65 | for k in RPN_FEAT_STRIDE: 66 | v = cfg[str(k)] 67 | bs = v['BASE_SIZE'] 68 | __ratios = np.array(v['RATIOS']) 69 | __scales = np.array(v['SCALES']) 70 | stride = int(k) 71 | #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) 72 | r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor) 73 | #print('anchors_fpn', r.shape, file=sys.stderr) 74 | anchors.append(r) 75 | 76 | return anchors 77 | 78 | def _whctrs(anchor): 79 | """ 80 | Return width, height, x center, and y center for an anchor (window). 81 | """ 82 | 83 | w = anchor[2] - anchor[0] + 1 84 | h = anchor[3] - anchor[1] + 1 85 | x_ctr = anchor[0] + 0.5 * (w - 1) 86 | y_ctr = anchor[1] + 0.5 * (h - 1) 87 | return w, h, x_ctr, y_ctr 88 | 89 | 90 | def _mkanchors(ws, hs, x_ctr, y_ctr): 91 | """ 92 | Given a vector of widths (ws) and heights (hs) around a center 93 | (x_ctr, y_ctr), output a set of anchors (windows). 94 | """ 95 | 96 | ws = ws[:, np.newaxis] 97 | hs = hs[:, np.newaxis] 98 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 99 | y_ctr - 0.5 * (hs - 1), 100 | x_ctr + 0.5 * (ws - 1), 101 | y_ctr + 0.5 * (hs - 1))) 102 | return anchors 103 | 104 | 105 | def _ratio_enum(anchor, ratios): 106 | """ 107 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 108 | """ 109 | 110 | w, h, x_ctr, y_ctr = _whctrs(anchor) 111 | size = w * h 112 | size_ratios = size / ratios 113 | ws = np.round(np.sqrt(size_ratios)) 114 | hs = np.round(ws * ratios) 115 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 116 | return anchors 117 | 118 | 119 | def _scale_enum(anchor, scales): 120 | """ 121 | Enumerate a set of anchors for each scale wrt an anchor. 122 | """ 123 | 124 | w, h, x_ctr, y_ctr = _whctrs(anchor) 125 | ws = w * scales 126 | hs = h * scales 127 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 128 | return anchors 129 | -------------------------------------------------------------------------------- /rcnn/processing/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..cython.cpu_nms import cpu_nms 3 | try: 4 | from ..cython.gpu_nms import gpu_nms 5 | except ImportError: 6 | gpu_nms = None 7 | 8 | 9 | def py_nms_wrapper(thresh): 10 | def _nms(dets): 11 | return nms(dets, thresh) 12 | return _nms 13 | 14 | 15 | def cpu_nms_wrapper(thresh): 16 | def _nms(dets): 17 | return cpu_nms(dets, thresh) 18 | return _nms 19 | 20 | 21 | def gpu_nms_wrapper(thresh, device_id): 22 | def _nms(dets): 23 | return gpu_nms(dets, thresh, device_id) 24 | if gpu_nms is not None: 25 | return _nms 26 | else: 27 | return cpu_nms_wrapper(thresh) 28 | 29 | 30 | def nms(dets, thresh): 31 | """ 32 | greedily select boxes with high confidence and overlap with current maximum <= thresh 33 | rule out overlap >= thresh 34 | :param dets: [[x1, y1, x2, y2 score]] 35 | :param thresh: retain overlap < thresh 36 | :return: indexes to keep 37 | """ 38 | x1 = dets[:, 0] 39 | y1 = dets[:, 1] 40 | x2 = dets[:, 2] 41 | y2 = dets[:, 3] 42 | scores = dets[:, 4] 43 | 44 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 45 | order = scores.argsort()[::-1] 46 | 47 | keep = [] 48 | while order.size > 0: 49 | i = order[0] 50 | keep.append(i) 51 | xx1 = np.maximum(x1[i], x1[order[1:]]) 52 | yy1 = np.maximum(y1[i], y1[order[1:]]) 53 | xx2 = np.minimum(x2[i], x2[order[1:]]) 54 | yy2 = np.minimum(y2[i], y2[order[1:]]) 55 | 56 | w = np.maximum(0.0, xx2 - xx1 + 1) 57 | h = np.maximum(0.0, yy2 - yy1 + 1) 58 | inter = w * h 59 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 60 | 61 | inds = np.where(ovr <= thresh)[0] 62 | order = order[inds + 1] 63 | 64 | return keep 65 | -------------------------------------------------------------------------------- /rcnn/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/336d2a27c91e3c0663d2dcf0b13574674d30f88e 2 | -------------------------------------------------------------------------------- /rcnn/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /rcnn/pycocotools/_mask.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c 2 | # distutils: sources = maskApi.c 3 | 4 | #************************************************************************** 5 | # Microsoft COCO Toolbox. version 2.0 6 | # Data, paper, and tutorials available at: http://mscoco.org/ 7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 8 | # Licensed under the Simplified BSD License [see coco/license.txt] 9 | #************************************************************************** 10 | 11 | __author__ = 'tsungyi' 12 | 13 | import sys 14 | PYTHON_VERSION = sys.version_info[0] 15 | 16 | # import both Python-level and C-level symbols of Numpy 17 | # the API uses Numpy to interface C and Python 18 | import numpy as np 19 | cimport numpy as np 20 | from libc.stdlib cimport malloc, free 21 | 22 | # intialized Numpy. must do. 23 | np.import_array() 24 | 25 | # import numpy C function 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management 27 | cdef extern from "numpy/arrayobject.h": 28 | void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) 29 | 30 | # Declare the prototype of the C functions in MaskApi.h 31 | cdef extern from "maskApi.h": 32 | ctypedef unsigned int uint 33 | ctypedef unsigned long siz 34 | ctypedef unsigned char byte 35 | ctypedef double* BB 36 | ctypedef struct RLE: 37 | siz h, 38 | siz w, 39 | siz m, 40 | uint* cnts, 41 | void rlesInit( RLE **R, siz n ) 42 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) 43 | void rleDecode( const RLE *R, byte *mask, siz n ) 44 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) 45 | void rleArea( const RLE *R, siz n, uint *a ) 46 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) 47 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 48 | void rleToBbox( const RLE *R, BB bb, siz n ) 49 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) 50 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) 51 | char* rleToString( const RLE *R ) 52 | void rleFrString( RLE *R, char *s, siz h, siz w ) 53 | 54 | # python class to wrap RLE array in C 55 | # the class handles the memory allocation and deallocation 56 | cdef class RLEs: 57 | cdef RLE *_R 58 | cdef siz _n 59 | 60 | def __cinit__(self, siz n =0): 61 | rlesInit(&self._R, n) 62 | self._n = n 63 | 64 | # free the RLE array here 65 | def __dealloc__(self): 66 | if self._R is not NULL: 67 | for i in range(self._n): 68 | free(self._R[i].cnts) 69 | free(self._R) 70 | def __getattr__(self, key): 71 | if key == 'n': 72 | return self._n 73 | raise AttributeError(key) 74 | 75 | # python class to wrap Mask array in C 76 | # the class handles the memory allocation and deallocation 77 | cdef class Masks: 78 | cdef byte *_mask 79 | cdef siz _h 80 | cdef siz _w 81 | cdef siz _n 82 | 83 | def __cinit__(self, h, w, n): 84 | self._mask = malloc(h*w*n* sizeof(byte)) 85 | self._h = h 86 | self._w = w 87 | self._n = n 88 | # def __dealloc__(self): 89 | # the memory management of _mask has been passed to np.ndarray 90 | # it doesn't need to be freed here 91 | 92 | # called when passing into np.array() and return an np.ndarray in column-major order 93 | def __array__(self): 94 | cdef np.npy_intp shape[1] 95 | shape[0] = self._h*self._w*self._n 96 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 97 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 98 | # The _mask allocated by Masks is now handled by ndarray 99 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 100 | return ndarray 101 | 102 | # internal conversion from Python RLEs object to compressed RLE format 103 | def _toString(RLEs Rs): 104 | cdef siz n = Rs.n 105 | cdef bytes py_string 106 | cdef char* c_string 107 | objs = [] 108 | for i in range(n): 109 | c_string = rleToString( &Rs._R[i] ) 110 | py_string = c_string 111 | objs.append({ 112 | 'size': [Rs._R[i].h, Rs._R[i].w], 113 | 'counts': py_string 114 | }) 115 | free(c_string) 116 | return objs 117 | 118 | # internal conversion from compressed RLE format to Python RLEs object 119 | def _frString(rleObjs): 120 | cdef siz n = len(rleObjs) 121 | Rs = RLEs(n) 122 | cdef bytes py_string 123 | cdef char* c_string 124 | for i, obj in enumerate(rleObjs): 125 | if PYTHON_VERSION == 2: 126 | py_string = str(obj['counts']).encode('utf8') 127 | elif PYTHON_VERSION == 3: 128 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 129 | else: 130 | raise Exception('Python version must be 2 or 3') 131 | c_string = py_string 132 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 133 | return Rs 134 | 135 | # encode mask to RLEs objects 136 | # list of RLE string can be generated by RLEs member function 137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 138 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 139 | cdef RLEs Rs = RLEs(n) 140 | rleEncode(Rs._R,mask.data,h,w,n) 141 | objs = _toString(Rs) 142 | return objs 143 | 144 | # decode mask from compressed list of RLE string or RLEs object 145 | def decode(rleObjs): 146 | cdef RLEs Rs = _frString(rleObjs) 147 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 148 | masks = Masks(h, w, n) 149 | rleDecode(Rs._R, masks._mask, n); 150 | return np.array(masks) 151 | 152 | def merge(rleObjs, intersect=0): 153 | cdef RLEs Rs = _frString(rleObjs) 154 | cdef RLEs R = RLEs(1) 155 | rleMerge(Rs._R, R._R, Rs._n, intersect) 156 | obj = _toString(R)[0] 157 | return obj 158 | 159 | def area(rleObjs): 160 | cdef RLEs Rs = _frString(rleObjs) 161 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 162 | rleArea(Rs._R, Rs._n, _a) 163 | cdef np.npy_intp shape[1] 164 | shape[0] = Rs._n 165 | a = np.array((Rs._n, ), dtype=np.uint8) 166 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 167 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 168 | return a 169 | 170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 171 | def iou( dt, gt, pyiscrowd ): 172 | def _preproc(objs): 173 | if len(objs) == 0: 174 | return objs 175 | if type(objs) == np.ndarray: 176 | if len(objs.shape) == 1: 177 | objs = objs.reshape((objs[0], 1)) 178 | # check if it's Nx4 bbox 179 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 180 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 181 | objs = objs.astype(np.double) 182 | elif type(objs) == list: 183 | # check if list is in box format and convert it to np.ndarray 184 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 185 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 186 | if isbox: 187 | objs = np.array(objs, dtype=np.double) 188 | if len(objs.shape) == 1: 189 | objs = objs.reshape((1,objs.shape[0])) 190 | elif isrle: 191 | objs = _frString(objs) 192 | else: 193 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 194 | else: 195 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 196 | return objs 197 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 198 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 199 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 200 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 201 | def _len(obj): 202 | cdef siz N = 0 203 | if type(obj) == RLEs: 204 | N = obj.n 205 | elif len(obj)==0: 206 | pass 207 | elif type(obj) == np.ndarray: 208 | N = obj.shape[0] 209 | return N 210 | # convert iscrowd to numpy array 211 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 212 | # simple type checking 213 | cdef siz m, n 214 | dt = _preproc(dt) 215 | gt = _preproc(gt) 216 | m = _len(dt) 217 | n = _len(gt) 218 | if m == 0 or n == 0: 219 | return [] 220 | if not type(dt) == type(gt): 221 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 222 | 223 | # define local variables 224 | cdef double* _iou = 0 225 | cdef np.npy_intp shape[1] 226 | # check type and assign iou function 227 | if type(dt) == RLEs: 228 | _iouFun = _rleIou 229 | elif type(dt) == np.ndarray: 230 | _iouFun = _bbIou 231 | else: 232 | raise Exception('input data type not allowed.') 233 | _iou = malloc(m*n* sizeof(double)) 234 | iou = np.zeros((m*n, ), dtype=np.double) 235 | shape[0] = m*n 236 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 237 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 238 | _iouFun(dt, gt, iscrowd, m, n, iou) 239 | return iou.reshape((m,n), order='F') 240 | 241 | def toBbox( rleObjs ): 242 | cdef RLEs Rs = _frString(rleObjs) 243 | cdef siz n = Rs.n 244 | cdef BB _bb = malloc(4*n* sizeof(double)) 245 | rleToBbox( Rs._R, _bb, n ) 246 | cdef np.npy_intp shape[1] 247 | shape[0] = 4*n 248 | bb = np.array((1,4*n), dtype=np.double) 249 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 250 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 251 | return bb 252 | 253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 254 | cdef siz n = bb.shape[0] 255 | Rs = RLEs(n) 256 | rleFrBbox( Rs._R, bb.data, h, w, n ) 257 | objs = _toString(Rs) 258 | return objs 259 | 260 | def frPoly( poly, siz h, siz w ): 261 | cdef np.ndarray[np.double_t, ndim=1] np_poly 262 | n = len(poly) 263 | Rs = RLEs(n) 264 | for i, p in enumerate(poly): 265 | np_poly = np.array(p, dtype=np.double, order='F') 266 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 267 | objs = _toString(Rs) 268 | return objs 269 | 270 | def frUncompressedRLE(ucRles, siz h, siz w): 271 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 272 | cdef RLE R 273 | cdef uint *data 274 | n = len(ucRles) 275 | objs = [] 276 | for i in range(n): 277 | Rs = RLEs(1) 278 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 279 | # time for malloc can be saved here but it's fine 280 | data = malloc(len(cnts)* sizeof(uint)) 281 | for j in range(len(cnts)): 282 | data[j] = cnts[j] 283 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 284 | Rs._R[0] = R 285 | objs.append(_toString(Rs)[0]) 286 | return objs 287 | 288 | def frPyObjects(pyobj, h, w): 289 | # encode rle from a list of python objects 290 | if type(pyobj) == np.ndarray: 291 | objs = frBbox(pyobj, h, w) 292 | elif type(pyobj) == list and len(pyobj[0]) == 4: 293 | objs = frBbox(pyobj, h, w) 294 | elif type(pyobj) == list and len(pyobj[0]) > 4: 295 | objs = frPoly(pyobj, h, w) 296 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 297 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 298 | objs = frUncompressedRLE(pyobj, h, w) 299 | # encode rle from single python object 300 | elif type(pyobj) == list and len(pyobj) == 4: 301 | objs = frBbox([pyobj], h, w)[0] 302 | elif type(pyobj) == list and len(pyobj) > 4: 303 | objs = frPoly([pyobj], h, w)[0] 304 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 305 | objs = frUncompressedRLE([pyobj], h, w)[0] 306 | else: 307 | raise Exception('input type is not supported.') 308 | return objs 309 | -------------------------------------------------------------------------------- /rcnn/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from rcnn.pycocotools import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] 104 | -------------------------------------------------------------------------------- /rcnn/pycocotools/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 173 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 174 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 175 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 176 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 177 | } else for( d=0; d<=dy; d++ ) { 178 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 179 | } 180 | } 181 | /* get points along y-boundary and downsample */ 182 | free(x); free(y); k=m; m=0; double xd, yd; 183 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 184 | for( j=1; jw-1 ) continue; 187 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 189 | x[m]=(int) xd; y[m]=(int) yd; m++; 190 | } 191 | /* compute rle encoding given y-boundary points */ 192 | k=m; a=malloc(sizeof(uint)*(k+1)); 193 | for( j=0; j0) b[m++]=a[j++]; else { 199 | j++; if(jm, p=0; long x; int more; 206 | char *s=malloc(sizeof(char)*m*6); 207 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 209 | while( more ) { 210 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 211 | if(more) c |= 0x20; c+=48; s[p++]=c; 212 | } 213 | } 214 | s[p]=0; return s; 215 | } 216 | 217 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 218 | siz m=0, p=0, k; long x; int more; uint *cnts; 219 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 220 | while( s[p] ) { 221 | x=0; k=0; more=1; 222 | while( more ) { 223 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 224 | more = c & 0x20; p++; k++; 225 | if(!more && (c & 0x10)) x |= -1 << 5*k; 226 | } 227 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 228 | } 229 | rleInit(R,h,w,m,cnts); free(cnts); 230 | } 231 | -------------------------------------------------------------------------------- /rcnn/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /rcnn/pycocotools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['maskApi.c', '_mask.pyx'], 13 | include_dirs=[np.get_include()], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | ext_modules=cythonize(ext_modules) 20 | ) 21 | -------------------------------------------------------------------------------- /rcnn/sample_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from easydict import EasyDict as edict 3 | 4 | config = edict() 5 | 6 | # network related params 7 | config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68]) 8 | config.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 9 | config.PIXEL_SCALE = 1.0 10 | config.IMAGE_STRIDE = 0 11 | 12 | # dataset related params 13 | config.NUM_CLASSES = 2 14 | config.PRE_SCALES = [(1200, 1600)] # first is scale (the shorter side); second is max size 15 | config.SCALES = [(640, 640)] # first is scale (the shorter side); second is max size 16 | #config.SCALES = [(800, 800)] # first is scale (the shorter side); second is max size 17 | config.ORIGIN_SCALE = False 18 | 19 | _ratio = (1.,) 20 | 21 | RAC_SSH = { 22 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 23 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 24 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 25 | } 26 | 27 | _ratio = (1.,1.5) 28 | RAC_SSH2 = { 29 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 30 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 31 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 32 | } 33 | 34 | _ratio = (1.,1.5) 35 | RAC_SSH3 = { 36 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 37 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 38 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 39 | '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 40 | } 41 | 42 | RAC_RETINA = {} 43 | _ratios = (1.0,) 44 | _ass = 2.0**(1.0/3) 45 | _basescale = 1.0 46 | for _stride in [4, 8, 16, 32, 64]: 47 | key = str(_stride) 48 | value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999} 49 | scales = [] 50 | for _ in range(3): 51 | scales.append(_basescale) 52 | _basescale *= _ass 53 | value['SCALES'] = tuple(scales) 54 | RAC_RETINA[key] = value 55 | 56 | 57 | config.RPN_ANCHOR_CFG = RAC_SSH #default 58 | 59 | config.NET_MODE = 2 60 | config.HEAD_MODULE = 'SSH' 61 | #config.HEAD_MODULE = 'RF' 62 | config.LR_MODE = 0 63 | config.LANDMARK_LR_MULT = 2.0 64 | config.HEAD_FILTER_NUM = 256 65 | config.CONTEXT_FILTER_RATIO = 1 66 | config.max_feat_channel = 9999 67 | 68 | config.USE_CROP = True 69 | config.USE_DCN = 0 70 | config.FACE_LANDMARK = True 71 | config.USE_OCCLUSION = False 72 | config.USE_BLUR = False 73 | config.MORE_SMALL_BOX = True 74 | 75 | config.LAYER_FIX = False 76 | 77 | config.HEAD_BOX = False 78 | config.DENSE_ANCHOR = False 79 | config.USE_MAXOUT = 0 80 | config.SHARE_WEIGHT_BBOX = False 81 | config.SHARE_WEIGHT_LANDMARK = False 82 | 83 | config.RANDOM_FEAT_STRIDE = False 84 | config.NUM_CPU = 4 85 | config.MIXUP = 0.0 86 | config.USE_3D = False 87 | 88 | #config.BBOX_MASK_THRESH = 0 89 | config.COLOR_MODE = 2 90 | config.COLOR_JITTERING = 0.125 91 | #config.COLOR_JITTERING = 0 92 | #config.COLOR_JITTERING = 0.2 93 | 94 | 95 | config.TRAIN = edict() 96 | 97 | config.TRAIN.IMAGE_ALIGN = 0 98 | config.TRAIN.MIN_BOX_SIZE = 0 99 | config.BBOX_MASK_THRESH = config.TRAIN.MIN_BOX_SIZE 100 | # R-CNN and RPN 101 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 102 | config.TRAIN.BATCH_IMAGES = 8 103 | # e2e changes behavior of anchor loader and metric 104 | config.TRAIN.END2END = True 105 | # group images with similar aspect ratio 106 | config.TRAIN.ASPECT_GROUPING = False 107 | 108 | # RPN anchor loader 109 | # rpn anchors batch size 110 | config.TRAIN.RPN_ENABLE_OHEM = 2 111 | config.TRAIN.RPN_BATCH_SIZE = 256 112 | # rpn anchors sampling params 113 | config.TRAIN.RPN_FG_FRACTION = 0.25 114 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5 115 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 116 | config.TRAIN.RPN_CLOBBER_POSITIVES = False 117 | config.TRAIN.RPN_FORCE_POSITIVE = False 118 | # rpn bounding box regression params 119 | #config.TRAIN.RPN_BBOX_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 120 | #config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 121 | #config.TRAIN.RPN_LANDMARK_WEIGHTS = (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0) 122 | #config.TRAIN.RPN_INVALID_LANDMARK_WEIGHTS = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) 123 | 124 | # used for end2end training 125 | # RPN proposal 126 | #config.TRAIN.CXX_PROPOSAL = True 127 | #config.TRAIN.RPN_NMS_THRESH = 0.7 128 | #config.TRAIN.RPN_PRE_NMS_TOP_N = 12000 129 | #config.TRAIN.RPN_POST_NMS_TOP_N = 2000 130 | #config.TRAIN.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE 131 | #config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True 132 | #config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0) 133 | #config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2) 134 | 135 | config.TEST = edict() 136 | 137 | # R-CNN testing 138 | # use rpn to generate proposal 139 | config.TEST.HAS_RPN = False 140 | # size of images for each device 141 | config.TEST.BATCH_IMAGES = 1 142 | 143 | # RPN proposal 144 | config.TEST.CXX_PROPOSAL = True 145 | config.TEST.RPN_NMS_THRESH = 0.3 146 | config.TEST.RPN_PRE_NMS_TOP_N = 1000 147 | config.TEST.RPN_POST_NMS_TOP_N = 3000 148 | #config.TEST.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE 149 | #config.TEST.RPN_MIN_SIZE = [0,0,0] 150 | 151 | # RCNN nms 152 | config.TEST.NMS = 0.3 153 | 154 | config.TEST.SCORE_THRESH = 0.05 155 | config.TEST.IOU_THRESH = 0.5 156 | 157 | 158 | # network settings 159 | network = edict() 160 | 161 | network.ssh = edict() 162 | 163 | network.mnet = edict() 164 | #network.mnet.pretrained = 'model/mnasnet' 165 | #network.mnet.pretrained = 'model/mobilenetv2_0_5' 166 | #network.mnet.pretrained = 'model/mobilenet_0_5' 167 | #network.mnet.MULTIPLIER = 0.5 168 | #network.mnet.pretrained = 'model/mobilenet_0_25' 169 | #network.mnet.pretrained_epoch = 0 170 | #network.mnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485]) 171 | #network.mnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229]) 172 | #network.mnet.PIXEL_SCALE = 255.0 173 | network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling'] 174 | network.mnet.BATCH_IMAGES = 16 175 | network.mnet.HEAD_FILTER_NUM = 64 176 | network.mnet.CONTEXT_FILTER_RATIO = 1 177 | 178 | network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0]) 179 | network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 180 | network.mnet.PIXEL_SCALE = 1.0 181 | #network.mnet.pretrained = 'model/mobilenetfd_0_25' #78 182 | #network.mnet.pretrained = 'model/mobilenetfd2' #75 183 | network.mnet.pretrained = 'model/mobilenet025fd0' #78 184 | #network.mnet.pretrained = 'model/mobilenet025fd1' #75 185 | #network.mnet.pretrained = 'model/mobilenet025fd2' # 186 | network.mnet.pretrained_epoch = 0 187 | network.mnet.max_feat_channel = 8888 188 | network.mnet.COLOR_MODE = 1 189 | network.mnet.USE_CROP = True 190 | network.mnet.RPN_ANCHOR_CFG = RAC_SSH 191 | network.mnet.LAYER_FIX = True 192 | network.mnet.LANDMARK_LR_MULT = 2.5 193 | 194 | 195 | network.resnet = edict() 196 | #network.resnet.pretrained = 'model/ResNet50_v1d' 197 | #network.resnet.pretrained = 'model/resnet-50' 198 | network.resnet.pretrained = 'model/resnet-152' 199 | #network.resnet.pretrained = 'model/senet154' 200 | #network.resnet.pretrained = 'model/densenet161' 201 | network.resnet.pretrained_epoch = 0 202 | #network.mnet.PIXEL_MEANS = np.array([103.939, 116.779, 123.68]) 203 | #network.mnet.PIXEL_STDS = np.array([57.375, 57.12, 58.393]) 204 | #network.resnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485]) 205 | #network.resnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229]) 206 | #network.resnet.PIXEL_SCALE = 255.0 207 | network.resnet.lr_step = '1,2,3,4,5,55,68,80' 208 | network.resnet.lr = 0.001 209 | network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0]) 210 | network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 211 | network.resnet.PIXEL_SCALE = 1.0 212 | network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling'] 213 | network.resnet.BATCH_IMAGES = 8 214 | network.resnet.HEAD_FILTER_NUM = 256 215 | network.resnet.CONTEXT_FILTER_RATIO = 1 216 | network.resnet.USE_DCN = 2 217 | network.resnet.RPN_BATCH_SIZE = 256 218 | network.resnet.RPN_ANCHOR_CFG = RAC_RETINA 219 | 220 | network.resnet.USE_DCN = 0 221 | network.resnet.pretrained = 'model/resnet-50' 222 | network.resnet.RPN_ANCHOR_CFG = RAC_SSH 223 | 224 | 225 | # dataset settings 226 | dataset = edict() 227 | 228 | dataset.widerface = edict() 229 | dataset.widerface.dataset = 'widerface' 230 | dataset.widerface.image_set = 'train' 231 | dataset.widerface.test_image_set = 'val' 232 | dataset.widerface.root_path = 'data' 233 | dataset.widerface.dataset_path = 'data/widerface' 234 | dataset.widerface.NUM_CLASSES = 2 235 | 236 | dataset.retinaface = edict() 237 | dataset.retinaface.dataset = 'retinaface' 238 | dataset.retinaface.image_set = 'train' 239 | dataset.retinaface.test_image_set = 'val' 240 | dataset.retinaface.root_path = 'data' 241 | dataset.retinaface.dataset_path = 'data/retinaface' 242 | dataset.retinaface.NUM_CLASSES = 2 243 | 244 | # default settings 245 | default = edict() 246 | 247 | config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling'] 248 | #config.FIXED_PARAMS = ['^.*upsampling'] 249 | #config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3'] 250 | #config.FIXED_PARAMS = ['^conv0', '^stage1', 'gamma', 'beta'] #for resnet 251 | 252 | # default network 253 | default.network = 'resnet' 254 | default.pretrained = 'model/resnet-152' 255 | #default.network = 'resnetssh' 256 | default.pretrained_epoch = 0 257 | # default dataset 258 | default.dataset = 'retinaface' 259 | default.image_set = 'train' 260 | default.test_image_set = 'val' 261 | default.root_path = 'data' 262 | default.dataset_path = 'data/retinaface' 263 | # default training 264 | default.frequent = 20 265 | default.kvstore = 'device' 266 | # default e2e 267 | default.prefix = 'model/retinaface' 268 | default.end_epoch = 10000 269 | default.lr_step = '55,68,80' 270 | default.lr = 0.01 271 | 272 | def generate_config(_network, _dataset): 273 | for k, v in network[_network].items(): 274 | if k in config: 275 | config[k] = v 276 | elif k in default: 277 | default[k] = v 278 | if k in config.TRAIN: 279 | config.TRAIN[k] = v 280 | for k, v in dataset[_dataset].items(): 281 | if k in config: 282 | config[k] = v 283 | elif k in default: 284 | default[k] = v 285 | if k in config.TRAIN: 286 | config.TRAIN[k] = v 287 | config.network = _network 288 | config.dataset = _dataset 289 | config.RPN_FEAT_STRIDE = [] 290 | num_anchors = [] 291 | for k in config.RPN_ANCHOR_CFG: 292 | config.RPN_FEAT_STRIDE.append( int(k) ) 293 | _num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES'])*len(config.RPN_ANCHOR_CFG[k]['RATIOS']) 294 | if config.DENSE_ANCHOR: 295 | _num_anchors *= 2 296 | config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors 297 | num_anchors.append(_num_anchors) 298 | config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True) 299 | for j in range(1,len(num_anchors)): 300 | assert num_anchors[0]==num_anchors[j] 301 | config.NUM_ANCHORS = num_anchors[0] 302 | 303 | -------------------------------------------------------------------------------- /rcnn/symbol/__init__.py: -------------------------------------------------------------------------------- 1 | from .symbol_ssh import * 2 | from .symbol_mnet import * 3 | from .symbol_resnet import * 4 | -------------------------------------------------------------------------------- /rcnn/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/tools/__init__.py -------------------------------------------------------------------------------- /rcnn/tools/demo_single_image.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from ..config import default, generate_config 3 | from ..symbol import * 4 | from ..utils.load_model import load_param 5 | from ..core.module import MutableModule 6 | from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes 7 | from rcnn.processing.nms import py_nms_wrapper 8 | import mxnet as mx 9 | #from rcnn.processing.nms import processing_nms_wrapper 10 | bbox_pred = nonlinear_pred 11 | 12 | import numpy as np 13 | import cv2 14 | import matplotlib.pyplot as plt 15 | 16 | def demo_maskrcnn(network, ctx, prefix, epoch,img_path, 17 | vis= True, has_rpn = True, thresh = 0.001): 18 | 19 | assert has_rpn,"Only has_rpn==True has been supported." 20 | #sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) 21 | sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES) 22 | arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) 23 | for k,v in arg_params.iteritems(): 24 | print(k, v.shape) 25 | 26 | max_image_shape = (1,3,1024,1024) 27 | max_data_shapes = [("data",max_image_shape),("im_info",(1,3))] 28 | mod = MutableModule(symbol = sym, data_names = ["data","im_info"], label_names= None, 29 | max_data_shapes = max_data_shapes, 30 | context=ctx) 31 | mod.bind(data_shapes = max_data_shapes, label_shapes = None, for_training=False) 32 | mod.init_params(arg_params=arg_params, aux_params=aux_params) 33 | 34 | class OneDataBatch(): 35 | def __init__(self,img): 36 | im_info = mx.nd.array([[img.shape[0],img.shape[1],1.0]]) 37 | img = np.transpose(img,(2,0,1)) 38 | img = img[np.newaxis,(2,1,0)] 39 | self.data = [mx.nd.array(img),im_info] 40 | self.label = None 41 | self.provide_label = None 42 | self.provide_data = [("data",(1,3,img.shape[2],img.shape[3])),("im_info",(1,3))] 43 | 44 | img_ori = cv2.imread(img_path) 45 | batch = OneDataBatch(img_ori) 46 | mod.forward(batch, False) 47 | results = mod.get_outputs() 48 | output = dict(zip(mod.output_names, results)) 49 | rois = output['rois_output'].asnumpy()[:, 1:] 50 | 51 | 52 | scores = output['cls_prob_reshape_output'].asnumpy()[0] 53 | bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] 54 | mask_output = output['mask_prob_output'].asnumpy() 55 | 56 | pred_boxes = bbox_pred(rois, bbox_deltas) 57 | pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0],img_ori.shape[1]]) 58 | 59 | nms = py_nms_wrapper(config.TEST.NMS) 60 | #nms = processing_nms_wrapper(config.TEST.NMS, 0.7) 61 | boxes= pred_boxes 62 | 63 | CLASSES = ('__background__', 'text') 64 | 65 | all_boxes = [[[] for _ in xrange(1)] 66 | for _ in xrange(len(CLASSES))] 67 | all_masks = [[[] for _ in xrange(1)] 68 | for _ in xrange(len(CLASSES))] 69 | label = np.argmax(scores, axis=1) 70 | label = label[:, np.newaxis] 71 | 72 | for cls in CLASSES: 73 | cls_ind = CLASSES.index(cls) 74 | cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] 75 | cls_masks = mask_output[:, cls_ind, :, :] 76 | cls_scores = scores[:, cls_ind, np.newaxis] 77 | #print cls_scores.shape, label.shape 78 | keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] 79 | cls_masks = cls_masks[keep, :, :] 80 | dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] 81 | keep = nms(dets) 82 | #print dets.shape, cls_masks.shape 83 | all_boxes[cls_ind] = dets[keep, :] 84 | all_masks[cls_ind] = cls_masks[keep, :, :] 85 | 86 | boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] 87 | masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))] 88 | 89 | 90 | import copy 91 | import random 92 | class_names = CLASSES 93 | color_white = (255, 255, 255) 94 | scale = 1.0 95 | im = copy.copy(img_ori) 96 | 97 | for j, name in enumerate(class_names): 98 | if name == '__background__': 99 | continue 100 | color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) # generate a random color 101 | dets = boxes_this_image[j] 102 | masks = masks_this_image[j] 103 | for i in range(len(dets)): 104 | bbox = dets[i, :4] * scale 105 | if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : 106 | continue 107 | score = dets[i, -1] 108 | bbox = map(int, bbox) 109 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2) 110 | cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10), 111 | color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) 112 | mask = masks[i, :, :] 113 | mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR) 114 | mask[mask > 0.5] = 1 115 | mask[mask <= 0.5] = 0 116 | mask_color = random.randint(0, 255) 117 | c = random.randint(0, 2) 118 | target = im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] + mask_color * mask 119 | target[target >= 255] = 255 120 | im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] = target 121 | ##im = im[:,:,(2,1,0)] 122 | ##plt.imshow(im) 123 | cv2.imwrite("figures/test_result.jpg",im) 124 | #if vis: 125 | # plt.show() 126 | # else: 127 | # plt.savefig("figures/test_result.jpg") 128 | def parse_args(): 129 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 130 | # general 131 | parser.add_argument('--network', help='network name', default=default.network, type=str) 132 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 133 | args, rest = parser.parse_known_args() 134 | generate_config(args.network, args.dataset) 135 | # testing 136 | parser.add_argument('--prefix', help='model to test with', default=default.rcnn_prefix, type=str) 137 | parser.add_argument('--epoch', help='model to test with', default=default.rcnn_epoch, type=int) 138 | parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) 139 | # rcnn 140 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 141 | parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float) 142 | parser.add_argument('--image_name', help='image file path',type=str) 143 | 144 | 145 | args = parser.parse_args() 146 | 147 | return args 148 | 149 | 150 | def main(): 151 | args = parse_args() 152 | ctx = mx.gpu(args.gpu) 153 | print args 154 | demo_maskrcnn(network = args.network, 155 | ctx = ctx, 156 | prefix = args.prefix, 157 | epoch = args.epoch, 158 | img_path = args.image_name, 159 | vis= args.vis, 160 | has_rpn = True, 161 | thresh = args.thresh) 162 | 163 | if __name__ == '__main__': 164 | main() 165 | -------------------------------------------------------------------------------- /rcnn/tools/reeval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | try: 3 | import cPickle as pickle 4 | except ImportError: 5 | import pickle 6 | import os 7 | import mxnet as mx 8 | 9 | from ..logger import logger 10 | from ..config import config, default, generate_config 11 | from ..dataset import * 12 | 13 | 14 | def reeval(args): 15 | # load imdb 16 | imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) 17 | 18 | # load detection results 19 | cache_file = os.path.join(imdb.cache_path, imdb.name, 'detections.pkl') 20 | with open(cache_file) as f: 21 | detections = pickle.load(f) 22 | 23 | # eval 24 | imdb.evaluate_detections(detections) 25 | 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser(description='imdb test') 29 | # general 30 | parser.add_argument('--network', help='network name', default=default.network, type=str) 31 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 32 | args, rest = parser.parse_known_args() 33 | generate_config(args.network, args.dataset) 34 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 35 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 36 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 37 | # other 38 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | def main(): 44 | args = parse_args() 45 | logger.info('Called with argument: %s' % args) 46 | reeval(args) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /rcnn/tools/test_rcnn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pprint 3 | import mxnet as mx 4 | 5 | from ..logger import logger 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..dataset import * 9 | from ..core.loader import TestLoader 10 | from ..core.tester import Predictor, pred_eval 11 | from ..utils.load_model import load_param 12 | 13 | 14 | def test_rcnn(network, dataset, image_set, root_path, dataset_path, 15 | ctx, prefix, epoch, 16 | vis, shuffle, has_rpn, proposal, thresh): 17 | # set config 18 | if has_rpn: 19 | config.TEST.HAS_RPN = True 20 | 21 | # print config 22 | logger.info(pprint.pformat(config)) 23 | 24 | # load symbol and testing data 25 | if has_rpn: 26 | sym = eval('get_' + network + '_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) 27 | imdb = eval(dataset)(image_set, root_path, dataset_path) 28 | roidb = imdb.gt_roidb() 29 | else: 30 | sym = eval('get_' + network + '_rcnn_test')(num_classes=config.NUM_CLASSES) 31 | imdb = eval(dataset)(image_set, root_path, dataset_path) 32 | gt_roidb = imdb.gt_roidb() 33 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) 34 | 35 | # get test data iter 36 | test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) 37 | 38 | # load model 39 | arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) 40 | 41 | # infer shape 42 | data_shape_dict = dict(test_data.provide_data) 43 | arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) 44 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 45 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 46 | 47 | # check parameters 48 | for k in sym.list_arguments(): 49 | if k in data_shape_dict or 'label' in k: 50 | continue 51 | assert k in arg_params, k + ' not initialized' 52 | assert arg_params[k].shape == arg_shape_dict[k], \ 53 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 54 | for k in sym.list_auxiliary_states(): 55 | assert k in aux_params, k + ' not initialized' 56 | assert aux_params[k].shape == aux_shape_dict[k], \ 57 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 58 | 59 | # decide maximum shape 60 | data_names = [k[0] for k in test_data.provide_data] 61 | label_names = None 62 | max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 63 | if not has_rpn: 64 | max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) 65 | 66 | # create predictor 67 | predictor = Predictor(sym, data_names, label_names, 68 | context=ctx, max_data_shapes=max_data_shape, 69 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 70 | arg_params=arg_params, aux_params=aux_params) 71 | 72 | # start detection 73 | pred_eval(predictor, test_data, imdb, vis=vis, thresh=thresh) 74 | 75 | 76 | def parse_args(): 77 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 78 | # general 79 | parser.add_argument('--network', help='network name', default=default.network, type=str) 80 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 81 | args, rest = parser.parse_known_args() 82 | generate_config(args.network, args.dataset) 83 | parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str) 84 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 85 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 86 | # testing 87 | parser.add_argument('--prefix', help='model to test with', default=default.rcnn_prefix, type=str) 88 | parser.add_argument('--epoch', help='model to test with', default=default.rcnn_epoch, type=int) 89 | parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) 90 | # rcnn 91 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 92 | parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float) 93 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 94 | parser.add_argument('--has_rpn', help='generate proposals on the fly', action='store_true') 95 | parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str) 96 | args = parser.parse_args() 97 | return args 98 | 99 | 100 | def main(): 101 | args = parse_args() 102 | logger.info('Called with argument: %s' % args) 103 | ctx = mx.gpu(args.gpu) 104 | test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 105 | ctx, args.prefix, args.epoch, 106 | args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh) 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /rcnn/tools/test_rpn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pprint 3 | import mxnet as mx 4 | 5 | from ..logger import logger 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..dataset import * 9 | from ..core.loader import TestLoader 10 | from ..core.tester import Predictor, generate_proposals, test_proposals 11 | from ..utils.load_model import load_param 12 | 13 | 14 | def test_rpn(network, dataset, image_set, root_path, dataset_path, 15 | ctx, prefix, epoch, 16 | vis, shuffle, thresh, test_output=False): 17 | # rpn generate proposal config 18 | config.TEST.HAS_RPN = True 19 | 20 | # print config 21 | logger.info(pprint.pformat(config)) 22 | 23 | # load symbol 24 | sym = eval('get_' + network + '_rpn_test')() 25 | 26 | # load dataset and prepare imdb for training 27 | imdb = eval(dataset)(image_set, root_path, dataset_path) 28 | roidb = imdb.gt_roidb() 29 | test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=True, withlabel=True) 30 | 31 | # load model 32 | arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx) 33 | 34 | # infer shape 35 | data_shape_dict = dict(test_data.provide_data) 36 | arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) 37 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 38 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 39 | 40 | # check parameters 41 | for k in sym.list_arguments(): 42 | if k in data_shape_dict or 'label' in k: 43 | continue 44 | assert k in arg_params, k + ' not initialized' 45 | assert arg_params[k].shape == arg_shape_dict[k], \ 46 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 47 | for k in sym.list_auxiliary_states(): 48 | assert k in aux_params, k + ' not initialized' 49 | assert aux_params[k].shape == aux_shape_dict[k], \ 50 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 51 | 52 | # decide maximum shape 53 | data_names = [k[0] for k in test_data.provide_data] 54 | label_names = None if test_data.provide_label is None else [k[0] for k in test_data.provide_label] 55 | max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 56 | 57 | # create predictor 58 | predictor = Predictor(sym, data_names, label_names, 59 | context=ctx, max_data_shapes=max_data_shape, 60 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 61 | arg_params=arg_params, aux_params=aux_params) 62 | 63 | # start testing 64 | if not test_output: 65 | imdb_boxes = generate_proposals(predictor, test_data, imdb, vis=vis, thresh=thresh) 66 | imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes) 67 | else: 68 | test_proposals(predictor, test_data, imdb, roidb, vis=vis) 69 | 70 | 71 | def parse_args(): 72 | parser = argparse.ArgumentParser(description='Test a Region Proposal Network') 73 | # general 74 | parser.add_argument('--network', help='network name', default=default.network, type=str) 75 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 76 | args, rest = parser.parse_known_args() 77 | generate_config(args.network, args.dataset) 78 | parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str) 79 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 80 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 81 | # testing 82 | parser.add_argument('--prefix', help='model to test with', default=default.rpn_prefix, type=str) 83 | parser.add_argument('--epoch', help='model to test with', default=default.rpn_epoch, type=int) 84 | # rpn 85 | parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) 86 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 87 | parser.add_argument('--thresh', help='rpn proposal threshold', default=0, type=float) 88 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 89 | args = parser.parse_args() 90 | return args 91 | 92 | 93 | def main(): 94 | args = parse_args() 95 | logger.info('Called with argument: %s' % args) 96 | ctx = mx.gpu(args.gpu) 97 | test_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 98 | ctx, args.prefix, args.epoch, 99 | args.vis, args.shuffle, args.thresh) 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /rcnn/tools/train_maskrcnn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import pprint 4 | import mxnet as mx 5 | import numpy as np 6 | import os.path as osp 7 | import cPickle as pkl 8 | 9 | from ..config import config, default, generate_config 10 | from ..symbol import * 11 | from ..core import callback, metric 12 | from ..core.loader import MaskROIIter 13 | from ..core.module import MutableModule 14 | from ..processing.bbox_regression import add_bbox_regression_targets, add_mask_targets 15 | from ..processing.assign_levels import add_assign_targets 16 | from ..utils.load_data import load_proposal_roidb, merge_roidb #, filter_roidb 17 | from ..utils.load_model import load_param 18 | 19 | def train_maskrcnn(network, dataset, image_set, root_path, dataset_path, 20 | frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, 21 | ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, 22 | train_shared, lr, lr_step, proposal, maskrcnn_stage=None): 23 | # set up logger 24 | logging.basicConfig() 25 | logger = logging.getLogger() 26 | logger.setLevel(logging.INFO) 27 | 28 | # load symbol 29 | config.TRAIN.BATCH_IMAGES = 1 30 | config.TRAIN.BATCH_ROIS = 256 31 | sym = eval('get_' + network + '_maskrcnn')(num_classes=config.NUM_CLASSES) 32 | 33 | # setup multi-gpu 34 | batch_size = len(ctx) 35 | input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size 36 | 37 | # print config 38 | pprint.pprint(config) 39 | 40 | USE_CACHE = True 41 | 42 | if USE_CACHE: 43 | roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask.pkl' 44 | mean_file = root_path + '/cache/' + dataset + '_roidb_mean.pkl' 45 | std_file = root_path + '/cache/' + dataset + '_roidb_std.pkl' 46 | if maskrcnn_stage is not None: 47 | roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask_' + maskrcnn_stage + '.pkl' 48 | mean_file = root_path + '/cache/' + dataset + '_roidb_mean_' + maskrcnn_stage + '.pkl' 49 | std_file = root_path + '/cache/' + dataset + '_roidb_std_' + maskrcnn_stage + '.pkl' 50 | 51 | if USE_CACHE and osp.exists(roidb_file) and osp.exists(mean_file) and osp.exists(std_file): 52 | print 'Load ' + roidb_file 53 | with open(roidb_file, 'r') as f: 54 | roidb = pkl.load(f) 55 | print 'Load ' + mean_file 56 | with open(mean_file, 'r') as f: 57 | means = pkl.load(f) 58 | print 'Load ' + std_file 59 | with open(std_file, 'r') as f: 60 | stds = pkl.load(f) 61 | else: 62 | # load dataset and prepare imdb for training 63 | image_sets = [iset for iset in image_set.split('+')] 64 | roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path, 65 | proposal=proposal, append_gt=True, flip=not no_flip) 66 | for image_set in image_sets] 67 | roidb = merge_roidb(roidbs) 68 | 69 | def filter_roidb(roidb): 70 | """ remove roidb entries without usable rois """ 71 | 72 | def is_valid(entry): 73 | """ valid images have at least 1 fg or bg roi """ 74 | overlaps = entry['max_overlaps'] 75 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 76 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 77 | valid = len(fg_inds) > 0 and len(bg_inds) > 0 78 | return valid 79 | 80 | num = len(roidb) 81 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 82 | num_after = len(filtered_roidb) 83 | print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after) 84 | 85 | return filtered_roidb 86 | 87 | roidb = filter_roidb(roidb) 88 | means, stds = add_bbox_regression_targets(roidb) 89 | add_assign_targets(roidb) 90 | add_mask_targets(roidb) 91 | if USE_CACHE: 92 | for file, obj in zip([roidb_file, mean_file, std_file], [roidb, means, stds]): 93 | with open(file, 'w') as f: 94 | pkl.dump(obj, f, -1) 95 | 96 | # load training data 97 | train_data = MaskROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 98 | ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING) 99 | 100 | # infer max shape 101 | max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 102 | max_label_shape = [] 103 | for s in config.RCNN_FEAT_STRIDE: 104 | max_data_shape.append(('rois_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, 5))) 105 | max_label_shape.append(('label_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS))) 106 | max_label_shape.append(('bbox_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS*config.NUM_CLASSES*4))) 107 | max_label_shape.append(('bbox_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS*config.NUM_CLASSES*4))) 108 | max_label_shape.append(('mask_target_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 28, 28))) 109 | max_label_shape.append(('mask_weight_stride%s' % s, (input_batch_size, config.TRAIN.BATCH_ROIS, config.NUM_CLASSES, 1, 1))) 110 | # infer shape 111 | data_shape_dict = dict(train_data.provide_data + train_data.provide_label) 112 | 113 | arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) 114 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 115 | out_shape_dict = zip(sym.list_outputs(), out_shape) 116 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 117 | print 'output shape' 118 | pprint.pprint(out_shape_dict) 119 | 120 | # load and initialize params 121 | if resume: 122 | arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) 123 | else: 124 | arg_params, aux_params = load_param(pretrained, epoch, convert=True) 125 | init_bbox_pred = mx.init.Normal(sigma=0.001) 126 | init_internal = mx.init.Normal(sigma=0.01) 127 | init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) 128 | for k in sym.list_arguments(): 129 | if k in data_shape_dict: 130 | continue 131 | if k not in arg_params: 132 | print 'init', k 133 | arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) 134 | init_internal(k, arg_params[k]) 135 | if k in ['rcnn_fc_bbox_weight', 'bbox_pred_weight']: 136 | init_bbox_pred(k, arg_params[k]) 137 | if k.endswith('bias'): 138 | arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) 139 | if 'ctx_red_weight' in k: 140 | ctx_shape = np.array(arg_shape_dict[k]) 141 | ctx_shape[1] /= 2 142 | arg_params[k][:] = np.concatenate((np.eye(ctx_shape[1]).reshape(ctx_shape), np.zeros(ctx_shape)), axis=1) 143 | 144 | for k in sym.list_auxiliary_states(): 145 | if k not in aux_params: 146 | print 'init', k 147 | aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k]) 148 | init(k, aux_params[k]) 149 | 150 | # check parameter shapes 151 | for k in sym.list_arguments(): 152 | if k in data_shape_dict: 153 | continue 154 | assert k in arg_params, k + ' not initialized' 155 | assert arg_params[k].shape == arg_shape_dict[k], \ 156 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 157 | for k in sym.list_auxiliary_states(): 158 | assert k in aux_params, k + ' not initialized' 159 | assert aux_params[k].shape == aux_shape_dict[k], \ 160 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 161 | 162 | # prepare training 163 | # create solver 164 | data_names = [k[0] for k in train_data.provide_data] 165 | label_names = [k[0] for k in train_data.provide_label] 166 | if train_shared: 167 | fixed_param_prefix = config.FIXED_PARAMS_SHARED 168 | else: 169 | fixed_param_prefix = config.FIXED_PARAMS 170 | mod = MutableModule(sym, data_names=data_names, label_names=label_names, 171 | logger=logger, context=ctx, work_load_list=work_load_list, 172 | max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, 173 | fixed_param_prefix=fixed_param_prefix) 174 | 175 | # decide training params 176 | # metric 177 | eval_metric = metric.RCNNAccMetric() 178 | cls_metric = metric.RCNNLogLossMetric() 179 | bbox_metric = metric.RCNNL1LossMetric() 180 | mask_acc_metric = metric.MaskAccMetric() 181 | mask_log_metric = metric.MaskLogLossMetric() 182 | eval_metrics = mx.metric.CompositeEvalMetric() 183 | for child_metric in [eval_metric, cls_metric, bbox_metric, mask_acc_metric, mask_log_metric]: 184 | eval_metrics.add(child_metric) 185 | # callback 186 | batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent) 187 | epoch_end_callback = callback.do_checkpoint(prefix, means, stds) 188 | # decide learning rate 189 | base_lr = lr 190 | lr_factor = 0.1 191 | lr_epoch = [int(epoch) for epoch in lr_step.split(',')] 192 | lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] 193 | lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) 194 | lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] 195 | print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters 196 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) 197 | # optimizer 198 | optimizer_params = {'momentum': 0.9, 199 | 'wd': 0.0001, 200 | 'learning_rate': lr, 201 | 'lr_scheduler': lr_scheduler, 202 | 'rescale_grad': (1.0 / batch_size), 203 | 'clip_gradient': 5} 204 | 205 | # train 206 | mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, 207 | batch_end_callback=batch_end_callback, kvstore=kvstore, 208 | optimizer='sgd', optimizer_params=optimizer_params, 209 | arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) 210 | 211 | -------------------------------------------------------------------------------- /rcnn/tools/train_rcnn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pprint 3 | import mxnet as mx 4 | 5 | from ..logger import logger 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..core import callback, metric 9 | from ..core.loader import ROIIter 10 | from ..core.module import MutableModule 11 | from ..processing.bbox_regression import add_bbox_regression_targets 12 | from ..utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb 13 | from ..utils.load_model import load_param 14 | 15 | 16 | def train_rcnn(network, dataset, image_set, root_path, dataset_path, 17 | frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, 18 | ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, 19 | train_shared, lr, lr_step, proposal): 20 | # set up config 21 | config.TRAIN.BATCH_IMAGES = 2 22 | config.TRAIN.BATCH_ROIS = 128 23 | if proposal == 'ss': 24 | config.TRAIN.BG_THRESH_LO = 0.1 # reproduce Fast R-CNN 25 | 26 | # load symbol 27 | sym = eval('get_' + network + '_rcnn')(num_classes=config.NUM_CLASSES) 28 | 29 | # setup multi-gpu 30 | batch_size = len(ctx) 31 | input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size 32 | 33 | # print config 34 | logger.info(pprint.pformat(config)) 35 | 36 | # load dataset and prepare imdb for training 37 | image_sets = [iset for iset in image_set.split('+')] 38 | roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path, 39 | proposal=proposal, append_gt=True, flip=not no_flip) 40 | for image_set in image_sets] 41 | roidb = merge_roidb(roidbs) 42 | roidb = filter_roidb(roidb) 43 | means, stds = add_bbox_regression_targets(roidb) 44 | 45 | # load training data 46 | train_data = ROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 47 | ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING) 48 | 49 | # infer max shape 50 | max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 51 | logger.info('providing maximum shape %s' % max_data_shape) 52 | 53 | # infer shape 54 | data_shape_dict = dict(train_data.provide_data + train_data.provide_label) 55 | arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) 56 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 57 | out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) 58 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 59 | logger.info('output shape %s' % pprint.pformat(out_shape_dict)) 60 | 61 | # load and initialize params 62 | if resume: 63 | arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) 64 | else: 65 | arg_params, aux_params = load_param(pretrained, epoch, convert=True) 66 | arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) 67 | arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) 68 | arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) 69 | arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) 70 | 71 | # check parameter shapes 72 | for k in sym.list_arguments(): 73 | if k in data_shape_dict: 74 | continue 75 | assert k in arg_params, k + ' not initialized' 76 | assert arg_params[k].shape == arg_shape_dict[k], \ 77 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 78 | for k in sym.list_auxiliary_states(): 79 | assert k in aux_params, k + ' not initialized' 80 | assert aux_params[k].shape == aux_shape_dict[k], \ 81 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 82 | 83 | # prepare training 84 | # create solver 85 | data_names = [k[0] for k in train_data.provide_data] 86 | label_names = [k[0] for k in train_data.provide_label] 87 | if train_shared: 88 | fixed_param_prefix = config.FIXED_PARAMS_SHARED 89 | else: 90 | fixed_param_prefix = config.FIXED_PARAMS 91 | mod = MutableModule(sym, data_names=data_names, label_names=label_names, 92 | logger=logger, context=ctx, work_load_list=work_load_list, 93 | max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix) 94 | 95 | # decide training params 96 | # metric 97 | eval_metric = metric.RCNNAccMetric() 98 | cls_metric = metric.RCNNLogLossMetric() 99 | bbox_metric = metric.RCNNL1LossMetric() 100 | eval_metrics = mx.metric.CompositeEvalMetric() 101 | for child_metric in [eval_metric, cls_metric, bbox_metric]: 102 | eval_metrics.add(child_metric) 103 | # callback 104 | batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent, auto_reset=False) 105 | epoch_end_callback = callback.do_checkpoint(prefix, means, stds) 106 | # decide learning rate 107 | base_lr = lr 108 | lr_factor = 0.1 109 | lr_epoch = [int(epoch) for epoch in lr_step.split(',')] 110 | lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] 111 | lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) 112 | lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] 113 | logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) 114 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) 115 | # optimizer 116 | optimizer_params = {'momentum': 0.9, 117 | 'wd': 0.0005, 118 | 'learning_rate': lr, 119 | 'lr_scheduler': lr_scheduler, 120 | 'rescale_grad': (1.0 / batch_size), 121 | 'clip_gradient': 5} 122 | 123 | # train 124 | mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, 125 | batch_end_callback=batch_end_callback, kvstore=kvstore, 126 | optimizer='sgd', optimizer_params=optimizer_params, 127 | arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) 128 | 129 | 130 | def parse_args(): 131 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN Network') 132 | # general 133 | parser.add_argument('--network', help='network name', default=default.network, type=str) 134 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 135 | args, rest = parser.parse_known_args() 136 | generate_config(args.network, args.dataset) 137 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 138 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 139 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 140 | # training 141 | parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int) 142 | parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str) 143 | parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list) 144 | parser.add_argument('--no_flip', help='disable flip images', action='store_true') 145 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 146 | parser.add_argument('--resume', help='continue training', action='store_true') 147 | # rcnn 148 | parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str) 149 | parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str) 150 | parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int) 151 | parser.add_argument('--prefix', help='new model prefix', default=default.rcnn_prefix, type=str) 152 | parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int) 153 | parser.add_argument('--end_epoch', help='end epoch of training', default=default.rcnn_epoch, type=int) 154 | parser.add_argument('--lr', help='base learning rate', default=default.rcnn_lr, type=float) 155 | parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rcnn_lr_step, type=str) 156 | parser.add_argument('--train_shared', help='second round train shared params', action='store_true') 157 | parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str) 158 | args = parser.parse_args() 159 | return args 160 | 161 | 162 | def main(): 163 | args = parse_args() 164 | logger.info('Called with argument: %s' % args) 165 | ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] 166 | train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 167 | args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, 168 | ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, 169 | train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step, proposal=args.proposal) 170 | 171 | if __name__ == '__main__': 172 | main() 173 | -------------------------------------------------------------------------------- /rcnn/tools/train_rpn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import pprint 4 | import mxnet as mx 5 | 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..core import callback, metric 9 | from ..core.loader import AnchorLoaderFPN 10 | from ..core.module import MutableModule 11 | from ..utils.load_data import load_gt_roidb, merge_roidb, filter_roidb 12 | from ..utils.load_model import load_param 13 | 14 | 15 | def train_rpn(network, dataset, image_set, root_path, dataset_path, 16 | frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, 17 | ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, 18 | train_shared, lr, lr_step): 19 | # set up logger 20 | logging.basicConfig() 21 | logger = logging.getLogger() 22 | logger.setLevel(logging.INFO) 23 | 24 | # setup config 25 | assert config.TRAIN.BATCH_IMAGES==1 26 | 27 | # load symbol 28 | sym = eval('get_' + network + '_rpn')() 29 | feat_sym = [] 30 | for stride in config.RPN_FEAT_STRIDE: 31 | feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) 32 | 33 | 34 | # setup multi-gpu 35 | batch_size = len(ctx) 36 | input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size 37 | 38 | # print config 39 | pprint.pprint(config) 40 | 41 | # load dataset and prepare imdb for training 42 | image_sets = [iset for iset in image_set.split('+')] 43 | roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path, 44 | flip=not no_flip) 45 | for image_set in image_sets] 46 | roidb = merge_roidb(roidbs) 47 | roidb = filter_roidb(roidb) 48 | 49 | # load training data 50 | #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 51 | # ctx=ctx, work_load_list=work_load_list, 52 | # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, 53 | # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, 54 | # allowed_border=9999) 55 | train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 56 | ctx=ctx, work_load_list=work_load_list) 57 | 58 | # infer max shape 59 | max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 60 | max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) 61 | print 'providing maximum shape', max_data_shape, max_label_shape 62 | 63 | # infer shape 64 | data_shape_dict = dict(train_data.provide_data + train_data.provide_label) 65 | arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) 66 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 67 | out_shape_dict = zip(sym.list_outputs(), out_shape) 68 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 69 | print 'output shape' 70 | pprint.pprint(out_shape_dict) 71 | 72 | # load and initialize params 73 | if resume: 74 | arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) 75 | else: 76 | arg_params, aux_params = load_param(pretrained, epoch, convert=True) 77 | init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) 78 | init_internal = mx.init.Normal(sigma=0.01) 79 | for k in sym.list_arguments(): 80 | if k in data_shape_dict: 81 | continue 82 | if k not in arg_params: 83 | print 'init', k 84 | arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) 85 | if not k.endswith('bias'): 86 | init_internal(k, arg_params[k]) 87 | 88 | for k in sym.list_auxiliary_states(): 89 | if k not in aux_params: 90 | print 'init', k 91 | aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k]) 92 | init(k, aux_params[k]) 93 | 94 | # check parameter shapes 95 | for k in sym.list_arguments(): 96 | if k in data_shape_dict: 97 | continue 98 | assert k in arg_params, k + ' not initialized' 99 | assert arg_params[k].shape == arg_shape_dict[k], \ 100 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 101 | for k in sym.list_auxiliary_states(): 102 | assert k in aux_params, k + ' not initialized' 103 | assert aux_params[k].shape == aux_shape_dict[k], \ 104 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 105 | 106 | # create solver 107 | data_names = [k[0] for k in train_data.provide_data] 108 | label_names = [k[0] for k in train_data.provide_label] 109 | if train_shared: 110 | fixed_param_prefix = config.FIXED_PARAMS_SHARED 111 | else: 112 | fixed_param_prefix = config.FIXED_PARAMS 113 | mod = MutableModule(sym, data_names=data_names, label_names=label_names, 114 | logger=logger, context=ctx, work_load_list=work_load_list, 115 | max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, 116 | fixed_param_prefix=fixed_param_prefix) 117 | 118 | # decide training params 119 | # metric 120 | eval_metric = metric.RPNAccMetric() 121 | cls_metric = metric.RPNLogLossMetric() 122 | bbox_metric = metric.RPNL1LossMetric() 123 | eval_metrics = mx.metric.CompositeEvalMetric() 124 | for child_metric in [eval_metric,cls_metric,bbox_metric]: 125 | eval_metrics.add(child_metric) 126 | # callback 127 | batch_end_callback = [] 128 | batch_end_callback.append(mx.callback.Speedometer(train_data.batch_size, frequent=frequent)) 129 | epoch_end_callback = mx.callback.do_checkpoint(prefix) 130 | # decide learning rate 131 | base_lr = lr 132 | lr_factor = 0.1 133 | lr_epoch = [int(epoch) for epoch in lr_step.split(',')] 134 | lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] 135 | lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) 136 | lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] 137 | print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters 138 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) 139 | # optimizer 140 | optimizer_params = {'momentum': 0.9, 141 | 'wd': 0.0001, 142 | 'learning_rate': lr, 143 | 'lr_scheduler': lr_scheduler, 144 | 'rescale_grad': (1.0 / batch_size), 145 | 'clip_gradient': 5} 146 | 147 | # train 148 | mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, 149 | batch_end_callback=batch_end_callback, kvstore=kvstore, 150 | optimizer='sgd', optimizer_params=optimizer_params, 151 | arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) 152 | 153 | 154 | def parse_args(): 155 | parser = argparse.ArgumentParser(description='Train a Region Proposal Network') 156 | # general 157 | parser.add_argument('--network', help='network name', default=default.network, type=str) 158 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 159 | args, rest = parser.parse_known_args() 160 | generate_config(args.network, args.dataset) 161 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 162 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 163 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 164 | # training 165 | parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int) 166 | parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str) 167 | parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list) 168 | parser.add_argument('--no_flip', help='disable flip images', action='store_true') 169 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 170 | parser.add_argument('--resume', help='continue training', action='store_true') 171 | # rpn 172 | parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str) 173 | parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str) 174 | parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int) 175 | parser.add_argument('--prefix', help='new model prefix', default=default.rpn_prefix, type=str) 176 | parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int) 177 | parser.add_argument('--end_epoch', help='end epoch of training', default=default.rpn_epoch, type=int) 178 | parser.add_argument('--lr', help='base learning rate', default=default.rpn_lr, type=float) 179 | parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rpn_lr_step, type=str) 180 | parser.add_argument('--train_shared', help='second round train shared params', action='store_true') 181 | args = parser.parse_args() 182 | return args 183 | 184 | 185 | def main(): 186 | args = parse_args() 187 | print 'Called with argument:', args 188 | ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] 189 | train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 190 | args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, 191 | ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, 192 | train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step) 193 | 194 | if __name__ == '__main__': 195 | main() 196 | -------------------------------------------------------------------------------- /rcnn/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bleakie/RetinaDetector/ffa3592613ab4fe120b598a64ce2a5106e7c9e43/rcnn/utils/__init__.py -------------------------------------------------------------------------------- /rcnn/utils/combine_model.py: -------------------------------------------------------------------------------- 1 | from .load_model import load_checkpoint 2 | from .save_model import save_checkpoint 3 | 4 | 5 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out): 6 | args1, auxs1 = load_checkpoint(prefix1, epoch1) 7 | args2, auxs2 = load_checkpoint(prefix2, epoch2) 8 | arg_names = args1.keys() + args2.keys() 9 | aux_names = auxs1.keys() + auxs2.keys() 10 | args = dict() 11 | for arg in arg_names: 12 | if arg in args1: 13 | args[arg] = args1[arg] 14 | else: 15 | args[arg] = args2[arg] 16 | auxs = dict() 17 | for aux in aux_names: 18 | if aux in auxs1: 19 | auxs[aux] = auxs1[aux] 20 | else: 21 | auxs[aux] = auxs2[aux] 22 | save_checkpoint(prefix_out, epoch_out, args, auxs) 23 | -------------------------------------------------------------------------------- /rcnn/utils/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..logger import logger 3 | from ..config import config 4 | from ..dataset import * 5 | 6 | 7 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, 8 | flip=False): 9 | """ load ground truth roidb """ 10 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path) 11 | roidb = imdb.gt_roidb() 12 | print('roidb size', len(roidb)) 13 | if flip: 14 | roidb = imdb.append_flipped_images(roidb) 15 | print('flipped roidb size', len(roidb)) 16 | return roidb 17 | 18 | 19 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, 20 | proposal='rpn', append_gt=True, flip=False): 21 | """ load proposal roidb (append_gt when training) """ 22 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path) 23 | gt_roidb = imdb.gt_roidb() 24 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt) 25 | if flip: 26 | roidb = imdb.append_flipped_images(roidb) 27 | return roidb 28 | 29 | 30 | def merge_roidb(roidbs): 31 | """ roidb are list, concat them together """ 32 | roidb = roidbs[0] 33 | for r in roidbs[1:]: 34 | roidb.extend(r) 35 | return roidb 36 | 37 | 38 | def filter_roidb(roidb): 39 | """ remove roidb entries without usable rois """ 40 | 41 | def is_valid(entry): 42 | """ valid images have at least 1 fg or bg roi """ 43 | overlaps = entry['max_overlaps'] 44 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 45 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 46 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 47 | #valid = len(fg_inds) > 0 48 | return valid 49 | 50 | num = len(roidb) 51 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 52 | num_after = len(filtered_roidb) 53 | logger.info('load data: filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)) 54 | 55 | return filtered_roidb 56 | -------------------------------------------------------------------------------- /rcnn/utils/load_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def load_checkpoint(prefix, epoch): 5 | """ 6 | Load model checkpoint from file. 7 | :param prefix: Prefix of model name. 8 | :param epoch: Epoch number of model we would like to load. 9 | :return: (arg_params, aux_params) 10 | arg_params : dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's weights. 12 | aux_params : dict of str to NDArray 13 | Model parameter, dict of name to NDArray of net's auxiliary states. 14 | """ 15 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 16 | arg_params = {} 17 | aux_params = {} 18 | for k, v in save_dict.items(): 19 | tp, name = k.split(':', 1) 20 | if tp == 'arg': 21 | arg_params[name] = v 22 | if tp == 'aux': 23 | aux_params[name] = v 24 | return arg_params, aux_params 25 | 26 | 27 | def convert_context(params, ctx): 28 | """ 29 | :param params: dict of str to NDArray 30 | :param ctx: the context to convert to 31 | :return: dict of str of NDArray with context ctx 32 | """ 33 | new_params = dict() 34 | for k, v in params.items(): 35 | new_params[k] = v.as_in_context(ctx) 36 | return new_params 37 | 38 | 39 | def load_param(prefix, epoch, convert=False, ctx=None, process=False): 40 | """ 41 | wrapper for load checkpoint 42 | :param prefix: Prefix of model name. 43 | :param epoch: Epoch number of model we would like to load. 44 | :param convert: reference model should be converted to GPU NDArray first 45 | :param ctx: if convert then ctx must be designated. 46 | :param process: model should drop any test 47 | :return: (arg_params, aux_params) 48 | """ 49 | arg_params, aux_params = load_checkpoint(prefix, epoch) 50 | if convert: 51 | if ctx is None: 52 | ctx = mx.cpu() 53 | arg_params = convert_context(arg_params, ctx) 54 | aux_params = convert_context(aux_params, ctx) 55 | if process: 56 | tests = [k for k in arg_params.keys() if '_test' in k] 57 | for test in tests: 58 | arg_params[test.replace('_test', '')] = arg_params.pop(test) 59 | return arg_params, aux_params 60 | -------------------------------------------------------------------------------- /rcnn/utils/save_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params): 5 | """Checkpoint the model data into file. 6 | :param prefix: Prefix of model name. 7 | :param epoch: The epoch number of the model. 8 | :param arg_params: dict of str to NDArray 9 | Model parameter, dict of name to NDArray of net's weights. 10 | :param aux_params: dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's auxiliary states. 12 | :return: None 13 | prefix-epoch.params will be saved for parameters. 14 | """ 15 | save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()} 16 | save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()}) 17 | param_name = '%s-%04d.params' % (prefix, epoch) 18 | mx.nd.save(param_name, save_dict) 19 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | import numpy as np 4 | import datetime 5 | import os 6 | import glob 7 | from retinaface import RetinaFace 8 | 9 | detector = RetinaFace(gpu=0) 10 | 11 | img_path = 'data/retinaface/val/images' 12 | 13 | dir = os.listdir(img_path) 14 | for im in dir: 15 | img = cv2.imread(os.path.join(img_path, im)) 16 | 17 | faces, landmarks = detector.detect(img, scales_index=1, do_flip=True) 18 | 19 | if faces is not None: 20 | print('find', faces.shape[0], 'faces') 21 | for i in range(faces.shape[0]): 22 | box = faces[i].astype(np.int) 23 | color = (0, 0, 255) 24 | cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2) 25 | title = "%.2f" % (faces[i][4]) 26 | p3 = (max(box[0], 15), max(box[1], 15)) 27 | cv2.putText(img, title, p3, cv2.FONT_ITALIC, 0.6, (0, 255, 0), 1) 28 | if landmarks is not None: 29 | landmark5 = landmarks[i].astype(np.int) 30 | for l in range(landmark5.shape[0]): 31 | color = (0, 0, 255) 32 | if l == 0 or l == 3: 33 | color = (0, 255, 0) 34 | cv2.circle(img, (landmark5[l][0], landmark5[l][1]), 1, color, 2) 35 | cv2.imwrite(im, img) 36 | cv2.imshow('0', img) 37 | cv2.waitKey(1) 38 | -------------------------------------------------------------------------------- /test_widerface.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import sys 5 | import os 6 | import time 7 | import numpy as np 8 | import mxnet as mx 9 | from mxnet import ndarray as nd 10 | import cv2 11 | from rcnn.logger import logger 12 | #from rcnn.config import config, default, generate_config 13 | #from rcnn.tools.test_rcnn import test_rcnn 14 | #from rcnn.tools.test_rpn import test_rpn 15 | from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, landmark_pred 16 | from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane 17 | from rcnn.processing.nms import gpu_nms_wrapper 18 | from rcnn.processing.bbox_transform import bbox_overlaps 19 | from rcnn.dataset import retinaface 20 | from retinaface import RetinaFace 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser(description='Test widerface by retinaface detector') 25 | # general 26 | parser.add_argument('--network', help='network name', default='net3', type=str) 27 | parser.add_argument('--dataset', help='dataset name', default='retinaface', type=str) 28 | parser.add_argument('--image-set', help='image_set name', default='val', type=str) 29 | parser.add_argument('--root-path', help='output data folder', default='./data', type=str) 30 | parser.add_argument('--dataset-path', help='dataset path', default='./data/retinaface', type=str) 31 | parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) 32 | # testing 33 | parser.add_argument('--prefix', help='model to test with', default='', type=str) 34 | parser.add_argument('--epoch', help='model to test with', default=0, type=int) 35 | parser.add_argument('--output', help='output folder', default='./wout', type=str) 36 | parser.add_argument('--nocrop', help='', action='store_true') 37 | parser.add_argument('--thresh', help='valid detection threshold', default=0.02, type=float) 38 | parser.add_argument('--mode', help='test mode, 0 for fast, 1 for accurate', default=1, type=int) 39 | #parser.add_argument('--pyramid', help='enable pyramid test', action='store_true') 40 | #parser.add_argument('--bbox-vote', help='', action='store_true') 41 | parser.add_argument('--part', help='', default=0, type=int) 42 | parser.add_argument('--parts', help='', default=1, type=int) 43 | args = parser.parse_args() 44 | return args 45 | 46 | detector = None 47 | args = None 48 | imgid = -1 49 | 50 | def get_boxes(roi, pyramid): 51 | global imgid 52 | im = cv2.imread(roi['image']) 53 | do_flip = False 54 | if not pyramid: 55 | target_size = 1200 56 | max_size = 1600 57 | #do_flip = True 58 | target_size = 1504 59 | max_size = 2000 60 | target_size = 1600 61 | max_size = 2150 62 | im_shape = im.shape 63 | im_size_min = np.min(im_shape[0:2]) 64 | im_size_max = np.max(im_shape[0:2]) 65 | im_scale = float(target_size) / float(im_size_min) 66 | # prevent bigger axis from being more than max_size: 67 | if np.round(im_scale * im_size_max) > max_size: 68 | im_scale = float(max_size) / float(im_size_max) 69 | scales = [im_scale] 70 | else: 71 | do_flip = True 72 | #TEST_SCALES = [500, 800, 1200, 1600] 73 | TEST_SCALES = [500, 800, 1100, 1400, 1700] 74 | target_size = 800 75 | max_size = 1200 76 | im_shape = im.shape 77 | im_size_min = np.min(im_shape[0:2]) 78 | im_size_max = np.max(im_shape[0:2]) 79 | im_scale = float(target_size) / float(im_size_min) 80 | # prevent bigger axis from being more than max_size: 81 | if np.round(im_scale * im_size_max) > max_size: 82 | im_scale = float(max_size) / float(im_size_max) 83 | scales = [float(scale)/target_size*im_scale for scale in TEST_SCALES] 84 | boxes, landmarks = detector.detect(im, threshold=args.thresh, scales = scales, do_flip=do_flip) 85 | #print(boxes.shape, landmarks.shape) 86 | if imgid>=0 and imgid<100: 87 | font = cv2.FONT_HERSHEY_SIMPLEX 88 | for i in xrange(boxes.shape[0]): 89 | box = boxes[i] 90 | ibox = box[0:4].copy().astype(np.int) 91 | cv2.rectangle(im, (ibox[0], ibox[1]), (ibox[2], ibox[3]), (255, 0, 0), 2) 92 | #print('box', ibox) 93 | #if len(ibox)>5: 94 | # for l in xrange(5): 95 | # pp = (ibox[5+l*2], ibox[6+l*2]) 96 | # cv2.circle(im, (pp[0], pp[1]), 1, (0, 0, 255), 1) 97 | blur = box[5] 98 | k = "%.3f"%blur 99 | cv2.putText(im,k,(ibox[0]+2,ibox[1]+14), font, 0.6, (0,255,0), 2) 100 | #landmarks = box[6:21].reshape( (5,3) ) 101 | if landmarks is not None: 102 | for l in xrange(5): 103 | color = (0,255,0) 104 | landmark = landmarks[i][l] 105 | pp = (int(landmark[0]), int(landmark[1])) 106 | if landmark[2]-0.5<0.0: 107 | color = (0,0,255) 108 | cv2.circle(im, (pp[0], pp[1]), 1, color, 2) 109 | filename = './testimages/%d.jpg'%imgid 110 | cv2.imwrite(filename, im) 111 | print(filename, 'wrote') 112 | imgid+=1 113 | 114 | return boxes 115 | 116 | 117 | def test(args): 118 | print('test with', args) 119 | global detector 120 | output_folder = args.output 121 | if not os.path.exists(output_folder): 122 | os.mkdir(output_folder) 123 | detector = RetinaFace(args.prefix, args.epoch, args.gpu, network=args.network, nocrop=args.nocrop, vote=args.bbox_vote) 124 | imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) 125 | roidb = imdb.gt_roidb() 126 | gt_overlaps = np.zeros(0) 127 | overall = [0.0, 0.0] 128 | gt_max = np.array( (0.0, 0.0) ) 129 | num_pos = 0 130 | print('roidb size', len(roidb)) 131 | 132 | for i in xrange(len(roidb)): 133 | if i%args.parts!=args.part: 134 | continue 135 | #if i%10==0: 136 | # print('processing', i, file=sys.stderr) 137 | roi = roidb[i] 138 | boxes = get_boxes(roi, args.pyramid) 139 | if 'boxes' in roi: 140 | gt_boxes = roi['boxes'].copy() 141 | gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) 142 | num_pos += gt_boxes.shape[0] 143 | 144 | overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) 145 | #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) 146 | 147 | _gt_overlaps = np.zeros((gt_boxes.shape[0])) 148 | 149 | if boxes.shape[0]>0: 150 | _gt_overlaps = overlaps.max(axis=0) 151 | #print('max_overlaps', _gt_overlaps, file=sys.stderr) 152 | for j in range(len(_gt_overlaps)): 153 | if _gt_overlaps[j]>0.5: 154 | continue 155 | #print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) 156 | 157 | # append recorded IoU coverage level 158 | found = (_gt_overlaps > 0.5).sum() 159 | recall = found / float(gt_boxes.shape[0]) 160 | #print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, 'num:', i, file=sys.stderr) 161 | overall[0]+=found 162 | overall[1]+=gt_boxes.shape[0] 163 | #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) 164 | #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) 165 | recall_all = float(overall[0])/overall[1] 166 | #print('recall_all', _recall, file=sys.stderr) 167 | print('[%d]'%i, 'recall', recall, (gt_boxes.shape[0], boxes.shape[0]), 'all:', recall_all, file=sys.stderr) 168 | else: 169 | print('[%d]'%i, 'detect %d faces'%boxes.shape[0]) 170 | 171 | 172 | _vec = roidb[i]['image'].split('/') 173 | out_dir = os.path.join(output_folder, _vec[-2]) 174 | if not os.path.exists(out_dir): 175 | os.mkdir(out_dir) 176 | out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) 177 | with open(out_file, 'w') as f: 178 | name = '/'.join(roidb[i]['image'].split('/')[-2:]) 179 | f.write("%s\n"%(name)) 180 | f.write("%d\n"%(boxes.shape[0])) 181 | for b in range(boxes.shape[0]): 182 | box = boxes[b] 183 | f.write("%d %d %d %d %g \n"%(box[0], box[1], box[2]-box[0], box[3]-box[1], box[4])) 184 | 185 | def main(): 186 | global args 187 | args = parse_args() 188 | args.pyramid = False 189 | args.bbox_vote = False 190 | if args.mode==1: 191 | args.pyramid = True 192 | args.bbox_vote = True 193 | logger.info('Called with argument: %s' % args) 194 | test(args) 195 | 196 | if __name__ == '__main__': 197 | main() 198 | 199 | -------------------------------------------------------------------------------- /train_model.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | source /etc/profile 3 | export CUDA_VISIBLE_DEVICES='0' 4 | nohup python -u train.py --network resnet 2>&1 > log.log & --------------------------------------------------------------------------------