├── .gitignore ├── LICENSE ├── README.md ├── data └── README.md ├── download_and_convert_data.py ├── libs ├── Makefile ├── __init__.py ├── boxes │ ├── .gitignore │ ├── __init__.py │ ├── anchor.py │ ├── bbox.pyx │ ├── bbox_transform.py │ ├── blob.py │ ├── cython_anchor.py │ ├── cython_anchor.pyx │ ├── cython_bbox.py │ ├── cython_bbox_transform.py │ ├── cython_bbox_transform.pyx │ ├── cython_nms.py │ ├── gprof2dot.py │ ├── nms.py │ ├── nms.pyx │ ├── nms_wrapper.py │ ├── profile │ ├── profile.png │ ├── roi.py │ └── timer.py ├── configs │ ├── __init__.py │ └── config_v1.py ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── dataset_factory.py │ ├── download_and_convert_coco.py │ └── pycocotools │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── cocoeval.py │ │ ├── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h │ │ ├── mask.py │ │ └── setup.py ├── layers │ ├── __init__.py │ ├── anchor.py │ ├── assign.py │ ├── crop.py │ ├── mask.py │ ├── roi.py │ ├── sample.py │ └── wrapper.py ├── logs │ ├── __init__.py │ └── log.py ├── make.sh ├── memory_util.py ├── nets │ ├── __init__.py │ ├── nets_factory.py │ ├── pyramid_network.py │ ├── resnet_utils.py │ ├── resnet_v1.py │ └── train_utils.py ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── preprocessings │ ├── __init__.py │ ├── coco_v1.py │ └── utils.py ├── setup.py └── visualization │ ├── __init__.py │ ├── pil_utils.py │ └── summary_utils.py ├── train ├── __init__.py ├── train.py └── train_utils.py └── unit_test ├── __init__.py ├── data_test.py ├── layer_test.py ├── preprocessing_test.py └── resnet50_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | .idea 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # IPython Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | 92 | data/coco 93 | data/pretrained_models 94 | tags 95 | output 96 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mask RCNN 2 | Mask RCNN in TensorFlow 3 | 4 | This repo attempts to reproduce this amazing work by Kaiming He et al. : 5 | [Mask R-CNN](https://arxiv.org/abs/1703.06870) 6 | 7 | ## Requirements 8 | 9 | - [Tensorflow (>= 1.0.0)](https://www.tensorflow.org/install/install_linux) 10 | - [Numpy](https://github.com/numpy/numpy/blob/master/INSTALL.rst.txt) 11 | - [COCO dataset](http://mscoco.org/dataset/#download) 12 | - [Resnet50](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz) 13 | 14 | ## How-to 15 | 1. Go to `./libs/datasets/pycocotools` and run `make` 16 | 2. Download [COCO](http://mscoco.org/dataset/#download) dataset, place it into `./data`, then run `python download_and_convert_data.py` to build tf-records. It takes a while. 17 | 3. Download pretrained resnet50 model, `wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz`, unzip it, place it into `./data/pretrained_models/` 18 | 4. Go to `./libs` and run `make` 19 | 5. run `python train/train.py` for training 20 | 6. There are certainly some bugs, please report them back, and let's solve them together. 21 | 22 | ## TODO: 23 | - [x] ROIAlign 24 | - [x] COCO Data Provider 25 | - [x] Resnet50 26 | - [x] Feature Pyramid Network 27 | - [x] Anchor and ROI layer 28 | - [x] Mask layer 29 | - [x] Speedup anchor layer with cython 30 | - [x] Combining all modules together. 31 | - [x] Testing and debugging (in progress) 32 | - [ ] Training / evaluation on COCO 33 | - [ ] Add image summary to show some results 34 | - [ ] Converting ResneXt 35 | - [ ] Training >2 images 36 | 37 | ## Call for contributions 38 | - Anything helps this repo, including **discussion**, **testing**, **promotion** and of course **your awesome code**. 39 | 40 | ## Acknowledgment 41 | This repo borrows tons of code from 42 | - [TFFRCNN](https://github.com/CharlesShang/TFFRCNN) 43 | - [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) 44 | - [faster_rcnn](https://github.com/ShaoqingRen/faster_rcnn) 45 | - [tf-models](https://github.com/tensorflow/models) 46 | 47 | ## License 48 | See [LICENSE](https://github.com/CharlesShang/FastMaskRCNN/blob/master/LICENSE) for details. 49 | 50 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | Place and unzip your coco in this dir, like 2 | 3 | ```buildoutcfg 4 | ./data 5 | ./coco 6 | ./annotations 7 | ./train2014 8 | ./val2014 9 | ``` 10 | -------------------------------------------------------------------------------- /download_and_convert_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import os 8 | import tensorflow as tf 9 | 10 | from libs.datasets import download_and_convert_coco 11 | from libs.configs import config_v1 12 | 13 | FLAGS = tf.app.flags.FLAGS 14 | 15 | # tf.app.flags.DEFINE_string( 16 | # 'dataset_name', 'coco', 17 | # 'The name of the dataset to convert, one of "coco", "cifar10", "flowers", "mnist".') 18 | 19 | # tf.app.flags.DEFINE_string( 20 | # 'dataset_dir', 'data/coco', 21 | # 'The directory where the output TFRecords and temporary files are saved.') 22 | 23 | 24 | def main(_): 25 | if not os.path.isdir('./output/mask_rcnn'): 26 | os.makedirs('./output/mask_rcnn') 27 | if not FLAGS.dataset_name: 28 | raise ValueError('You must supply the dataset name with --dataset_name') 29 | if not FLAGS.dataset_dir: 30 | raise ValueError('You must supply the dataset directory with --dataset_dir') 31 | 32 | elif FLAGS.dataset_name == 'coco': 33 | download_and_convert_coco.run(FLAGS.dataset_dir, FLAGS.dataset_split_name) 34 | else: 35 | raise ValueError( 36 | 'dataset_name [%s] was not recognized.' % FLAGS.dataset_dir) 37 | 38 | if __name__ == '__main__': 39 | tf.app.run() 40 | 41 | -------------------------------------------------------------------------------- /libs/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | sh make.sh -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/__init__.py -------------------------------------------------------------------------------- /libs/boxes/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /libs/boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import cython_nms 8 | from . import cython_bbox 9 | import nms 10 | import timer 11 | from .anchor import anchors 12 | from .anchor import anchors_plane 13 | from .roi import roi_cropping 14 | from .roi import roi_cropping 15 | from . import cython_anchor 16 | from . import cython_bbox_transform -------------------------------------------------------------------------------- /libs/boxes/anchor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from libs.boxes import cython_anchor 7 | 8 | def anchors(scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16): 9 | """Get a set of anchors at one position """ 10 | return generate_anchors(base_size=base, scales=np.asarray(scales, np.int32), ratios=ratios) 11 | 12 | def anchors_plane(height, width, stride = 1.0, 13 | scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16): 14 | """Get a complete set of anchors in a spatial plane, 15 | height, width are plane dimensions 16 | stride is scale ratio of 17 | """ 18 | # TODO: implement in C, or pre-compute them, or set to a fixed input-shape 19 | # enum all anchors in a plane 20 | # scales = kwargs.setdefault('scales', [2, 4, 8, 16, 32]) 21 | # ratios = kwargs.setdefault('ratios', [0.5, 1, 2.0]) 22 | # base = kwargs.setdefault('base', 16) 23 | anc = anchors(scales, ratios, base) 24 | all_anchors = cython_anchor.anchors_plane(height, width, stride, anc) 25 | return all_anchors 26 | 27 | # Written by Ross Girshick and Sean Bell 28 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 29 | scales=2 ** np.arange(3, 6)): 30 | """ 31 | Generate anchor (reference) windows by enumerating aspect ratios X 32 | scales wrt a reference (0, 0, 15, 15) window. 33 | """ 34 | 35 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 36 | ratio_anchors = _ratio_enum(base_anchor, ratios) 37 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 38 | for i in xrange(ratio_anchors.shape[0])]) 39 | return anchors 40 | 41 | def _whctrs(anchor): 42 | """ 43 | Return width, height, x center, and y center for an anchor (window). 44 | """ 45 | 46 | w = anchor[2] - anchor[0] + 1 47 | h = anchor[3] - anchor[1] + 1 48 | x_ctr = anchor[0] + 0.5 * (w - 1) 49 | y_ctr = anchor[1] + 0.5 * (h - 1) 50 | return w, h, x_ctr, y_ctr 51 | 52 | 53 | def _mkanchors(ws, hs, x_ctr, y_ctr): 54 | """ 55 | Given a vector of widths (ws) and heights (hs) around a center 56 | (x_ctr, y_ctr), output a set of anchors (windows). 57 | """ 58 | 59 | ws = ws[:, np.newaxis] 60 | hs = hs[:, np.newaxis] 61 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 62 | y_ctr - 0.5 * (hs - 1), 63 | x_ctr + 0.5 * (ws - 1), 64 | y_ctr + 0.5 * (hs - 1))) 65 | return anchors 66 | 67 | 68 | def _ratio_enum(anchor, ratios): 69 | """ 70 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 71 | """ 72 | 73 | w, h, x_ctr, y_ctr = _whctrs(anchor) 74 | size = w * h 75 | size_ratios = size / ratios 76 | ws = np.round(np.sqrt(size_ratios)) 77 | hs = np.round(ws * ratios) 78 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 79 | return anchors 80 | 81 | 82 | def _scale_enum(anchor, scales): 83 | """ 84 | Enumerate a set of anchors for each scale wrt an anchor. 85 | """ 86 | 87 | w, h, x_ctr, y_ctr = _whctrs(anchor) 88 | ws = w * scales 89 | hs = h * scales 90 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 91 | return anchors 92 | 93 | def _unmap(data, count, inds, fill=0): 94 | """ Unmap a subset of item (data) back to the original set of items (of 95 | size count) """ 96 | if len(data.shape) == 1: 97 | ret = np.empty((count,), dtype=np.float32) 98 | ret.fill(fill) 99 | ret[inds] = data 100 | else: 101 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 102 | ret.fill(fill) 103 | ret[inds, :] = data 104 | return ret 105 | 106 | if __name__ == '__main__': 107 | import time 108 | 109 | t = time.time() 110 | a = anchors() 111 | num_anchors = 0 112 | 113 | # all_anchors = anchors_plane(200, 250, stride=4, boarder=0) 114 | # num_anchors += all_anchors.shape[0] 115 | for i in range(10): 116 | ancs = anchors() 117 | all_anchors = cython_anchor.anchors_plane(200, 250, 4, ancs) 118 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 119 | all_anchors = cython_anchor.anchors_plane(100, 125, 8, ancs) 120 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 121 | all_anchors = cython_anchor.anchors_plane(50, 63, 16, ancs) 122 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 123 | all_anchors = cython_anchor.anchors_plane(25, 32, 32, ancs) 124 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 125 | print('average time: %f' % ((time.time() - t) / 10)) 126 | print('anchors: %d' % (num_anchors / 10)) 127 | print(a.shape, '\n', a) 128 | print (all_anchors.shape) 129 | # from IPython import embed 130 | # embed() 131 | -------------------------------------------------------------------------------- /libs/boxes/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | 57 | def bbox_intersections( 58 | np.ndarray[DTYPE_t, ndim=2] boxes, 59 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 60 | """ 61 | For each query box compute the intersection ratio covered by boxes 62 | ---------- 63 | Parameters 64 | ---------- 65 | boxes: (N, 4) ndarray of float 66 | query_boxes: (K, 4) ndarray of float 67 | Returns 68 | ------- 69 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 70 | """ 71 | cdef unsigned int N = boxes.shape[0] 72 | cdef unsigned int K = query_boxes.shape[0] 73 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 74 | cdef DTYPE_t iw, ih, box_area 75 | cdef DTYPE_t ua 76 | cdef unsigned int k, n 77 | for k in range(K): 78 | box_area = ( 79 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 80 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 81 | ) 82 | for n in range(N): 83 | iw = ( 84 | min(boxes[n, 2], query_boxes[k, 2]) - 85 | max(boxes[n, 0], query_boxes[k, 0]) + 1 86 | ) 87 | if iw > 0: 88 | ih = ( 89 | min(boxes[n, 3], query_boxes[k, 3]) - 90 | max(boxes[n, 1], query_boxes[k, 1]) + 1 91 | ) 92 | if ih > 0: 93 | intersec[n, k] = iw * ih / box_area 94 | return intersec -------------------------------------------------------------------------------- /libs/boxes/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import warnings 10 | 11 | def bbox_transform(ex_rois, gt_rois): 12 | """ 13 | computes the distance from ground-truth boxes to the given boxes, normed by their size 14 | :param ex_rois: n * 4 numpy array, given boxes 15 | :param gt_rois: n * 4 numpy array, ground-truth boxes 16 | :return: deltas: n * 4 numpy array, ground-truth boxes 17 | """ 18 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 19 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 20 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 21 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 22 | 23 | # assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \ 24 | # 'Invalid boxes found: {} {}'. \ 25 | # format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :]) 26 | 27 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 28 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 29 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 30 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 31 | 32 | # warnings.catch_warnings() 33 | # warnings.filterwarnings('error') 34 | targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths 35 | targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights 36 | targets_dw = 5.0 * np.log(gt_widths / ex_widths) 37 | targets_dh = 5.0 * np.log(gt_heights / ex_heights) 38 | 39 | targets = np.vstack( 40 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 41 | return targets 42 | 43 | def bbox_transform_inv(boxes, deltas): 44 | if boxes.shape[0] == 0: 45 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 46 | 47 | boxes = boxes.astype(deltas.dtype, copy=False) 48 | 49 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 50 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 51 | ctr_x = boxes[:, 0] + 0.5 * widths 52 | ctr_y = boxes[:, 1] + 0.5 * heights 53 | 54 | dx = deltas[:, 0::4] * 0.1 55 | dy = deltas[:, 1::4] * 0.1 56 | dw = deltas[:, 2::4] * 0.2 57 | dh = deltas[:, 3::4] * 0.2 58 | 59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 61 | # pred_w = np.exp(dw) * widths[:, np.newaxis] 62 | # pred_h = np.exp(dh) * heights[:, np.newaxis] 63 | 64 | pred_w = np.exp(dw + np.log(widths[:, np.newaxis])) 65 | pred_h = np.exp(dh + np.log(heights[:, np.newaxis])) 66 | 67 | 68 | #pred_w = np.exp(dw + np.log(widths[:, np.newaxis])) 69 | #pred_h = np.exp(dh + np.log(heights[:, np.newaxis])) 70 | 71 | 72 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 73 | # x1 74 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 75 | # y1 76 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 77 | # x2 78 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 79 | # y2 80 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 81 | 82 | return pred_boxes 83 | 84 | def clip_boxes(boxes, im_shape): 85 | """ 86 | Clip boxes to image boundaries. 87 | """ 88 | 89 | # x1 >= 0 90 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 91 | # y1 >= 0 92 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 93 | # x2 < im_shape[1] 94 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 95 | # y2 < im_shape[0] 96 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 97 | return boxes 98 | -------------------------------------------------------------------------------- /libs/boxes/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | from ..fast_rcnn.config import cfg 13 | 14 | def im_list_to_blob(ims): 15 | """Convert a list of images into a network input. 16 | 17 | Assumes images are already prepared (means subtracted, BGR order, ...). 18 | """ 19 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 20 | num_images = len(ims) 21 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 22 | dtype=np.float32) 23 | for i in xrange(num_images): 24 | im = ims[i] 25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 26 | 27 | return blob 28 | 29 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 30 | """Mean subtract and scale an image for use in a blob.""" 31 | im = im.astype(np.float32, copy=False) 32 | im -= pixel_means 33 | im_shape = im.shape 34 | im_size_min = np.min(im_shape[0:2]) 35 | im_size_max = np.max(im_shape[0:2]) 36 | im_scale = float(target_size) / float(im_size_min) 37 | # Prevent the biggest axis from being more than MAX_SIZE 38 | if np.round(im_scale * im_size_max) > max_size: 39 | im_scale = float(max_size) / float(im_size_max) 40 | if cfg.TRAIN.RANDOM_DOWNSAMPLE: 41 | r = 0.6 + np.random.rand() * 0.4 42 | im_scale *= r 43 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 44 | interpolation=cv2.INTER_LINEAR) 45 | 46 | return im, im_scale 47 | -------------------------------------------------------------------------------- /libs/boxes/cython_anchor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_anchor.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_anchor.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by CharlesShang@github 5 | # -------------------------------------------------------- 6 | 7 | cimport cython 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | DTYPE = np.float 12 | ctypedef np.float_t DTYPE_t 13 | 14 | def anchors_plane( 15 | int height, int width, int stride, 16 | np.ndarray[DTYPE_t, ndim=2] anchors_base): 17 | """ 18 | Parameters 19 | ---------- 20 | height: height of plane 21 | width: width of plane 22 | stride: stride ot the original image 23 | anchors_base: (A, 4) a base set of anchors 24 | Returns 25 | ------- 26 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane 27 | """ 28 | cdef unsigned int A = anchors_base.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) 30 | cdef unsigned int iw, ih 31 | cdef unsigned int k 32 | cdef unsigned int A4 33 | cdef unsigned int sh 34 | cdef unsigned int sw 35 | A4 = A*4 36 | for iw in range(width): 37 | sw = iw * stride 38 | for ih in range(height): 39 | sh = ih * stride 40 | for k in range(A): 41 | all_anchors[ih, iw, k, 0] = anchors_base[k, 0] + sw 42 | all_anchors[ih, iw, k, 1] = anchors_base[k, 1] + sh 43 | all_anchors[ih, iw, k, 2] = anchors_base[k, 2] + sw 44 | all_anchors[ih, iw, k, 3] = anchors_base[k, 3] + sh 45 | return all_anchors -------------------------------------------------------------------------------- /libs/boxes/cython_bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_bbox_transform.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox_transform.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_bbox_transform.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by CharlesShang@github 5 | # -------------------------------------------------------- 6 | 7 | cimport cython 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | DTYPE = np.float 12 | ctypedef np.float_t DTYPE_t 13 | # ctypedef float DTYPE_t 14 | 15 | #def bbox_transform( 16 | # np.ndarray[DTYPE_t, ndim=2] ex_rois, 17 | # np.ndarray[DTYPE_t, ndim=2] gt_rois): 18 | def bbox_transform( 19 | np.ndarray[DTYPE_t, ndim=2] ex_rois, 20 | np.ndarray[DTYPE_t, ndim=2] gt_rois): 21 | """ 22 | Parameters 23 | ---------- 24 | ex_rois: n * 4 numpy array, given boxes 25 | gt_rois: n * 4 numpy array, ground-truth boxes 26 | Returns 27 | ------- 28 | targets: (n, 4) ndarray 29 | """ 30 | cdef unsigned int R = ex_rois.shape[0] 31 | cdef np.ndarray[DTYPE_t, ndim=2] targets = np.zeros((R, 4), dtype=DTYPE) 32 | cdef unsigned int i 33 | cdef DTYPE_t gt_w 34 | cdef DTYPE_t gt_h 35 | cdef DTYPE_t gt_cx 36 | cdef DTYPE_t gt_cy 37 | cdef DTYPE_t ex_w 38 | cdef DTYPE_t ex_h 39 | cdef DTYPE_t ex_cx 40 | cdef DTYPE_t ex_cy 41 | for i in range(R): 42 | gt_w = gt_rois[i, 2] - gt_rois[i, 0] + 1.0 43 | gt_h = gt_rois[i, 3] - gt_rois[i, 1] + 1.0 44 | ex_w = ex_rois[i, 2] - ex_rois[i, 0] + 1.0 45 | ex_h = ex_rois[i, 3] - ex_rois[i, 1] + 1.0 46 | gt_cx = gt_rois[i, 0] + gt_w * 0.5 47 | gt_cy = gt_rois[i, 1] + gt_h * 0.5 48 | ex_cx = ex_rois[i, 0] + ex_w * 0.5 49 | ex_cy = ex_rois[i, 1] + ex_h * 0.5 50 | targets[i, 0] = (gt_cx - ex_cx) / ex_w 51 | targets[i, 1] = (gt_cy - ex_cy) / ex_h 52 | targets[i, 2] = np.log(gt_w / ex_w) 53 | targets[i, 3] = np.log(gt_h / ex_h) 54 | return targets 55 | 56 | cdef inline DTYPE_t my_max(DTYPE_t a, DTYPE_t b): return a if a >= b else b 57 | cdef inline DTYPE_t my_min(DTYPE_t a, DTYPE_t b): return a if a <= b else b 58 | 59 | def bbox_transform_inv( 60 | np.ndarray[DTYPE_t, ndim=2] boxes, 61 | np.ndarray[DTYPE_t, ndim=2] deltas): 62 | """ 63 | Parameters 64 | ---------- 65 | boxes: n * 4 numpy array, given boxes 66 | deltas: (n, kx4) numpy array, 67 | Returns 68 | ------- 69 | pred_boxes: (n, kx4) ndarray 70 | """ 71 | cdef unsigned int R = boxes.shape[0] 72 | cdef unsigned int k4 = deltas.shape[1] 73 | cdef unsigned int k 74 | k = k4 / 4 75 | cdef np.ndarray[DTYPE_t, ndim=2] pred_boxes = np.zeros((R, k4), dtype=DTYPE) 76 | if R == 0: 77 | return pred_boxes 78 | 79 | cdef unsigned int i 80 | cdef unsigned int j 81 | cdef unsigned int j4 82 | cdef DTYPE_t w 83 | cdef DTYPE_t h 84 | cdef DTYPE_t cx 85 | cdef DTYPE_t cy 86 | cdef DTYPE_t px 87 | cdef DTYPE_t py 88 | cdef DTYPE_t pw 89 | cdef DTYPE_t ph 90 | for i in range(R): 91 | w = boxes[i, 2] - boxes[i, 0] + 1.0 92 | h = boxes[i, 3] - boxes[i, 1] + 1.0 93 | cx = boxes[i, 0] + w * 0.5 94 | cy = boxes[i, 1] + h * 0.5 95 | for j in range(k): 96 | j4 = j * 4 97 | px = deltas[i, j4 ] * w + cx 98 | py = deltas[i, j4 + 1] * h + cy 99 | pw = np.exp(deltas[i, j4 + 2]) * w 100 | ph = np.exp(deltas[i, j4 + 3]) * h 101 | pred_boxes[i, j4 ] = px - 0.5 * pw 102 | pred_boxes[i, j4 + 1] = py - 0.5 * ph 103 | pred_boxes[i, j4 + 2] = px + 0.5 * pw 104 | pred_boxes[i, j4 + 3] = py + 0.5 * ph 105 | return pred_boxes 106 | 107 | def clip_boxes( 108 | np.ndarray[DTYPE_t, ndim=2] boxes, 109 | np.ndarray[DTYPE_t, ndim=1] im_shape): 110 | """ 111 | Parameters 112 | ---------- 113 | boxes: (n ,kx4) numpy array, given boxes 114 | im_shape:(2,) numpy array, (image_height, image_width) 115 | Returns 116 | ------- 117 | clipped: (n, kx4) ndarray 118 | """ 119 | cdef unsigned int R = boxes.shape[0] 120 | cdef unsigned int k4 = boxes.shape[1] 121 | cdef unsigned int k = k4 / 4 122 | cdef np.ndarray[DTYPE_t, ndim=2] clipped = np.zeros((R, k4), dtype=DTYPE) 123 | cdef unsigned int i 124 | cdef unsigned int j 125 | cdef unsigned int j4 126 | for i in range(R): 127 | for j in range(k): 128 | j4 = j * 4 129 | clipped[i, j4 ] = my_max(my_min(boxes[i, j4 ], im_shape[1]-1), 0) 130 | clipped[i, j4 + 1] = my_max(my_min(boxes[i, j4 + 1], im_shape[0]-1), 0) 131 | clipped[i, j4 + 2] = my_max(my_min(boxes[i, j4 + 2], im_shape[1]-1), 0) 132 | clipped[i, j4 + 3] = my_max(my_min(boxes[i, j4 + 3], im_shape[0]-1), 0) 133 | return clipped -------------------------------------------------------------------------------- /libs/boxes/cython_nms.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_nms.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /libs/boxes/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /libs/boxes/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import libs.configs.config_v1 as cfg 10 | import libs.nms.gpu_nms as gpu_nms 11 | import libs.nms.cpu_nms as cpu_nms 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | return gpu_nms.gpu_nms(dets, thresh, device_id=0) 19 | 20 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None): 21 | """ 22 | post-process the results of im_detect 23 | :param boxes: N * (K * 4) numpy 24 | :param scores: N * K numpy 25 | :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus') 26 | :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]} 27 | """ 28 | num_class = scores.shape[1] if class_sets is None else len(class_sets) 29 | assert num_class * 4 == boxes.shape[1],\ 30 | 'Detection scores and boxes dont match %d vs %d' % (num_class, boxes.shape[1]) 31 | class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets 32 | 33 | res = [] 34 | for ind, cls in enumerate(class_sets[1:]): 35 | ind += 1 # skip background 36 | cls_boxes = boxes[:, 4*ind : 4*(ind+1)] 37 | cls_scores = scores[:, ind] 38 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) 39 | keep = nms(dets, thresh=0.3) 40 | dets = dets[keep, :] 41 | dets = dets[np.where(dets[:, 4] > threshold)] 42 | r = {} 43 | if dets.shape[0] > 0: 44 | r['class'], r['dets'] = cls, dets 45 | else: 46 | r['class'], r['dets'] = cls, None 47 | res.append(r) 48 | return res 49 | 50 | if __name__=='__main__': 51 | 52 | score = np.random.rand(10, 21) 53 | boxes = np.random.randint(0, 100, (10, 21, 2)) 54 | s = np.random.randint(0, 100, (10, 21, 2)) 55 | s = boxes + s 56 | boxes = np.concatenate((boxes, s), axis=2) 57 | boxes = np.reshape(boxes, [boxes.shape[0], -1]) 58 | # score = np.reshape(score, [score.shape[0], -1]) 59 | res = nms_wrapper(score, boxes) 60 | print (res) -------------------------------------------------------------------------------- /libs/boxes/profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/boxes/profile -------------------------------------------------------------------------------- /libs/boxes/profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/boxes/profile.png -------------------------------------------------------------------------------- /libs/boxes/roi.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import functools 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | import tensorflow.contrib.slim as slim 9 | 10 | def roi_align(feat, boxes): 11 | """Given features and boxes, This function crops feature """ 12 | return 13 | 14 | def roi_cropping(feat, boxes, clses, anchors, spatial_scale=1.0/16): 15 | """This function computes final rpn boxes 16 | And crops areas from the incoming features 17 | """ 18 | return -------------------------------------------------------------------------------- /libs/boxes/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /libs/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/configs/__init__.py -------------------------------------------------------------------------------- /libs/configs/config_v1.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | ########################## 8 | # restore 9 | ########################## 10 | tf.app.flags.DEFINE_string( 11 | 'train_dir', './output/mask_rcnn/', 12 | 'Directory where checkpoints and event logs are written to.') 13 | 14 | tf.app.flags.DEFINE_string( 15 | 'pretrained_model', './data/pretrained_models/resnet_v1_50.ckpt', 16 | 'Path to pretrained model') 17 | 18 | ########################## 19 | # network 20 | ########################## 21 | tf.app.flags.DEFINE_string( 22 | 'network', 'resnet50', 23 | 'name of backbone network') 24 | 25 | ########################## 26 | # dataset 27 | ########################## 28 | tf.app.flags.DEFINE_bool( 29 | 'update_bn', False, 30 | 'Whether or not to update bacth normalization layer') 31 | 32 | tf.app.flags.DEFINE_integer( 33 | 'num_readers', 4, 34 | 'The number of parallel readers that read data from the dataset.') 35 | 36 | tf.app.flags.DEFINE_string( 37 | 'dataset_name', 'coco', 38 | 'The name of the dataset to load.') 39 | 40 | tf.app.flags.DEFINE_string( 41 | 'dataset_split_name', 'train2014', 42 | 'The name of the train/test/val split.') 43 | 44 | tf.app.flags.DEFINE_string( 45 | 'dataset_dir', 'data/coco/', 46 | 'The directory where the dataset files are stored.') 47 | 48 | tf.app.flags.DEFINE_integer( 49 | 'im_batch', 1, 50 | 'number of images in a mini-batch') 51 | 52 | 53 | tf.app.flags.DEFINE_integer( 54 | 'num_preprocessing_threads', 4, 55 | 'The number of threads used to create the batches.') 56 | 57 | tf.app.flags.DEFINE_integer( 58 | 'log_every_n_steps', 10, 59 | 'The frequency with which logs are print.') 60 | 61 | tf.app.flags.DEFINE_integer( 62 | 'save_summaries_secs', 60, 63 | 'The frequency with which summaries are saved, in seconds.') 64 | 65 | tf.app.flags.DEFINE_integer( 66 | 'save_interval_secs', 7200, 67 | 'The frequency with which the model is saved, in seconds.') 68 | 69 | tf.app.flags.DEFINE_integer( 70 | 'max_iters', 2500000, 71 | 'max iterations') 72 | 73 | ###################### 74 | # Optimization Flags # 75 | ###################### 76 | 77 | tf.app.flags.DEFINE_float( 78 | 'weight_decay', 0.00005, 'The weight decay on the model weights.') 79 | 80 | tf.app.flags.DEFINE_string( 81 | 'optimizer', 'momentum', 82 | 'The name of the optimizer, one of "adadelta", "adagrad", "adam",' 83 | '"ftrl", "momentum", "sgd" or "rmsprop".') 84 | 85 | tf.app.flags.DEFINE_float( 86 | 'adadelta_rho', 0.95, 87 | 'The decay rate for adadelta.') 88 | 89 | tf.app.flags.DEFINE_float( 90 | 'adagrad_initial_accumulator_value', 0.1, 91 | 'Starting value for the AdaGrad accumulators.') 92 | 93 | tf.app.flags.DEFINE_float( 94 | 'adam_beta1', 0.9, 95 | 'The exponential decay rate for the 1st moment estimates.') 96 | 97 | tf.app.flags.DEFINE_float( 98 | 'adam_beta2', 0.999, 99 | 'The exponential decay rate for the 2nd moment estimates.') 100 | 101 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.') 102 | 103 | tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5, 104 | 'The learning rate power.') 105 | 106 | tf.app.flags.DEFINE_float( 107 | 'ftrl_initial_accumulator_value', 0.1, 108 | 'Starting value for the FTRL accumulators.') 109 | 110 | tf.app.flags.DEFINE_float( 111 | 'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.') 112 | 113 | tf.app.flags.DEFINE_float( 114 | 'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.') 115 | 116 | tf.app.flags.DEFINE_float( 117 | 'momentum', 0.99, 118 | 'The momentum for the MomentumOptimizer and RMSPropOptimizer.') 119 | 120 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.99, 'Momentum.') 121 | 122 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.99, 'Decay term for RMSProp.') 123 | 124 | ####################### 125 | # Learning Rate Flags # 126 | ####################### 127 | 128 | tf.app.flags.DEFINE_string( 129 | 'learning_rate_decay_type', 'exponential', 130 | 'Specifies how the learning rate is decayed. One of "fixed", "exponential",' 131 | ' or "polynomial"') 132 | 133 | tf.app.flags.DEFINE_float('learning_rate', 0.002, 134 | 'Initial learning rate.') 135 | 136 | tf.app.flags.DEFINE_float( 137 | 'end_learning_rate', 0.00001, 138 | 'The minimal end learning rate used by a polynomial decay learning rate.') 139 | 140 | tf.app.flags.DEFINE_float( 141 | 'label_smoothing', 0.0, 'The amount of label smoothing.') 142 | 143 | tf.app.flags.DEFINE_float( 144 | 'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.') 145 | 146 | tf.app.flags.DEFINE_float( 147 | 'num_epochs_per_decay', 2.0, 148 | 'Number of epochs after which learning rate decays.') 149 | 150 | tf.app.flags.DEFINE_bool( 151 | 'sync_replicas', False, 152 | 'Whether or not to synchronize the replicas during training.') 153 | 154 | tf.app.flags.DEFINE_integer( 155 | 'replicas_to_aggregate', 1, 156 | 'The Number of gradients to collect before updating params.') 157 | 158 | tf.app.flags.DEFINE_float( 159 | 'moving_average_decay', None, 160 | 'The decay to use for the moving average.' 161 | 'If left as None, then moving averages are not used.') 162 | 163 | ####################### 164 | # Dataset Flags # 165 | ####################### 166 | 167 | 168 | tf.app.flags.DEFINE_string( 169 | 'model_name', 'resnet50', 170 | 'The name of the architecture to train.') 171 | 172 | tf.app.flags.DEFINE_string( 173 | 'preprocessing_name', 'coco', 174 | 'The name of the preprocessing to use. If left ' 175 | 'as `None`, then the model_name flag is used.') 176 | 177 | tf.app.flags.DEFINE_integer( 178 | 'batch_size', 1, 179 | 'The number of samples in each batch.') 180 | 181 | tf.app.flags.DEFINE_integer( 182 | 'train_image_size', None, 'Train image size') 183 | 184 | tf.app.flags.DEFINE_integer('max_number_of_steps', None, 185 | 'The maximum number of training steps.') 186 | 187 | tf.app.flags.DEFINE_string( 188 | 'classes', None, 189 | 'The classes to classify.') 190 | 191 | tf.app.flags.DEFINE_integer( 192 | 'image_min_size', 640, 193 | 'resize image so that the min edge equals to image_min_size') 194 | 195 | ##################### 196 | # Fine-Tuning Flags # 197 | ##################### 198 | 199 | tf.app.flags.DEFINE_string( 200 | 'checkpoint_path', None, 201 | 'The path to a checkpoint from which to fine-tune.') 202 | 203 | tf.app.flags.DEFINE_string( 204 | 'checkpoint_exclude_scopes', None, 205 | 'Comma-separated list of scopes of variables to exclude when restoring ' 206 | 'from a checkpoint.') 207 | 208 | tf.app.flags.DEFINE_string( 209 | 'checkpoint_include_scopes', None, 210 | 'Comma-separated list of scopes of variables to include when restoring ' 211 | 'from a checkpoint.') 212 | 213 | tf.app.flags.DEFINE_string( 214 | 'trainable_scopes', None, 215 | 'Comma-separated list of scopes to filter the set of variables to train.' 216 | 'By default, None would train all the variables.') 217 | 218 | tf.app.flags.DEFINE_boolean( 219 | 'ignore_missing_vars', False, 220 | 'When restoring a checkpoint would ignore missing variables.') 221 | 222 | tf.app.flags.DEFINE_boolean( 223 | 'restore_previous_if_exists', True, 224 | 'When restoring a checkpoint would ignore missing variables.') 225 | 226 | ####################### 227 | # BOX Flags # 228 | ####################### 229 | tf.app.flags.DEFINE_float( 230 | 'rpn_bg_threshold', 0.3, 231 | 'Only regions which intersection is larger than fg_threshold are considered to be fg') 232 | 233 | tf.app.flags.DEFINE_float( 234 | 'rpn_fg_threshold', 0.7, 235 | 'Only regions which intersection is larger than fg_threshold are considered to be fg') 236 | 237 | tf.app.flags.DEFINE_float( 238 | 'fg_threshold', 0.7, 239 | 'Only regions which intersection is larger than fg_threshold are considered to be fg') 240 | 241 | tf.app.flags.DEFINE_float( 242 | 'bg_threshold', 0.3, 243 | 'Only regions which intersection is less than bg_threshold are considered to be bg') 244 | 245 | tf.app.flags.DEFINE_integer( 246 | 'rois_per_image', 256, 247 | 'Number of rois that should be sampled to train this network') 248 | 249 | tf.app.flags.DEFINE_float( 250 | 'fg_roi_fraction', 0.25, 251 | 'Number of rois that should be sampled to train this network') 252 | 253 | tf.app.flags.DEFINE_float( 254 | 'fg_rpn_fraction', 0.25, 255 | 'Number of rois that should be sampled to train this network') 256 | 257 | tf.app.flags.DEFINE_integer( 258 | 'rpn_batch_size', 500, 259 | 'Number of rpn anchors that should be sampled to train this network') 260 | 261 | tf.app.flags.DEFINE_integer( 262 | 'allow_border', 10, 263 | 'How many pixels out of an image') 264 | 265 | ################################## 266 | # NMS # 267 | ################################## 268 | 269 | tf.app.flags.DEFINE_integer( 270 | 'pre_nms_top_n', 12000, 271 | 'Number of rpn anchors that should be sampled before nms') 272 | 273 | tf.app.flags.DEFINE_integer( 274 | 'post_nms_top_n', 2000, 275 | 'Number of rpn anchors that should be sampled after nms') 276 | 277 | tf.app.flags.DEFINE_float( 278 | 'rpn_nms_threshold', 0.7, 279 | 'NMS threshold') 280 | 281 | ################################## 282 | # Mask # 283 | ################################## 284 | 285 | tf.app.flags.DEFINE_boolean( 286 | 'mask_allow_bg', True, 287 | 'Allow to add bg masks in the masking stage') 288 | 289 | tf.app.flags.DEFINE_float( 290 | 'mask_threshold', 0.50, 291 | 'Least intersection of a positive mask') 292 | tf.app.flags.DEFINE_integer( 293 | 'masks_per_image', 64, 294 | 'Number of rois that should be sampled to train this network') 295 | 296 | tf.app.flags.DEFINE_float( 297 | 'min_size', 2, 298 | 'minimum size of an object') 299 | 300 | FLAGS = tf.app.flags.FLAGS 301 | -------------------------------------------------------------------------------- /libs/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/datasets/__init__.py -------------------------------------------------------------------------------- /libs/datasets/coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import tensorflow as tf 7 | 8 | import tensorflow.contrib.slim as slim 9 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 10 | 11 | _FILE_PATTERN = 'coco_%s_*.tfrecord' 12 | 13 | SPLITS_TO_SIZES = {'train2014': 82783, 'val2014': 40504} 14 | 15 | _NUM_CLASSES = 81 16 | 17 | _ITEMS_TO_DESCRIPTIONS = { 18 | 'image': 'A color image of varying size.', 19 | 'label': 'An annotation image of varying size. (pixel-level masks)', 20 | 'gt_masks': 'masks of instances in this image. (instance-level masks), of shape (N, image_height, image_width)', 21 | 'gt_boxes': 'bounding boxes and classes of instances in this image, of shape (N, 5), each entry is (x1, y1, x2, y2)', 22 | } 23 | 24 | 25 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 26 | if split_name not in SPLITS_TO_SIZES: 27 | raise ValueError('split name %s was not recognized.' % split_name) 28 | 29 | if not file_pattern: 30 | file_pattern = _FILE_PATTERN 31 | file_pattern = os.path.join(dataset_dir, 'records', file_pattern % split_name) 32 | 33 | # Allowing None in the signature so that dataset_factory can use the default. 34 | if reader is None: 35 | reader = tf.TFRecordReader 36 | 37 | keys_to_features = { 38 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 39 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 40 | 'label/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 41 | 'label/format': tf.FixedLenFeature((), tf.string, default_value='png'), 42 | 'image/height': tf.FixedLenFeature((), tf.int64), 43 | 'image/width': tf.FixedLenFeature((), tf.int64), 44 | 45 | 'label/num_instances': tf.FixedLenFeature((), tf.int64), 46 | 'label/gt_boxes': tf.FixedLenFeature((), tf.string), 47 | 'label/gt_masks': tf.FixedLenFeature((), tf.string), 48 | } 49 | 50 | def _masks_decoder(keys_to_tensors): 51 | masks = tf.decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8) 52 | width = tf.cast(keys_to_tensors['image/width'], tf.int32) 53 | height = tf.cast(keys_to_tensors['image/height'], tf.int32) 54 | instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) 55 | mask_shape = tf.stack([instances, height, width]) 56 | return tf.reshape(masks, mask_shape) 57 | 58 | def _gt_boxes_decoder(keys_to_tensors): 59 | bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32) 60 | instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) 61 | bboxes_shape = tf.stack([instances, 5]) 62 | return tf.reshape(bboxes, bboxes_shape) 63 | 64 | def _width_decoder(keys_to_tensors): 65 | width = keys_to_tensors['image/width'] 66 | return tf.cast(width, tf.int32) 67 | 68 | def _height_decoder(keys_to_tensors): 69 | height = keys_to_tensors['image/height'] 70 | return tf.cast(height, tf.int32) 71 | 72 | items_to_handlers = { 73 | 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 74 | 'label': slim.tfexample_decoder.Image('label/encoded', 'label/format', channels=1), 75 | 'gt_masks': slim.tfexample_decoder.ItemHandlerCallback( 76 | ['label/gt_masks', 'label/num_instances', 'image/width', 'image/height'], _masks_decoder), 77 | 'gt_boxes': slim.tfexample_decoder.ItemHandlerCallback(['label/gt_boxes', 'label/num_instances'], _gt_boxes_decoder), 78 | 'width': slim.tfexample_decoder.ItemHandlerCallback(['image/width'], _width_decoder), 79 | 'height': slim.tfexample_decoder.ItemHandlerCallback(['image/height'], _height_decoder), 80 | } 81 | 82 | decoder = slim.tfexample_decoder.TFExampleDecoder( 83 | keys_to_features, items_to_handlers) 84 | 85 | return slim.dataset.Dataset( 86 | data_sources=file_pattern, 87 | reader=reader, 88 | decoder=decoder, 89 | num_samples=SPLITS_TO_SIZES[split_name], 90 | items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, 91 | num_classes=_NUM_CLASSES) 92 | 93 | def read(tfrecords_filename): 94 | 95 | if not isinstance(tfrecords_filename, list): 96 | tfrecords_filename = [tfrecords_filename] 97 | filename_queue = tf.train.string_input_producer( 98 | tfrecords_filename, num_epochs=100) 99 | 100 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 101 | reader = tf.TFRecordReader(options=options) 102 | _, serialized_example = reader.read(filename_queue) 103 | features = tf.parse_single_example( 104 | serialized_example, 105 | features={ 106 | 'image/img_id': tf.FixedLenFeature([], tf.int64), 107 | 'image/encoded': tf.FixedLenFeature([], tf.string), 108 | 'image/height': tf.FixedLenFeature([], tf.int64), 109 | 'image/width': tf.FixedLenFeature([], tf.int64), 110 | 'label/num_instances': tf.FixedLenFeature([], tf.int64), 111 | 'label/gt_masks': tf.FixedLenFeature([], tf.string), 112 | 'label/gt_boxes': tf.FixedLenFeature([], tf.string), 113 | 'label/encoded': tf.FixedLenFeature([], tf.string), 114 | }) 115 | # image = tf.image.decode_jpeg(features['image/encoded'], channels=3) 116 | img_id = tf.cast(features['image/img_id'], tf.int32) 117 | ih = tf.cast(features['image/height'], tf.int32) 118 | iw = tf.cast(features['image/width'], tf.int32) 119 | num_instances = tf.cast(features['label/num_instances'], tf.int32) 120 | image = tf.decode_raw(features['image/encoded'], tf.uint8) 121 | imsize = tf.size(image) 122 | image = tf.cond(tf.equal(imsize, ih * iw), \ 123 | lambda: tf.image.grayscale_to_rgb(tf.reshape(image, (ih, iw, 1))), \ 124 | lambda: tf.reshape(image, (ih, iw, 3))) 125 | 126 | gt_boxes = tf.decode_raw(features['label/gt_boxes'], tf.float32) 127 | gt_boxes = tf.reshape(gt_boxes, [num_instances, 5]) 128 | gt_masks = tf.decode_raw(features['label/gt_masks'], tf.uint8) 129 | gt_masks = tf.cast(gt_masks, tf.int32) 130 | gt_masks = tf.reshape(gt_masks, [num_instances, ih, iw]) 131 | 132 | return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id 133 | 134 | -------------------------------------------------------------------------------- /libs/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from libs.visualization.summary_utils import visualize_input 7 | import glob 8 | from libs.datasets import coco 9 | 10 | import libs.preprocessings.coco_v1 as coco_preprocess 11 | 12 | def get_dataset(dataset_name, split_name, dataset_dir, 13 | im_batch=1, is_training=False, file_pattern=None, reader=None): 14 | """""" 15 | if file_pattern is None: 16 | file_pattern = dataset_name + '_' + split_name + '*.tfrecord' 17 | 18 | tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern) 19 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords) 20 | 21 | image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training) 22 | #visualize_input(gt_boxes, image, tf.expand_dims(gt_masks, axis=3)) 23 | 24 | return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id 25 | 26 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build -------------------------------------------------------------------------------- /libs/datasets/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/common/gason.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #include "gason.h" 3 | #include 4 | 5 | #define JSON_ZONE_SIZE 4096 6 | #define JSON_STACK_SIZE 32 7 | 8 | const char *jsonStrError(int err) { 9 | switch (err) { 10 | #define XX(no, str) \ 11 | case JSON_##no: \ 12 | return str; 13 | JSON_ERRNO_MAP(XX) 14 | #undef XX 15 | default: 16 | return "unknown"; 17 | } 18 | } 19 | 20 | void *JsonAllocator::allocate(size_t size) { 21 | size = (size + 7) & ~7; 22 | 23 | if (head && head->used + size <= JSON_ZONE_SIZE) { 24 | char *p = (char *)head + head->used; 25 | head->used += size; 26 | return p; 27 | } 28 | 29 | size_t allocSize = sizeof(Zone) + size; 30 | Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); 31 | if (zone == nullptr) 32 | return nullptr; 33 | zone->used = allocSize; 34 | if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { 35 | zone->next = head; 36 | head = zone; 37 | } else { 38 | zone->next = head->next; 39 | head->next = zone; 40 | } 41 | return (char *)zone + sizeof(Zone); 42 | } 43 | 44 | void JsonAllocator::deallocate() { 45 | while (head) { 46 | Zone *next = head->next; 47 | free(head); 48 | head = next; 49 | } 50 | } 51 | 52 | static inline bool isspace(char c) { 53 | return c == ' ' || (c >= '\t' && c <= '\r'); 54 | } 55 | 56 | static inline bool isdelim(char c) { 57 | return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; 58 | } 59 | 60 | static inline bool isdigit(char c) { 61 | return c >= '0' && c <= '9'; 62 | } 63 | 64 | static inline bool isxdigit(char c) { 65 | return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); 66 | } 67 | 68 | static inline int char2int(char c) { 69 | if (c <= '9') 70 | return c - '0'; 71 | return (c & ~' ') - 'A' + 10; 72 | } 73 | 74 | static double string2double(char *s, char **endptr) { 75 | char ch = *s; 76 | if (ch == '-') 77 | ++s; 78 | 79 | double result = 0; 80 | while (isdigit(*s)) 81 | result = (result * 10) + (*s++ - '0'); 82 | 83 | if (*s == '.') { 84 | ++s; 85 | 86 | double fraction = 1; 87 | while (isdigit(*s)) { 88 | fraction *= 0.1; 89 | result += (*s++ - '0') * fraction; 90 | } 91 | } 92 | 93 | if (*s == 'e' || *s == 'E') { 94 | ++s; 95 | 96 | double base = 10; 97 | if (*s == '+') 98 | ++s; 99 | else if (*s == '-') { 100 | ++s; 101 | base = 0.1; 102 | } 103 | 104 | unsigned int exponent = 0; 105 | while (isdigit(*s)) 106 | exponent = (exponent * 10) + (*s++ - '0'); 107 | 108 | double power = 1; 109 | for (; exponent; exponent >>= 1, base *= base) 110 | if (exponent & 1) 111 | power *= base; 112 | 113 | result *= power; 114 | } 115 | 116 | *endptr = s; 117 | return ch == '-' ? -result : result; 118 | } 119 | 120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { 121 | if (!tail) 122 | return node->next = node; 123 | node->next = tail->next; 124 | tail->next = node; 125 | return node; 126 | } 127 | 128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { 129 | if (tail) { 130 | auto head = tail->next; 131 | tail->next = nullptr; 132 | return JsonValue(tag, head); 133 | } 134 | return JsonValue(tag, nullptr); 135 | } 136 | 137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { 138 | JsonNode *tails[JSON_STACK_SIZE]; 139 | JsonTag tags[JSON_STACK_SIZE]; 140 | char *keys[JSON_STACK_SIZE]; 141 | JsonValue o; 142 | int pos = -1; 143 | bool separator = true; 144 | JsonNode *node; 145 | *endptr = s; 146 | 147 | while (*s) { 148 | while (isspace(*s)) { 149 | ++s; 150 | if (!*s) break; 151 | } 152 | *endptr = s++; 153 | switch (**endptr) { 154 | case '-': 155 | if (!isdigit(*s) && *s != '.') { 156 | *endptr = s; 157 | return JSON_BAD_NUMBER; 158 | } 159 | case '0': 160 | case '1': 161 | case '2': 162 | case '3': 163 | case '4': 164 | case '5': 165 | case '6': 166 | case '7': 167 | case '8': 168 | case '9': 169 | o = JsonValue(string2double(*endptr, &s)); 170 | if (!isdelim(*s)) { 171 | *endptr = s; 172 | return JSON_BAD_NUMBER; 173 | } 174 | break; 175 | case '"': 176 | o = JsonValue(JSON_STRING, s); 177 | for (char *it = s; *s; ++it, ++s) { 178 | int c = *it = *s; 179 | if (c == '\\') { 180 | c = *++s; 181 | switch (c) { 182 | case '\\': 183 | case '"': 184 | case '/': 185 | *it = c; 186 | break; 187 | case 'b': 188 | *it = '\b'; 189 | break; 190 | case 'f': 191 | *it = '\f'; 192 | break; 193 | case 'n': 194 | *it = '\n'; 195 | break; 196 | case 'r': 197 | *it = '\r'; 198 | break; 199 | case 't': 200 | *it = '\t'; 201 | break; 202 | case 'u': 203 | c = 0; 204 | for (int i = 0; i < 4; ++i) { 205 | if (isxdigit(*++s)) { 206 | c = c * 16 + char2int(*s); 207 | } else { 208 | *endptr = s; 209 | return JSON_BAD_STRING; 210 | } 211 | } 212 | if (c < 0x80) { 213 | *it = c; 214 | } else if (c < 0x800) { 215 | *it++ = 0xC0 | (c >> 6); 216 | *it = 0x80 | (c & 0x3F); 217 | } else { 218 | *it++ = 0xE0 | (c >> 12); 219 | *it++ = 0x80 | ((c >> 6) & 0x3F); 220 | *it = 0x80 | (c & 0x3F); 221 | } 222 | break; 223 | default: 224 | *endptr = s; 225 | return JSON_BAD_STRING; 226 | } 227 | } else if ((unsigned int)c < ' ' || c == '\x7F') { 228 | *endptr = s; 229 | return JSON_BAD_STRING; 230 | } else if (c == '"') { 231 | *it = 0; 232 | ++s; 233 | break; 234 | } 235 | } 236 | if (!isdelim(*s)) { 237 | *endptr = s; 238 | return JSON_BAD_STRING; 239 | } 240 | break; 241 | case 't': 242 | if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) 243 | return JSON_BAD_IDENTIFIER; 244 | o = JsonValue(JSON_TRUE); 245 | s += 3; 246 | break; 247 | case 'f': 248 | if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) 249 | return JSON_BAD_IDENTIFIER; 250 | o = JsonValue(JSON_FALSE); 251 | s += 4; 252 | break; 253 | case 'n': 254 | if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) 255 | return JSON_BAD_IDENTIFIER; 256 | o = JsonValue(JSON_NULL); 257 | s += 3; 258 | break; 259 | case ']': 260 | if (pos == -1) 261 | return JSON_STACK_UNDERFLOW; 262 | if (tags[pos] != JSON_ARRAY) 263 | return JSON_MISMATCH_BRACKET; 264 | o = listToValue(JSON_ARRAY, tails[pos--]); 265 | break; 266 | case '}': 267 | if (pos == -1) 268 | return JSON_STACK_UNDERFLOW; 269 | if (tags[pos] != JSON_OBJECT) 270 | return JSON_MISMATCH_BRACKET; 271 | if (keys[pos] != nullptr) 272 | return JSON_UNEXPECTED_CHARACTER; 273 | o = listToValue(JSON_OBJECT, tails[pos--]); 274 | break; 275 | case '[': 276 | if (++pos == JSON_STACK_SIZE) 277 | return JSON_STACK_OVERFLOW; 278 | tails[pos] = nullptr; 279 | tags[pos] = JSON_ARRAY; 280 | keys[pos] = nullptr; 281 | separator = true; 282 | continue; 283 | case '{': 284 | if (++pos == JSON_STACK_SIZE) 285 | return JSON_STACK_OVERFLOW; 286 | tails[pos] = nullptr; 287 | tags[pos] = JSON_OBJECT; 288 | keys[pos] = nullptr; 289 | separator = true; 290 | continue; 291 | case ':': 292 | if (separator || keys[pos] == nullptr) 293 | return JSON_UNEXPECTED_CHARACTER; 294 | separator = true; 295 | continue; 296 | case ',': 297 | if (separator || keys[pos] != nullptr) 298 | return JSON_UNEXPECTED_CHARACTER; 299 | separator = true; 300 | continue; 301 | case '\0': 302 | continue; 303 | default: 304 | return JSON_UNEXPECTED_CHARACTER; 305 | } 306 | 307 | separator = false; 308 | 309 | if (pos == -1) { 310 | *endptr = s; 311 | *value = o; 312 | return JSON_OK; 313 | } 314 | 315 | if (tags[pos] == JSON_OBJECT) { 316 | if (!keys[pos]) { 317 | if (o.getTag() != JSON_STRING) 318 | return JSON_UNQUOTED_KEY; 319 | keys[pos] = o.toString(); 320 | continue; 321 | } 322 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) 323 | return JSON_ALLOCATION_FAILURE; 324 | tails[pos] = insertAfter(tails[pos], node); 325 | tails[pos]->key = keys[pos]; 326 | keys[pos] = nullptr; 327 | } else { 328 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) 329 | return JSON_ALLOCATION_FAILURE; 330 | tails[pos] = insertAfter(tails[pos], node); 331 | } 332 | tails[pos]->value = o; 333 | } 334 | return JSON_BREAKING_BAD; 335 | } 336 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/common/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 173 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 174 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 175 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 176 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 177 | } else for( d=0; d<=dy; d++ ) { 178 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 179 | } 180 | } 181 | /* get points along y-boundary and downsample */ 182 | free(x); free(y); k=m; m=0; double xd, yd; 183 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 184 | for( j=1; jw-1 ) continue; 187 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 189 | x[m]=(int) xd; y[m]=(int) yd; m++; 190 | } 191 | /* compute rle encoding given y-boundary points */ 192 | k=m; a=malloc(sizeof(uint)*(k+1)); 193 | for( j=0; j0) b[m++]=a[j++]; else { 199 | j++; if(jm, p=0; long x; int more; 206 | char *s=malloc(sizeof(char)*m*6); 207 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 209 | while( more ) { 210 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 211 | if(more) c |= 0x20; c+=48; s[p++]=c; 212 | } 213 | } 214 | s[p]=0; return s; 215 | } 216 | 217 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 218 | siz m=0, p=0, k; long x; int more; uint *cnts; 219 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 220 | while( s[p] ) { 221 | x=0; k=0; more=1; 222 | while( more ) { 223 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 224 | more = c & 0x20; p++; k++; 225 | if(!more && (c & 0x10)) x |= -1 << 5*k; 226 | } 227 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 228 | } 229 | rleInit(R,h,w,m,cnts); free(cnts); 230 | } 231 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import libs.datasets.pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /libs/datasets/pycocotools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['./common/maskApi.c', '_mask.pyx'], 13 | include_dirs = [np.get_include(), './common'], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | packages=['pycocotools'], 20 | package_dir = {'pycocotools': 'pycocotools'}, 21 | version='2.0', 22 | ext_modules= 23 | cythonize(ext_modules) 24 | ) -------------------------------------------------------------------------------- /libs/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Written by CharlesShang@github 4 | # -------------------------------------------------------- 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | from .wrapper import anchor_decoder 10 | from .wrapper import anchor_encoder 11 | from .wrapper import roi_decoder 12 | from .wrapper import roi_encoder 13 | from .wrapper import mask_decoder 14 | from .wrapper import mask_encoder 15 | from .wrapper import sample_wrapper as sample_rpn_outputs 16 | from .wrapper import sample_with_gt_wrapper as sample_rpn_outputs_with_gt 17 | from .wrapper import gen_all_anchors 18 | from .wrapper import assign_boxes 19 | from .crop import crop as ROIAlign 20 | from .crop import crop_ as ROIAlign_ 21 | -------------------------------------------------------------------------------- /libs/layers/anchor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | 7 | import libs.boxes.cython_bbox as cython_bbox 8 | import libs.configs.config_v1 as cfg 9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 10 | from libs.boxes.anchor import anchors_plane 11 | from libs.logs.log import LOG 12 | # FLAGS = tf.app.flags.FLAGS 13 | 14 | _DEBUG = False 15 | 16 | def encode(gt_boxes, all_anchors, height, width, stride): 17 | """Matching and Encoding groundtruth into learning targets 18 | Sampling 19 | 20 | Parameters 21 | --------- 22 | gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] 23 | all_anchors: an array of shape (h, w, A, 4), 24 | width: width of feature 25 | height: height of feature 26 | stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] 27 | Returns 28 | -------- 29 | labels: Nx1 array in [0, num_classes] 30 | bbox_targets: N x (4) regression targets 31 | bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. 32 | """ 33 | # TODO: speedup this module 34 | # if all_anchors is None: 35 | # all_anchors = anchors_plane(height, width, stride=stride) 36 | 37 | # # anchors, inds_inside, total_anchors 38 | # border = cfg.FLAGS.allow_border 39 | # all_anchors = all_anchors.reshape((-1, 4)) 40 | # inds_inside = np.where( 41 | # (all_anchors[:, 0] >= -border) & 42 | # (all_anchors[:, 1] >= -border) & 43 | # (all_anchors[:, 2] < (width * stride) + border) & 44 | # (all_anchors[:, 3] < (height * stride) + border))[0] 45 | # anchors = all_anchors[inds_inside, :] 46 | all_anchors = all_anchors.reshape([-1, 4]) 47 | anchors = all_anchors 48 | total_anchors = all_anchors.shape[0] 49 | 50 | # labels = np.zeros((anchors.shape[0], ), dtype=np.float32) 51 | labels = np.empty((anchors.shape[0], ), dtype=np.float32) 52 | labels.fill(-1) 53 | 54 | if gt_boxes.size > 0: 55 | overlaps = cython_bbox.bbox_overlaps( 56 | np.ascontiguousarray(anchors, dtype=np.float), 57 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 58 | 59 | # if _DEBUG: 60 | # print ('gt_boxes shape: ', gt_boxes.shape) 61 | # print ('anchors shape: ', anchors.shape) 62 | # print ('overlaps shape: ', overlaps.shape) 63 | 64 | gt_assignment = overlaps.argmax(axis=1) # (A) 65 | max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] 66 | gt_argmax_overlaps = overlaps.argmax(axis=0) # G 67 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 68 | np.arange(overlaps.shape[1])] 69 | 70 | labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0 71 | 72 | if True: 73 | # this is sentive to boxes of little overlaps, no need! 74 | # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 75 | 76 | # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps 77 | labels[gt_argmax_overlaps] = 1 78 | 79 | # exclude examples with little overlaps 80 | # added later 81 | # excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0] 82 | # labels[gt_argmax_overlaps[excludes]] = -1 83 | 84 | if _DEBUG: 85 | min_ov = np.min(gt_max_overlaps) 86 | max_ov = np.max(gt_max_overlaps) 87 | mean_ov = np.mean(gt_max_overlaps) 88 | if min_ov < cfg.FLAGS.bg_threshold: 89 | LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)' 90 | % (min_ov, mean_ov, max_ov, stride, height, width)) 91 | worst = gt_boxes[np.argmin(gt_max_overlaps)] 92 | anc = anchors[gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :] 93 | LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)' 94 | % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4], 95 | anc[0], anc[1], anc[2], anc[3])) 96 | 97 | 98 | # fg label: above threshold IOU 99 | labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1 100 | # print (np.min(labels), np.max(labels)) 101 | 102 | # subsample positive labels if there are too many 103 | num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) 104 | fg_inds = np.where(labels == 1)[0] 105 | if len(fg_inds) > num_fg: 106 | disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) 107 | labels[disable_inds] = -1 108 | else: 109 | # if there is no gt 110 | labels[:] = 0 111 | 112 | # TODO: mild hard negative mining 113 | # subsample negative labels if there are too many 114 | num_fg = np.sum(labels == 1) 115 | num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8) 116 | bg_inds = np.where(labels == 0)[0] 117 | if len(bg_inds) > num_bg: 118 | disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) 119 | labels[disable_inds] = -1 120 | 121 | bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) 122 | if gt_boxes.size > 0: 123 | bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) 124 | bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) 125 | bbox_inside_weights[labels == 1, :] = 0.1 126 | 127 | # # mapping to whole outputs 128 | # labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 129 | # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 130 | # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) 131 | 132 | labels = labels.reshape((1, height, width, -1)) 133 | bbox_targets = bbox_targets.reshape((1, height, width, -1)) 134 | bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1)) 135 | 136 | return labels, bbox_targets, bbox_inside_weights 137 | 138 | def decode(boxes, scores, all_anchors, ih, iw): 139 | """Decode outputs into boxes 140 | Parameters 141 | --------- 142 | boxes: an array of shape (1, h, w, Ax4) 143 | scores: an array of shape (1, h, w, Ax2), 144 | all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] 145 | 146 | Returns 147 | -------- 148 | final_boxes: of shape (R x 4) 149 | classes: of shape (R) in {0,1,2,3... K-1} 150 | scores: of shape (R) in [0 ~ 1] 151 | """ 152 | # h, w = boxes.shape[1], boxes.shape[2] 153 | # if all_anchors is None: 154 | # stride = 2 ** int(round(np.log2((iw + 0.0) / w))) 155 | # all_anchors = anchors_plane(h, w, stride=stride) 156 | all_anchors = all_anchors.reshape((-1, 4)) 157 | boxes = boxes.reshape((-1, 4)) 158 | scores = scores.reshape((-1, 2)) 159 | assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 160 | 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0]) 161 | boxes = bbox_transform_inv(all_anchors, boxes) 162 | classes = np.argmax(scores, axis=1) 163 | scores = scores[:, 1] 164 | final_boxes = boxes 165 | final_boxes = clip_boxes(final_boxes, (ih, iw)) 166 | classes = classes.astype(np.int32) 167 | return final_boxes, classes, scores 168 | 169 | def sample(boxes, scores, ih, iw, is_training): 170 | """ 171 | Sampling the anchor layer outputs for next stage, mask or roi prediction or roi 172 | 173 | Params 174 | ---------- 175 | boxes: of shape (? ,4) 176 | scores: foreground prob 177 | ih: image height 178 | iw: image width 179 | is_training: 'test' or 'train' 180 | 181 | Returns 182 | ---------- 183 | rois: of shape (N, 4) 184 | scores: of shape (N, 1) 185 | batch_ids: 186 | """ 187 | return 188 | 189 | 190 | def _unmap(data, count, inds, fill=0): 191 | """ Unmap a subset of item (data) back to the original set of items (of 192 | size count) """ 193 | if len(data.shape) == 1: 194 | ret = np.empty((count,), dtype=np.float32) 195 | ret.fill(fill) 196 | ret[inds] = data 197 | else: 198 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 199 | ret.fill(fill) 200 | ret[inds, :] = data 201 | return ret 202 | 203 | def _compute_targets(ex_rois, gt_rois): 204 | """Compute bounding-box regression targets for an image.""" 205 | 206 | assert ex_rois.shape[0] == gt_rois.shape[0] 207 | assert ex_rois.shape[1] == 4 208 | assert gt_rois.shape[1] == 5 209 | 210 | return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) 211 | 212 | if __name__ == '__main__': 213 | 214 | import time 215 | t = time.time() 216 | 217 | for i in range(10): 218 | cfg.FLAGS.fg_threshold = 0.1 219 | classes = np.random.randint(0, 3, (50, 1)) 220 | boxes = np.random.randint(10, 50, (50, 2)) 221 | s = np.random.randint(20, 50, (50, 2)) 222 | s = boxes + s 223 | boxes = np.concatenate((boxes, s), axis=1) 224 | gt_boxes = np.hstack((boxes, classes)) 225 | # gt_boxes = boxes 226 | rois = np.random.randint(10, 50, (20, 2)) 227 | s = np.random.randint(0, 20, (20, 2)) 228 | s = rois + s 229 | rois = np.concatenate((rois, s), axis=1) 230 | labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=200, width=300, stride=4) 231 | labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=100, width=150, stride=8) 232 | labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=50, width=75, stride=16) 233 | labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=25, width=37, stride=32) 234 | # anchors, _, _ = anchors_plane(200, 300, stride=4, boarder=0) 235 | 236 | print('average time: %f' % ((time.time() - t)/10.0)) 237 | -------------------------------------------------------------------------------- /libs/layers/assign.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import numpy as np 8 | 9 | import libs.boxes.cython_bbox as cython_bbox 10 | import libs.configs.config_v1 as cfg 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 12 | from libs.boxes.anchor import anchors_plane 13 | from libs.logs.log import LOG 14 | # FLAGS = tf.app.flags.FLAGS 15 | 16 | _DEBUG = False 17 | 18 | def assign_boxes(gt_boxes, min_k=2, max_k=5): 19 | """assigning boxes to layers in a pyramid according to its area 20 | Params 21 | ----- 22 | gt_boxes: of shape (N, 5), each entry is [x1, y1, x2, y2, cls] 23 | strides: the stride of each layer, like [4, 8, 16, 32] 24 | 25 | Returns 26 | ----- 27 | layer_ids: of shape (N,), each entry is a id indicating the assigned layer id 28 | """ 29 | k0 = 4 30 | if gt_boxes.size > 0: 31 | layer_ids = np.zeros((gt_boxes.shape[0], ), dtype=np.int32) 32 | ws = gt_boxes[:, 2] - gt_boxes[:, 0] 33 | hs = gt_boxes[:, 3] - gt_boxes[:, 1] 34 | areas = ws * hs 35 | k = np.floor(k0 + np.log2(np.sqrt(areas) / 224)) 36 | inds = np.where(k < min_k)[0] 37 | k[inds] = min_k 38 | inds = np.where(k > max_k)[0] 39 | k[inds] = max_k 40 | if _DEBUG: 41 | print ("### boxes and layer ids") 42 | print (np.hstack((gt_boxes[:, 0:4], k[:, np.newaxis]))) 43 | return k.astype(np.int32) 44 | 45 | else: 46 | return np.asarray([], dtype=np.int32) 47 | -------------------------------------------------------------------------------- /libs/layers/crop.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | def crop(images, boxes, batch_inds, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'): 8 | """Cropping areas of features into fixed size 9 | Params: 10 | -------- 11 | images: a 4-d Tensor of shape (N, H, W, C) 12 | boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2] 13 | batch_inds: 14 | 15 | Returns: 16 | -------- 17 | A Tensor of shape (N, pooled_height, pooled_width, C) 18 | """ 19 | with tf.name_scope(scope): 20 | # 21 | boxes = boxes / (stride + 0.0) 22 | boxes = tf.reshape(boxes, [-1, 4]) 23 | 24 | # normalize the boxes and swap x y dimensions 25 | shape = tf.shape(images) 26 | boxes = tf.reshape(boxes, [-1, 2]) # to (x, y) 27 | xs = boxes[:, 0] 28 | ys = boxes[:, 1] 29 | xs = xs / tf.cast(shape[2], tf.float32) 30 | ys = ys / tf.cast(shape[1], tf.float32) 31 | boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1) 32 | boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2) 33 | 34 | # if batch_inds is False: 35 | # num_boxes = tf.shape(boxes)[0] 36 | # batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds') 37 | # batch_inds = boxes[:, 0] * 0 38 | # batch_inds = tf.cast(batch_inds, tf.int32) 39 | 40 | # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds]) 41 | assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds]) 42 | with tf.control_dependencies([assert_op, images, batch_inds]): 43 | return tf.image.crop_and_resize(images, boxes, batch_inds, 44 | [pooled_height, pooled_width], 45 | method='bilinear', 46 | name='Crop') 47 | 48 | def crop_(images, boxes, batch_inds, ih, iw, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'): 49 | """Cropping areas of features into fixed size 50 | Params: 51 | -------- 52 | images: a 4-d Tensor of shape (N, H, W, C) 53 | boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2] 54 | batch_inds: 55 | 56 | Returns: 57 | -------- 58 | A Tensor of shape (N, pooled_height, pooled_width, C) 59 | """ 60 | with tf.name_scope(scope): 61 | # 62 | boxes = boxes / (stride + 0.0) 63 | boxes = tf.reshape(boxes, [-1, 4]) 64 | 65 | # normalize the boxes and swap x y dimensions 66 | shape = tf.shape(images) 67 | boxes = tf.reshape(boxes, [-1, 2]) # to (x, y) 68 | xs = boxes[:, 0] 69 | ys = boxes[:, 1] 70 | xs = xs / tf.cast(shape[2], tf.float32) 71 | ys = ys / tf.cast(shape[1], tf.float32) 72 | boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1) 73 | boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2) 74 | 75 | # if batch_inds is False: 76 | # num_boxes = tf.shape(boxes)[0] 77 | # batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds') 78 | # batch_inds = boxes[:, 0] * 0 79 | # batch_inds = tf.cast(batch_inds, tf.int32) 80 | 81 | # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds]) 82 | assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds]) 83 | with tf.control_dependencies([assert_op, images, batch_inds]): 84 | return [tf.image.crop_and_resize(images, boxes, batch_inds, 85 | [pooled_height, pooled_width], 86 | method='bilinear', 87 | name='Crop')] + [boxes] 88 | 89 | -------------------------------------------------------------------------------- /libs/layers/mask.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import cv2 8 | import libs.boxes.cython_bbox as cython_bbox 9 | import libs.configs.config_v1 as cfg 10 | from libs.logs.log import LOG 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 12 | 13 | _DEBUG = False 14 | def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): 15 | """Encode masks groundtruth into learnable targets 16 | Sample some exmaples 17 | 18 | Params 19 | ------ 20 | gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) 21 | gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] 22 | rois: the bounding boxes of shape (N, 4), 23 | ## scores: scores of shape (N, 1) 24 | num_classes; K 25 | mask_height, mask_width: height and width of output masks 26 | 27 | Returns 28 | ------- 29 | # rois: boxes sampled for cropping masks, of shape (M, 4) 30 | labels: class-ids of shape (M, 1) 31 | mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values 32 | mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled 33 | """ 34 | total_masks = rois.shape[0] 35 | if gt_boxes.size > 0: 36 | # B x G 37 | overlaps = cython_bbox.bbox_overlaps( 38 | np.ascontiguousarray(rois[:, 0:4], dtype=np.float), 39 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 40 | gt_assignment = overlaps.argmax(axis=1) # shape is N 41 | max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N 42 | # note: this will assign every rois with a positive label 43 | # labels = gt_boxes[gt_assignment, 4] # N 44 | labels = np.zeros((total_masks, ), np.float32) 45 | labels[:] = -1 46 | 47 | # sample positive rois which intersection is more than 0.5 48 | keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] 49 | num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) 50 | if keep_inds.size > 0 and num_masks < keep_inds.size: 51 | keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) 52 | LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ 53 | %(num_masks, rois.shape[0], gt_masks.shape[0])) 54 | 55 | labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1] 56 | 57 | # rois = rois[inds] 58 | # labels = labels[inds].astype(np.int32) 59 | # gt_assignment = gt_assignment[inds] 60 | 61 | # ignore rois with overlaps between fg_threshold and bg_threshold 62 | # mask are only defined on positive rois 63 | ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0] 64 | labels[ignore_inds] = -1 65 | 66 | mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32) 67 | mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32) 68 | rois [rois < 0] = 0 69 | 70 | # TODO: speed bottleneck? 71 | for i in keep_inds: 72 | roi = rois[i, :4] 73 | cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1] 74 | cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) 75 | 76 | mask_targets[i, :, :, int(labels[i])] = cropped 77 | mask_inside_weights[i, :, :, int(labels[i])] = 1 78 | else: 79 | # there is no gt 80 | labels = np.zeros((total_masks, ), np.float32) 81 | labels[:] = -1 82 | mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32) 83 | mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32) 84 | return labels, mask_targets, mask_inside_weights 85 | 86 | def decode(mask_targets, rois, classes, ih, iw): 87 | """Decode outputs into final masks 88 | Params 89 | ------ 90 | mask_targets: of shape (N, h, w, K) 91 | rois: of shape (N, 4) [x1, y1, x2, y2] 92 | classes: of shape (N, 1) the class-id of each roi 93 | height: image height 94 | width: image width 95 | 96 | Returns 97 | ------ 98 | M: a painted image with all masks, of shape (height, width), in [0, K] 99 | """ 100 | Mask = np.zeros((ih, iw), dtype=np.float32) 101 | assert rois.shape[0] == mask_targets.shape[0], \ 102 | '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0]) 103 | num = rois.shape[0] 104 | rois = clip_boxes(rois, (ih, iw)) 105 | for i in np.arange(num): 106 | k = classes[i] 107 | mask = mask_targets[i, :, :, k] 108 | h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1 109 | x, y = rois[i, 0], rois[i, 1] 110 | mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST) 111 | mask *= k 112 | 113 | # paint 114 | Mask[y:y+h, x:x+w] = mask 115 | 116 | return Mask 117 | 118 | 119 | 120 | if __name__ == '__main__': 121 | 122 | import time 123 | import matplotlib.pyplot as plt 124 | 125 | t = time.time() 126 | 127 | for i in range(10): 128 | cfg.FLAGS.mask_threshold = 0.2 129 | N = 50 130 | W, H = 200, 200 131 | M = 50 132 | 133 | gt_masks = np.zeros((2, H, W), dtype=np.int32) 134 | gt_masks[0, 50:150, 50:150] = 1 135 | gt_masks[1, 100:150, 50:150] = 1 136 | gt_boxes = np.asarray( 137 | [ 138 | [20, 20, 100, 100, 1], 139 | [100, 100, 180, 180, 2] 140 | ]) 141 | rois = gt_boxes[:, :4] 142 | print (rois) 143 | rois, labels, mask_targets, mask_inside_weights = encode(gt_masks, gt_boxes, rois, 3, 7, 7) 144 | print (rois) 145 | Mask = decode(mask_targets, rois, labels, H, W) 146 | if True: 147 | plt.figure(1) 148 | plt.imshow(Mask) 149 | plt.show() 150 | time.sleep(2) 151 | print(labels) 152 | print('average time: %f' % ((time.time() - t) / 10.0)) 153 | 154 | -------------------------------------------------------------------------------- /libs/layers/roi.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | 7 | import libs.boxes.cython_bbox as cython_bbox 8 | import libs.configs.config_v1 as cfg 9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 10 | from libs.logs.log import LOG 11 | 12 | # FLAGS = tf.app.flags.FLAGS 13 | 14 | _DEBUG = False 15 | 16 | def encode(gt_boxes, rois, num_classes): 17 | """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes 18 | Sampling 19 | Parameters 20 | --------- 21 | gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class] 22 | rois an array of shape (R x 4), [x1, y1, x2, y2] 23 | num_classes: scalar, number of classes 24 | 25 | Returns 26 | -------- 27 | labels: Nx1 array in [0, num_classes) 28 | bbox_targets: of shape (N, Kx4) regression targets 29 | bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned. 30 | """ 31 | 32 | all_rois = rois 33 | num_rois = rois.shape[0] 34 | if gt_boxes.size > 0: 35 | # R x G matrix 36 | overlaps = cython_bbox.bbox_overlaps( 37 | np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float), 38 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 39 | gt_assignment = overlaps.argmax(axis=1) # R 40 | # max_overlaps = overlaps.max(axis=1) # R 41 | max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment] 42 | # note: this will assign every rois with a positive label 43 | # labels = gt_boxes[gt_assignment, 4] 44 | labels = np.zeros([num_rois], dtype=np.float32) 45 | labels[:] = -1 46 | 47 | # if _DEBUG: 48 | # print ('gt_assignment') 49 | # print (gt_assignment) 50 | 51 | # sample rois as to 1:3 52 | fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] 53 | fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) 54 | if fg_inds.size > 0 and fg_rois < fg_inds.size: 55 | fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) 56 | labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] 57 | 58 | # TODO: sampling strategy 59 | bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] 60 | bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64) 61 | if bg_inds.size > 0 and bg_rois < bg_inds.size: 62 | bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) 63 | labels[bg_inds] = 0 64 | 65 | # ignore rois with overlaps between fg_threshold and bg_threshold 66 | ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\ 67 | (max_overlaps < cfg.FLAGS.fg_threshold)))[0] 68 | labels[ignore_inds] = -1 69 | 70 | keep_inds = np.append(fg_inds, bg_inds) 71 | if _DEBUG: 72 | print ('keep_inds') 73 | print (keep_inds) 74 | print ('fg_inds') 75 | print (fg_inds) 76 | print ('bg_inds') 77 | print (bg_inds) 78 | print ('bg_rois:', bg_rois) 79 | print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold) 80 | # print (max_overlaps) 81 | 82 | LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds))) 83 | 84 | bbox_targets, bbox_inside_weights = _compute_targets( 85 | rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes) 86 | bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0) 87 | bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0) 88 | 89 | else: 90 | # there is no gt 91 | labels = np.zeros((num_rois, ), np.float32) 92 | bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32) 93 | bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32) 94 | bg_rois = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64) 95 | if bg_rois < num_rois: 96 | bg_inds = np.arange(num_rois) 97 | ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False) 98 | labels[ignore_inds] = -1 99 | 100 | return labels, bbox_targets, bbox_inside_weights 101 | 102 | def decode(boxes, scores, rois, ih, iw): 103 | """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois 104 | Parameters 105 | --------- 106 | boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2] 107 | scores: an array of shape (R, K), 108 | rois: an array of shape (R, 4), [x1, y1, x2, y2] 109 | 110 | Returns 111 | -------- 112 | final_boxes: of shape (R x 4) 113 | classes: of shape (R) in {0,1,2,3... K-1} 114 | scores: of shape (R) in [0 ~ 1] 115 | """ 116 | boxes = bbox_transform_inv(rois, deltas=boxes) 117 | classes = np.argmax(scores, axis=1) 118 | classes = classes.astype(np.int32) 119 | scores = np.max(scores, axis=1) 120 | final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32) 121 | for i in np.arange(0, boxes.shape[0]): 122 | ind = classes[i]*4 123 | final_boxes[i, 0:4] = boxes[i, ind:ind+4] 124 | final_boxes = clip_boxes(final_boxes, (ih, iw)) 125 | return final_boxes, classes, scores 126 | 127 | def _compute_targets(ex_rois, gt_rois, labels, num_classes): 128 | """ 129 | This function expands those targets into the 4-of-4*K representation used 130 | by the network (i.e. only one class has non-zero targets). 131 | 132 | Returns: 133 | bbox_target (ndarray): N x 4K blob of regression targets 134 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 135 | """ 136 | 137 | assert ex_rois.shape[0] == gt_rois.shape[0] 138 | assert ex_rois.shape[1] == 4 139 | assert gt_rois.shape[1] == 4 140 | 141 | targets = bbox_transform(ex_rois, gt_rois) 142 | 143 | clss = labels 144 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 145 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 146 | inds = np.where(clss > 0)[0] 147 | for ind in inds: 148 | cls = int(clss[ind]) 149 | start = 4 * cls 150 | end = start + 4 151 | bbox_targets[ind, start:end] = targets[ind, 0:4] 152 | bbox_inside_weights[ind, start:end] = 1 153 | return bbox_targets, bbox_inside_weights 154 | 155 | def _unmap(data, count, inds, fill=0): 156 | """ Unmap a subset of item (data) back to the original set of items (of 157 | size count) """ 158 | if len(data.shape) == 1: 159 | ret = np.empty((count,), dtype=np.float32) 160 | ret.fill(fill) 161 | ret[inds] = data 162 | else: 163 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 164 | ret.fill(fill) 165 | ret[inds, :] = data 166 | return ret 167 | 168 | if __name__ == '__main__': 169 | cfg.FLAGS.fg_threshold = 0.1 170 | classes = np.random.randint(0, 3, (10, 1)) 171 | boxes = np.random.randint(10, 50, (10, 2)) 172 | s = np.random.randint(10, 20, (10, 2)) 173 | s = boxes + s 174 | boxes = np.concatenate((boxes, s), axis=1) 175 | gt_boxes = np.hstack((boxes, classes)) 176 | noise = np.random.randint(-3, 3, (10, 4)) 177 | rois = gt_boxes[:, :4] + noise 178 | labels, rois, bbox_targets, bbox_inside_weights = encode(gt_boxes, rois, num_classes=3) 179 | print (labels) 180 | print (bbox_inside_weights) 181 | 182 | ls = np.zeros((labels.shape[0], 3)) 183 | for i in range(labels.shape[0]): 184 | ls[i, labels[i]] = 1 185 | final_boxes, classes, scores = decode(bbox_targets, ls, rois, 100, 100) 186 | print('gt_boxes:\n', gt_boxes) 187 | print ('final boxes:\n', np.hstack((final_boxes, np.expand_dims(classes, axis=1))).astype(np.int32)) 188 | # print (final_boxes.astype(np.int32)) 189 | -------------------------------------------------------------------------------- /libs/layers/sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | import libs.configs.config_v1 as cfg 9 | import libs.boxes.nms_wrapper as nms_wrapper 10 | import libs.boxes.cython_bbox as cython_bbox 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 12 | from libs.logs.log import LOG 13 | 14 | _DEBUG=False 15 | 16 | def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False): 17 | """Sample boxes according to scores and some learning strategies 18 | assuming the first class is background 19 | Params: 20 | boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims 21 | scores: of shape (..., A), probs of fg, in [0, 1] 22 | """ 23 | min_size = cfg.FLAGS.min_size 24 | rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold 25 | pre_nms_top_n = cfg.FLAGS.pre_nms_top_n 26 | post_nms_top_n = cfg.FLAGS.post_nms_top_n 27 | 28 | # training: 12000, 2000 29 | # testing: 6000, 400 30 | if not is_training: 31 | pre_nms_top_n = int(pre_nms_top_n / 2) 32 | post_nms_top_n = int(post_nms_top_n / 5) 33 | 34 | boxes = boxes.reshape((-1, 4)) 35 | scores = scores.reshape((-1, 1)) 36 | assert scores.shape[0] == boxes.shape[0], 'scores and boxes dont match' 37 | 38 | # filter backgrounds 39 | # Hope this will filter most of background anchors, since a argsort is too slow.. 40 | if only_positive: 41 | keeps = np.where(scores > 0.5)[0] 42 | boxes = boxes[keeps, :] 43 | scores = scores[keeps] 44 | 45 | # filter minimum size 46 | keeps = _filter_boxes(boxes, min_size=min_size) 47 | boxes = boxes[keeps, :] 48 | scores = scores[keeps] 49 | 50 | # filter with scores 51 | order = scores.ravel().argsort()[::-1] 52 | if pre_nms_top_n > 0: 53 | order = order[:pre_nms_top_n] 54 | boxes = boxes[order, :] 55 | scores = scores[order] 56 | 57 | # filter with nms 58 | det = np.hstack((boxes, scores)).astype(np.float32) 59 | keeps = nms_wrapper.nms(det, rpn_nms_threshold) 60 | 61 | if post_nms_top_n > 0: 62 | keeps = keeps[:post_nms_top_n] 63 | boxes = boxes[keeps, :] 64 | scores = scores[keeps] 65 | batch_inds = np.zeros([boxes.shape[0]], dtype=np.int32) 66 | 67 | # # random sample boxes 68 | ## try early sample later 69 | # fg_inds = np.where(scores > 0.5)[0] 70 | # num_fgs = min(len(fg_inds.size), int(rois_per_image * fg_roi_fraction)) 71 | 72 | if _DEBUG: 73 | LOG('SAMPLE: %d rois has been choosen' % len(scores)) 74 | LOG('SAMPLE: a positive box: %d %d %d %d %.4f' % (boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3], scores[0])) 75 | LOG('SAMPLE: a negative box: %d %d %d %d %.4f' % (boxes[-1, 0], boxes[-1, 1], boxes[-1, 2], boxes[-1, 3], scores[-1])) 76 | hs = boxes[:, 3] - boxes[:, 1] 77 | ws = boxes[:, 2] - boxes[:, 0] 78 | assert min(np.min(hs), np.min(ws)) > 0, 'invalid boxes' 79 | 80 | return boxes, scores.astype(np.float32), batch_inds 81 | 82 | def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False): 83 | """sample boxes for refined output""" 84 | boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive) 85 | 86 | if gt_boxes.size > 0: 87 | overlaps = cython_bbox.bbox_overlaps( 88 | np.ascontiguousarray(boxes[:, 0:4], dtype=np.float), 89 | np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float)) 90 | gt_assignment = overlaps.argmax(axis=1) # B 91 | max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B 92 | fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] 93 | if _DEBUG and np.argmax(overlaps[fg_inds],axis=1).size < gt_boxes.size/5.0: 94 | print("gt_size") 95 | print(gt_boxes) 96 | gt_height = (gt_boxes[:,2]-gt_boxes[:,0]) 97 | gt_width = (gt_boxes[:,3]-gt_boxes[:,1]) 98 | gt_dim = np.vstack((gt_height, gt_width)) 99 | print(np.transpose(gt_dim)) 100 | #print(gt_height) 101 | #print(gt_width) 102 | 103 | print('SAMPLE: %d after overlaps by %s' % (len(fg_inds),cfg.FLAGS.fg_threshold)) 104 | print("detected object no.") 105 | print(np.argmax(overlaps[fg_inds],axis=1)) 106 | print("total object") 107 | print(gt_boxes.size/5.0) 108 | 109 | mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] 110 | if mask_fg_inds.size > cfg.FLAGS.masks_per_image: 111 | mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False) 112 | 113 | if True: 114 | gt_argmax_overlaps = overlaps.argmax(axis=0) # G 115 | fg_inds = np.union1d(gt_argmax_overlaps, fg_inds) 116 | 117 | fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) 118 | if fg_inds.size > 0 and fg_rois < fg_inds.size: 119 | fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) 120 | 121 | # TODO: sampling strategy 122 | bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] 123 | bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64 124 | if bg_inds.size > 0 and bg_rois < bg_inds.size: 125 | bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) 126 | 127 | keep_inds = np.append(fg_inds, bg_inds) 128 | #print(gt_boxes[np.argmax(overlaps[fg_inds],axis=1),4]) 129 | else: 130 | bg_inds = np.arange(boxes.shape[0]) 131 | bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 8)#64 132 | if bg_rois < bg_inds.size: 133 | bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) 134 | 135 | keep_inds = bg_inds 136 | mask_fg_inds = np.arange(0) 137 | 138 | return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\ 139 | boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds] 140 | 141 | def _jitter_boxes(boxes, jitter=0.1): 142 | """ jitter the boxes before appending them into rois 143 | """ 144 | jittered_boxes = boxes.copy() 145 | ws = jittered_boxes[:, 2] - jittered_boxes[:, 0] + 1.0 146 | hs = jittered_boxes[:, 3] - jittered_boxes[:, 1] + 1.0 147 | width_offset = (np.random.rand(jittered_boxes.shape[0]) - 0.5) * jitter * ws 148 | height_offset = (np.random.rand(jittered_boxes.shape[0]) - 0.5) * jitter * hs 149 | jittered_boxes[:, 0] += width_offset 150 | jittered_boxes[:, 2] += width_offset 151 | jittered_boxes[:, 1] += height_offset 152 | jittered_boxes[:, 3] += height_offset 153 | 154 | return jittered_boxes 155 | 156 | def _filter_boxes(boxes, min_size): 157 | """Remove all boxes with any side smaller than min_size.""" 158 | ws = boxes[:, 2] - boxes[:, 0] + 1 159 | hs = boxes[:, 3] - boxes[:, 1] + 1 160 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 161 | return keep 162 | 163 | def _apply_nms(boxes, scores, threshold = 0.5): 164 | """After this only positive boxes are left 165 | Applying this class-wise 166 | """ 167 | num_class = scores.shape[1] 168 | assert boxes.shape[0] == scores.shape[0], \ 169 | 'Shape dismatch {} vs {}'.format(boxes.shape, scores.shape) 170 | 171 | final_boxes = [] 172 | final_scores = [] 173 | for cls in np.arange(1, num_class): 174 | cls_boxes = boxes[:, 4*cls: 4*cls+4] 175 | cls_scores = scores[:, cls] 176 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) 177 | keep = nms_wrapper.nms(dets, thresh=0.3) 178 | dets = dets[keep, :] 179 | dets = dets[np.where(dets[:, 4] > threshold)] 180 | final_boxes.append(dets[:, :4]) 181 | final_scores.append(dets[:, 4]) 182 | 183 | final_boxes = np.vstack(final_boxes) 184 | final_scores = np.vstack(final_scores) 185 | 186 | return final_boxes, final_scores 187 | 188 | if __name__ == '__main__': 189 | import time 190 | t = time.time() 191 | 192 | for i in range(10): 193 | N = 200000 194 | boxes = np.random.randint(0, 50, (N, 2)) 195 | s = np.random.randint(10, 40, (N, 2)) 196 | s = boxes + s 197 | boxes = np.hstack((boxes, s)) 198 | 199 | scores = np.random.rand(N, 1) 200 | # scores_ = 1 - np.random.rand(N, 1) 201 | # scores = np.hstack((scores, scores_)) 202 | 203 | boxes, scores = sample_rpn_outputs(boxes, scores, only_positive=False) 204 | 205 | print ('average time %f' % ((time.time() - t) / 10)) 206 | -------------------------------------------------------------------------------- /libs/layers/wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Written by CharlesShang@github 4 | # -------------------------------------------------------- 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | from . import anchor 11 | from . import roi 12 | from . import mask 13 | from . import sample 14 | from . import assign 15 | from libs.boxes.anchor import anchors_plane 16 | 17 | def anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder'): 18 | 19 | with tf.name_scope(scope) as sc: 20 | labels, bbox_targets, bbox_inside_weights = \ 21 | tf.py_func(anchor.encode, 22 | [gt_boxes, all_anchors, height, width, stride], 23 | [tf.float32, tf.float32, tf.float32]) 24 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels') 25 | bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets') 26 | bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights') 27 | labels = tf.reshape(labels, (1, height, width, -1)) 28 | bbox_targets = tf.reshape(bbox_targets, (1, height, width, -1)) 29 | bbox_inside_weights = tf.reshape(bbox_inside_weights, (1, height, width, -1)) 30 | 31 | return labels, bbox_targets, bbox_inside_weights 32 | 33 | 34 | def anchor_decoder(boxes, scores, all_anchors, ih, iw, scope='AnchorDecoder'): 35 | 36 | with tf.name_scope(scope) as sc: 37 | final_boxes, classes, scores = \ 38 | tf.py_func(anchor.decode, 39 | [boxes, scores, all_anchors, ih, iw], 40 | [tf.float32, tf.int32, tf.float32]) 41 | final_boxes = tf.convert_to_tensor(final_boxes, name='boxes') 42 | classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes') 43 | scores = tf.convert_to_tensor(scores, name='scores') 44 | final_boxes = tf.reshape(final_boxes, (-1, 4)) 45 | classes = tf.reshape(classes, (-1, )) 46 | scores = tf.reshape(scores, (-1, )) 47 | 48 | return final_boxes, classes, scores 49 | 50 | 51 | def roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder'): 52 | 53 | with tf.name_scope(scope) as sc: 54 | labels, bbox_targets, bbox_inside_weights = \ 55 | tf.py_func(roi.encode, 56 | [gt_boxes, rois, num_classes], 57 | [tf.float32, tf.float32, tf.float32]) 58 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels') 59 | bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets') 60 | bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights') 61 | labels = tf.reshape(labels, (-1, )) 62 | bbox_targets = tf.reshape(bbox_targets, (-1, num_classes * 4)) 63 | bbox_inside_weights = tf.reshape(bbox_inside_weights, (-1, num_classes * 4)) 64 | 65 | return labels, bbox_targets, bbox_inside_weights 66 | 67 | 68 | def roi_decoder(boxes, scores, rois, ih, iw, scope='ROIDecoder'): 69 | 70 | with tf.name_scope(scope) as sc: 71 | final_boxes, classes, scores = \ 72 | tf.py_func(roi.decode, 73 | [boxes, scores, rois, ih, iw], 74 | [tf.float32, tf.int32, tf.float32]) 75 | final_boxes = tf.convert_to_tensor(final_boxes, name='boxes') 76 | classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes') 77 | scores = tf.convert_to_tensor(scores, name='scores') 78 | final_boxes = tf.reshape(final_boxes, (-1, 4)) 79 | 80 | return final_boxes, classes, scores 81 | 82 | def mask_encoder(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width, scope='MaskEncoder'): 83 | 84 | with tf.name_scope(scope) as sc: 85 | labels, mask_targets, mask_inside_weights = \ 86 | tf.py_func(mask.encode, 87 | [gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width], 88 | [tf.float32, tf.int32, tf.float32]) 89 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='classes') 90 | mask_targets = tf.convert_to_tensor(mask_targets, name='mask_targets') 91 | mask_inside_weights = tf.convert_to_tensor(mask_inside_weights, name='mask_inside_weights') 92 | labels = tf.reshape(labels, (-1,)) 93 | mask_targets = tf.reshape(mask_targets, (-1, mask_height, mask_width, num_classes)) 94 | mask_inside_weights = tf.reshape(mask_inside_weights, (-1, mask_height, mask_width, num_classes)) 95 | 96 | return labels, mask_targets, mask_inside_weights 97 | 98 | def mask_decoder(mask_targets, rois, classes, ih, iw, scope='MaskDecoder'): 99 | 100 | with tf.name_scope(scope) as sc: 101 | Mask = \ 102 | tf.py_func(mask.decode, 103 | [mask_targets, rois, classes, ih, iw,], 104 | [tf.float32]) 105 | Mask = tf.convert_to_tensor(Mask, name='MaskImage') 106 | Mask = tf.reshape(Mask, (ih, iw)) 107 | 108 | return Mask 109 | 110 | 111 | def sample_wrapper(boxes, scores, is_training=True, scope='SampleBoxes'): 112 | 113 | with tf.name_scope(scope) as sc: 114 | boxes, scores, batch_inds = \ 115 | tf.py_func(sample.sample_rpn_outputs, 116 | [boxes, scores, is_training], 117 | [tf.float32, tf.float32, tf.int32]) 118 | boxes = tf.convert_to_tensor(boxes, name='Boxes') 119 | scores = tf.convert_to_tensor(scores, name='Scores') 120 | batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds') 121 | boxes = tf.reshape(boxes, (-1, 4)) 122 | batch_inds = tf.reshape(batch_inds, [-1]) 123 | 124 | return boxes, scores, batch_inds 125 | 126 | def sample_with_gt_wrapper(boxes, scores, gt_boxes, is_training=True, scope='SampleBoxesWithGT'): 127 | 128 | with tf.name_scope(scope) as sc: 129 | boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds = \ 130 | tf.py_func(sample.sample_rpn_outputs_wrt_gt_boxes, 131 | [boxes, scores, gt_boxes, is_training], 132 | [tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32]) 133 | boxes = tf.convert_to_tensor(boxes, name='Boxes') 134 | scores = tf.convert_to_tensor(scores, name='Scores') 135 | batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds') 136 | 137 | mask_boxes = tf.convert_to_tensor(mask_boxes, name='MaskBoxes') 138 | mask_scores = tf.convert_to_tensor(mask_scores, name='MaskScores') 139 | mask_batch_inds = tf.convert_to_tensor(mask_batch_inds, name='MaskBatchInds') 140 | 141 | return boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds 142 | 143 | def gen_all_anchors(height, width, stride, scales, scope='GenAnchors'): 144 | 145 | with tf.name_scope(scope) as sc: 146 | all_anchors = \ 147 | tf.py_func(anchors_plane, 148 | [height, width, stride, scales], 149 | [tf.float64] 150 | ) 151 | all_anchors = tf.convert_to_tensor(tf.cast(all_anchors, tf.float32), name='AllAnchors') 152 | all_anchors = tf.reshape(all_anchors, (height, width, -1)) 153 | 154 | return all_anchors 155 | 156 | def assign_boxes(gt_boxes, tensors, layers, scope='AssignGTBoxes'): 157 | 158 | with tf.name_scope(scope) as sc: 159 | min_k = layers[0] 160 | max_k = layers[-1] 161 | assigned_layers = \ 162 | tf.py_func(assign.assign_boxes, 163 | [ gt_boxes, min_k, max_k ], 164 | tf.int32) 165 | assigned_layers = tf.reshape(assigned_layers, [-1]) 166 | 167 | assigned_tensors = [] 168 | for t in tensors: 169 | split_tensors = [] 170 | for l in layers: 171 | tf.cast(l, tf.int32) 172 | inds = tf.where(tf.equal(assigned_layers, l)) 173 | inds = tf.reshape(inds, [-1]) 174 | split_tensors.append(tf.gather(t, inds)) 175 | assigned_tensors.append(split_tensors) 176 | 177 | return assigned_tensors + [assigned_layers] -------------------------------------------------------------------------------- /libs/logs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/logs/__init__.py -------------------------------------------------------------------------------- /libs/logs/log.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import logging 6 | import libs.configs.config_v1 as cfg 7 | 8 | def LOG(mssg): 9 | logging.basicConfig(filename=cfg.FLAGS.train_dir + '/maskrcnn.log', 10 | level=logging.INFO, 11 | datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s %(message)s') 12 | logging.info(mssg) -------------------------------------------------------------------------------- /libs/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # build pycocotools 4 | cd datasets/pycocotools 5 | make 6 | cd - 7 | -------------------------------------------------------------------------------- /libs/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/nets/__init__.py -------------------------------------------------------------------------------- /libs/nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import functools 5 | 6 | import tensorflow as tf 7 | 8 | from . import resnet_v1 9 | from .resnet_v1 import resnet_v1_50 as resnet50 10 | from .resnet_utils import resnet_arg_scope 11 | from .resnet_v1 import resnet_v1_101 as resnet101 12 | 13 | slim = tf.contrib.slim 14 | 15 | pyramid_maps = { 16 | 'resnet50': {'C1':'resnet_v1_50/conv1/Relu:0', 17 | 'C2':'resnet_v1_50/block1/unit_2/bottleneck_v1', 18 | 'C3':'resnet_v1_50/block2/unit_3/bottleneck_v1', 19 | 'C4':'resnet_v1_50/block3/unit_5/bottleneck_v1', 20 | 'C5':'resnet_v1_50/block4/unit_3/bottleneck_v1', 21 | }, 22 | 'resnet101': {'C1': '', 'C2': '', 23 | 'C3': '', 'C4': '', 24 | 'C5': '', 25 | } 26 | } 27 | 28 | def get_network(name, image, weight_decay=0.000005, is_training=False): 29 | 30 | if name == 'resnet50': 31 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): 32 | logits, end_points = resnet50(image, 1000, is_training=is_training) 33 | 34 | if name == 'resnet101': 35 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): 36 | logits, end_points = resnet50(image, 1000, is_training=is_training) 37 | 38 | if name == 'resnext50': 39 | name 40 | 41 | end_points['input'] = image 42 | return logits, end_points, pyramid_maps[name] 43 | -------------------------------------------------------------------------------- /libs/nets/resnet_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains building blocks for various versions of Residual Networks. 16 | 17 | Residual networks (ResNets) were proposed in: 18 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 20 | 21 | More variants were introduced in: 22 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 23 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 24 | 25 | We can obtain different ResNet variants by changing the network depth, width, 26 | and form of residual unit. This module implements the infrastructure for 27 | building them. Concrete ResNet units and full ResNet networks are implemented in 28 | the accompanying resnet_v1.py and resnet_v2.py modules. 29 | 30 | Compared to https://github.com/KaimingHe/deep-residual-networks, in the current 31 | implementation we subsample the output activations in the last residual unit of 32 | each block, instead of subsampling the input activations in the first residual 33 | unit of each block. The two implementations give identical results but our 34 | implementation is more memory efficient. 35 | """ 36 | from __future__ import absolute_import 37 | from __future__ import division 38 | from __future__ import print_function 39 | 40 | import collections 41 | import tensorflow as tf 42 | 43 | # slim = tf.contrib.slim 44 | import tensorflow.contrib.slim as slim 45 | 46 | 47 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 48 | """A named tuple describing a ResNet block. 49 | 50 | Its parts are: 51 | scope: The scope of the `Block`. 52 | unit_fn: The ResNet unit function which takes as input a `Tensor` and 53 | returns another `Tensor` with the output of the ResNet unit. 54 | args: A list of length equal to the number of units in the `Block`. The list 55 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the 56 | block to serve as argument to unit_fn. 57 | """ 58 | 59 | 60 | def subsample(inputs, factor, scope=None): 61 | """Subsamples the input along the spatial dimensions. 62 | 63 | Args: 64 | inputs: A `Tensor` of size [batch, height_in, width_in, channels]. 65 | factor: The subsampling factor. 66 | scope: Optional variable_scope. 67 | 68 | Returns: 69 | output: A `Tensor` of size [batch, height_out, width_out, channels] with the 70 | input, either intact (if factor == 1) or subsampled (if factor > 1). 71 | """ 72 | if factor == 1: 73 | return inputs 74 | else: 75 | return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) 76 | 77 | 78 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): 79 | """Strided 2-D convolution with 'SAME' padding. 80 | 81 | When stride > 1, then we do explicit zero-padding, followed by conv2d with 82 | 'VALID' padding. 83 | 84 | Note that 85 | 86 | net = conv2d_same(inputs, num_outputs, 3, stride=stride) 87 | 88 | is equivalent to 89 | 90 | net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') 91 | net = subsample(net, factor=stride) 92 | 93 | whereas 94 | 95 | net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') 96 | 97 | is different when the input's height or width is even, which is why we add the 98 | current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). 99 | 100 | Args: 101 | inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. 102 | num_outputs: An integer, the number of output filters. 103 | kernel_size: An int with the kernel_size of the filters. 104 | stride: An integer, the output stride. 105 | rate: An integer, rate for atrous convolution. 106 | scope: Scope. 107 | 108 | Returns: 109 | output: A 4-D tensor of size [batch, height_out, width_out, channels] with 110 | the convolution output. 111 | """ 112 | if stride == 1: 113 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, 114 | padding='SAME', scope=scope) 115 | else: 116 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) 117 | pad_total = kernel_size_effective - 1 118 | pad_beg = pad_total // 2 119 | pad_end = pad_total - pad_beg 120 | inputs = tf.pad(inputs, 121 | [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) 122 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, 123 | rate=rate, padding='VALID', scope=scope) 124 | 125 | 126 | @slim.add_arg_scope 127 | def stack_blocks_dense(net, blocks, output_stride=None, 128 | outputs_collections=None): 129 | """Stacks ResNet `Blocks` and controls output feature density. 130 | 131 | First, this function creates scopes for the ResNet in the form of 132 | 'block_name/unit_1', 'block_name/unit_2', etc. 133 | 134 | Second, this function allows the user to explicitly control the ResNet 135 | output_stride, which is the ratio of the input to output spatial resolution. 136 | This is useful for dense prediction tasks such as semantic segmentation or 137 | object detection. 138 | 139 | Most ResNets consist of 4 ResNet blocks and subsample the activations by a 140 | factor of 2 when transitioning between consecutive ResNet blocks. This results 141 | to a nominal ResNet output_stride equal to 8. If we set the output_stride to 142 | half the nominal network stride (e.g., output_stride=4), then we compute 143 | responses twice. 144 | 145 | Control of the output feature density is implemented by atrous convolution. 146 | 147 | Args: 148 | net: A `Tensor` of size [batch, height, width, channels]. 149 | blocks: A list of length equal to the number of ResNet `Blocks`. Each 150 | element is a ResNet `Block` object describing the units in the `Block`. 151 | output_stride: If `None`, then the output will be computed at the nominal 152 | network stride. If output_stride is not `None`, it specifies the requested 153 | ratio of input to output spatial resolution, which needs to be equal to 154 | the product of unit strides from the start up to some level of the ResNet. 155 | For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, 156 | then valid values for the output_stride are 1, 2, 6, 24 or None (which 157 | is equivalent to output_stride=24). 158 | outputs_collections: Collection to add the ResNet block outputs. 159 | 160 | Returns: 161 | net: Output tensor with stride equal to the specified output_stride. 162 | 163 | Raises: 164 | ValueError: If the target output_stride is not valid. 165 | """ 166 | # The current_stride variable keeps track of the effective stride of the 167 | # activations. This allows us to invoke atrous convolution whenever applying 168 | # the next residual unit would result in the activations having stride larger 169 | # than the target output_stride. 170 | current_stride = 1 171 | 172 | # The atrous convolution rate parameter. 173 | rate = 1 174 | 175 | for block in blocks: 176 | with tf.variable_scope(block.scope, 'block', [net]) as sc: 177 | for i, unit in enumerate(block.args): 178 | if output_stride is not None and current_stride > output_stride: 179 | raise ValueError('The target output_stride cannot be reached.') 180 | 181 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]): 182 | unit_depth, unit_depth_bottleneck, unit_stride = unit 183 | 184 | # If we have reached the target output_stride, then we need to employ 185 | # atrous convolution with stride=1 and multiply the atrous rate by the 186 | # current unit's stride for use in subsequent layers. 187 | if output_stride is not None and current_stride == output_stride: 188 | net = block.unit_fn(net, 189 | depth=unit_depth, 190 | depth_bottleneck=unit_depth_bottleneck, 191 | stride=1, 192 | rate=rate) 193 | rate *= unit_stride 194 | 195 | else: 196 | net = block.unit_fn(net, 197 | depth=unit_depth, 198 | depth_bottleneck=unit_depth_bottleneck, 199 | stride=unit_stride, 200 | rate=1) 201 | current_stride *= unit_stride 202 | net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) 203 | 204 | if output_stride is not None and current_stride != output_stride: 205 | raise ValueError('The target output_stride cannot be reached.') 206 | 207 | return net 208 | 209 | 210 | def resnet_arg_scope(weight_decay=0.0001, 211 | batch_norm_decay=0.997, 212 | batch_norm_epsilon=1e-5, 213 | batch_norm_scale=True): 214 | """Defines the default ResNet arg scope. 215 | 216 | TODO(gpapan): The batch-normalization related default values above are 217 | appropriate for use in conjunction with the reference ResNet models 218 | released at https://github.com/KaimingHe/deep-residual-networks. When 219 | training ResNets from scratch, they might need to be tuned. 220 | 221 | Args: 222 | weight_decay: The weight decay to use for regularizing the model. 223 | batch_norm_decay: The moving average decay when estimating layer activation 224 | statistics in batch normalization. 225 | batch_norm_epsilon: Small constant to prevent division by zero when 226 | normalizing activations by their variance in batch normalization. 227 | batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the 228 | activations in the batch normalization layer. 229 | 230 | Returns: 231 | An `arg_scope` to use for the resnet models. 232 | """ 233 | batch_norm_params = { 234 | 'decay': batch_norm_decay, 235 | 'epsilon': batch_norm_epsilon, 236 | 'scale': batch_norm_scale, 237 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 238 | } 239 | 240 | with slim.arg_scope( 241 | [slim.conv2d], 242 | weights_regularizer=slim.l2_regularizer(weight_decay), 243 | weights_initializer=slim.variance_scaling_initializer(), 244 | activation_fn=tf.nn.relu, 245 | normalizer_fn=slim.batch_norm, 246 | normalizer_params=batch_norm_params): 247 | with slim.arg_scope([slim.batch_norm], **batch_norm_params): 248 | # The following implies padding='SAME' for pool1, which makes feature 249 | # alignment easier for dense prediction tasks. This is also used in 250 | # https://github.com/facebook/fb.resnet.torch. However the accompanying 251 | # code of 'Deep Residual Learning for Image Recognition' uses 252 | # padding='VALID' for pool1. You can switch to that choice by setting 253 | # slim.arg_scope([slim.max_pool2d], padding='VALID'). 254 | with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: 255 | return arg_sc 256 | -------------------------------------------------------------------------------- /libs/nets/train_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import collections 6 | import tensorflow as tf 7 | import libs.configs.config_v1 as cfg 8 | 9 | slim = tf.contrib.slim 10 | FLAGS = tf.app.flags.FLAGS 11 | 12 | def _configure_optimizer(learning_rate): 13 | """Configures the optimizer used for training. 14 | 15 | Args: 16 | learning_rate: A scalar or `Tensor` learning rate. 17 | 18 | Returns: 19 | An instance of an optimizer. 20 | 21 | Raises: 22 | ValueError: if FLAGS.optimizer is not recognized. 23 | """ 24 | if FLAGS.optimizer == 'adadelta': 25 | optimizer = tf.train.AdadeltaOptimizer( 26 | learning_rate, 27 | rho=FLAGS.adadelta_rho, 28 | epsilon=FLAGS.opt_epsilon) 29 | elif FLAGS.optimizer == 'adagrad': 30 | optimizer = tf.train.AdagradOptimizer( 31 | learning_rate, 32 | initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value) 33 | elif FLAGS.optimizer == 'adam': 34 | optimizer = tf.train.AdamOptimizer( 35 | learning_rate, 36 | beta1=FLAGS.adam_beta1, 37 | beta2=FLAGS.adam_beta2, 38 | epsilon=FLAGS.opt_epsilon) 39 | elif FLAGS.optimizer == 'ftrl': 40 | optimizer = tf.train.FtrlOptimizer( 41 | learning_rate, 42 | learning_rate_power=FLAGS.ftrl_learning_rate_power, 43 | initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, 44 | l1_regularization_strength=FLAGS.ftrl_l1, 45 | l2_regularization_strength=FLAGS.ftrl_l2) 46 | elif FLAGS.optimizer == 'momentum': 47 | optimizer = tf.train.MomentumOptimizer( 48 | learning_rate, 49 | momentum=FLAGS.momentum, 50 | name='Momentum') 51 | elif FLAGS.optimizer == 'rmsprop': 52 | optimizer = tf.train.RMSPropOptimizer( 53 | learning_rate, 54 | decay=FLAGS.rmsprop_decay, 55 | momentum=FLAGS.rmsprop_momentum, 56 | epsilon=FLAGS.opt_epsilon) 57 | elif FLAGS.optimizer == 'sgd': 58 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 59 | else: 60 | raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer) 61 | return optimizer 62 | 63 | def _configure_learning_rate(num_samples_per_epoch, global_step): 64 | """Configures the learning rate. 65 | 66 | Args: 67 | num_samples_per_epoch: The number of samples in each epoch of training. 68 | global_step: The global_step tensor. 69 | 70 | Returns: 71 | A `Tensor` representing the learning rate. 72 | 73 | Raises: 74 | ValueError: if 75 | """ 76 | decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * 77 | FLAGS.num_epochs_per_decay) 78 | if FLAGS.sync_replicas: 79 | decay_steps /= FLAGS.replicas_to_aggregate 80 | 81 | if FLAGS.learning_rate_decay_type == 'exponential': 82 | return tf.train.exponential_decay(FLAGS.learning_rate, 83 | global_step, 84 | decay_steps, 85 | FLAGS.learning_rate_decay_factor, 86 | staircase=True, 87 | name='exponential_decay_learning_rate') 88 | elif FLAGS.learning_rate_decay_type == 'fixed': 89 | return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate') 90 | elif FLAGS.learning_rate_decay_type == 'polynomial': 91 | return tf.train.polynomial_decay(FLAGS.learning_rate, 92 | global_step, 93 | decay_steps, 94 | FLAGS.end_learning_rate, 95 | power=0.9, 96 | cycle=False, 97 | name='polynomial_decay_learning_rate') 98 | else: 99 | raise ValueError('learning_rate_decay_type [%s] was not recognized', 100 | FLAGS.learning_rate_decay_type) 101 | 102 | def _get_variables_to_train(): 103 | """Returns a list of variables to train. 104 | 105 | Returns: 106 | A list of variables to train by the optimizer. 107 | """ 108 | if FLAGS.trainable_scopes is None: 109 | return tf.trainable_variables() 110 | else: 111 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] 112 | 113 | variables_to_train = [] 114 | for scope in scopes: 115 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 116 | variables_to_train.extend(variables) 117 | return variables_to_train 118 | 119 | def _get_init_fn(): 120 | """Returns a function run by the chief worker to warm-start the training. 121 | 122 | Note that the init_fn is only run when initializing the model during the very 123 | first global step. 124 | 125 | Returns: 126 | An init function run by the supervisor. 127 | """ 128 | if FLAGS.checkpoint_path is None: 129 | return None 130 | 131 | # Warn the user if a checkpoint exists in the train_dir. Then we'll be 132 | # ignoring the checkpoint anyway. 133 | if tf.train.latest_checkpoint(FLAGS.train_dir): 134 | tf.logging.info( 135 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s' 136 | % FLAGS.train_dir) 137 | return None 138 | 139 | exclusions = [] 140 | if FLAGS.checkpoint_exclude_scopes: 141 | exclusions = [scope.strip() 142 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 143 | 144 | # TODO(sguada) variables.filter_variables() 145 | variables_to_restore = [] 146 | for var in slim.get_model_variables(): 147 | excluded = False 148 | for exclusion in exclusions: 149 | if var.op.name.startswith(exclusion): 150 | excluded = True 151 | break 152 | if not excluded: 153 | variables_to_restore.append(var) 154 | 155 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path): 156 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 157 | else: 158 | checkpoint_path = FLAGS.checkpoint_path 159 | 160 | tf.logging.info('Fine-tuning from %s' % checkpoint_path) 161 | 162 | return slim.assign_from_checkpoint_fn( 163 | checkpoint_path, 164 | variables_to_restore, 165 | ignore_missing_vars=FLAGS.ignore_missing_vars) 166 | 167 | def get_var_list_to_restore(): 168 | """Choosing which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """ 169 | 170 | variables_to_restore = [] 171 | if FLAGS.checkpoint_exclude_scopes is not None: 172 | exclusions = [scope.strip() 173 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 174 | 175 | # build restore list 176 | for var in tf.model_variables(): 177 | excluded = False 178 | for exclusion in exclusions: 179 | if var.name.startswith(exclusion): 180 | excluded = True 181 | break 182 | if not excluded: 183 | variables_to_restore.append(var) 184 | else: 185 | variables_to_restore = tf.model_variables() 186 | 187 | variables_to_restore_final = [] 188 | if FLAGS.checkpoint_include_scopes is not None: 189 | includes = [ 190 | scope.strip() 191 | for scope in FLAGS.checkpoint_include_scopes.split(',') 192 | ] 193 | for var in variables_to_restore: 194 | included = False 195 | for include in includes: 196 | if var.name.startswith(include): 197 | included = True 198 | break 199 | if included: 200 | variables_to_restore_final.append(var) 201 | else: 202 | variables_to_restore_final = variables_to_restore 203 | 204 | return variables_to_restore_final 205 | -------------------------------------------------------------------------------- /libs/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /libs/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/nms/__init__.py -------------------------------------------------------------------------------- /libs/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /libs/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /libs/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /libs/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /libs/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /libs/preprocessings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/preprocessings/__init__.py -------------------------------------------------------------------------------- /libs/preprocessings/coco_v1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import time 9 | import tensorflow as tf 10 | import libs.configs.config_v1 as cfg 11 | from . import utils as preprocess_utils 12 | 13 | FLAGS = tf.app.flags.FLAGS 14 | 15 | def preprocess_image(image, gt_boxes, gt_masks, is_training=False): 16 | """preprocess image for coco 17 | 1. random flipping 18 | 2. min size resizing 19 | 3. zero mean 20 | 4. ... 21 | """ 22 | if is_training: 23 | return preprocess_for_training(image, gt_boxes, gt_masks) 24 | else: 25 | return preprocess_for_test(image, gt_boxes, gt_masks) 26 | 27 | 28 | def preprocess_for_training(image, gt_boxes, gt_masks): 29 | 30 | ih, iw = tf.shape(image)[0], tf.shape(image)[1] 31 | ## random flipping 32 | coin = tf.to_float(tf.random_uniform([1]))[0] 33 | image, gt_boxes, gt_masks =\ 34 | tf.cond(tf.greater_equal(coin, 0.5), 35 | lambda: (preprocess_utils.flip_image(image), 36 | preprocess_utils.flip_gt_boxes(gt_boxes, ih, iw), 37 | preprocess_utils.flip_gt_masks(gt_masks)), 38 | lambda: (image, gt_boxes, gt_masks)) 39 | 40 | ## min size resizing 41 | new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size) 42 | image = tf.expand_dims(image, 0) 43 | image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False) 44 | image = tf.squeeze(image, axis=[0]) 45 | 46 | gt_masks = tf.expand_dims(gt_masks, -1) 47 | gt_masks = tf.cast(gt_masks, tf.float32) 48 | gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False) 49 | gt_masks = tf.cast(gt_masks, tf.int32) 50 | gt_masks = tf.squeeze(gt_masks, axis=[-1]) 51 | 52 | scale_ratio = tf.to_float(new_ih) / tf.to_float(ih) 53 | gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio) 54 | 55 | ## random flip image 56 | # val_lr = tf.to_float(tf.random_uniform([1]))[0] 57 | # image = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_image(image), lambda: image) 58 | # gt_masks = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_masks(gt_masks), lambda: gt_masks) 59 | # gt_boxes = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_boxes(gt_boxes, new_ih, new_iw), lambda: gt_boxes) 60 | 61 | ## zero mean image 62 | image = tf.cast(image, tf.float32) 63 | image = image / 256.0 64 | image = (image - 0.5) * 2.0 65 | image = tf.expand_dims(image, axis=0) 66 | 67 | ## rgb to bgr 68 | image = tf.reverse(image, axis=[-1]) 69 | 70 | return image, gt_boxes, gt_masks 71 | 72 | def preprocess_for_test(image, gt_boxes, gt_masks): 73 | 74 | 75 | ih, iw = tf.shape(image)[0], tf.shape(image)[1] 76 | 77 | ## min size resizing 78 | new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size) 79 | image = tf.expand_dims(image, 0) 80 | image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False) 81 | image = tf.squeeze(image, axis=[0]) 82 | 83 | gt_masks = tf.expand_dims(gt_masks, -1) 84 | gt_masks = tf.cast(gt_masks, tf.float32) 85 | gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False) 86 | gt_masks = tf.cast(gt_masks, tf.int32) 87 | gt_masks = tf.squeeze(gt_masks, axis=[-1]) 88 | 89 | scale_ratio = tf.to_float(new_ih) / tf.to_float(ih) 90 | gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio) 91 | 92 | ## zero mean image 93 | image = tf.cast(image, tf.float32) 94 | image = image / 256.0 95 | image = (image - 0.5) * 2.0 96 | image = tf.expand_dims(image, axis=0) 97 | 98 | ## rgb to bgr 99 | image = tf.reverse(image, axis=[-1]) 100 | 101 | return image, gt_boxes, gt_masks 102 | -------------------------------------------------------------------------------- /libs/preprocessings/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | from tensorflow.python.ops import control_flow_ops 8 | from tensorflow.contrib import slim 9 | 10 | 11 | def _crop(image, offset_height, offset_width, crop_height, crop_width): 12 | original_shape = tf.shape(image) 13 | 14 | rank_assertion = tf.Assert( 15 | tf.equal(tf.rank(image), 3), 16 | ['Rank of image must be equal to 3.']) 17 | cropped_shape = control_flow_ops.with_dependencies( 18 | [rank_assertion], 19 | tf.stack([crop_height, crop_width, original_shape[2]])) 20 | 21 | size_assertion = tf.Assert( 22 | tf.logical_and( 23 | tf.greater_equal(original_shape[0], crop_height), 24 | tf.greater_equal(original_shape[1], crop_width)), 25 | ['Crop size greater than the image size.']) 26 | 27 | offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) 28 | 29 | # Use tf.slice instead of crop_to_bounding box as it accepts tensors to 30 | # define the crop size. 31 | image = control_flow_ops.with_dependencies( 32 | [size_assertion], 33 | tf.slice(image, offsets, cropped_shape)) 34 | return tf.reshape(image, cropped_shape) 35 | 36 | 37 | def _random_crop(image_list, label_list, crop_height, crop_width): 38 | if not image_list: 39 | raise ValueError('Empty image_list.') 40 | 41 | # Compute the rank assertions. 42 | rank_assertions = [] 43 | for i in range(len(image_list)): 44 | image_rank = tf.rank(image_list[i]) 45 | rank_assert = tf.Assert( 46 | tf.equal(image_rank, 3), 47 | ['Wrong rank for tensor %s [expected] [actual]', 48 | image_list[i].name, 3, image_rank]) 49 | rank_assertions.append(rank_assert) 50 | 51 | image_shape = control_flow_ops.with_dependencies( 52 | [rank_assertions[0]], 53 | tf.shape(image_list[0])) 54 | image_height = image_shape[0] 55 | image_width = image_shape[1] 56 | crop_size_assert = tf.Assert( 57 | tf.logical_and( 58 | tf.greater_equal(image_height, crop_height), 59 | tf.greater_equal(image_width, crop_width)), 60 | ['Crop size greater than the image size.', image_height, image_width, crop_height, crop_width]) 61 | 62 | asserts = [rank_assertions[0], crop_size_assert] 63 | 64 | for i in range(1, len(image_list)): 65 | image = image_list[i] 66 | asserts.append(rank_assertions[i]) 67 | shape = control_flow_ops.with_dependencies([rank_assertions[i]], 68 | tf.shape(image)) 69 | height = shape[0] 70 | width = shape[1] 71 | 72 | height_assert = tf.Assert( 73 | tf.equal(height, image_height), 74 | ['Wrong height for tensor %s [expected][actual]', 75 | image.name, height, image_height]) 76 | width_assert = tf.Assert( 77 | tf.equal(width, image_width), 78 | ['Wrong width for tensor %s [expected][actual]', 79 | image.name, width, image_width]) 80 | asserts.extend([height_assert, width_assert]) 81 | 82 | # Create a random bounding box. 83 | # 84 | # Use tf.random_uniform and not numpy.random.rand as doing the former would 85 | # generate random numbers at graph eval time, unlike the latter which 86 | # generates random numbers at graph definition time. 87 | max_offset_height = control_flow_ops.with_dependencies( 88 | asserts, tf.reshape(image_height - crop_height + 1, [])) 89 | max_offset_width = control_flow_ops.with_dependencies( 90 | asserts, tf.reshape(image_width - crop_width + 1, [])) 91 | offset_height = tf.random_uniform( 92 | [], maxval=max_offset_height, dtype=tf.int32) 93 | offset_width = tf.random_uniform( 94 | [], maxval=max_offset_width, dtype=tf.int32) 95 | 96 | cropped_images = [_crop(image, offset_height, offset_width, 97 | crop_height, crop_width) for image in image_list] 98 | cropped_labels = [_crop(label, offset_height, offset_width, 99 | crop_height, crop_width) for label in label_list] 100 | return cropped_images, cropped_labels 101 | 102 | 103 | def _central_crop(image_list, label_list, crop_height, crop_width): 104 | output_images = [] 105 | output_labels = [] 106 | for image, label in zip(image_list, label_list): 107 | image_height = tf.shape(image)[0] 108 | image_width = tf.shape(image)[1] 109 | 110 | offset_height = (image_height - crop_height) / 2 111 | offset_width = (image_width - crop_width) / 2 112 | 113 | output_images.append(_crop(image, offset_height, offset_width, 114 | crop_height, crop_width)) 115 | output_labels.append(_crop(label, offset_height, offset_width, 116 | crop_height, crop_width)) 117 | return output_images, output_labels 118 | 119 | 120 | def _smallest_size_at_least(height, width, smallest_side): 121 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) 122 | 123 | height = tf.to_float(height) 124 | width = tf.to_float(width) 125 | smallest_side = tf.to_float(smallest_side) 126 | 127 | scale = tf.cond(tf.greater(height, width), 128 | lambda: smallest_side / width, 129 | lambda: smallest_side / height) 130 | new_height = tf.to_int32(height * scale) 131 | new_width = tf.to_int32(width * scale) 132 | return new_height, new_width 133 | 134 | def _aspect_preserving_resize(image, label, smallest_side): 135 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) 136 | 137 | shape = tf.shape(image) 138 | height = shape[0] 139 | width = shape[1] 140 | new_height, new_width = _smallest_size_at_least(height, width, smallest_side) 141 | 142 | image = tf.expand_dims(image, 0) 143 | resized_image = tf.image.resize_bilinear(image, [new_height, new_width], 144 | align_corners=False) 145 | resized_image = tf.squeeze(resized_image, axis=[0]) 146 | resized_image.set_shape([None, None, 3]) 147 | 148 | label = tf.expand_dims(label, 0) 149 | resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width], 150 | align_corners=False) 151 | resized_label = tf.squeeze(resized_label, axis=[0]) 152 | resized_label.set_shape([None, None, 1]) 153 | return resized_image, resized_label 154 | 155 | def flip_gt_boxes(gt_boxes, ih, iw): 156 | x1s, y1s, x2s, y2s, cls = \ 157 | gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4] 158 | x1s = tf.to_float(iw) - x1s 159 | x2s = tf.to_float(iw) - x2s 160 | return tf.concat(values=(x2s[:, tf.newaxis], 161 | y1s[:, tf.newaxis], 162 | x1s[:, tf.newaxis], 163 | y2s[:, tf.newaxis], 164 | cls[:, tf.newaxis]), axis=1) 165 | 166 | def flip_gt_masks(gt_masks): 167 | return tf.reverse(gt_masks, axis=[2]) 168 | 169 | def flip_image(image): 170 | return tf.reverse(image, axis=[1]) 171 | 172 | def resize_gt_boxes(gt_boxes, scale_ratio): 173 | xys, cls = \ 174 | gt_boxes[:, 0:4], gt_boxes[:, 4] 175 | xys = xys * scale_ratio 176 | return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1) 177 | 178 | -------------------------------------------------------------------------------- /libs/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.iteritems(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print extra_postargs 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | 102 | # run the customize_compiler 103 | class custom_build_ext(build_ext): 104 | def build_extensions(self): 105 | customize_compiler_for_nvcc(self.compiler) 106 | build_ext.build_extensions(self) 107 | 108 | ext_modules = [ 109 | Extension( 110 | "boxes.cython_bbox", 111 | ["boxes/bbox.pyx"], 112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 113 | include_dirs = [numpy_include] 114 | ), 115 | Extension( 116 | "boxes.cython_anchor", 117 | ["boxes/cython_anchor.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs = [numpy_include] 120 | ), 121 | Extension( 122 | "boxes.cython_bbox_transform", 123 | ["boxes/cython_bbox_transform.pyx"], 124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 125 | include_dirs=[numpy_include] 126 | ), 127 | Extension( 128 | "boxes.cython_nms", 129 | ["boxes/nms.pyx"], 130 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 131 | include_dirs = [numpy_include] 132 | ), 133 | Extension( 134 | "nms.cpu_nms", 135 | ["nms/cpu_nms.pyx"], 136 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 137 | include_dirs = [numpy_include] 138 | ), 139 | Extension( 140 | 'nms.gpu_nms', 141 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 142 | library_dirs=[CUDA['lib64']], 143 | libraries=['cudart'], 144 | language='c++', 145 | runtime_library_dirs=[CUDA['lib64']], 146 | # this syntax is specific to this build system 147 | # we're only going to use certain compiler args with nvcc and not with gcc 148 | # the implementation of this trick is in customize_compiler() below 149 | extra_compile_args={'gcc': ["-Wno-unused-function"], 150 | 'nvcc': ['-arch=sm_52', 151 | '--ptxas-options=-v', 152 | '-c', 153 | '--compiler-options', 154 | "'-fPIC'"]}, 155 | include_dirs = [numpy_include, CUDA['include']] 156 | ), 157 | ] 158 | 159 | setup( 160 | name='fast_rcnn', 161 | ext_modules=ext_modules, 162 | # inject our custom trigger 163 | cmdclass={'build_ext': custom_build_ext}, 164 | ) 165 | -------------------------------------------------------------------------------- /libs/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/visualization/__init__.py -------------------------------------------------------------------------------- /libs/visualization/pil_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance 4 | 5 | FLAGS = tf.app.flags.FLAGS 6 | _DEBUG = False 7 | 8 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None): 9 | #print("image") 10 | #print(image) 11 | #norm_image = np.uint8(image/np.max(np.abs(image))*255.0) 12 | norm_image = np.uint8(image/0.1*127.0 + 127.0) 13 | #print("norm_image") 14 | #print(norm_image) 15 | source_img = Image.fromarray(norm_image) 16 | return source_img.save(FLAGS.train_dir + 'test_' + name + '_' + str(step) +'.jpg', 'JPEG') 17 | 18 | def draw_bbox(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None): 19 | #print(prob[:,label]) 20 | source_img = Image.fromarray(image) 21 | b, g, r = source_img.split() 22 | source_img = Image.merge("RGB", (r, g, b)) 23 | draw = ImageDraw.Draw(source_img) 24 | color = '#0000ff' 25 | if bbox is not None: 26 | for i, box in enumerate(bbox): 27 | if label is not None: 28 | if prob is not None: 29 | if (prob[i,label[i]] > 0.5) and (label[i] > 0): 30 | if gt_label is not None: 31 | text = cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]) 32 | if label[i] != gt_label[i]: 33 | color = '#ff0000'#draw.text((2+bbox[i,0], 2+bbox[i,1]), cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]), fill='#ff0000') 34 | else: 35 | color = '#0000ff' 36 | else: 37 | text = cat_id_to_cls_name(label[i]) 38 | draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color) 39 | if _DEBUG is True: 40 | print("plot",label[i], prob[i,label[i]]) 41 | draw.rectangle(box,fill=None,outline=color) 42 | else: 43 | if _DEBUG is True: 44 | print("skip",label[i], prob[i,label[i]]) 45 | else: 46 | text = cat_id_to_cls_name(label[i]) 47 | draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color) 48 | draw.rectangle(box,fill=None,outline=color) 49 | 50 | 51 | return source_img.save(FLAGS.train_dir + '/est_imgs/test_' + name + '_' + str(step) +'.jpg', 'JPEG') 52 | 53 | def cat_id_to_cls_name(catId): 54 | cls_name = np.array([ 'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 55 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 56 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 57 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 58 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 59 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 60 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 61 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 62 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 63 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 64 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 65 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 66 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 67 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']) 68 | return cls_name[catId] -------------------------------------------------------------------------------- /libs/visualization/summary_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def visualize_input(boxes, image, masks): 5 | image_sum_sample = image[:1] 6 | visualize_masks(masks, "input_image_gt_mask") 7 | visualize_bb(image, boxes, "input_image_gt_bb") 8 | visualize_input_image(image_sum_sample) 9 | 10 | 11 | def visualize_rpn_predictions(boxes, image): 12 | image_sum_sample = image[:1] 13 | visualize_bb(image_sum_sample, boxes, "rpn_pred_bb") 14 | 15 | # TODO: Present all masks in different colors 16 | def visualize_masks(masks, name): 17 | masks = tf.cast(masks, tf.float32) 18 | tf.summary.image(name=name, tensor=masks, max_outputs=1) 19 | 20 | 21 | def visualize_bb(image, boxes, name): 22 | image_sum_sample_shape = tf.shape(image)[1:] 23 | gt_x_min = boxes[:, 0] / tf.cast(image_sum_sample_shape[1], tf.float32) 24 | gt_y_min = boxes[:, 1] / tf.cast(image_sum_sample_shape[0], tf.float32) 25 | gt_x_max = boxes[:, 2] / tf.cast(image_sum_sample_shape[1], tf.float32) 26 | gt_y_max = boxes[:, 3] / tf.cast(image_sum_sample_shape[0], tf.float32) 27 | bb = tf.stack([gt_y_min, gt_x_min, gt_y_max, gt_x_max], axis=1) 28 | tf.summary.image(name=name, 29 | tensor=tf.image.draw_bounding_boxes(image, tf.expand_dims(bb, 0), name=None), 30 | max_outputs=1) 31 | 32 | 33 | def visualize_input_image(image): 34 | tf.summary.image(name="input_image", tensor=image, max_outputs=1) 35 | 36 | 37 | def visualize_final_predictions(boxes, image, masks): 38 | visualize_masks(masks, "pred_mask") 39 | visualize_bb(image, boxes, "final_bb_pred") 40 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from . import train_utils 5 | -------------------------------------------------------------------------------- /train/train_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import collections 8 | import tensorflow as tf 9 | import libs.configs.config_v1 as cfg 10 | 11 | slim = tf.contrib.slim 12 | FLAGS = tf.app.flags.FLAGS 13 | 14 | def _configure_optimizer(learning_rate): 15 | """Configures the optimizer used for training. 16 | 17 | Args: 18 | learning_rate: A scalar or `Tensor` learning rate. 19 | 20 | Returns: 21 | An instance of an optimizer. 22 | 23 | Raises: 24 | ValueError: if FLAGS.optimizer is not recognized. 25 | """ 26 | if FLAGS.optimizer == 'adadelta': 27 | optimizer = tf.train.AdadeltaOptimizer( 28 | learning_rate, 29 | rho=FLAGS.adadelta_rho, 30 | epsilon=FLAGS.opt_epsilon) 31 | elif FLAGS.optimizer == 'adagrad': 32 | optimizer = tf.train.AdagradOptimizer( 33 | learning_rate, 34 | initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value) 35 | elif FLAGS.optimizer == 'adam': 36 | optimizer = tf.train.AdamOptimizer( 37 | learning_rate, 38 | beta1=FLAGS.adam_beta1, 39 | beta2=FLAGS.adam_beta2, 40 | epsilon=FLAGS.opt_epsilon) 41 | elif FLAGS.optimizer == 'ftrl': 42 | optimizer = tf.train.FtrlOptimizer( 43 | learning_rate, 44 | learning_rate_power=FLAGS.ftrl_learning_rate_power, 45 | initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, 46 | l1_regularization_strength=FLAGS.ftrl_l1, 47 | l2_regularization_strength=FLAGS.ftrl_l2) 48 | elif FLAGS.optimizer == 'momentum': 49 | optimizer = tf.train.MomentumOptimizer( 50 | learning_rate, 51 | momentum=FLAGS.momentum, 52 | name='Momentum') 53 | elif FLAGS.optimizer == 'rmsprop': 54 | optimizer = tf.train.RMSPropOptimizer( 55 | learning_rate, 56 | decay=FLAGS.rmsprop_decay, 57 | momentum=FLAGS.rmsprop_momentum, 58 | epsilon=FLAGS.opt_epsilon) 59 | elif FLAGS.optimizer == 'sgd': 60 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 61 | else: 62 | raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer) 63 | return optimizer 64 | 65 | def _configure_learning_rate(num_samples_per_epoch, global_step): 66 | """Configures the learning rate. 67 | 68 | Args: 69 | num_samples_per_epoch: The number of samples in each epoch of training. 70 | global_step: The global_step tensor. 71 | 72 | Returns: 73 | A `Tensor` representing the learning rate. 74 | 75 | Raises: 76 | ValueError: if 77 | """ 78 | decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * 79 | FLAGS.num_epochs_per_decay) 80 | if FLAGS.sync_replicas: 81 | decay_steps /= FLAGS.replicas_to_aggregate 82 | 83 | if FLAGS.learning_rate_decay_type == 'exponential': 84 | return tf.train.exponential_decay(FLAGS.learning_rate, 85 | global_step, 86 | decay_steps, 87 | FLAGS.learning_rate_decay_factor, 88 | staircase=True, 89 | name='exponential_decay_learning_rate') 90 | elif FLAGS.learning_rate_decay_type == 'fixed': 91 | return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate') 92 | elif FLAGS.learning_rate_decay_type == 'polynomial': 93 | return tf.train.polynomial_decay(FLAGS.learning_rate, 94 | global_step, 95 | decay_steps, 96 | FLAGS.end_learning_rate, 97 | power=0.9, 98 | cycle=False, 99 | name='polynomial_decay_learning_rate') 100 | else: 101 | raise ValueError('learning_rate_decay_type [%s] was not recognized', 102 | FLAGS.learning_rate_decay_type) 103 | 104 | def _get_variables_to_train(): 105 | """Returns a list of variables to train. 106 | 107 | Returns: 108 | A list of variables to train by the optimizer. 109 | """ 110 | if FLAGS.trainable_scopes is None: 111 | return tf.trainable_variables() 112 | else: 113 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] 114 | 115 | variables_to_train = [] 116 | for scope in scopes: 117 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 118 | variables_to_train.extend(variables) 119 | return variables_to_train 120 | 121 | def _get_init_fn(): 122 | """Returns a function run by the chief worker to warm-start the training. 123 | 124 | Note that the init_fn is only run when initializing the model during the very 125 | first global step. 126 | 127 | Returns: 128 | An init function run by the supervisor. 129 | """ 130 | if FLAGS.checkpoint_path is None: 131 | return None 132 | 133 | # Warn the user if a checkpoint exists in the train_dir. Then we'll 134 | # ignore the checkpoint anyway. 135 | if tf.train.latest_checkpoint(FLAGS.train_dir): 136 | tf.logging.info( 137 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s' 138 | % FLAGS.train_dir) 139 | return None 140 | 141 | exclusions = [] 142 | if FLAGS.checkpoint_exclude_scopes: 143 | exclusions = [scope.strip() 144 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 145 | 146 | # TODO(sguada) variables.filter_variables() 147 | variables_to_restore = [] 148 | for var in slim.get_model_variables(): 149 | for exclusion in exclusions: 150 | if var.op.name.startswith(exclusion): 151 | break 152 | else: 153 | variables_to_restore.append(var) 154 | 155 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path): 156 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 157 | else: 158 | checkpoint_path = FLAGS.checkpoint_path 159 | 160 | tf.logging.info('Fine-tuning from %s' % checkpoint_path) 161 | 162 | return slim.assign_from_checkpoint_fn( 163 | checkpoint_path, 164 | variables_to_restore, 165 | ignore_missing_vars=FLAGS.ignore_missing_vars) 166 | 167 | def get_var_list_to_restore(): 168 | """Choose which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """ 169 | 170 | variables_to_restore = [] 171 | if FLAGS.checkpoint_exclude_scopes is not None: 172 | exclusions = [scope.strip() 173 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 174 | 175 | # build restore list 176 | for var in tf.model_variables(): 177 | for exclusion in exclusions: 178 | if var.name.startswith(exclusion): 179 | break 180 | else: 181 | variables_to_restore.append(var) 182 | else: 183 | variables_to_restore = tf.model_variables() 184 | 185 | variables_to_restore_final = [] 186 | if FLAGS.checkpoint_include_scopes is not None: 187 | includes = [ 188 | scope.strip() 189 | for scope in FLAGS.checkpoint_include_scopes.split(',') 190 | ] 191 | for var in variables_to_restore: 192 | for include in includes: 193 | if var.name.startswith(include): 194 | variables_to_restore_final.append(var) 195 | break 196 | else: 197 | variables_to_restore_final = variables_to_restore 198 | 199 | return variables_to_restore_final 200 | -------------------------------------------------------------------------------- /unit_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/unit_test/__init__.py -------------------------------------------------------------------------------- /unit_test/data_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | import functools 7 | 8 | import sys 9 | import os 10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 11 | import numpy as np 12 | import PIL.Image as Image 13 | from PIL import ImageDraw 14 | import tensorflow as tf 15 | import tensorflow.contrib.slim as slim 16 | from libs.logs.log import LOG 17 | import libs.configs.config_v1 as cfg 18 | import libs.nets.resnet_v1 as resnet_v1 19 | import libs.datasets.dataset_factory as dataset_factory 20 | import libs.datasets.coco as coco 21 | import libs.preprocessings.coco_v1 as preprocess_coco 22 | from libs.layers import ROIAlign 23 | 24 | resnet50 = resnet_v1.resnet_v1_50 25 | FLAGS = tf.app.flags.FLAGS 26 | 27 | with tf.Graph().as_default(): 28 | 29 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \ 30 | coco.read('./data/coco/records/coco_trainval2014_00000-of-00048.tfrecord') 31 | 32 | image, gt_boxes, gt_masks = \ 33 | preprocess_coco.preprocess_image(image, gt_boxes, gt_masks) 34 | 35 | 36 | 37 | sess = tf.Session() 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | # init_op = tf.initialize_all_variables() 41 | 42 | boxes = [[100, 100, 200, 200], 43 | [50, 50, 100, 100], 44 | [100, 100, 750, 750], 45 | [50, 50, 60, 60]] 46 | # boxes = np.zeros((0, 4)) 47 | boxes = tf.constant(boxes, tf.float32) 48 | feat = ROIAlign(image, boxes, False, 16, 7, 7) 49 | sess.run(init_op) 50 | 51 | tf.train.start_queue_runners(sess=sess) 52 | with sess.as_default(): 53 | for i in range(20000): 54 | image_np, ih_np, iw_np, gt_boxes_np, gt_masks_np, num_instances_np, img_id_np, \ 55 | feat_np = \ 56 | sess.run([image, ih, iw, gt_boxes, gt_masks, num_instances, img_id, 57 | feat]) 58 | # print (image_np.shape, gt_boxes_np.shape, gt_masks_np.shape) 59 | 60 | if i % 100 == 0: 61 | print ('%d, image_id: %s, instances: %d'% (i, str(img_id_np), num_instances_np)) 62 | image_np = 256 * (image_np * 0.5 + 0.5) 63 | image_np = image_np.astype(np.uint8) 64 | image_np = np.squeeze(image_np) 65 | print (image_np.shape, ih_np, iw_np) 66 | print (feat_np.shape) 67 | im = Image.fromarray(image_np) 68 | imd = ImageDraw.Draw(im) 69 | for i in range(gt_boxes_np.shape[0]): 70 | imd.rectangle(gt_boxes_np[i, :]) 71 | im.save(str(img_id_np) + '.png') 72 | # print (gt_boxes_np) 73 | sess.close() 74 | -------------------------------------------------------------------------------- /unit_test/preprocessing_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import numpy as np 5 | import sys 6 | import os 7 | import tensorflow as tf 8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 9 | 10 | import libs.preprocessings.coco_v1 as coco_preprocess 11 | import libs.configs.config_v1 as cfg 12 | 13 | ih, iw, ic = 400,500, 3 14 | N = 3 15 | image = np.random.randint(0, 255, (ih, iw, ic)).astype(np.uint8) 16 | gt_masks = np.zeros((N, ih, iw)).astype(np.int32) 17 | xy = np.random.randint(0, min(iw, ih)-100, (N, 2)).astype(np.float32) 18 | wh = np.random.randint(20, 40, (N, 2)).astype(np.float32) 19 | cls = np.random.randint(1, 6, (N, 1)).astype(np.float32) 20 | gt_boxes = np.hstack((xy, xy + wh, cls)).astype(np.float32) 21 | gt_boxes_np = gt_boxes 22 | image_np = image 23 | gt_masks_np = gt_masks 24 | 25 | for i in range(N): 26 | box = gt_boxes[i, 0:4] 27 | gt_masks[i, int(box[1]):int(box[3]), 28 | int(box[0]):int(box[2])] = 1 29 | image = tf.constant(image) 30 | gt_boxes = tf.constant(gt_boxes) 31 | gt_masks = tf.constant(gt_masks) 32 | 33 | image, gt_boxes, gt_masks = \ 34 | coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True) 35 | 36 | with tf.Session() as sess: 37 | # print(image.eval()) 38 | image_tf, gt_boxes_tf, gt_masks_tf = \ 39 | sess.run([image, gt_boxes, gt_masks]) 40 | print ('#######################') 41 | print ('DATA PREPROCESSING TEST') 42 | print ('#######################') 43 | print ('gt_boxes shape:', gt_boxes_tf.shape) 44 | print('mask shape:', gt_masks_tf.shape) 45 | print(gt_boxes_tf) 46 | for i in range(N): 47 | box = np.round(gt_boxes_tf[i, 0:4]) 48 | box = box.astype(np.int32) 49 | m = gt_masks_tf[i, box[1]:box[3], box[0]:box[2]] 50 | print ('after:', box) 51 | print (np.sum(m)/ (0.0 + m.size)) 52 | print (m) 53 | box = np.round(gt_boxes_np[i, 0:4]) 54 | box = box.astype(np.int32) 55 | m = gt_masks_np[i, box[1]:box[3], box[0]:box[2]] 56 | print ('ori box:', box) 57 | print (np.sum(m)/ (0.0 + m.size)) 58 | -------------------------------------------------------------------------------- /unit_test/resnet50_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | import functools 7 | import os, sys 8 | import time 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | import numpy as np 11 | from time import gmtime, strftime 12 | import tensorflow as tf 13 | import tensorflow.contrib.slim as slim 14 | import libs.configs.config_v1 as cfg 15 | import libs.datasets.coco as coco 16 | import libs.preprocessings.coco_v1 as coco_preprocess 17 | import libs.nets.pyramid_network as pyramid_network 18 | import libs.nets.resnet_v1 as resnet_v1 19 | from train.train_utils import _configure_learning_rate, _configure_optimizer, \ 20 | _get_variables_to_train, _get_init_fn, get_var_list_to_restore 21 | 22 | resnet50 = resnet_v1.resnet_v1_50 23 | FLAGS = tf.app.flags.FLAGS 24 | 25 | DEBUG = False 26 | 27 | with tf.Graph().as_default(): 28 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, 29 | allow_growth=True, 30 | ) 31 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, 32 | allow_soft_placement=True)) as sess: 33 | global_step = slim.create_global_step() 34 | 35 | ## data 36 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \ 37 | coco.read('./data/coco/records/coco_train2014_00000-of-00040.tfrecord') 38 | with tf.control_dependencies([image, gt_boxes, gt_masks]): 39 | image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True) 40 | 41 | ## network 42 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=0.0001)): 43 | logits, end_points = resnet50(image, 1000, is_training=False) 44 | end_points['inputs'] = image 45 | 46 | for x in sorted(end_points.keys()): 47 | print (x, end_points[x].name, end_points[x].shape) 48 | 49 | pyramid = pyramid_network.build_pyramid('resnet50', end_points) 50 | # for p in pyramid: 51 | # print (p, pyramid[p]) 52 | 53 | summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) 54 | for p in pyramid: 55 | summaries.add(tf.summary.histogram('pyramid/hist/' + p, pyramid[p])) 56 | summaries.add(tf.summary.scalar('pyramid/means/'+ p, tf.reduce_mean(tf.abs(pyramid[p])))) 57 | 58 | outputs = pyramid_network.build_heads(pyramid, ih, iw, num_classes=81, base_anchors=9, is_training=True, gt_boxes=gt_boxes) 59 | 60 | ## losses 61 | loss, losses, batch_info = pyramid_network.build_losses(pyramid, outputs, 62 | gt_boxes, gt_masks, 63 | num_classes=81, base_anchors=9, 64 | rpn_box_lw =0.1, rpn_cls_lw = 0.2, 65 | refined_box_lw=2.0, refined_cls_lw=0.1, 66 | mask_lw=0.2) 67 | 68 | ## optimization 69 | learning_rate = _configure_learning_rate(82783, global_step) 70 | optimizer = _configure_optimizer(learning_rate) 71 | summaries.add(tf.summary.scalar('learning_rate', learning_rate)) 72 | for loss in tf.get_collection(tf.GraphKeys.LOSSES): 73 | summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) 74 | 75 | loss = tf.get_collection(tf.GraphKeys.LOSSES) 76 | regular_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 77 | total_loss = tf.add_n(loss + regular_loss) 78 | reg_loss = tf.add_n(regular_loss) 79 | summaries.add(tf.summary.scalar('total_loss', total_loss)) 80 | summaries.add(tf.summary.scalar('regular_loss', reg_loss)) 81 | 82 | variables_to_train = _get_variables_to_train() 83 | update_op = optimizer.minimize(total_loss) 84 | # gradients = optimizer.compute_gradients(total_loss, var_list=variables_to_train) 85 | # grad_updates = optimizer.apply_gradients(gradients, 86 | # global_step=global_step) 87 | # update_op = tf.group(grad_updates) 88 | 89 | # summary_op = tf.summary.merge(list(summaries), name='summary_op') 90 | summary_op = tf.summary.merge_all() 91 | logdir = os.path.join(FLAGS.train_dir, strftime('%Y%m%d%H%M%S', gmtime())) 92 | if not os.path.exists(logdir): 93 | os.makedirs(logdir) 94 | summary_writer = tf.summary.FileWriter( 95 | logdir, 96 | graph=sess.graph) 97 | 98 | 99 | init_op = tf.group(tf.global_variables_initializer(), 100 | tf.local_variables_initializer()) 101 | 102 | sess.run(init_op) 103 | coord = tf.train.Coordinator() 104 | tf.train.start_queue_runners(sess=sess, coord=coord) 105 | 106 | ## restore pretrained model 107 | # FLAGS.pretrained_model = None 108 | if FLAGS.pretrained_model: 109 | if tf.gfile.IsDirectory(FLAGS.pretrained_model): 110 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.pretrained_model) 111 | else: 112 | checkpoint_path = FLAGS.pretrained_model 113 | FLAGS.checkpoint_exclude_scopes='pyramid' 114 | FLAGS.checkpoint_include_scopes='resnet_v1_50' 115 | vars_to_restore = get_var_list_to_restore() 116 | for var in vars_to_restore: 117 | print ('restoring ', var.name) 118 | 119 | try: 120 | restorer = tf.train.Saver(vars_to_restore) 121 | restorer.restore(sess, checkpoint_path) 122 | print ('Restored %d(%d) vars from %s' %( 123 | len(vars_to_restore), len(tf.global_variables()), 124 | checkpoint_path )) 125 | except: 126 | print ('Checking your params %s' %(checkpoint_path)) 127 | raise 128 | 129 | # import libs.memory_util as memory_util 130 | # memory_util.vlog(1) 131 | # with memory_util.capture_stderr() as stderr: 132 | # sess.run([update_op]) 133 | # memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000) 134 | 135 | ## training loop 136 | saver = tf.train.Saver(max_to_keep=20) 137 | for step in range(FLAGS.max_iters): 138 | start_time = time.time() 139 | 140 | _, tot_loss, reg_lossnp, img_id_str, \ 141 | rpn_box_loss, rpn_cls_loss, refined_box_loss, refined_cls_loss, mask_loss, \ 142 | gt_boxesnp, \ 143 | rpn_batch_pos, rpn_batch, refine_batch_pos, refine_batch, mask_batch_pos, mask_batch = \ 144 | sess.run([update_op, total_loss, reg_loss, img_id] + 145 | losses + 146 | [gt_boxes] + 147 | batch_info) 148 | # TODO: sampling strategy 149 | 150 | duration_time = time.time() - start_time 151 | if step % 1 == 0: 152 | print ( """iter %d: image-id:%07d, time:%.3f(sec), regular_loss: %.6f, """ 153 | """total-loss %.4f(%.4f, %.4f, %.6f, %.4f, %.4f), """ 154 | """instances: %d, """ 155 | """batch:(%d|%d, %d|%d, %d|%d)""" 156 | % (step, img_id_str, duration_time, reg_lossnp, 157 | tot_loss, rpn_box_loss, rpn_cls_loss, refined_box_loss, refined_cls_loss, mask_loss, 158 | gt_boxesnp.shape[0], 159 | rpn_batch_pos, rpn_batch, refine_batch_pos, refine_batch, mask_batch_pos, mask_batch)) 160 | 161 | if np.isnan(tot_loss) or np.isinf(tot_loss): 162 | print (gt_boxesnp) 163 | raise 164 | 165 | if step % 100 == 0: 166 | summary_str = sess.run(summary_op) 167 | summary_writer.add_summary(summary_str, step) 168 | 169 | if (step % 1000 == 0 or step + 1 == FLAGS.max_iters) and step != 0: 170 | checkpoint_path = os.path.join(FLAGS.train_dir, 171 | FLAGS.dataset_name + '_model.ckpt') 172 | saver.save(sess, checkpoint_path, global_step=step) 173 | 174 | if coord.should_stop(): 175 | coord.request_stop() 176 | coord.join(threads) 177 | --------------------------------------------------------------------------------