├── .gitignore
├── LICENSE
├── README.md
├── data
    └── README.md
├── download_and_convert_data.py
├── libs
    ├── Makefile
    ├── __init__.py
    ├── boxes
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── anchor.py
    │   ├── bbox.pyx
    │   ├── bbox_transform.py
    │   ├── blob.py
    │   ├── cython_anchor.py
    │   ├── cython_anchor.pyx
    │   ├── cython_bbox.py
    │   ├── cython_bbox_transform.py
    │   ├── cython_bbox_transform.pyx
    │   ├── cython_nms.py
    │   ├── gprof2dot.py
    │   ├── nms.py
    │   ├── nms.pyx
    │   ├── nms_wrapper.py
    │   ├── profile
    │   ├── profile.png
    │   ├── roi.py
    │   └── timer.py
    ├── configs
    │   ├── __init__.py
    │   └── config_v1.py
    ├── datasets
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── dataset_factory.py
    │   ├── download_and_convert_coco.py
    │   └── pycocotools
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── _mask.pyx
    │   │   ├── coco.py
    │   │   ├── cocoeval.py
    │   │   ├── common
    │   │       ├── gason.cpp
    │   │       ├── gason.h
    │   │       ├── maskApi.c
    │   │       └── maskApi.h
    │   │   ├── mask.py
    │   │   └── setup.py
    ├── layers
    │   ├── __init__.py
    │   ├── anchor.py
    │   ├── assign.py
    │   ├── crop.py
    │   ├── mask.py
    │   ├── roi.py
    │   ├── sample.py
    │   └── wrapper.py
    ├── logs
    │   ├── __init__.py
    │   └── log.py
    ├── make.sh
    ├── memory_util.py
    ├── nets
    │   ├── __init__.py
    │   ├── nets_factory.py
    │   ├── pyramid_network.py
    │   ├── resnet_utils.py
    │   ├── resnet_v1.py
    │   └── train_utils.py
    ├── nms
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── preprocessings
    │   ├── __init__.py
    │   ├── coco_v1.py
    │   └── utils.py
    ├── setup.py
    └── visualization
    │   ├── __init__.py
    │   ├── pil_utils.py
    │   └── summary_utils.py
├── train
    ├── __init__.py
    ├── train.py
    └── train_utils.py
└── unit_test
    ├── __init__.py
    ├── data_test.py
    ├── layer_test.py
    ├── preprocessing_test.py
    └── resnet50_test.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | .idea
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 | 
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 | 
61 | # Scrapy stuff:
62 | .scrapy
63 | 
64 | # Sphinx documentation
65 | docs/_build/
66 | 
67 | # PyBuilder
68 | target/
69 | 
70 | # IPython Notebook
71 | .ipynb_checkpoints
72 | 
73 | # pyenv
74 | .python-version
75 | 
76 | # celery beat schedule file
77 | celerybeat-schedule
78 | 
79 | # dotenv
80 | .env
81 | 
82 | # virtualenv
83 | venv/
84 | ENV/
85 | 
86 | # Spyder project settings
87 | .spyderproject
88 | 
89 | # Rope project settings
90 | .ropeproject
91 | 
92 | data/coco   
93 | data/pretrained_models
94 | tags
95 | output
96 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mask RCNN
 2 | Mask RCNN in TensorFlow
 3 | 
 4 | This repo attempts to reproduce this amazing work by Kaiming He et al. : 
 5 | [Mask R-CNN](https://arxiv.org/abs/1703.06870)
 6 | 
 7 | ## Requirements
 8 | 
 9 | - [Tensorflow (>= 1.0.0)](https://www.tensorflow.org/install/install_linux)
10 | - [Numpy](https://github.com/numpy/numpy/blob/master/INSTALL.rst.txt)
11 | - [COCO dataset](http://mscoco.org/dataset/#download)
12 | - [Resnet50](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)
13 | 
14 | ## How-to
15 | 1. Go to `./libs/datasets/pycocotools` and run `make`
16 | 2. Download [COCO](http://mscoco.org/dataset/#download) dataset, place it into `./data`, then run `python download_and_convert_data.py` to build tf-records. It takes a while.
17 | 3. Download pretrained resnet50 model, `wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz`, unzip it, place it into `./data/pretrained_models/`
18 | 4. Go to `./libs` and run `make`
19 | 5. run `python train/train.py` for training
20 | 6. There are certainly some bugs, please report them back, and let's solve them together.
21 | 
22 | ## TODO:
23 | - [x] ROIAlign
24 | - [x] COCO Data Provider
25 | - [x] Resnet50
26 | - [x] Feature Pyramid Network
27 | - [x] Anchor and ROI layer
28 | - [x] Mask layer
29 | - [x] Speedup anchor layer with cython
30 | - [x] Combining all modules together.
31 | - [x] Testing and debugging (in progress)
32 | - [ ] Training / evaluation on COCO
33 | - [ ] Add image summary to show some results
34 | - [ ] Converting ResneXt
35 | - [ ] Training >2 images
36 | 
37 | ## Call for contributions
38 | - Anything helps this repo, including **discussion**, **testing**, **promotion** and of course **your awesome code**.
39 | 
40 | ## Acknowledgment
41 | This repo borrows tons of code from 
42 | - [TFFRCNN](https://github.com/CharlesShang/TFFRCNN)
43 | - [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) 
44 | - [faster_rcnn](https://github.com/ShaoqingRen/faster_rcnn)
45 | - [tf-models](https://github.com/tensorflow/models)
46 | 
47 | ## License
48 | See [LICENSE](https://github.com/CharlesShang/FastMaskRCNN/blob/master/LICENSE) for details.
49 | 
50 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | Place and unzip your coco in this dir, like
 2 | 
 3 | ```buildoutcfg
 4 | ./data
 5 |     ./coco
 6 |         ./annotations
 7 |         ./train2014
 8 |         ./val2014
 9 | ```
10 | 


--------------------------------------------------------------------------------
/download_and_convert_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import os
 8 | import tensorflow as tf
 9 | 
10 | from libs.datasets import download_and_convert_coco
11 | from libs.configs import config_v1
12 | 
13 | FLAGS = tf.app.flags.FLAGS
14 | 
15 | # tf.app.flags.DEFINE_string(
16 | #     'dataset_name', 'coco',
17 | #     'The name of the dataset to convert, one of "coco", "cifar10", "flowers", "mnist".')
18 | 
19 | # tf.app.flags.DEFINE_string(
20 | #     'dataset_dir', 'data/coco',
21 | #     'The directory where the output TFRecords and temporary files are saved.')
22 | 
23 | 
24 | def main(_):
25 |   if not os.path.isdir('./output/mask_rcnn'):
26 |     os.makedirs('./output/mask_rcnn')
27 |   if not FLAGS.dataset_name:
28 |     raise ValueError('You must supply the dataset name with --dataset_name')
29 |   if not FLAGS.dataset_dir:
30 |     raise ValueError('You must supply the dataset directory with --dataset_dir')
31 | 
32 |   elif FLAGS.dataset_name == 'coco':
33 |     download_and_convert_coco.run(FLAGS.dataset_dir, FLAGS.dataset_split_name)
34 |   else:
35 |     raise ValueError(
36 |         'dataset_name [%s] was not recognized.' % FLAGS.dataset_dir)
37 | 
38 | if __name__ == '__main__':
39 |   tf.app.run()
40 | 
41 | 


--------------------------------------------------------------------------------
/libs/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 	sh make.sh


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/__init__.py


--------------------------------------------------------------------------------
/libs/boxes/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/libs/boxes/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from . import cython_nms
 8 | from . import cython_bbox
 9 | import nms
10 | import timer
11 | from .anchor import anchors
12 | from .anchor import anchors_plane
13 | from .roi import roi_cropping
14 | from .roi import roi_cropping
15 | from . import cython_anchor
16 | from . import cython_bbox_transform


--------------------------------------------------------------------------------
/libs/boxes/anchor.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | from libs.boxes import cython_anchor
  7 | 
  8 | def anchors(scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16):
  9 |   """Get a set of anchors at one position """
 10 |   return generate_anchors(base_size=base, scales=np.asarray(scales, np.int32), ratios=ratios)
 11 | 
 12 | def anchors_plane(height, width, stride = 1.0, 
 13 |         scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16):
 14 |   """Get a complete set of anchors in a spatial plane,
 15 |   height, width are plane dimensions
 16 |   stride is scale ratio of
 17 |   """
 18 |   # TODO: implement in C, or pre-compute them, or set to a fixed input-shape
 19 |   # enum all anchors in a plane
 20 |   # scales = kwargs.setdefault('scales', [2, 4, 8, 16, 32])
 21 |   # ratios = kwargs.setdefault('ratios', [0.5, 1, 2.0])
 22 |   # base = kwargs.setdefault('base', 16)
 23 |   anc = anchors(scales, ratios, base)
 24 |   all_anchors = cython_anchor.anchors_plane(height, width, stride, anc)
 25 |   return all_anchors
 26 | 
 27 | # Written by Ross Girshick and Sean Bell
 28 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 29 |                      scales=2 ** np.arange(3, 6)):
 30 |   """
 31 |   Generate anchor (reference) windows by enumerating aspect ratios X
 32 |   scales wrt a reference (0, 0, 15, 15) window.
 33 |   """
 34 | 
 35 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 36 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 37 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 38 |                        for i in xrange(ratio_anchors.shape[0])])
 39 |   return anchors
 40 | 
 41 | def _whctrs(anchor):
 42 |   """
 43 |   Return width, height, x center, and y center for an anchor (window).
 44 |   """
 45 | 
 46 |   w = anchor[2] - anchor[0] + 1
 47 |   h = anchor[3] - anchor[1] + 1
 48 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 49 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 50 |   return w, h, x_ctr, y_ctr
 51 | 
 52 | 
 53 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 54 |   """
 55 |   Given a vector of widths (ws) and heights (hs) around a center
 56 |   (x_ctr, y_ctr), output a set of anchors (windows).
 57 |   """
 58 |   
 59 |   ws = ws[:, np.newaxis]
 60 |   hs = hs[:, np.newaxis]
 61 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 62 |                        y_ctr - 0.5 * (hs - 1),
 63 |                        x_ctr + 0.5 * (ws - 1),
 64 |                        y_ctr + 0.5 * (hs - 1)))
 65 |   return anchors
 66 | 
 67 | 
 68 | def _ratio_enum(anchor, ratios):
 69 |   """
 70 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
 71 |   """
 72 |   
 73 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 74 |   size = w * h
 75 |   size_ratios = size / ratios
 76 |   ws = np.round(np.sqrt(size_ratios))
 77 |   hs = np.round(ws * ratios)
 78 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 79 |   return anchors
 80 | 
 81 | 
 82 | def _scale_enum(anchor, scales):
 83 |   """
 84 |   Enumerate a set of anchors for each scale wrt an anchor.
 85 |   """
 86 |   
 87 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 88 |   ws = w * scales
 89 |   hs = h * scales
 90 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 91 |   return anchors
 92 | 
 93 | def _unmap(data, count, inds, fill=0):
 94 |   """ Unmap a subset of item (data) back to the original set of items (of
 95 |   size count) """
 96 |   if len(data.shape) == 1:
 97 |     ret = np.empty((count,), dtype=np.float32)
 98 |     ret.fill(fill)
 99 |     ret[inds] = data
100 |   else:
101 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
102 |     ret.fill(fill)
103 |     ret[inds, :] = data
104 |   return ret
105 | 
106 | if __name__ == '__main__':
107 |   import time
108 |   
109 |   t = time.time()
110 |   a = anchors()
111 |   num_anchors = 0
112 | 
113 |   # all_anchors = anchors_plane(200, 250, stride=4, boarder=0)
114 |   # num_anchors += all_anchors.shape[0]
115 |   for i in range(10):
116 |     ancs = anchors()
117 |     all_anchors = cython_anchor.anchors_plane(200, 250, 4, ancs)
118 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
119 |     all_anchors = cython_anchor.anchors_plane(100, 125, 8, ancs)
120 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
121 |     all_anchors = cython_anchor.anchors_plane(50, 63, 16, ancs)
122 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
123 |     all_anchors = cython_anchor.anchors_plane(25, 32, 32, ancs)
124 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
125 |   print('average time: %f' % ((time.time() - t) / 10))
126 |   print('anchors: %d' % (num_anchors / 10))
127 |   print(a.shape, '\n', a)
128 |   print (all_anchors.shape)
129 |   # from IPython import embed
130 |   # embed()
131 | 


--------------------------------------------------------------------------------
/libs/boxes/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 
57 | def bbox_intersections(
58 |         np.ndarray[DTYPE_t, ndim=2] boxes,
59 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
60 |     """
61 |     For each query box compute the intersection ratio covered by boxes
62 |     ----------
63 |     Parameters
64 |     ----------
65 |     boxes: (N, 4) ndarray of float
66 |     query_boxes: (K, 4) ndarray of float
67 |     Returns
68 |     -------
69 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
70 |     """
71 |     cdef unsigned int N = boxes.shape[0]
72 |     cdef unsigned int K = query_boxes.shape[0]
73 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
74 |     cdef DTYPE_t iw, ih, box_area
75 |     cdef DTYPE_t ua
76 |     cdef unsigned int k, n
77 |     for k in range(K):
78 |         box_area = (
79 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
80 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
81 |         )
82 |         for n in range(N):
83 |             iw = (
84 |                 min(boxes[n, 2], query_boxes[k, 2]) -
85 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
86 |             )
87 |             if iw > 0:
88 |                 ih = (
89 |                     min(boxes[n, 3], query_boxes[k, 3]) -
90 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
91 |                 )
92 |                 if ih > 0:
93 |                     intersec[n, k] = iw * ih / box_area
94 |     return intersec


--------------------------------------------------------------------------------
/libs/boxes/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import warnings
10 | 
11 | def bbox_transform(ex_rois, gt_rois):
12 |     """
13 |     computes the distance from ground-truth boxes to the given boxes, normed by their size
14 |     :param ex_rois: n * 4 numpy array, given boxes
15 |     :param gt_rois: n * 4 numpy array, ground-truth boxes
16 |     :return: deltas: n * 4 numpy array, ground-truth boxes
17 |     """
18 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
19 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
20 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
21 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
22 | 
23 |     # assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \
24 |         # 'Invalid boxes found: {} {}'. \
25 |             # format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :])
26 | 
27 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
28 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
29 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
30 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
31 | 
32 |     # warnings.catch_warnings()
33 |     # warnings.filterwarnings('error')
34 |     targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths
35 |     targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights
36 |     targets_dw = 5.0 * np.log(gt_widths / ex_widths)
37 |     targets_dh = 5.0 * np.log(gt_heights / ex_heights)
38 | 
39 |     targets = np.vstack(
40 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
41 |     return targets
42 | 
43 | def bbox_transform_inv(boxes, deltas):
44 |     if boxes.shape[0] == 0:
45 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
46 | 
47 |     boxes = boxes.astype(deltas.dtype, copy=False)
48 | 
49 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
50 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
51 |     ctr_x = boxes[:, 0] + 0.5 * widths
52 |     ctr_y = boxes[:, 1] + 0.5 * heights
53 | 
54 |     dx = deltas[:, 0::4] * 0.1
55 |     dy = deltas[:, 1::4] * 0.1
56 |     dw = deltas[:, 2::4] * 0.2
57 |     dh = deltas[:, 3::4] * 0.2
58 | 
59 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
60 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
61 |     # pred_w = np.exp(dw) * widths[:, np.newaxis]
62 |     # pred_h = np.exp(dh) * heights[:, np.newaxis]
63 | 
64 |     pred_w = np.exp(dw + np.log(widths[:, np.newaxis]))
65 |     pred_h = np.exp(dh + np.log(heights[:, np.newaxis]))
66 | 
67 | 
68 |     #pred_w = np.exp(dw + np.log(widths[:, np.newaxis]))
69 |     #pred_h = np.exp(dh + np.log(heights[:, np.newaxis]))
70 | 
71 | 
72 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
73 |     # x1
74 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
75 |     # y1
76 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
77 |     # x2
78 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
79 |     # y2
80 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
81 | 
82 |     return pred_boxes
83 | 
84 | def clip_boxes(boxes, im_shape):
85 |     """
86 |     Clip boxes to image boundaries.
87 |     """
88 | 
89 |     # x1 >= 0
90 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
91 |     # y1 >= 0
92 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
93 |     # x2 < im_shape[1]
94 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
95 |     # y2 < im_shape[0]
96 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
97 |     return boxes
98 | 


--------------------------------------------------------------------------------
/libs/boxes/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | from ..fast_rcnn.config import cfg
13 | 
14 | def im_list_to_blob(ims):
15 |     """Convert a list of images into a network input.
16 | 
17 |     Assumes images are already prepared (means subtracted, BGR order, ...).
18 |     """
19 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
20 |     num_images = len(ims)
21 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
22 |                     dtype=np.float32)
23 |     for i in xrange(num_images):
24 |         im = ims[i]
25 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
26 | 
27 |     return blob
28 | 
29 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
30 |     """Mean subtract and scale an image for use in a blob."""
31 |     im = im.astype(np.float32, copy=False)
32 |     im -= pixel_means
33 |     im_shape = im.shape
34 |     im_size_min = np.min(im_shape[0:2])
35 |     im_size_max = np.max(im_shape[0:2])
36 |     im_scale = float(target_size) / float(im_size_min)
37 |     # Prevent the biggest axis from being more than MAX_SIZE
38 |     if np.round(im_scale * im_size_max) > max_size:
39 |         im_scale = float(max_size) / float(im_size_max)
40 |     if cfg.TRAIN.RANDOM_DOWNSAMPLE:
41 |         r = 0.6 + np.random.rand() * 0.4
42 |         im_scale *= r
43 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
44 |                     interpolation=cv2.INTER_LINEAR)
45 | 
46 |     return im, im_scale
47 | 


--------------------------------------------------------------------------------
/libs/boxes/cython_anchor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_anchor.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_anchor.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Mask RCNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by CharlesShang@github
 5 | # --------------------------------------------------------
 6 | 
 7 | cimport cython
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | DTYPE = np.float
12 | ctypedef np.float_t DTYPE_t
13 | 
14 | def anchors_plane(
15 |         int height, int width, int stride,
16 |         np.ndarray[DTYPE_t, ndim=2] anchors_base):
17 |     """
18 |     Parameters
19 |     ----------
20 |     height: height of plane
21 |     width:  width of plane
22 |     stride: stride ot the original image
23 |     anchors_base: (A, 4) a base set of anchors
24 |     Returns
25 |     -------
26 |     all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
27 |     """
28 |     cdef unsigned int A = anchors_base.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE)
30 |     cdef unsigned int iw, ih
31 |     cdef unsigned int k
32 |     cdef unsigned int A4
33 |     cdef unsigned int sh
34 |     cdef unsigned int sw
35 |     A4 = A*4
36 |     for iw in range(width):
37 |         sw = iw * stride
38 |         for ih in range(height):
39 |             sh = ih * stride
40 |             for k in range(A):
41 |                 all_anchors[ih, iw, k, 0] = anchors_base[k, 0] + sw
42 |                 all_anchors[ih, iw, k, 1] = anchors_base[k, 1] + sh
43 |                 all_anchors[ih, iw, k, 2] = anchors_base[k, 2] + sw
44 |                 all_anchors[ih, iw, k, 3] = anchors_base[k, 3] + sh
45 |     return all_anchors


--------------------------------------------------------------------------------
/libs/boxes/cython_bbox.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_bbox_transform.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox_transform.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_bbox_transform.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Mask RCNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by CharlesShang@github
  5 | # --------------------------------------------------------
  6 | 
  7 | cimport cython
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | DTYPE = np.float
 12 | ctypedef np.float_t DTYPE_t
 13 | # ctypedef float DTYPE_t
 14 | 
 15 | #def bbox_transform(
 16 | #        np.ndarray[DTYPE_t, ndim=2] ex_rois,
 17 | #        np.ndarray[DTYPE_t, ndim=2] gt_rois):
 18 | def bbox_transform(
 19 |         np.ndarray[DTYPE_t, ndim=2] ex_rois,
 20 |         np.ndarray[DTYPE_t, ndim=2] gt_rois):
 21 |     """
 22 |     Parameters
 23 |     ----------
 24 |     ex_rois: n * 4 numpy array, given boxes
 25 |     gt_rois: n * 4 numpy array, ground-truth boxes
 26 |     Returns
 27 |     -------
 28 |     targets: (n, 4) ndarray
 29 |     """
 30 |     cdef unsigned int R = ex_rois.shape[0]
 31 |     cdef np.ndarray[DTYPE_t, ndim=2] targets = np.zeros((R, 4), dtype=DTYPE)
 32 |     cdef unsigned int i
 33 |     cdef DTYPE_t gt_w
 34 |     cdef DTYPE_t gt_h
 35 |     cdef DTYPE_t gt_cx
 36 |     cdef DTYPE_t gt_cy
 37 |     cdef DTYPE_t ex_w
 38 |     cdef DTYPE_t ex_h
 39 |     cdef DTYPE_t ex_cx
 40 |     cdef DTYPE_t ex_cy
 41 |     for i in range(R):
 42 |         gt_w = gt_rois[i, 2] - gt_rois[i, 0] + 1.0
 43 |         gt_h = gt_rois[i, 3] - gt_rois[i, 1] + 1.0
 44 |         ex_w = ex_rois[i, 2] - ex_rois[i, 0] + 1.0
 45 |         ex_h = ex_rois[i, 3] - ex_rois[i, 1] + 1.0
 46 |         gt_cx = gt_rois[i, 0] + gt_w * 0.5
 47 |         gt_cy = gt_rois[i, 1] + gt_h * 0.5
 48 |         ex_cx = ex_rois[i, 0] + ex_w * 0.5
 49 |         ex_cy = ex_rois[i, 1] + ex_h * 0.5
 50 |         targets[i, 0] = (gt_cx - ex_cx) / ex_w
 51 |         targets[i, 1] = (gt_cy - ex_cy) / ex_h
 52 |         targets[i, 2] = np.log(gt_w / ex_w)
 53 |         targets[i, 3] = np.log(gt_h / ex_h)
 54 |     return targets
 55 | 
 56 | cdef inline DTYPE_t my_max(DTYPE_t a, DTYPE_t b): return a if a >= b else b
 57 | cdef inline DTYPE_t my_min(DTYPE_t a, DTYPE_t b): return a if a <= b else b
 58 | 
 59 | def bbox_transform_inv(
 60 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 61 |         np.ndarray[DTYPE_t, ndim=2] deltas):
 62 |     """
 63 |     Parameters
 64 |     ----------
 65 |     boxes: n * 4 numpy array, given boxes
 66 |     deltas: (n, kx4) numpy array,
 67 |     Returns
 68 |     -------
 69 |     pred_boxes: (n, kx4) ndarray
 70 |     """
 71 |     cdef unsigned int R = boxes.shape[0]
 72 |     cdef unsigned int k4 = deltas.shape[1]
 73 |     cdef unsigned int k
 74 |     k = k4 / 4
 75 |     cdef np.ndarray[DTYPE_t, ndim=2] pred_boxes = np.zeros((R, k4), dtype=DTYPE)
 76 |     if R == 0:
 77 |         return pred_boxes
 78 | 
 79 |     cdef unsigned int i
 80 |     cdef unsigned int j
 81 |     cdef unsigned int j4
 82 |     cdef DTYPE_t w
 83 |     cdef DTYPE_t h
 84 |     cdef DTYPE_t cx
 85 |     cdef DTYPE_t cy
 86 |     cdef DTYPE_t px
 87 |     cdef DTYPE_t py
 88 |     cdef DTYPE_t pw
 89 |     cdef DTYPE_t ph
 90 |     for i in range(R):
 91 |         w = boxes[i, 2] - boxes[i, 0] + 1.0
 92 |         h = boxes[i, 3] - boxes[i, 1] + 1.0
 93 |         cx = boxes[i, 0] + w * 0.5
 94 |         cy = boxes[i, 1] + h * 0.5
 95 |         for j in range(k):
 96 |             j4 = j * 4
 97 |             px = deltas[i, j4    ] * w + cx
 98 |             py = deltas[i, j4 + 1] * h + cy
 99 |             pw = np.exp(deltas[i, j4 + 2]) * w
100 |             ph = np.exp(deltas[i, j4 + 3]) * h
101 |             pred_boxes[i, j4    ] = px - 0.5 * pw
102 |             pred_boxes[i, j4 + 1] = py - 0.5 * ph
103 |             pred_boxes[i, j4 + 2] = px + 0.5 * pw
104 |             pred_boxes[i, j4 + 3] = py + 0.5 * ph
105 |     return pred_boxes
106 | 
107 | def clip_boxes(
108 |         np.ndarray[DTYPE_t, ndim=2] boxes,
109 |         np.ndarray[DTYPE_t, ndim=1] im_shape):
110 |     """
111 |     Parameters
112 |     ----------
113 |     boxes: (n ,kx4) numpy array, given boxes
114 |     im_shape:(2,) numpy array, (image_height, image_width)
115 |     Returns
116 |     -------
117 |     clipped: (n, kx4) ndarray
118 |     """
119 |     cdef unsigned int R = boxes.shape[0]
120 |     cdef unsigned int k4 = boxes.shape[1]
121 |     cdef unsigned int k  = k4 / 4
122 |     cdef np.ndarray[DTYPE_t, ndim=2] clipped = np.zeros((R, k4), dtype=DTYPE)
123 |     cdef unsigned int i
124 |     cdef unsigned int j
125 |     cdef unsigned int j4
126 |     for i in range(R):
127 |         for j in range(k):
128 |             j4 = j * 4
129 |             clipped[i, j4    ] = my_max(my_min(boxes[i, j4    ], im_shape[1]-1), 0)
130 |             clipped[i, j4 + 1] = my_max(my_min(boxes[i, j4 + 1], im_shape[0]-1), 0)
131 |             clipped[i, j4 + 2] = my_max(my_min(boxes[i, j4 + 2], im_shape[1]-1), 0)
132 |             clipped[i, j4 + 3] = my_max(my_min(boxes[i, j4 + 3], im_shape[0]-1), 0)
133 |     return clipped


--------------------------------------------------------------------------------
/libs/boxes/cython_nms.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_nms.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/libs/boxes/nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 71 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 72 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 73 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 74 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 75 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 76 | 
 77 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 78 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 79 | 
 80 |     cdef int ndets = dets.shape[0]
 81 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 82 |             np.zeros((ndets), dtype=np.int)
 83 | 
 84 |     # nominal indices
 85 |     cdef int _i, _j
 86 |     # sorted indices
 87 |     cdef int i, j
 88 |     # temp variables for box i's (the box currently under consideration)
 89 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 90 |     # variables for computing overlap with box j (lower scoring box)
 91 |     cdef np.float32_t xx1, yy1, xx2, yy2
 92 |     cdef np.float32_t w, h
 93 |     cdef np.float32_t inter, ovr
 94 | 
 95 |     keep = []
 96 |     for _i in range(ndets):
 97 |         i = order[_i]
 98 |         if suppressed[i] == 1:
 99 |             continue
100 |         keep.append(i)
101 |         ix1 = x1[i]
102 |         iy1 = y1[i]
103 |         ix2 = x2[i]
104 |         iy2 = y2[i]
105 |         iarea = areas[i]
106 |         for _j in range(_i + 1, ndets):
107 |             j = order[_j]
108 |             if suppressed[j] == 1:
109 |                 continue
110 |             xx1 = max(ix1, x1[j])
111 |             yy1 = max(iy1, y1[j])
112 |             xx2 = min(ix2, x2[j])
113 |             yy2 = min(iy2, y2[j])
114 |             w = max(0.0, xx2 - xx1 + 1)
115 |             h = max(0.0, yy2 - yy1 + 1)
116 |             inter = w * h
117 |             ovr = inter / (iarea + areas[j] - inter)
118 |             ovr1 = inter / iarea
119 |             ovr2 = inter / areas[j]
120 |             if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 |                 suppressed[j] = 1
122 | 
123 |     return keep
124 | 


--------------------------------------------------------------------------------
/libs/boxes/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import libs.configs.config_v1 as cfg
10 | import libs.nms.gpu_nms as gpu_nms
11 | import libs.nms.cpu_nms as cpu_nms
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 | 
16 |     if dets.shape[0] == 0:
17 |         return []
18 |     return gpu_nms.gpu_nms(dets, thresh, device_id=0)
19 | 
20 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None):
21 |     """
22 |     post-process the results of im_detect
23 |     :param boxes: N * (K * 4) numpy
24 |     :param scores: N * K numpy
25 |     :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus')
26 |     :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]}
27 |     """
28 |     num_class = scores.shape[1] if class_sets is None else len(class_sets)
29 |     assert num_class * 4 == boxes.shape[1],\
30 |         'Detection scores and boxes dont match %d vs %d' % (num_class, boxes.shape[1])
31 |     class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets
32 | 
33 |     res = []
34 |     for ind, cls in enumerate(class_sets[1:]):
35 |         ind += 1 # skip background
36 |         cls_boxes =  boxes[:, 4*ind : 4*(ind+1)]
37 |         cls_scores = scores[:, ind]
38 |         dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
39 |         keep = nms(dets, thresh=0.3)
40 |         dets = dets[keep, :]
41 |         dets = dets[np.where(dets[:, 4] > threshold)]
42 |         r = {}
43 |         if dets.shape[0] > 0:
44 |             r['class'], r['dets'] = cls, dets
45 |         else:
46 |             r['class'], r['dets'] = cls, None
47 |         res.append(r)
48 |     return res
49 | 
50 | if __name__=='__main__':
51 |   
52 |   score = np.random.rand(10, 21)
53 |   boxes = np.random.randint(0, 100, (10, 21, 2))
54 |   s = np.random.randint(0, 100, (10, 21, 2))
55 |   s = boxes + s
56 |   boxes = np.concatenate((boxes, s), axis=2)
57 |   boxes = np.reshape(boxes, [boxes.shape[0], -1])
58 |   # score = np.reshape(score, [score.shape[0], -1])
59 |   res = nms_wrapper(score, boxes)
60 |   print (res)


--------------------------------------------------------------------------------
/libs/boxes/profile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/boxes/profile


--------------------------------------------------------------------------------
/libs/boxes/profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/boxes/profile.png


--------------------------------------------------------------------------------
/libs/boxes/roi.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | import functools
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | import tensorflow.contrib.slim as slim
 9 | 
10 | def roi_align(feat, boxes):
11 |   """Given features and boxes, This function crops feature """
12 |   return
13 | 
14 | def roi_cropping(feat, boxes, clses, anchors, spatial_scale=1.0/16):
15 |   """This function computes final rpn boxes
16 |    And crops areas from the incoming features
17 |   """
18 |   return


--------------------------------------------------------------------------------
/libs/boxes/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/libs/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/configs/__init__.py


--------------------------------------------------------------------------------
/libs/configs/config_v1.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | 
  7 | ##########################
  8 | #                  restore
  9 | ##########################
 10 | tf.app.flags.DEFINE_string(
 11 |     'train_dir', './output/mask_rcnn/',
 12 |     'Directory where checkpoints and event logs are written to.')
 13 | 
 14 | tf.app.flags.DEFINE_string(
 15 |     'pretrained_model', './data/pretrained_models/resnet_v1_50.ckpt',
 16 |     'Path to pretrained model')
 17 | 
 18 | ##########################
 19 | #                  network
 20 | ##########################
 21 | tf.app.flags.DEFINE_string(
 22 |     'network', 'resnet50',
 23 |     'name of backbone network')
 24 | 
 25 | ##########################
 26 | #                  dataset
 27 | ##########################
 28 | tf.app.flags.DEFINE_bool(
 29 |     'update_bn', False,
 30 |     'Whether or not to update bacth normalization layer')
 31 | 
 32 | tf.app.flags.DEFINE_integer(
 33 |     'num_readers', 4,
 34 |     'The number of parallel readers that read data from the dataset.')
 35 | 
 36 | tf.app.flags.DEFINE_string(
 37 |     'dataset_name', 'coco',
 38 |     'The name of the dataset to load.')
 39 | 
 40 | tf.app.flags.DEFINE_string(
 41 |     'dataset_split_name', 'train2014',
 42 |     'The name of the train/test/val split.')
 43 | 
 44 | tf.app.flags.DEFINE_string(
 45 |     'dataset_dir', 'data/coco/',
 46 |     'The directory where the dataset files are stored.')
 47 | 
 48 | tf.app.flags.DEFINE_integer(
 49 |     'im_batch', 1,
 50 |     'number of images in a mini-batch')
 51 | 
 52 | 
 53 | tf.app.flags.DEFINE_integer(
 54 |     'num_preprocessing_threads', 4,
 55 |     'The number of threads used to create the batches.')
 56 | 
 57 | tf.app.flags.DEFINE_integer(
 58 |     'log_every_n_steps', 10,
 59 |     'The frequency with which logs are print.')
 60 | 
 61 | tf.app.flags.DEFINE_integer(
 62 |     'save_summaries_secs', 60,
 63 |     'The frequency with which summaries are saved, in seconds.')
 64 | 
 65 | tf.app.flags.DEFINE_integer(
 66 |     'save_interval_secs', 7200,
 67 |     'The frequency with which the model is saved, in seconds.')
 68 | 
 69 | tf.app.flags.DEFINE_integer(
 70 |     'max_iters', 2500000,
 71 |     'max iterations')
 72 | 
 73 | ######################
 74 | # Optimization Flags #
 75 | ######################
 76 | 
 77 | tf.app.flags.DEFINE_float(
 78 |     'weight_decay', 0.00005, 'The weight decay on the model weights.')
 79 | 
 80 | tf.app.flags.DEFINE_string(
 81 |     'optimizer', 'momentum',
 82 |     'The name of the optimizer, one of "adadelta", "adagrad", "adam",'
 83 |     '"ftrl", "momentum", "sgd" or "rmsprop".')
 84 | 
 85 | tf.app.flags.DEFINE_float(
 86 |     'adadelta_rho', 0.95,
 87 |     'The decay rate for adadelta.')
 88 | 
 89 | tf.app.flags.DEFINE_float(
 90 |     'adagrad_initial_accumulator_value', 0.1,
 91 |     'Starting value for the AdaGrad accumulators.')
 92 | 
 93 | tf.app.flags.DEFINE_float(
 94 |     'adam_beta1', 0.9,
 95 |     'The exponential decay rate for the 1st moment estimates.')
 96 | 
 97 | tf.app.flags.DEFINE_float(
 98 |     'adam_beta2', 0.999,
 99 |     'The exponential decay rate for the 2nd moment estimates.')
100 | 
101 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.')
102 | 
103 | tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5,
104 |                           'The learning rate power.')
105 | 
106 | tf.app.flags.DEFINE_float(
107 |     'ftrl_initial_accumulator_value', 0.1,
108 |     'Starting value for the FTRL accumulators.')
109 | 
110 | tf.app.flags.DEFINE_float(
111 |     'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.')
112 | 
113 | tf.app.flags.DEFINE_float(
114 |     'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.')
115 | 
116 | tf.app.flags.DEFINE_float(
117 |     'momentum', 0.99,
118 |     'The momentum for the MomentumOptimizer and RMSPropOptimizer.')
119 | 
120 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.99, 'Momentum.')
121 | 
122 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.99, 'Decay term for RMSProp.')
123 | 
124 | #######################
125 | # Learning Rate Flags #
126 | #######################
127 | 
128 | tf.app.flags.DEFINE_string(
129 |     'learning_rate_decay_type', 'exponential',
130 |     'Specifies how the learning rate is decayed. One of "fixed", "exponential",'
131 |     ' or "polynomial"')
132 | 
133 | tf.app.flags.DEFINE_float('learning_rate', 0.002,
134 |                           'Initial learning rate.')
135 | 
136 | tf.app.flags.DEFINE_float(
137 |     'end_learning_rate', 0.00001,
138 |     'The minimal end learning rate used by a polynomial decay learning rate.')
139 | 
140 | tf.app.flags.DEFINE_float(
141 |     'label_smoothing', 0.0, 'The amount of label smoothing.')
142 | 
143 | tf.app.flags.DEFINE_float(
144 |     'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.')
145 | 
146 | tf.app.flags.DEFINE_float(
147 |     'num_epochs_per_decay', 2.0,
148 |     'Number of epochs after which learning rate decays.')
149 | 
150 | tf.app.flags.DEFINE_bool(
151 |     'sync_replicas', False,
152 |     'Whether or not to synchronize the replicas during training.')
153 | 
154 | tf.app.flags.DEFINE_integer(
155 |     'replicas_to_aggregate', 1,
156 |     'The Number of gradients to collect before updating params.')
157 | 
158 | tf.app.flags.DEFINE_float(
159 |     'moving_average_decay', None,
160 |     'The decay to use for the moving average.'
161 |     'If left as None, then moving averages are not used.')
162 | 
163 | #######################
164 | # Dataset Flags #
165 | #######################
166 | 
167 | 
168 | tf.app.flags.DEFINE_string(
169 |     'model_name', 'resnet50',
170 |     'The name of the architecture to train.')
171 | 
172 | tf.app.flags.DEFINE_string(
173 |     'preprocessing_name', 'coco',
174 |     'The name of the preprocessing to use. If left '
175 |     'as `None`, then the model_name flag is used.')
176 | 
177 | tf.app.flags.DEFINE_integer(
178 |     'batch_size', 1,
179 |     'The number of samples in each batch.')
180 | 
181 | tf.app.flags.DEFINE_integer(
182 |     'train_image_size', None, 'Train image size')
183 | 
184 | tf.app.flags.DEFINE_integer('max_number_of_steps', None,
185 |                             'The maximum number of training steps.')
186 | 
187 | tf.app.flags.DEFINE_string(
188 |     'classes', None,
189 |     'The classes to classify.')
190 | 
191 | tf.app.flags.DEFINE_integer(
192 |     'image_min_size', 640,
193 |     'resize image so that the min edge equals to image_min_size')
194 | 
195 | #####################
196 | # Fine-Tuning Flags #
197 | #####################
198 | 
199 | tf.app.flags.DEFINE_string(
200 |     'checkpoint_path', None,
201 |     'The path to a checkpoint from which to fine-tune.')
202 | 
203 | tf.app.flags.DEFINE_string(
204 |     'checkpoint_exclude_scopes', None,
205 |     'Comma-separated list of scopes of variables to exclude when restoring '
206 |     'from a checkpoint.')
207 | 
208 | tf.app.flags.DEFINE_string(
209 |     'checkpoint_include_scopes', None,
210 |     'Comma-separated list of scopes of variables to include when restoring '
211 |     'from a checkpoint.')
212 | 
213 | tf.app.flags.DEFINE_string(
214 |     'trainable_scopes', None,
215 |     'Comma-separated list of scopes to filter the set of variables to train.'
216 |     'By default, None would train all the variables.')
217 | 
218 | tf.app.flags.DEFINE_boolean(
219 |     'ignore_missing_vars', False,
220 |     'When restoring a checkpoint would ignore missing variables.')
221 | 
222 | tf.app.flags.DEFINE_boolean(
223 |     'restore_previous_if_exists', True,
224 |     'When restoring a checkpoint would ignore missing variables.')
225 | 
226 | #######################
227 | # BOX Flags #
228 | #######################
229 | tf.app.flags.DEFINE_float(
230 |     'rpn_bg_threshold', 0.3,
231 |     'Only regions which intersection is larger than fg_threshold are considered to be fg')
232 | 
233 | tf.app.flags.DEFINE_float(
234 |     'rpn_fg_threshold', 0.7,
235 |     'Only regions which intersection is larger than fg_threshold are considered to be fg')
236 | 
237 | tf.app.flags.DEFINE_float(
238 |     'fg_threshold', 0.7,
239 |     'Only regions which intersection is larger than fg_threshold are considered to be fg')
240 | 
241 | tf.app.flags.DEFINE_float(
242 |     'bg_threshold', 0.3,
243 |     'Only regions which intersection is less than bg_threshold are considered to be bg')
244 | 
245 | tf.app.flags.DEFINE_integer(
246 |     'rois_per_image', 256,
247 |     'Number of rois that should be sampled to train this network')
248 | 
249 | tf.app.flags.DEFINE_float(
250 |     'fg_roi_fraction', 0.25,
251 |     'Number of rois that should be sampled to train this network')
252 | 
253 | tf.app.flags.DEFINE_float(
254 |     'fg_rpn_fraction', 0.25,
255 |     'Number of rois that should be sampled to train this network')
256 | 
257 | tf.app.flags.DEFINE_integer(
258 |     'rpn_batch_size', 500,
259 |     'Number of rpn anchors that should be sampled to train this network')
260 | 
261 | tf.app.flags.DEFINE_integer(
262 |     'allow_border', 10,
263 |     'How many pixels out of an image')
264 | 
265 | ##################################
266 | #            NMS                #
267 | ##################################
268 | 
269 | tf.app.flags.DEFINE_integer(
270 |     'pre_nms_top_n', 12000,
271 |     'Number of rpn anchors that should be sampled before nms')
272 | 
273 | tf.app.flags.DEFINE_integer(
274 |     'post_nms_top_n', 2000,
275 |     'Number of rpn anchors that should be sampled after nms')
276 | 
277 | tf.app.flags.DEFINE_float(
278 |     'rpn_nms_threshold', 0.7,
279 |     'NMS threshold')
280 | 
281 | ##################################
282 | #            Mask                #
283 | ##################################
284 | 
285 | tf.app.flags.DEFINE_boolean(
286 |     'mask_allow_bg', True,
287 |     'Allow to add bg masks in the masking stage')
288 | 
289 | tf.app.flags.DEFINE_float(
290 |     'mask_threshold', 0.50,
291 |     'Least intersection of a positive mask')
292 | tf.app.flags.DEFINE_integer(
293 |     'masks_per_image', 64,
294 |     'Number of rois that should be sampled to train this network')
295 | 
296 | tf.app.flags.DEFINE_float(
297 |     'min_size', 2,
298 |     'minimum size of an object')
299 | 
300 | FLAGS = tf.app.flags.FLAGS
301 | 


--------------------------------------------------------------------------------
/libs/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/datasets/__init__.py


--------------------------------------------------------------------------------
/libs/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import tensorflow as tf
  7 | 
  8 | import tensorflow.contrib.slim as slim
  9 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
 10 | 
 11 | _FILE_PATTERN = 'coco_%s_*.tfrecord'
 12 | 
 13 | SPLITS_TO_SIZES = {'train2014': 82783, 'val2014': 40504}
 14 | 
 15 | _NUM_CLASSES = 81
 16 | 
 17 | _ITEMS_TO_DESCRIPTIONS = {
 18 |     'image': 'A color image of varying size.',
 19 |     'label': 'An annotation image of varying size. (pixel-level masks)',
 20 |     'gt_masks': 'masks of instances in this image. (instance-level masks), of shape (N, image_height, image_width)',
 21 |     'gt_boxes': 'bounding boxes and classes of instances in this image, of shape (N, 5), each entry is (x1, y1, x2, y2)',
 22 | }
 23 | 
 24 | 
 25 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
 26 |   if split_name not in SPLITS_TO_SIZES:
 27 |     raise ValueError('split name %s was not recognized.' % split_name)
 28 |   
 29 |   if not file_pattern:
 30 |     file_pattern = _FILE_PATTERN
 31 |   file_pattern = os.path.join(dataset_dir, 'records', file_pattern % split_name)
 32 |   
 33 |   # Allowing None in the signature so that dataset_factory can use the default.
 34 |   if reader is None:
 35 |     reader = tf.TFRecordReader
 36 |   
 37 |   keys_to_features = {
 38 |     'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
 39 |     'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
 40 |     'label/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
 41 |     'label/format': tf.FixedLenFeature((), tf.string, default_value='png'),
 42 |     'image/height': tf.FixedLenFeature((), tf.int64),
 43 |     'image/width': tf.FixedLenFeature((), tf.int64),
 44 |     
 45 |     'label/num_instances': tf.FixedLenFeature((), tf.int64),
 46 |     'label/gt_boxes': tf.FixedLenFeature((), tf.string),
 47 |     'label/gt_masks': tf.FixedLenFeature((), tf.string),
 48 |   }
 49 |   
 50 |   def _masks_decoder(keys_to_tensors):
 51 |     masks = tf.decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8)
 52 |     width = tf.cast(keys_to_tensors['image/width'], tf.int32)
 53 |     height = tf.cast(keys_to_tensors['image/height'], tf.int32)
 54 |     instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
 55 |     mask_shape = tf.stack([instances, height, width])
 56 |     return tf.reshape(masks, mask_shape)
 57 |   
 58 |   def _gt_boxes_decoder(keys_to_tensors):
 59 |     bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32)
 60 |     instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
 61 |     bboxes_shape = tf.stack([instances, 5])
 62 |     return tf.reshape(bboxes, bboxes_shape)
 63 |   
 64 |   def _width_decoder(keys_to_tensors):
 65 |     width = keys_to_tensors['image/width']
 66 |     return tf.cast(width, tf.int32)
 67 |   
 68 |   def _height_decoder(keys_to_tensors):
 69 |     height = keys_to_tensors['image/height']
 70 |     return tf.cast(height, tf.int32)
 71 |   
 72 |   items_to_handlers = {
 73 |     'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
 74 |     'label': slim.tfexample_decoder.Image('label/encoded', 'label/format', channels=1),
 75 |     'gt_masks': slim.tfexample_decoder.ItemHandlerCallback(
 76 |                 ['label/gt_masks', 'label/num_instances', 'image/width', 'image/height'], _masks_decoder),
 77 |     'gt_boxes': slim.tfexample_decoder.ItemHandlerCallback(['label/gt_boxes', 'label/num_instances'], _gt_boxes_decoder),
 78 |     'width': slim.tfexample_decoder.ItemHandlerCallback(['image/width'], _width_decoder),
 79 |     'height': slim.tfexample_decoder.ItemHandlerCallback(['image/height'], _height_decoder),
 80 |   }
 81 |   
 82 |   decoder = slim.tfexample_decoder.TFExampleDecoder(
 83 |     keys_to_features, items_to_handlers)
 84 |   
 85 |   return slim.dataset.Dataset(
 86 |     data_sources=file_pattern,
 87 |     reader=reader,
 88 |     decoder=decoder,
 89 |     num_samples=SPLITS_TO_SIZES[split_name],
 90 |     items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
 91 |     num_classes=_NUM_CLASSES)
 92 | 
 93 | def read(tfrecords_filename):
 94 | 
 95 |   if not isinstance(tfrecords_filename, list):
 96 |     tfrecords_filename = [tfrecords_filename]
 97 |   filename_queue = tf.train.string_input_producer(
 98 |     tfrecords_filename, num_epochs=100)
 99 | 
100 |   options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
101 |   reader = tf.TFRecordReader(options=options)
102 |   _, serialized_example = reader.read(filename_queue)
103 |   features = tf.parse_single_example(
104 |     serialized_example,
105 |     features={
106 |       'image/img_id': tf.FixedLenFeature([], tf.int64),
107 |       'image/encoded': tf.FixedLenFeature([], tf.string),
108 |       'image/height': tf.FixedLenFeature([], tf.int64),
109 |       'image/width': tf.FixedLenFeature([], tf.int64),
110 |       'label/num_instances': tf.FixedLenFeature([], tf.int64),
111 |       'label/gt_masks': tf.FixedLenFeature([], tf.string),
112 |       'label/gt_boxes': tf.FixedLenFeature([], tf.string),
113 |       'label/encoded': tf.FixedLenFeature([], tf.string),
114 |       })
115 |   # image = tf.image.decode_jpeg(features['image/encoded'], channels=3)
116 |   img_id = tf.cast(features['image/img_id'], tf.int32)
117 |   ih = tf.cast(features['image/height'], tf.int32)
118 |   iw = tf.cast(features['image/width'], tf.int32)
119 |   num_instances = tf.cast(features['label/num_instances'], tf.int32)
120 |   image = tf.decode_raw(features['image/encoded'], tf.uint8)
121 |   imsize = tf.size(image)
122 |   image = tf.cond(tf.equal(imsize, ih * iw), \
123 |           lambda: tf.image.grayscale_to_rgb(tf.reshape(image, (ih, iw, 1))), \
124 |           lambda: tf.reshape(image, (ih, iw, 3)))
125 | 
126 |   gt_boxes = tf.decode_raw(features['label/gt_boxes'], tf.float32)
127 |   gt_boxes = tf.reshape(gt_boxes, [num_instances, 5])
128 |   gt_masks = tf.decode_raw(features['label/gt_masks'], tf.uint8)
129 |   gt_masks = tf.cast(gt_masks, tf.int32)
130 |   gt_masks = tf.reshape(gt_masks, [num_instances, ih, iw])
131 |   
132 |   return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
133 | 
134 | 


--------------------------------------------------------------------------------
/libs/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import tensorflow as tf
 6 | from libs.visualization.summary_utils import visualize_input
 7 | import glob
 8 | from libs.datasets import coco
 9 | 
10 | import libs.preprocessings.coco_v1 as coco_preprocess
11 | 
12 | def get_dataset(dataset_name, split_name, dataset_dir, 
13 |         im_batch=1, is_training=False, file_pattern=None, reader=None):
14 |     """"""
15 |     if file_pattern is None:
16 |         file_pattern = dataset_name + '_' + split_name + '*.tfrecord' 
17 | 
18 |     tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern)
19 |     image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords)
20 | 
21 |     image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training)
22 |     #visualize_input(gt_boxes, image, tf.expand_dims(gt_masks, axis=3))
23 | 
24 |     return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
25 | 
26 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |     # install pycocotools locally
3 | 	python setup.py build_ext --inplace
4 | 	rm -rf build
5 | 
6 | install:
7 | 	# install pycocotools to the Python site-packages
8 | 	python setup.py build_ext install
9 | 	rm -rf build


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/gason.cpp:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #include "gason.h"
  3 | #include <stdlib.h>
  4 | 
  5 | #define JSON_ZONE_SIZE 4096
  6 | #define JSON_STACK_SIZE 32
  7 | 
  8 | const char *jsonStrError(int err) {
  9 |     switch (err) {
 10 | #define XX(no, str) \
 11 |     case JSON_##no: \
 12 |         return str;
 13 |         JSON_ERRNO_MAP(XX)
 14 | #undef XX
 15 |     default:
 16 |         return "unknown";
 17 |     }
 18 | }
 19 | 
 20 | void *JsonAllocator::allocate(size_t size) {
 21 |     size = (size + 7) & ~7;
 22 | 
 23 |     if (head && head->used + size <= JSON_ZONE_SIZE) {
 24 |         char *p = (char *)head + head->used;
 25 |         head->used += size;
 26 |         return p;
 27 |     }
 28 | 
 29 |     size_t allocSize = sizeof(Zone) + size;
 30 |     Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize);
 31 |     if (zone == nullptr)
 32 |         return nullptr;
 33 |     zone->used = allocSize;
 34 |     if (allocSize <= JSON_ZONE_SIZE || head == nullptr) {
 35 |         zone->next = head;
 36 |         head = zone;
 37 |     } else {
 38 |         zone->next = head->next;
 39 |         head->next = zone;
 40 |     }
 41 |     return (char *)zone + sizeof(Zone);
 42 | }
 43 | 
 44 | void JsonAllocator::deallocate() {
 45 |     while (head) {
 46 |         Zone *next = head->next;
 47 |         free(head);
 48 |         head = next;
 49 |     }
 50 | }
 51 | 
 52 | static inline bool isspace(char c) {
 53 |     return c == ' ' || (c >= '\t' && c <= '\r');
 54 | }
 55 | 
 56 | static inline bool isdelim(char c) {
 57 |     return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c;
 58 | }
 59 | 
 60 | static inline bool isdigit(char c) {
 61 |     return c >= '0' && c <= '9';
 62 | }
 63 | 
 64 | static inline bool isxdigit(char c) {
 65 |     return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F');
 66 | }
 67 | 
 68 | static inline int char2int(char c) {
 69 |     if (c <= '9')
 70 |         return c - '0';
 71 |     return (c & ~' ') - 'A' + 10;
 72 | }
 73 | 
 74 | static double string2double(char *s, char **endptr) {
 75 |     char ch = *s;
 76 |     if (ch == '-')
 77 |         ++s;
 78 | 
 79 |     double result = 0;
 80 |     while (isdigit(*s))
 81 |         result = (result * 10) + (*s++ - '0');
 82 | 
 83 |     if (*s == '.') {
 84 |         ++s;
 85 | 
 86 |         double fraction = 1;
 87 |         while (isdigit(*s)) {
 88 |             fraction *= 0.1;
 89 |             result += (*s++ - '0') * fraction;
 90 |         }
 91 |     }
 92 | 
 93 |     if (*s == 'e' || *s == 'E') {
 94 |         ++s;
 95 | 
 96 |         double base = 10;
 97 |         if (*s == '+')
 98 |             ++s;
 99 |         else if (*s == '-') {
100 |             ++s;
101 |             base = 0.1;
102 |         }
103 | 
104 |         unsigned int exponent = 0;
105 |         while (isdigit(*s))
106 |             exponent = (exponent * 10) + (*s++ - '0');
107 | 
108 |         double power = 1;
109 |         for (; exponent; exponent >>= 1, base *= base)
110 |             if (exponent & 1)
111 |                 power *= base;
112 | 
113 |         result *= power;
114 |     }
115 | 
116 |     *endptr = s;
117 |     return ch == '-' ? -result : result;
118 | }
119 | 
120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) {
121 |     if (!tail)
122 |         return node->next = node;
123 |     node->next = tail->next;
124 |     tail->next = node;
125 |     return node;
126 | }
127 | 
128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) {
129 |     if (tail) {
130 |         auto head = tail->next;
131 |         tail->next = nullptr;
132 |         return JsonValue(tag, head);
133 |     }
134 |     return JsonValue(tag, nullptr);
135 | }
136 | 
137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) {
138 |     JsonNode *tails[JSON_STACK_SIZE];
139 |     JsonTag tags[JSON_STACK_SIZE];
140 |     char *keys[JSON_STACK_SIZE];
141 |     JsonValue o;
142 |     int pos = -1;
143 |     bool separator = true;
144 |     JsonNode *node;
145 |     *endptr = s;
146 | 
147 |     while (*s) {
148 |         while (isspace(*s)) {
149 |             ++s;
150 |             if (!*s) break;
151 |         }
152 |         *endptr = s++;
153 |         switch (**endptr) {
154 |         case '-':
155 |             if (!isdigit(*s) && *s != '.') {
156 |                 *endptr = s;
157 |                 return JSON_BAD_NUMBER;
158 |             }
159 |         case '0':
160 |         case '1':
161 |         case '2':
162 |         case '3':
163 |         case '4':
164 |         case '5':
165 |         case '6':
166 |         case '7':
167 |         case '8':
168 |         case '9':
169 |             o = JsonValue(string2double(*endptr, &s));
170 |             if (!isdelim(*s)) {
171 |                 *endptr = s;
172 |                 return JSON_BAD_NUMBER;
173 |             }
174 |             break;
175 |         case '"':
176 |             o = JsonValue(JSON_STRING, s);
177 |             for (char *it = s; *s; ++it, ++s) {
178 |                 int c = *it = *s;
179 |                 if (c == '\\') {
180 |                     c = *++s;
181 |                     switch (c) {
182 |                     case '\\':
183 |                     case '"':
184 |                     case '/':
185 |                         *it = c;
186 |                         break;
187 |                     case 'b':
188 |                         *it = '\b';
189 |                         break;
190 |                     case 'f':
191 |                         *it = '\f';
192 |                         break;
193 |                     case 'n':
194 |                         *it = '\n';
195 |                         break;
196 |                     case 'r':
197 |                         *it = '\r';
198 |                         break;
199 |                     case 't':
200 |                         *it = '\t';
201 |                         break;
202 |                     case 'u':
203 |                         c = 0;
204 |                         for (int i = 0; i < 4; ++i) {
205 |                             if (isxdigit(*++s)) {
206 |                                 c = c * 16 + char2int(*s);
207 |                             } else {
208 |                                 *endptr = s;
209 |                                 return JSON_BAD_STRING;
210 |                             }
211 |                         }
212 |                         if (c < 0x80) {
213 |                             *it = c;
214 |                         } else if (c < 0x800) {
215 |                             *it++ = 0xC0 | (c >> 6);
216 |                             *it = 0x80 | (c & 0x3F);
217 |                         } else {
218 |                             *it++ = 0xE0 | (c >> 12);
219 |                             *it++ = 0x80 | ((c >> 6) & 0x3F);
220 |                             *it = 0x80 | (c & 0x3F);
221 |                         }
222 |                         break;
223 |                     default:
224 |                         *endptr = s;
225 |                         return JSON_BAD_STRING;
226 |                     }
227 |                 } else if ((unsigned int)c < ' ' || c == '\x7F') {
228 |                     *endptr = s;
229 |                     return JSON_BAD_STRING;
230 |                 } else if (c == '"') {
231 |                     *it = 0;
232 |                     ++s;
233 |                     break;
234 |                 }
235 |             }
236 |             if (!isdelim(*s)) {
237 |                 *endptr = s;
238 |                 return JSON_BAD_STRING;
239 |             }
240 |             break;
241 |         case 't':
242 |             if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3])))
243 |                 return JSON_BAD_IDENTIFIER;
244 |             o = JsonValue(JSON_TRUE);
245 |             s += 3;
246 |             break;
247 |         case 'f':
248 |             if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4])))
249 |                 return JSON_BAD_IDENTIFIER;
250 |             o = JsonValue(JSON_FALSE);
251 |             s += 4;
252 |             break;
253 |         case 'n':
254 |             if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3])))
255 |                 return JSON_BAD_IDENTIFIER;
256 |             o = JsonValue(JSON_NULL);
257 |             s += 3;
258 |             break;
259 |         case ']':
260 |             if (pos == -1)
261 |                 return JSON_STACK_UNDERFLOW;
262 |             if (tags[pos] != JSON_ARRAY)
263 |                 return JSON_MISMATCH_BRACKET;
264 |             o = listToValue(JSON_ARRAY, tails[pos--]);
265 |             break;
266 |         case '}':
267 |             if (pos == -1)
268 |                 return JSON_STACK_UNDERFLOW;
269 |             if (tags[pos] != JSON_OBJECT)
270 |                 return JSON_MISMATCH_BRACKET;
271 |             if (keys[pos] != nullptr)
272 |                 return JSON_UNEXPECTED_CHARACTER;
273 |             o = listToValue(JSON_OBJECT, tails[pos--]);
274 |             break;
275 |         case '[':
276 |             if (++pos == JSON_STACK_SIZE)
277 |                 return JSON_STACK_OVERFLOW;
278 |             tails[pos] = nullptr;
279 |             tags[pos] = JSON_ARRAY;
280 |             keys[pos] = nullptr;
281 |             separator = true;
282 |             continue;
283 |         case '{':
284 |             if (++pos == JSON_STACK_SIZE)
285 |                 return JSON_STACK_OVERFLOW;
286 |             tails[pos] = nullptr;
287 |             tags[pos] = JSON_OBJECT;
288 |             keys[pos] = nullptr;
289 |             separator = true;
290 |             continue;
291 |         case ':':
292 |             if (separator || keys[pos] == nullptr)
293 |                 return JSON_UNEXPECTED_CHARACTER;
294 |             separator = true;
295 |             continue;
296 |         case ',':
297 |             if (separator || keys[pos] != nullptr)
298 |                 return JSON_UNEXPECTED_CHARACTER;
299 |             separator = true;
300 |             continue;
301 |         case '\0':
302 |             continue;
303 |         default:
304 |             return JSON_UNEXPECTED_CHARACTER;
305 |         }
306 | 
307 |         separator = false;
308 | 
309 |         if (pos == -1) {
310 |             *endptr = s;
311 |             *value = o;
312 |             return JSON_OK;
313 |         }
314 | 
315 |         if (tags[pos] == JSON_OBJECT) {
316 |             if (!keys[pos]) {
317 |                 if (o.getTag() != JSON_STRING)
318 |                     return JSON_UNQUOTED_KEY;
319 |                 keys[pos] = o.toString();
320 |                 continue;
321 |             }
322 |             if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr)
323 |                 return JSON_ALLOCATION_FAILURE;
324 |             tails[pos] = insertAfter(tails[pos], node);
325 |             tails[pos]->key = keys[pos];
326 |             keys[pos] = nullptr;
327 |         } else {
328 |             if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr)
329 |                 return JSON_ALLOCATION_FAILURE;
330 |             tails[pos] = insertAfter(tails[pos], node);
331 |         }
332 |         tails[pos]->value = o;
333 |     }
334 |     return JSON_BREAKING_BAD;
335 | }
336 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/gason.h:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #pragma once
  3 | 
  4 | #include <stdint.h>
  5 | #include <stddef.h>
  6 | #include <assert.h>
  7 | 
  8 | enum JsonTag {
  9 |     JSON_NUMBER = 0,
 10 |     JSON_STRING,
 11 |     JSON_ARRAY,
 12 |     JSON_OBJECT,
 13 |     JSON_TRUE,
 14 |     JSON_FALSE,
 15 |     JSON_NULL = 0xF
 16 | };
 17 | 
 18 | struct JsonNode;
 19 | 
 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
 22 | #define JSON_VALUE_TAG_MASK 0xF
 23 | #define JSON_VALUE_TAG_SHIFT 47
 24 | 
 25 | union JsonValue {
 26 |     uint64_t ival;
 27 |     double fval;
 28 | 
 29 |     JsonValue(double x)
 30 |         : fval(x) {
 31 |     }
 32 |     JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
 33 |         assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
 34 |         ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
 35 |     }
 36 |     bool isDouble() const {
 37 |         return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
 38 |     }
 39 |     JsonTag getTag() const {
 40 |         return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
 41 |     }
 42 |     uint64_t getPayload() const {
 43 |         assert(!isDouble());
 44 |         return ival & JSON_VALUE_PAYLOAD_MASK;
 45 |     }
 46 |     double toNumber() const {
 47 |         assert(getTag() == JSON_NUMBER);
 48 |         return fval;
 49 |     }
 50 |     char *toString() const {
 51 |         assert(getTag() == JSON_STRING);
 52 |         return (char *)getPayload();
 53 |     }
 54 |     JsonNode *toNode() const {
 55 |         assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
 56 |         return (JsonNode *)getPayload();
 57 |     }
 58 | };
 59 | 
 60 | struct JsonNode {
 61 |     JsonValue value;
 62 |     JsonNode *next;
 63 |     char *key;
 64 | };
 65 | 
 66 | struct JsonIterator {
 67 |     JsonNode *p;
 68 | 
 69 |     void operator++() {
 70 |         p = p->next;
 71 |     }
 72 |     bool operator!=(const JsonIterator &x) const {
 73 |         return p != x.p;
 74 |     }
 75 |     JsonNode *operator*() const {
 76 |         return p;
 77 |     }
 78 |     JsonNode *operator->() const {
 79 |         return p;
 80 |     }
 81 | };
 82 | 
 83 | inline JsonIterator begin(JsonValue o) {
 84 |     return JsonIterator{o.toNode()};
 85 | }
 86 | inline JsonIterator end(JsonValue) {
 87 |     return JsonIterator{nullptr};
 88 | }
 89 | 
 90 | #define JSON_ERRNO_MAP(XX)                           \
 91 |     XX(OK, "ok")                                     \
 92 |     XX(BAD_NUMBER, "bad number")                     \
 93 |     XX(BAD_STRING, "bad string")                     \
 94 |     XX(BAD_IDENTIFIER, "bad identifier")             \
 95 |     XX(STACK_OVERFLOW, "stack overflow")             \
 96 |     XX(STACK_UNDERFLOW, "stack underflow")           \
 97 |     XX(MISMATCH_BRACKET, "mismatch bracket")         \
 98 |     XX(UNEXPECTED_CHARACTER, "unexpected character") \
 99 |     XX(UNQUOTED_KEY, "unquoted key")                 \
100 |     XX(BREAKING_BAD, "breaking bad")                 \
101 |     XX(ALLOCATION_FAILURE, "allocation failure")
102 | 
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 |     JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 | 
109 | const char *jsonStrError(int err);
110 | 
111 | class JsonAllocator {
112 |     struct Zone {
113 |         Zone *next;
114 |         size_t used;
115 |     } *head = nullptr;
116 | 
117 | public:
118 |     JsonAllocator() = default;
119 |     JsonAllocator(const JsonAllocator &) = delete;
120 |     JsonAllocator &operator=(const JsonAllocator &) = delete;
121 |     JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 |         x.head = nullptr;
123 |     }
124 |     JsonAllocator &operator=(JsonAllocator &&x) {
125 |         head = x.head;
126 |         x.head = nullptr;
127 |         return *this;
128 |     }
129 |     ~JsonAllocator() {
130 |         deallocate();
131 |     }
132 |     void *allocate(size_t size);
133 |     void deallocate();
134 | };
135 | 
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
142 |     }
143 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
144 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
145 |   }
146 | }
147 | 
148 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
149 |   siz i; for( i=0; i<n; i++ ) {
150 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
151 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
152 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
153 |     rleFrPoly( R+i, xy, 4, h, w );
154 |   }
155 | }
156 | 
157 | int uintCompare(const void *a, const void *b) {
158 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
159 | }
160 | 
161 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
162 |   /* upsample and get discrete points densely along entire boundary */
163 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
164 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
165 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
166 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
167 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
168 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
169 |   for( j=0; j<k; j++ ) {
170 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
171 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
172 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
173 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
174 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
175 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
176 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
177 |     } else for( d=0; d<=dy; d++ ) {
178 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
179 |     }
180 |   }
181 |   /* get points along y-boundary and downsample */
182 |   free(x); free(y); k=m; m=0; double xd, yd;
183 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
184 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
185 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
186 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
187 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
188 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
189 |     x[m]=(int) xd; y[m]=(int) yd; m++;
190 |   }
191 |   /* compute rle encoding given y-boundary points */
192 |   k=m; a=malloc(sizeof(uint)*(k+1));
193 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
194 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
195 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
196 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
197 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
198 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
199 |     j++; if(j<k) b[m-1]+=a[j++]; }
200 |   rleInit(R,h,w,m,b); free(a); free(b);
201 | }
202 | 
203 | char* rleToString( const RLE *R ) {
204 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
205 |   siz i, m=R->m, p=0; long x; int more;
206 |   char *s=malloc(sizeof(char)*m*6);
207 |   for( i=0; i<m; i++ ) {
208 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
209 |     while( more ) {
210 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
211 |       if(more) c |= 0x20; c+=48; s[p++]=c;
212 |     }
213 |   }
214 |   s[p]=0; return s;
215 | }
216 | 
217 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
218 |   siz m=0, p=0, k; long x; int more; uint *cnts;
219 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
220 |   while( s[p] ) {
221 |     x=0; k=0; more=1;
222 |     while( more ) {
223 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
224 |       more = c & 0x20; p++; k++;
225 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
226 |     }
227 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
228 |   }
229 |   rleInit(R,h,w,m,cnts); free(cnts);
230 | }
231 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import libs.datasets.pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['./common/maskApi.c', '_mask.pyx'],
13 |         include_dirs = [np.get_include(), './common'],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       packages=['pycocotools'],
20 |       package_dir = {'pycocotools': 'pycocotools'},
21 |       version='2.0',
22 |       ext_modules=
23 |           cythonize(ext_modules)
24 |       )


--------------------------------------------------------------------------------
/libs/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Mask RCNN
 3 | # Written by CharlesShang@github
 4 | # --------------------------------------------------------
 5 | from __future__ import absolute_import
 6 | from __future__ import division
 7 | from __future__ import print_function
 8 | 
 9 | from .wrapper import anchor_decoder
10 | from .wrapper import anchor_encoder
11 | from .wrapper import roi_decoder
12 | from .wrapper import roi_encoder
13 | from .wrapper import mask_decoder
14 | from .wrapper import mask_encoder
15 | from .wrapper import sample_wrapper as sample_rpn_outputs
16 | from .wrapper import sample_with_gt_wrapper as sample_rpn_outputs_with_gt
17 | from .wrapper import gen_all_anchors
18 | from .wrapper import assign_boxes
19 | from .crop import crop as ROIAlign
20 | from .crop import crop_ as ROIAlign_
21 | 


--------------------------------------------------------------------------------
/libs/layers/anchor.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | 
  7 | import libs.boxes.cython_bbox as cython_bbox
  8 | import libs.configs.config_v1 as cfg
  9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
 10 | from libs.boxes.anchor import anchors_plane
 11 | from libs.logs.log import LOG
 12 | # FLAGS = tf.app.flags.FLAGS
 13 | 
 14 | _DEBUG = False
 15 | 
 16 | def encode(gt_boxes, all_anchors, height, width, stride):
 17 |   """Matching and Encoding groundtruth into learning targets
 18 |   Sampling
 19 |   
 20 |   Parameters
 21 |   ---------
 22 |   gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
 23 |   all_anchors: an array of shape (h, w, A, 4),
 24 |   width: width of feature
 25 |   height: height of feature
 26 |   stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
 27 |   Returns
 28 |   --------
 29 |   labels:   Nx1 array in [0, num_classes]
 30 |   bbox_targets: N x (4) regression targets
 31 |   bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
 32 |   """
 33 |   # TODO: speedup this module
 34 |   # if all_anchors is None:
 35 |   #   all_anchors = anchors_plane(height, width, stride=stride)
 36 | 
 37 |   # # anchors, inds_inside, total_anchors
 38 |   # border = cfg.FLAGS.allow_border
 39 |   # all_anchors = all_anchors.reshape((-1, 4))
 40 |   # inds_inside = np.where(
 41 |   #   (all_anchors[:, 0] >= -border) &
 42 |   #   (all_anchors[:, 1] >= -border) &
 43 |   #   (all_anchors[:, 2] < (width * stride) + border) &
 44 |   #   (all_anchors[:, 3] < (height * stride) + border))[0]
 45 |   # anchors = all_anchors[inds_inside, :]
 46 |   all_anchors = all_anchors.reshape([-1, 4])
 47 |   anchors = all_anchors
 48 |   total_anchors = all_anchors.shape[0]
 49 | 
 50 |   # labels = np.zeros((anchors.shape[0], ), dtype=np.float32)
 51 |   labels = np.empty((anchors.shape[0], ), dtype=np.float32)
 52 |   labels.fill(-1)
 53 | 
 54 |   if gt_boxes.size > 0:
 55 |       overlaps = cython_bbox.bbox_overlaps(
 56 |                  np.ascontiguousarray(anchors, dtype=np.float),
 57 |                  np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
 58 | 
 59 |       # if _DEBUG:
 60 |       #     print ('gt_boxes shape: ', gt_boxes.shape)
 61 |       #     print ('anchors shape: ', anchors.shape)
 62 |       #     print ('overlaps shape: ', overlaps.shape)
 63 | 
 64 |       gt_assignment = overlaps.argmax(axis=1)  # (A)
 65 |       max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
 66 |       gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
 67 |       gt_max_overlaps = overlaps[gt_argmax_overlaps,
 68 |                                  np.arange(overlaps.shape[1])]
 69 | 
 70 |       labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0
 71 |       
 72 |       if True:
 73 |         # this is sentive to boxes of little overlaps, no need!
 74 |         # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 75 | 
 76 |         # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
 77 |         labels[gt_argmax_overlaps] = 1
 78 | 
 79 |         # exclude examples with little overlaps
 80 |         # added later
 81 |         # excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0]
 82 |         # labels[gt_argmax_overlaps[excludes]] = -1
 83 | 
 84 |         if _DEBUG:
 85 |            min_ov = np.min(gt_max_overlaps)
 86 |            max_ov = np.max(gt_max_overlaps)
 87 |            mean_ov = np.mean(gt_max_overlaps)
 88 |            if min_ov < cfg.FLAGS.bg_threshold:
 89 |                LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)' 
 90 |                        % (min_ov, mean_ov, max_ov, stride, height, width))
 91 |                worst = gt_boxes[np.argmin(gt_max_overlaps)]
 92 |                anc = anchors[gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :]
 93 |                LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)'
 94 |                        % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4],
 95 |                           anc[0], anc[1], anc[2], anc[3]))
 96 |            
 97 | 
 98 |       # fg label: above threshold IOU
 99 |       labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1
100 |       # print (np.min(labels), np.max(labels))
101 | 
102 |       # subsample positive labels if there are too many
103 |       num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
104 |       fg_inds = np.where(labels == 1)[0]
105 |       if len(fg_inds) > num_fg:
106 |         disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
107 |         labels[disable_inds] = -1
108 |   else:
109 |       # if there is no gt
110 |       labels[:] = 0
111 | 
112 |   # TODO: mild hard negative mining
113 |   # subsample negative labels if there are too many
114 |   num_fg = np.sum(labels == 1)
115 |   num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8)
116 |   bg_inds = np.where(labels == 0)[0]
117 |   if len(bg_inds) > num_bg:
118 |     disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
119 |     labels[disable_inds] = -1
120 | 
121 |   bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
122 |   if gt_boxes.size > 0:
123 |     bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
124 |   bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
125 |   bbox_inside_weights[labels == 1, :] = 0.1
126 | 
127 |   # # mapping to whole outputs
128 |   # labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
129 |   # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
130 |   # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
131 | 
132 |   labels = labels.reshape((1, height, width, -1))
133 |   bbox_targets = bbox_targets.reshape((1, height, width, -1))
134 |   bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1))
135 | 
136 |   return labels, bbox_targets, bbox_inside_weights
137 | 
138 | def decode(boxes, scores, all_anchors, ih, iw):
139 |   """Decode outputs into boxes
140 |   Parameters
141 |   ---------
142 |   boxes: an array of shape (1, h, w, Ax4)
143 |   scores: an array of shape (1, h, w, Ax2),
144 |   all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2]
145 |   
146 |   Returns
147 |   --------
148 |   final_boxes: of shape (R x 4)
149 |   classes: of shape (R) in {0,1,2,3... K-1}
150 |   scores: of shape (R) in [0 ~ 1]
151 |   """
152 |   # h, w = boxes.shape[1], boxes.shape[2]
153 |   # if all_anchors is  None:
154 |   #   stride = 2 ** int(round(np.log2((iw + 0.0) / w)))
155 |   #   all_anchors = anchors_plane(h, w, stride=stride)
156 |   all_anchors = all_anchors.reshape((-1, 4))
157 |   boxes = boxes.reshape((-1, 4))
158 |   scores = scores.reshape((-1, 2))
159 |   assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \
160 |     'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0])
161 |   boxes = bbox_transform_inv(all_anchors, boxes)
162 |   classes = np.argmax(scores, axis=1)
163 |   scores = scores[:, 1]
164 |   final_boxes = boxes  
165 |   final_boxes = clip_boxes(final_boxes, (ih, iw))
166 |   classes = classes.astype(np.int32)
167 |   return final_boxes, classes, scores
168 | 
169 | def sample(boxes, scores, ih, iw, is_training):
170 |   """
171 |   Sampling the anchor layer outputs for next stage, mask or roi prediction or roi
172 |   
173 |   Params
174 |   ----------
175 |   boxes:  of shape (? ,4)
176 |   scores: foreground prob
177 |   ih:     image height
178 |   iw:     image width
179 |   is_training:  'test' or 'train'
180 |   
181 |   Returns
182 |   ----------
183 |   rois: of shape (N, 4)
184 |   scores: of shape (N, 1)
185 |   batch_ids:
186 |   """
187 |   return
188 | 
189 | 
190 | def _unmap(data, count, inds, fill=0):
191 |   """ Unmap a subset of item (data) back to the original set of items (of
192 |   size count) """
193 |   if len(data.shape) == 1:
194 |     ret = np.empty((count,), dtype=np.float32)
195 |     ret.fill(fill)
196 |     ret[inds] = data
197 |   else:
198 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
199 |     ret.fill(fill)
200 |     ret[inds, :] = data
201 |   return ret
202 |   
203 | def _compute_targets(ex_rois, gt_rois):
204 |   """Compute bounding-box regression targets for an image."""
205 | 
206 |   assert ex_rois.shape[0] == gt_rois.shape[0]
207 |   assert ex_rois.shape[1] == 4
208 |   assert gt_rois.shape[1] == 5
209 | 
210 |   return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
211 | 
212 | if __name__ == '__main__':
213 |   
214 |   import time
215 |   t = time.time()
216 |   
217 |   for i in range(10):
218 |     cfg.FLAGS.fg_threshold = 0.1
219 |     classes = np.random.randint(0, 3, (50, 1))
220 |     boxes = np.random.randint(10, 50, (50, 2))
221 |     s = np.random.randint(20, 50, (50, 2))
222 |     s = boxes + s
223 |     boxes = np.concatenate((boxes, s), axis=1)
224 |     gt_boxes = np.hstack((boxes, classes))
225 |     # gt_boxes = boxes
226 |     rois = np.random.randint(10, 50, (20, 2))
227 |     s = np.random.randint(0, 20, (20, 2))
228 |     s = rois + s
229 |     rois = np.concatenate((rois, s), axis=1)
230 |     labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=200, width=300, stride=4)
231 |     labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=100, width=150, stride=8)
232 |     labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=50, width=75, stride=16)
233 |     labels, bbox_targets, bbox_inside_weights = encode(gt_boxes, all_anchors=None, height=25, width=37, stride=32)
234 |     # anchors, _, _ = anchors_plane(200, 300, stride=4, boarder=0)
235 |   
236 |   print('average time: %f' % ((time.time() - t)/10.0))
237 | 


--------------------------------------------------------------------------------
/libs/layers/assign.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | import libs.boxes.cython_bbox as cython_bbox
10 | import libs.configs.config_v1 as cfg
11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
12 | from libs.boxes.anchor import anchors_plane
13 | from libs.logs.log import LOG
14 | # FLAGS = tf.app.flags.FLAGS
15 | 
16 | _DEBUG = False
17 | 
18 | def assign_boxes(gt_boxes, min_k=2, max_k=5):
19 |     """assigning boxes to layers in a pyramid according to its area
20 |     Params
21 |     -----
22 |     gt_boxes: of shape (N, 5), each entry is [x1, y1, x2, y2, cls]
23 |     strides:  the stride of each layer, like [4, 8, 16, 32]
24 | 
25 |     Returns
26 |     -----
27 |     layer_ids: of shape (N,), each entry is a id indicating the assigned layer id
28 |     """
29 |     k0 = 4
30 |     if gt_boxes.size > 0:
31 |         layer_ids = np.zeros((gt_boxes.shape[0], ), dtype=np.int32)
32 |         ws = gt_boxes[:, 2] - gt_boxes[:, 0]
33 |         hs = gt_boxes[:, 3] - gt_boxes[:, 1]
34 |         areas = ws * hs
35 |         k = np.floor(k0 + np.log2(np.sqrt(areas) / 224))
36 |         inds = np.where(k < min_k)[0]
37 |         k[inds] = min_k
38 |         inds = np.where(k > max_k)[0]
39 |         k[inds] = max_k
40 |         if _DEBUG: 
41 |             print ("### boxes and layer ids")
42 |             print (np.hstack((gt_boxes[:, 0:4], k[:, np.newaxis])))
43 |         return k.astype(np.int32)
44 | 
45 |     else:
46 |         return np.asarray([], dtype=np.int32)
47 | 


--------------------------------------------------------------------------------
/libs/layers/crop.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | def crop(images, boxes, batch_inds, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
 8 |   """Cropping areas of features into fixed size
 9 |   Params:
10 |   --------
11 |   images: a 4-d Tensor of shape (N, H, W, C)
12 |   boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
13 |   batch_inds: 
14 | 
15 |   Returns:
16 |   --------
17 |   A Tensor of shape (N, pooled_height, pooled_width, C)
18 |   """
19 |   with tf.name_scope(scope):
20 |     #
21 |     boxes = boxes / (stride + 0.0)
22 |     boxes = tf.reshape(boxes, [-1, 4])
23 | 
24 |     # normalize the boxes and swap x y dimensions
25 |     shape = tf.shape(images)
26 |     boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
27 |     xs = boxes[:, 0] 
28 |     ys = boxes[:, 1]
29 |     xs = xs / tf.cast(shape[2], tf.float32)
30 |     ys = ys / tf.cast(shape[1], tf.float32)
31 |     boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
32 |     boxes = tf.reshape(boxes, [-1, 4])  # to (y1, x1, y2, x2)
33 |     
34 |     # if batch_inds is False:
35 |     #   num_boxes = tf.shape(boxes)[0]
36 |     #   batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
37 |     # batch_inds = boxes[:, 0] * 0
38 |     # batch_inds = tf.cast(batch_inds, tf.int32)
39 | 
40 |     # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
41 |     assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
42 |     with tf.control_dependencies([assert_op, images, batch_inds]):
43 |         return  tf.image.crop_and_resize(images, boxes, batch_inds,
44 |                                          [pooled_height, pooled_width],
45 |                                          method='bilinear',
46 |                                          name='Crop')
47 | 
48 | def crop_(images, boxes, batch_inds, ih, iw, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
49 |   """Cropping areas of features into fixed size
50 |   Params:
51 |   --------
52 |   images: a 4-d Tensor of shape (N, H, W, C)
53 |   boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
54 |   batch_inds: 
55 | 
56 |   Returns:
57 |   --------
58 |   A Tensor of shape (N, pooled_height, pooled_width, C)
59 |   """
60 |   with tf.name_scope(scope):
61 |     #
62 |     boxes = boxes / (stride + 0.0)
63 |     boxes = tf.reshape(boxes, [-1, 4])
64 | 
65 |     # normalize the boxes and swap x y dimensions
66 |     shape = tf.shape(images)
67 |     boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
68 |     xs = boxes[:, 0] 
69 |     ys = boxes[:, 1]
70 |     xs = xs / tf.cast(shape[2], tf.float32)
71 |     ys = ys / tf.cast(shape[1], tf.float32)
72 |     boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
73 |     boxes = tf.reshape(boxes, [-1, 4])  # to (y1, x1, y2, x2)
74 |     
75 |     # if batch_inds is False:
76 |     #   num_boxes = tf.shape(boxes)[0]
77 |     #   batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
78 |     # batch_inds = boxes[:, 0] * 0
79 |     # batch_inds = tf.cast(batch_inds, tf.int32)
80 | 
81 |     # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
82 |     assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
83 |     with tf.control_dependencies([assert_op, images, batch_inds]):
84 |         return  [tf.image.crop_and_resize(images, boxes, batch_inds,
85 |                                          [pooled_height, pooled_width],
86 |                                          method='bilinear',
87 |                                          name='Crop')] + [boxes]
88 | 
89 | 


--------------------------------------------------------------------------------
/libs/layers/mask.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | import libs.boxes.cython_bbox as cython_bbox
  9 | import libs.configs.config_v1 as cfg
 10 | from libs.logs.log import LOG
 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
 12 | 
 13 | _DEBUG = False 
 14 | def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
 15 |   """Encode masks groundtruth into learnable targets
 16 |   Sample some exmaples
 17 |   
 18 |   Params
 19 |   ------
 20 |   gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
 21 |   gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
 22 |   rois:     the bounding boxes of shape (N, 4),
 23 |   ## scores:   scores of shape (N, 1)
 24 |   num_classes; K
 25 |   mask_height, mask_width: height and width of output masks
 26 |   
 27 |   Returns
 28 |   -------
 29 |   # rois: boxes sampled for cropping masks, of shape (M, 4)
 30 |   labels: class-ids of shape (M, 1)
 31 |   mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
 32 |   mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
 33 |   """
 34 |   total_masks = rois.shape[0]
 35 |   if gt_boxes.size > 0: 
 36 |       # B x G
 37 |       overlaps = cython_bbox.bbox_overlaps(
 38 |           np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
 39 |           np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
 40 |       gt_assignment = overlaps.argmax(axis=1)  # shape is N
 41 |       max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N
 42 |       # note: this will assign every rois with a positive label 
 43 |       # labels = gt_boxes[gt_assignment, 4] # N
 44 |       labels = np.zeros((total_masks, ), np.float32)
 45 |       labels[:] = -1
 46 | 
 47 |       # sample positive rois which intersection is more than 0.5
 48 |       keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
 49 |       num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
 50 |       if keep_inds.size > 0 and num_masks < keep_inds.size:
 51 |         keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False)
 52 |         LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
 53 |                      %(num_masks, rois.shape[0], gt_masks.shape[0]))
 54 | 
 55 |       labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]
 56 |         
 57 |       # rois = rois[inds]
 58 |       # labels = labels[inds].astype(np.int32)
 59 |       # gt_assignment = gt_assignment[inds]
 60 | 
 61 |       # ignore rois with overlaps between fg_threshold and bg_threshold 
 62 |       # mask are only defined on positive rois
 63 |       ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0]
 64 |       labels[ignore_inds] = -1 
 65 | 
 66 |       mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
 67 |       mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32)
 68 |       rois [rois < 0] = 0
 69 |       
 70 |       # TODO: speed bottleneck?
 71 |       for i in keep_inds:
 72 |         roi = rois[i, :4]
 73 |         cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1]
 74 |         cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST)
 75 |         
 76 |         mask_targets[i, :, :, int(labels[i])] = cropped
 77 |         mask_inside_weights[i, :, :, int(labels[i])] = 1
 78 |   else:
 79 |       # there is no gt
 80 |       labels = np.zeros((total_masks, ), np.float32)
 81 |       labels[:] = -1
 82 |       mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
 83 |       mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32)
 84 |   return labels, mask_targets, mask_inside_weights
 85 | 
 86 | def decode(mask_targets, rois, classes, ih, iw):
 87 |   """Decode outputs into final masks
 88 |   Params
 89 |   ------
 90 |   mask_targets: of shape (N, h, w, K)
 91 |   rois: of shape (N, 4) [x1, y1, x2, y2]
 92 |   classes: of shape (N, 1) the class-id of each roi
 93 |   height: image height
 94 |   width:  image width
 95 |   
 96 |   Returns
 97 |   ------
 98 |   M: a painted image with all masks, of shape (height, width), in [0, K]
 99 |   """
100 |   Mask = np.zeros((ih, iw), dtype=np.float32)
101 |   assert rois.shape[0] == mask_targets.shape[0], \
102 |     '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0])
103 |   num = rois.shape[0]
104 |   rois = clip_boxes(rois, (ih, iw))
105 |   for i in np.arange(num):
106 |     k = classes[i]
107 |     mask = mask_targets[i, :, :, k]
108 |     h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1
109 |     x, y = rois[i, 0], rois[i, 1]
110 |     mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
111 |     mask *= k
112 |     
113 |     # paint
114 |     Mask[y:y+h, x:x+w] = mask
115 |   
116 |   return Mask
117 | 
118 | 
119 | 
120 | if __name__ == '__main__':
121 |   
122 |   import time
123 |   import matplotlib.pyplot as plt
124 |   
125 |   t = time.time()
126 |   
127 |   for i in range(10):
128 |     cfg.FLAGS.mask_threshold = 0.2
129 |     N = 50
130 |     W, H = 200, 200
131 |     M = 50
132 |     
133 |     gt_masks = np.zeros((2, H, W), dtype=np.int32)
134 |     gt_masks[0, 50:150, 50:150] = 1
135 |     gt_masks[1, 100:150, 50:150] = 1
136 |     gt_boxes = np.asarray(
137 |       [
138 |         [20, 20, 100, 100, 1],
139 |         [100, 100, 180, 180, 2]
140 |       ])
141 |     rois = gt_boxes[:, :4]
142 |     print (rois)
143 |     rois, labels, mask_targets, mask_inside_weights = encode(gt_masks, gt_boxes, rois, 3, 7, 7)
144 |     print (rois)
145 |     Mask = decode(mask_targets, rois, labels, H, W)
146 |     if True:
147 |       plt.figure(1)
148 |       plt.imshow(Mask)
149 |       plt.show()
150 |       time.sleep(2)
151 |   print(labels)
152 |   print('average time: %f' % ((time.time() - t) / 10.0))
153 |   
154 | 


--------------------------------------------------------------------------------
/libs/layers/roi.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | 
  7 | import libs.boxes.cython_bbox as cython_bbox
  8 | import libs.configs.config_v1 as cfg
  9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
 10 | from libs.logs.log import LOG 
 11 | 
 12 | # FLAGS = tf.app.flags.FLAGS
 13 | 
 14 | _DEBUG = False 
 15 | 
 16 | def encode(gt_boxes, rois, num_classes):
 17 |   """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
 18 |   Sampling
 19 |   Parameters
 20 |   ---------
 21 |   gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
 22 |   rois an array of shape (R x 4), [x1, y1, x2, y2]
 23 |   num_classes: scalar, number of classes
 24 |   
 25 |   Returns
 26 |   --------
 27 |   labels: Nx1 array in [0, num_classes)
 28 |   bbox_targets: of shape (N, Kx4) regression targets
 29 |   bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned.
 30 |   """
 31 |   
 32 |   all_rois = rois
 33 |   num_rois = rois.shape[0]
 34 |   if gt_boxes.size > 0: 
 35 |       # R x G matrix
 36 |       overlaps = cython_bbox.bbox_overlaps(
 37 |         np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
 38 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
 39 |       gt_assignment = overlaps.argmax(axis=1)  # R
 40 |       # max_overlaps = overlaps.max(axis=1)      # R
 41 |       max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
 42 |       # note: this will assign every rois with a positive label 
 43 |       # labels = gt_boxes[gt_assignment, 4]
 44 |       labels = np.zeros([num_rois], dtype=np.float32)
 45 |       labels[:] = -1
 46 | 
 47 |       # if _DEBUG:
 48 |       #     print ('gt_assignment')
 49 |       #     print (gt_assignment)
 50 | 
 51 |       # sample rois as to 1:3
 52 |       fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
 53 |       fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
 54 |       if fg_inds.size > 0 and fg_rois < fg_inds.size:
 55 |         fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
 56 |       labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] 
 57 |       
 58 |       # TODO: sampling strategy
 59 |       bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
 60 |       bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)
 61 |       if bg_inds.size > 0 and bg_rois < bg_inds.size:
 62 |         bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
 63 |       labels[bg_inds] = 0
 64 |       
 65 |       # ignore rois with overlaps between fg_threshold and bg_threshold 
 66 |       ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\
 67 |               (max_overlaps < cfg.FLAGS.fg_threshold)))[0]
 68 |       labels[ignore_inds] = -1 
 69 | 
 70 |       keep_inds = np.append(fg_inds, bg_inds)
 71 |       if _DEBUG: 
 72 |           print ('keep_inds')
 73 |           print (keep_inds)
 74 |           print ('fg_inds')
 75 |           print (fg_inds)
 76 |           print ('bg_inds')
 77 |           print (bg_inds)
 78 |           print ('bg_rois:', bg_rois)
 79 |           print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold)
 80 |           # print (max_overlaps)
 81 | 
 82 |           LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds)))
 83 | 
 84 |       bbox_targets, bbox_inside_weights = _compute_targets(
 85 |         rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes)
 86 |       bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
 87 |       bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0)
 88 |    
 89 |   else:
 90 |       # there is no gt
 91 |       labels = np.zeros((num_rois, ), np.float32)
 92 |       bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32)
 93 |       bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32)
 94 |       bg_rois  = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64)
 95 |       if bg_rois < num_rois:
 96 |           bg_inds = np.arange(num_rois)
 97 |           ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False)
 98 |           labels[ignore_inds] = -1 
 99 | 
100 |   return labels, bbox_targets, bbox_inside_weights
101 | 
102 | def decode(boxes, scores, rois, ih, iw):
103 |   """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois
104 |     Parameters
105 |   ---------
106 |   boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2]
107 |   scores: an array of shape (R, K),
108 |   rois: an array of shape (R, 4), [x1, y1, x2, y2]
109 |   
110 |   Returns
111 |   --------
112 |   final_boxes: of shape (R x 4)
113 |   classes: of shape (R) in {0,1,2,3... K-1}
114 |   scores: of shape (R) in [0 ~ 1]
115 |   """
116 |   boxes = bbox_transform_inv(rois, deltas=boxes)
117 |   classes = np.argmax(scores, axis=1)
118 |   classes = classes.astype(np.int32)
119 |   scores = np.max(scores, axis=1)
120 |   final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32)
121 |   for i in np.arange(0, boxes.shape[0]):
122 |     ind = classes[i]*4
123 |     final_boxes[i, 0:4] = boxes[i, ind:ind+4]
124 |   final_boxes = clip_boxes(final_boxes, (ih, iw))
125 |   return final_boxes, classes, scores
126 | 
127 | def _compute_targets(ex_rois, gt_rois, labels, num_classes):
128 |   """
129 |   This function expands those targets into the 4-of-4*K representation used
130 |   by the network (i.e. only one class has non-zero targets).
131 |   
132 |   Returns:
133 |     bbox_target (ndarray): N x 4K blob of regression targets
134 |     bbox_inside_weights (ndarray): N x 4K blob of loss weights
135 |   """
136 | 
137 |   assert ex_rois.shape[0] == gt_rois.shape[0]
138 |   assert ex_rois.shape[1] == 4
139 |   assert gt_rois.shape[1] == 4
140 | 
141 |   targets = bbox_transform(ex_rois, gt_rois)
142 | 
143 |   clss = labels
144 |   bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
145 |   bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
146 |   inds = np.where(clss > 0)[0]
147 |   for ind in inds:
148 |     cls = int(clss[ind])
149 |     start = 4 * cls
150 |     end = start + 4
151 |     bbox_targets[ind, start:end] = targets[ind, 0:4]
152 |     bbox_inside_weights[ind, start:end] = 1
153 |   return bbox_targets, bbox_inside_weights
154 | 
155 | def _unmap(data, count, inds, fill=0):
156 |   """ Unmap a subset of item (data) back to the original set of items (of
157 |   size count) """
158 |   if len(data.shape) == 1:
159 |     ret = np.empty((count,), dtype=np.float32)
160 |     ret.fill(fill)
161 |     ret[inds] = data
162 |   else:
163 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
164 |     ret.fill(fill)
165 |     ret[inds, :] = data
166 |   return ret
167 | 
168 | if __name__ == '__main__':
169 |   cfg.FLAGS.fg_threshold = 0.1
170 |   classes = np.random.randint(0, 3, (10, 1))
171 |   boxes = np.random.randint(10, 50, (10, 2))
172 |   s = np.random.randint(10, 20, (10, 2))
173 |   s = boxes + s
174 |   boxes = np.concatenate((boxes, s), axis=1)
175 |   gt_boxes = np.hstack((boxes, classes))
176 |   noise = np.random.randint(-3, 3, (10, 4))
177 |   rois = gt_boxes[:, :4] + noise
178 |   labels, rois, bbox_targets, bbox_inside_weights = encode(gt_boxes, rois, num_classes=3)
179 |   print (labels)
180 |   print (bbox_inside_weights)
181 |   
182 |   ls = np.zeros((labels.shape[0], 3))
183 |   for i in range(labels.shape[0]):
184 |     ls[i, labels[i]] = 1
185 |   final_boxes, classes, scores = decode(bbox_targets, ls, rois, 100, 100)
186 |   print('gt_boxes:\n', gt_boxes)
187 |   print ('final boxes:\n', np.hstack((final_boxes, np.expand_dims(classes, axis=1))).astype(np.int32))
188 |   # print (final_boxes.astype(np.int32))
189 | 


--------------------------------------------------------------------------------
/libs/layers/sample.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | import libs.configs.config_v1 as cfg
  9 | import libs.boxes.nms_wrapper as nms_wrapper
 10 | import libs.boxes.cython_bbox as cython_bbox
 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
 12 | from libs.logs.log import LOG
 13 | 
 14 | _DEBUG=False
 15 | 
 16 | def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False):
 17 |   """Sample boxes according to scores and some learning strategies
 18 |   assuming the first class is background
 19 |   Params:
 20 |   boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims
 21 |   scores: of shape (..., A), probs of fg, in [0, 1]
 22 |   """
 23 |   min_size = cfg.FLAGS.min_size
 24 |   rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold
 25 |   pre_nms_top_n = cfg.FLAGS.pre_nms_top_n
 26 |   post_nms_top_n = cfg.FLAGS.post_nms_top_n
 27 | 
 28 |   # training: 12000, 2000
 29 |   # testing: 6000, 400
 30 |   if not is_training:
 31 |     pre_nms_top_n = int(pre_nms_top_n / 2)
 32 |     post_nms_top_n = int(post_nms_top_n / 5)
 33 |     
 34 |   boxes = boxes.reshape((-1, 4))
 35 |   scores = scores.reshape((-1, 1))
 36 |   assert scores.shape[0] == boxes.shape[0], 'scores and boxes dont match'
 37 |   
 38 |   # filter backgrounds
 39 |   # Hope this will filter most of background anchors, since a argsort is too slow..
 40 |   if only_positive:
 41 |     keeps = np.where(scores > 0.5)[0]
 42 |     boxes = boxes[keeps, :]
 43 |     scores = scores[keeps]
 44 |   
 45 |   # filter minimum size
 46 |   keeps = _filter_boxes(boxes, min_size=min_size)
 47 |   boxes = boxes[keeps, :]
 48 |   scores = scores[keeps]
 49 |   
 50 |   # filter with scores
 51 |   order = scores.ravel().argsort()[::-1]
 52 |   if pre_nms_top_n > 0:
 53 |     order = order[:pre_nms_top_n]
 54 |   boxes = boxes[order, :]
 55 |   scores = scores[order]
 56 | 
 57 |   # filter with nms
 58 |   det = np.hstack((boxes, scores)).astype(np.float32)
 59 |   keeps = nms_wrapper.nms(det, rpn_nms_threshold)
 60 |   
 61 |   if post_nms_top_n > 0:
 62 |     keeps = keeps[:post_nms_top_n]
 63 |   boxes = boxes[keeps, :]
 64 |   scores = scores[keeps]
 65 |   batch_inds = np.zeros([boxes.shape[0]], dtype=np.int32)
 66 | 
 67 |   # # random sample boxes
 68 |   ## try early sample later
 69 |   # fg_inds = np.where(scores > 0.5)[0]
 70 |   # num_fgs = min(len(fg_inds.size), int(rois_per_image * fg_roi_fraction))
 71 | 
 72 |   if _DEBUG:
 73 |     LOG('SAMPLE: %d rois has been choosen' % len(scores))
 74 |     LOG('SAMPLE: a positive box: %d %d %d %d %.4f' % (boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3], scores[0]))
 75 |     LOG('SAMPLE: a negative box: %d %d %d %d %.4f' % (boxes[-1, 0], boxes[-1, 1], boxes[-1, 2], boxes[-1, 3], scores[-1]))
 76 |     hs = boxes[:, 3] - boxes[:, 1]
 77 |     ws = boxes[:, 2] - boxes[:, 0]
 78 |     assert min(np.min(hs), np.min(ws)) > 0, 'invalid boxes'
 79 |   
 80 |   return boxes, scores.astype(np.float32), batch_inds
 81 | 
 82 | def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False):
 83 |     """sample boxes for refined output"""
 84 |     boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive)
 85 | 
 86 |     if gt_boxes.size > 0:
 87 |         overlaps = cython_bbox.bbox_overlaps(
 88 |                 np.ascontiguousarray(boxes[:, 0:4], dtype=np.float),
 89 |                 np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float))
 90 |         gt_assignment = overlaps.argmax(axis=1) # B
 91 |         max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B
 92 |         fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
 93 |         if _DEBUG and np.argmax(overlaps[fg_inds],axis=1).size < gt_boxes.size/5.0:
 94 |             print("gt_size")
 95 |             print(gt_boxes)
 96 |             gt_height = (gt_boxes[:,2]-gt_boxes[:,0])
 97 |             gt_width = (gt_boxes[:,3]-gt_boxes[:,1])
 98 |             gt_dim = np.vstack((gt_height, gt_width))
 99 |             print(np.transpose(gt_dim))
100 |             #print(gt_height)
101 |             #print(gt_width)
102 | 
103 |             print('SAMPLE: %d after overlaps by %s' % (len(fg_inds),cfg.FLAGS.fg_threshold))
104 |             print("detected object no.")
105 |             print(np.argmax(overlaps[fg_inds],axis=1))
106 |             print("total object")
107 |             print(gt_boxes.size/5.0)
108 | 
109 |         mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
110 |         if mask_fg_inds.size > cfg.FLAGS.masks_per_image:
111 |             mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False)
112 | 
113 |         if True:
114 |             gt_argmax_overlaps = overlaps.argmax(axis=0) # G
115 |             fg_inds = np.union1d(gt_argmax_overlaps, fg_inds)
116 | 
117 | 	fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
118 |       	if fg_inds.size > 0 and fg_rois < fg_inds.size:
119 |        	   fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
120 |       	
121 | 	# TODO: sampling strategy
122 |       	bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
123 |       	bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64
124 |       	if bg_inds.size > 0 and bg_rois < bg_inds.size:
125 |            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
126 | 
127 |         keep_inds = np.append(fg_inds, bg_inds)
128 |         #print(gt_boxes[np.argmax(overlaps[fg_inds],axis=1),4])
129 |     else:
130 |         bg_inds = np.arange(boxes.shape[0])
131 |         bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 8)#64
132 |         if bg_rois < bg_inds.size:
133 |             bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
134 | 
135 |         keep_inds = bg_inds
136 |         mask_fg_inds = np.arange(0)
137 |     
138 |     return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\
139 |            boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
140 | 
141 | def _jitter_boxes(boxes, jitter=0.1):
142 |     """ jitter the boxes before appending them into rois
143 |     """
144 |     jittered_boxes = boxes.copy()
145 |     ws = jittered_boxes[:, 2] - jittered_boxes[:, 0] + 1.0
146 |     hs = jittered_boxes[:, 3] - jittered_boxes[:, 1] + 1.0
147 |     width_offset = (np.random.rand(jittered_boxes.shape[0]) - 0.5) * jitter * ws
148 |     height_offset = (np.random.rand(jittered_boxes.shape[0]) - 0.5) * jitter * hs
149 |     jittered_boxes[:, 0] += width_offset
150 |     jittered_boxes[:, 2] += width_offset
151 |     jittered_boxes[:, 1] += height_offset
152 |     jittered_boxes[:, 3] += height_offset
153 | 
154 |     return jittered_boxes
155 | 
156 | def _filter_boxes(boxes, min_size):
157 |   """Remove all boxes with any side smaller than min_size."""
158 |   ws = boxes[:, 2] - boxes[:, 0] + 1
159 |   hs = boxes[:, 3] - boxes[:, 1] + 1
160 |   keep = np.where((ws >= min_size) & (hs >= min_size))[0]
161 |   return keep
162 | 
163 | def _apply_nms(boxes, scores, threshold = 0.5):
164 |   """After this only positive boxes are left
165 |   Applying this class-wise
166 |   """
167 |   num_class = scores.shape[1]
168 |   assert boxes.shape[0] == scores.shape[0], \
169 |     'Shape dismatch {} vs {}'.format(boxes.shape, scores.shape)
170 |   
171 |   final_boxes = []
172 |   final_scores = []
173 |   for cls in np.arange(1, num_class):
174 |     cls_boxes = boxes[:, 4*cls: 4*cls+4]
175 |     cls_scores = scores[:, cls]
176 |     dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis]))
177 |     keep = nms_wrapper.nms(dets, thresh=0.3)
178 |     dets = dets[keep, :]
179 |     dets = dets[np.where(dets[:, 4] > threshold)]
180 |     final_boxes.append(dets[:, :4])
181 |     final_scores.append(dets[:, 4])
182 |   
183 |   final_boxes = np.vstack(final_boxes)
184 |   final_scores = np.vstack(final_scores)
185 |   
186 |   return final_boxes, final_scores
187 | 
188 | if __name__ == '__main__':
189 |   import time
190 |   t = time.time()
191 |   
192 |   for i in range(10):
193 |     N = 200000
194 |     boxes = np.random.randint(0, 50, (N, 2))
195 |     s = np.random.randint(10, 40, (N, 2))
196 |     s = boxes + s
197 |     boxes = np.hstack((boxes, s))
198 |     
199 |     scores = np.random.rand(N, 1)
200 |     # scores_ = 1 - np.random.rand(N, 1)
201 |     # scores = np.hstack((scores, scores_))
202 |   
203 |     boxes, scores = sample_rpn_outputs(boxes, scores, only_positive=False)
204 |   
205 |   print ('average time %f' % ((time.time() - t) / 10))
206 | 


--------------------------------------------------------------------------------
/libs/layers/wrapper.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Mask RCNN
  3 | # Written by CharlesShang@github
  4 | # --------------------------------------------------------
  5 | from __future__ import absolute_import
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import tensorflow as tf
 10 | from . import anchor
 11 | from . import roi
 12 | from . import mask
 13 | from . import sample
 14 | from . import assign
 15 | from libs.boxes.anchor import anchors_plane
 16 | 
 17 | def anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder'):
 18 |   
 19 |   with tf.name_scope(scope) as sc:
 20 |     labels, bbox_targets, bbox_inside_weights = \
 21 |       tf.py_func(anchor.encode,
 22 |                  [gt_boxes, all_anchors, height, width, stride],
 23 |                  [tf.float32, tf.float32, tf.float32])
 24 |     labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
 25 |     bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
 26 |     bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
 27 |     labels = tf.reshape(labels, (1, height, width, -1))
 28 |     bbox_targets = tf.reshape(bbox_targets, (1, height, width, -1))
 29 |     bbox_inside_weights = tf.reshape(bbox_inside_weights, (1, height, width, -1))
 30 |   
 31 |   return labels, bbox_targets, bbox_inside_weights
 32 | 
 33 | 
 34 | def anchor_decoder(boxes, scores, all_anchors, ih, iw, scope='AnchorDecoder'):
 35 |   
 36 |   with tf.name_scope(scope) as sc:
 37 |     final_boxes, classes, scores = \
 38 |       tf.py_func(anchor.decode,
 39 |                  [boxes, scores, all_anchors, ih, iw],
 40 |                  [tf.float32, tf.int32, tf.float32])
 41 |     final_boxes = tf.convert_to_tensor(final_boxes, name='boxes')
 42 |     classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes')
 43 |     scores = tf.convert_to_tensor(scores, name='scores')
 44 |     final_boxes = tf.reshape(final_boxes, (-1, 4))
 45 |     classes = tf.reshape(classes, (-1, ))
 46 |     scores = tf.reshape(scores, (-1, ))
 47 |   
 48 |   return final_boxes, classes, scores
 49 | 
 50 | 
 51 | def roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder'):
 52 |   
 53 |   with tf.name_scope(scope) as sc:
 54 |     labels, bbox_targets, bbox_inside_weights = \
 55 |       tf.py_func(roi.encode,
 56 |                 [gt_boxes, rois, num_classes],
 57 |                 [tf.float32, tf.float32, tf.float32])
 58 |     labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
 59 |     bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
 60 |     bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
 61 |     labels = tf.reshape(labels, (-1, ))
 62 |     bbox_targets = tf.reshape(bbox_targets, (-1, num_classes * 4))
 63 |     bbox_inside_weights = tf.reshape(bbox_inside_weights, (-1, num_classes * 4))
 64 |   
 65 |   return labels, bbox_targets, bbox_inside_weights
 66 | 
 67 | 
 68 | def roi_decoder(boxes, scores, rois, ih, iw, scope='ROIDecoder'):
 69 |   
 70 |   with tf.name_scope(scope) as sc:
 71 |     final_boxes, classes, scores = \
 72 |       tf.py_func(roi.decode,
 73 |                  [boxes, scores, rois, ih, iw],
 74 |                  [tf.float32, tf.int32, tf.float32])
 75 |     final_boxes = tf.convert_to_tensor(final_boxes, name='boxes')
 76 |     classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes')
 77 |     scores = tf.convert_to_tensor(scores, name='scores')
 78 |     final_boxes = tf.reshape(final_boxes, (-1, 4))
 79 |     
 80 |   return final_boxes, classes, scores
 81 | 
 82 | def mask_encoder(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width, scope='MaskEncoder'):
 83 |   
 84 |   with tf.name_scope(scope) as sc:
 85 |     labels, mask_targets, mask_inside_weights = \
 86 |       tf.py_func(mask.encode,
 87 |                  [gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width],
 88 |                  [tf.float32, tf.int32, tf.float32])
 89 |     labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='classes')
 90 |     mask_targets = tf.convert_to_tensor(mask_targets, name='mask_targets')
 91 |     mask_inside_weights = tf.convert_to_tensor(mask_inside_weights, name='mask_inside_weights')
 92 |     labels = tf.reshape(labels, (-1,))
 93 |     mask_targets = tf.reshape(mask_targets, (-1, mask_height, mask_width, num_classes))
 94 |     mask_inside_weights = tf.reshape(mask_inside_weights, (-1, mask_height, mask_width, num_classes))
 95 |   
 96 |   return labels, mask_targets, mask_inside_weights
 97 | 
 98 | def mask_decoder(mask_targets, rois, classes, ih, iw, scope='MaskDecoder'):
 99 |   
100 |   with tf.name_scope(scope) as sc:
101 |     Mask = \
102 |       tf.py_func(mask.decode,
103 |                  [mask_targets, rois, classes, ih, iw,],
104 |                  [tf.float32])
105 |     Mask = tf.convert_to_tensor(Mask, name='MaskImage')
106 |     Mask = tf.reshape(Mask, (ih, iw))
107 |   
108 |   return Mask
109 | 
110 | 
111 | def sample_wrapper(boxes, scores, is_training=True, scope='SampleBoxes'):
112 |   
113 |   with tf.name_scope(scope) as sc:
114 |     boxes, scores, batch_inds = \
115 |       tf.py_func(sample.sample_rpn_outputs,
116 |                  [boxes, scores, is_training],
117 |                  [tf.float32, tf.float32, tf.int32])
118 |     boxes = tf.convert_to_tensor(boxes, name='Boxes')
119 |     scores = tf.convert_to_tensor(scores, name='Scores')
120 |     batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds')
121 |     boxes = tf.reshape(boxes, (-1, 4))
122 |     batch_inds = tf.reshape(batch_inds, [-1])
123 |   
124 |   return boxes, scores, batch_inds
125 | 
126 | def sample_with_gt_wrapper(boxes, scores, gt_boxes, is_training=True, scope='SampleBoxesWithGT'):
127 |   
128 |   with tf.name_scope(scope) as sc:
129 |     boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds = \
130 |       tf.py_func(sample.sample_rpn_outputs_wrt_gt_boxes,
131 |                  [boxes, scores, gt_boxes, is_training],
132 |                  [tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32])
133 |     boxes = tf.convert_to_tensor(boxes, name='Boxes')
134 |     scores = tf.convert_to_tensor(scores, name='Scores')
135 |     batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds')
136 |     
137 |     mask_boxes = tf.convert_to_tensor(mask_boxes, name='MaskBoxes')
138 |     mask_scores = tf.convert_to_tensor(mask_scores, name='MaskScores')
139 |     mask_batch_inds = tf.convert_to_tensor(mask_batch_inds, name='MaskBatchInds')
140 |   
141 |   return boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds
142 | 
143 | def gen_all_anchors(height, width, stride, scales, scope='GenAnchors'):
144 |   
145 |   with tf.name_scope(scope) as sc:
146 |     all_anchors = \
147 |       tf.py_func(anchors_plane,
148 |                  [height, width, stride, scales],
149 |                  [tf.float64]
150 |                  )
151 |     all_anchors = tf.convert_to_tensor(tf.cast(all_anchors, tf.float32), name='AllAnchors')
152 |     all_anchors = tf.reshape(all_anchors, (height, width, -1))
153 |     
154 |     return all_anchors
155 | 
156 | def assign_boxes(gt_boxes, tensors, layers, scope='AssignGTBoxes'):
157 | 
158 |     with tf.name_scope(scope) as sc:
159 |         min_k = layers[0]
160 |         max_k = layers[-1]
161 |         assigned_layers = \
162 |             tf.py_func(assign.assign_boxes, 
163 |                      [ gt_boxes, min_k, max_k ],
164 |                      tf.int32)
165 |         assigned_layers = tf.reshape(assigned_layers, [-1])
166 | 
167 |         assigned_tensors = []
168 |         for t in tensors:
169 |             split_tensors = []
170 |             for l in layers:
171 |                 tf.cast(l, tf.int32)
172 |                 inds = tf.where(tf.equal(assigned_layers, l))
173 |                 inds = tf.reshape(inds, [-1])
174 |                 split_tensors.append(tf.gather(t, inds))
175 |             assigned_tensors.append(split_tensors)
176 | 
177 |         return assigned_tensors + [assigned_layers]


--------------------------------------------------------------------------------
/libs/logs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/logs/__init__.py


--------------------------------------------------------------------------------
/libs/logs/log.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import logging
 6 | import libs.configs.config_v1 as cfg
 7 | 
 8 | def LOG(mssg):
 9 |   logging.basicConfig(filename=cfg.FLAGS.train_dir + '/maskrcnn.log',
10 |                       level=logging.INFO,
11 |                       datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s %(message)s')
12 |   logging.info(mssg)


--------------------------------------------------------------------------------
/libs/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # build pycocotools
4 | cd datasets/pycocotools
5 | make
6 | cd -
7 | 


--------------------------------------------------------------------------------
/libs/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/nets/__init__.py


--------------------------------------------------------------------------------
/libs/nets/nets_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | import functools
 5 | 
 6 | import tensorflow as tf
 7 | 
 8 | from . import resnet_v1
 9 | from .resnet_v1 import resnet_v1_50 as resnet50
10 | from .resnet_utils import resnet_arg_scope
11 | from .resnet_v1 import resnet_v1_101 as resnet101
12 | 
13 | slim = tf.contrib.slim
14 | 
15 | pyramid_maps = {
16 |   'resnet50': {'C1':'resnet_v1_50/conv1/Relu:0',
17 |                'C2':'resnet_v1_50/block1/unit_2/bottleneck_v1',
18 |                'C3':'resnet_v1_50/block2/unit_3/bottleneck_v1',
19 |                'C4':'resnet_v1_50/block3/unit_5/bottleneck_v1',
20 |                'C5':'resnet_v1_50/block4/unit_3/bottleneck_v1',
21 |                },
22 |   'resnet101': {'C1': '', 'C2': '',
23 |                 'C3': '', 'C4': '',
24 |                 'C5': '',
25 |                }
26 | }
27 | 
28 | def get_network(name, image, weight_decay=0.000005, is_training=False):
29 | 
30 |     if name == 'resnet50':
31 |         with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
32 |             logits, end_points = resnet50(image, 1000, is_training=is_training)
33 |     
34 |     if name == 'resnet101':
35 |         with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
36 |             logits, end_points = resnet50(image, 1000, is_training=is_training)
37 | 
38 |     if name == 'resnext50':
39 |         name
40 | 
41 |     end_points['input'] = image
42 |     return logits, end_points, pyramid_maps[name]
43 | 


--------------------------------------------------------------------------------
/libs/nets/resnet_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains building blocks for various versions of Residual Networks.
 16 | 
 17 | Residual networks (ResNets) were proposed in:
 18 |   Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 19 |   Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015
 20 | 
 21 | More variants were introduced in:
 22 |   Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 23 |   Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016
 24 | 
 25 | We can obtain different ResNet variants by changing the network depth, width,
 26 | and form of residual unit. This module implements the infrastructure for
 27 | building them. Concrete ResNet units and full ResNet networks are implemented in
 28 | the accompanying resnet_v1.py and resnet_v2.py modules.
 29 | 
 30 | Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
 31 | implementation we subsample the output activations in the last residual unit of
 32 | each block, instead of subsampling the input activations in the first residual
 33 | unit of each block. The two implementations give identical results but our
 34 | implementation is more memory efficient.
 35 | """
 36 | from __future__ import absolute_import
 37 | from __future__ import division
 38 | from __future__ import print_function
 39 | 
 40 | import collections
 41 | import tensorflow as tf
 42 | 
 43 | # slim = tf.contrib.slim
 44 | import tensorflow.contrib.slim as slim
 45 | 
 46 | 
 47 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
 48 |   """A named tuple describing a ResNet block.
 49 | 
 50 |   Its parts are:
 51 |     scope: The scope of the `Block`.
 52 |     unit_fn: The ResNet unit function which takes as input a `Tensor` and
 53 |       returns another `Tensor` with the output of the ResNet unit.
 54 |     args: A list of length equal to the number of units in the `Block`. The list
 55 |       contains one (depth, depth_bottleneck, stride) tuple for each unit in the
 56 |       block to serve as argument to unit_fn.
 57 |   """
 58 | 
 59 | 
 60 | def subsample(inputs, factor, scope=None):
 61 |   """Subsamples the input along the spatial dimensions.
 62 | 
 63 |   Args:
 64 |     inputs: A `Tensor` of size [batch, height_in, width_in, channels].
 65 |     factor: The subsampling factor.
 66 |     scope: Optional variable_scope.
 67 | 
 68 |   Returns:
 69 |     output: A `Tensor` of size [batch, height_out, width_out, channels] with the
 70 |       input, either intact (if factor == 1) or subsampled (if factor > 1).
 71 |   """
 72 |   if factor == 1:
 73 |     return inputs
 74 |   else:
 75 |     return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
 76 | 
 77 | 
 78 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
 79 |   """Strided 2-D convolution with 'SAME' padding.
 80 | 
 81 |   When stride > 1, then we do explicit zero-padding, followed by conv2d with
 82 |   'VALID' padding.
 83 | 
 84 |   Note that
 85 | 
 86 |      net = conv2d_same(inputs, num_outputs, 3, stride=stride)
 87 | 
 88 |   is equivalent to
 89 | 
 90 |      net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
 91 |      net = subsample(net, factor=stride)
 92 | 
 93 |   whereas
 94 | 
 95 |      net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
 96 | 
 97 |   is different when the input's height or width is even, which is why we add the
 98 |   current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
 99 | 
100 |   Args:
101 |     inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
102 |     num_outputs: An integer, the number of output filters.
103 |     kernel_size: An int with the kernel_size of the filters.
104 |     stride: An integer, the output stride.
105 |     rate: An integer, rate for atrous convolution.
106 |     scope: Scope.
107 | 
108 |   Returns:
109 |     output: A 4-D tensor of size [batch, height_out, width_out, channels] with
110 |       the convolution output.
111 |   """
112 |   if stride == 1:
113 |     return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
114 |                        padding='SAME', scope=scope)
115 |   else:
116 |     kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
117 |     pad_total = kernel_size_effective - 1
118 |     pad_beg = pad_total // 2
119 |     pad_end = pad_total - pad_beg
120 |     inputs = tf.pad(inputs,
121 |                     [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
122 |     return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
123 |                        rate=rate, padding='VALID', scope=scope)
124 | 
125 | 
126 | @slim.add_arg_scope
127 | def stack_blocks_dense(net, blocks, output_stride=None,
128 |                        outputs_collections=None):
129 |   """Stacks ResNet `Blocks` and controls output feature density.
130 | 
131 |   First, this function creates scopes for the ResNet in the form of
132 |   'block_name/unit_1', 'block_name/unit_2', etc.
133 | 
134 |   Second, this function allows the user to explicitly control the ResNet
135 |   output_stride, which is the ratio of the input to output spatial resolution.
136 |   This is useful for dense prediction tasks such as semantic segmentation or
137 |   object detection.
138 | 
139 |   Most ResNets consist of 4 ResNet blocks and subsample the activations by a
140 |   factor of 2 when transitioning between consecutive ResNet blocks. This results
141 |   to a nominal ResNet output_stride equal to 8. If we set the output_stride to
142 |   half the nominal network stride (e.g., output_stride=4), then we compute
143 |   responses twice.
144 | 
145 |   Control of the output feature density is implemented by atrous convolution.
146 | 
147 |   Args:
148 |     net: A `Tensor` of size [batch, height, width, channels].
149 |     blocks: A list of length equal to the number of ResNet `Blocks`. Each
150 |       element is a ResNet `Block` object describing the units in the `Block`.
151 |     output_stride: If `None`, then the output will be computed at the nominal
152 |       network stride. If output_stride is not `None`, it specifies the requested
153 |       ratio of input to output spatial resolution, which needs to be equal to
154 |       the product of unit strides from the start up to some level of the ResNet.
155 |       For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
156 |       then valid values for the output_stride are 1, 2, 6, 24 or None (which
157 |       is equivalent to output_stride=24).
158 |     outputs_collections: Collection to add the ResNet block outputs.
159 | 
160 |   Returns:
161 |     net: Output tensor with stride equal to the specified output_stride.
162 | 
163 |   Raises:
164 |     ValueError: If the target output_stride is not valid.
165 |   """
166 |   # The current_stride variable keeps track of the effective stride of the
167 |   # activations. This allows us to invoke atrous convolution whenever applying
168 |   # the next residual unit would result in the activations having stride larger
169 |   # than the target output_stride.
170 |   current_stride = 1
171 | 
172 |   # The atrous convolution rate parameter.
173 |   rate = 1
174 | 
175 |   for block in blocks:
176 |     with tf.variable_scope(block.scope, 'block', [net]) as sc:
177 |       for i, unit in enumerate(block.args):
178 |         if output_stride is not None and current_stride > output_stride:
179 |           raise ValueError('The target output_stride cannot be reached.')
180 | 
181 |         with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
182 |           unit_depth, unit_depth_bottleneck, unit_stride = unit
183 | 
184 |           # If we have reached the target output_stride, then we need to employ
185 |           # atrous convolution with stride=1 and multiply the atrous rate by the
186 |           # current unit's stride for use in subsequent layers.
187 |           if output_stride is not None and current_stride == output_stride:
188 |             net = block.unit_fn(net,
189 |                                 depth=unit_depth,
190 |                                 depth_bottleneck=unit_depth_bottleneck,
191 |                                 stride=1,
192 |                                 rate=rate)
193 |             rate *= unit_stride
194 | 
195 |           else:
196 |             net = block.unit_fn(net,
197 |                                 depth=unit_depth,
198 |                                 depth_bottleneck=unit_depth_bottleneck,
199 |                                 stride=unit_stride,
200 |                                 rate=1)
201 |             current_stride *= unit_stride
202 |       net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
203 | 
204 |   if output_stride is not None and current_stride != output_stride:
205 |     raise ValueError('The target output_stride cannot be reached.')
206 | 
207 |   return net
208 | 
209 | 
210 | def resnet_arg_scope(weight_decay=0.0001,
211 |                      batch_norm_decay=0.997,
212 |                      batch_norm_epsilon=1e-5,
213 |                      batch_norm_scale=True):
214 |   """Defines the default ResNet arg scope.
215 | 
216 |   TODO(gpapan): The batch-normalization related default values above are
217 |     appropriate for use in conjunction with the reference ResNet models
218 |     released at https://github.com/KaimingHe/deep-residual-networks. When
219 |     training ResNets from scratch, they might need to be tuned.
220 | 
221 |   Args:
222 |     weight_decay: The weight decay to use for regularizing the model.
223 |     batch_norm_decay: The moving average decay when estimating layer activation
224 |       statistics in batch normalization.
225 |     batch_norm_epsilon: Small constant to prevent division by zero when
226 |       normalizing activations by their variance in batch normalization.
227 |     batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
228 |       activations in the batch normalization layer.
229 | 
230 |   Returns:
231 |     An `arg_scope` to use for the resnet models.
232 |   """
233 |   batch_norm_params = {
234 |       'decay': batch_norm_decay,
235 |       'epsilon': batch_norm_epsilon,
236 |       'scale': batch_norm_scale,
237 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
238 |   }
239 | 
240 |   with slim.arg_scope(
241 |       [slim.conv2d],
242 |       weights_regularizer=slim.l2_regularizer(weight_decay),
243 |       weights_initializer=slim.variance_scaling_initializer(),
244 |       activation_fn=tf.nn.relu,
245 |       normalizer_fn=slim.batch_norm,
246 |       normalizer_params=batch_norm_params):
247 |     with slim.arg_scope([slim.batch_norm], **batch_norm_params):
248 |       # The following implies padding='SAME' for pool1, which makes feature
249 |       # alignment easier for dense prediction tasks. This is also used in
250 |       # https://github.com/facebook/fb.resnet.torch. However the accompanying
251 |       # code of 'Deep Residual Learning for Image Recognition' uses
252 |       # padding='VALID' for pool1. You can switch to that choice by setting
253 |       # slim.arg_scope([slim.max_pool2d], padding='VALID').
254 |       with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
255 |         return arg_sc
256 | 


--------------------------------------------------------------------------------
/libs/nets/train_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import collections
  6 | import tensorflow as tf
  7 | import libs.configs.config_v1 as cfg
  8 | 
  9 | slim = tf.contrib.slim
 10 | FLAGS = tf.app.flags.FLAGS
 11 | 
 12 | def _configure_optimizer(learning_rate):
 13 |   """Configures the optimizer used for training.
 14 | 
 15 |   Args:
 16 |     learning_rate: A scalar or `Tensor` learning rate.
 17 | 
 18 |   Returns:
 19 |     An instance of an optimizer.
 20 | 
 21 |   Raises:
 22 |     ValueError: if FLAGS.optimizer is not recognized.
 23 |   """
 24 |   if FLAGS.optimizer == 'adadelta':
 25 |     optimizer = tf.train.AdadeltaOptimizer(
 26 |         learning_rate,
 27 |         rho=FLAGS.adadelta_rho,
 28 |         epsilon=FLAGS.opt_epsilon)
 29 |   elif FLAGS.optimizer == 'adagrad':
 30 |     optimizer = tf.train.AdagradOptimizer(
 31 |         learning_rate,
 32 |         initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
 33 |   elif FLAGS.optimizer == 'adam':
 34 |     optimizer = tf.train.AdamOptimizer(
 35 |         learning_rate,
 36 |         beta1=FLAGS.adam_beta1,
 37 |         beta2=FLAGS.adam_beta2,
 38 |         epsilon=FLAGS.opt_epsilon)
 39 |   elif FLAGS.optimizer == 'ftrl':
 40 |     optimizer = tf.train.FtrlOptimizer(
 41 |         learning_rate,
 42 |         learning_rate_power=FLAGS.ftrl_learning_rate_power,
 43 |         initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
 44 |         l1_regularization_strength=FLAGS.ftrl_l1,
 45 |         l2_regularization_strength=FLAGS.ftrl_l2)
 46 |   elif FLAGS.optimizer == 'momentum':
 47 |     optimizer = tf.train.MomentumOptimizer(
 48 |         learning_rate,
 49 |         momentum=FLAGS.momentum,
 50 |         name='Momentum')
 51 |   elif FLAGS.optimizer == 'rmsprop':
 52 |     optimizer = tf.train.RMSPropOptimizer(
 53 |         learning_rate,
 54 |         decay=FLAGS.rmsprop_decay,
 55 |         momentum=FLAGS.rmsprop_momentum,
 56 |         epsilon=FLAGS.opt_epsilon)
 57 |   elif FLAGS.optimizer == 'sgd':
 58 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 59 |   else:
 60 |     raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
 61 |   return optimizer
 62 | 
 63 | def _configure_learning_rate(num_samples_per_epoch, global_step):
 64 |   """Configures the learning rate.
 65 | 
 66 |   Args:
 67 |     num_samples_per_epoch: The number of samples in each epoch of training.
 68 |     global_step: The global_step tensor.
 69 | 
 70 |   Returns:
 71 |     A `Tensor` representing the learning rate.
 72 | 
 73 |   Raises:
 74 |     ValueError: if
 75 |   """
 76 |   decay_steps = int(num_samples_per_epoch / FLAGS.batch_size *
 77 |                     FLAGS.num_epochs_per_decay)
 78 |   if FLAGS.sync_replicas:
 79 |     decay_steps /= FLAGS.replicas_to_aggregate
 80 | 
 81 |   if FLAGS.learning_rate_decay_type == 'exponential':
 82 |     return tf.train.exponential_decay(FLAGS.learning_rate,
 83 |                                       global_step,
 84 |                                       decay_steps,
 85 |                                       FLAGS.learning_rate_decay_factor,
 86 |                                       staircase=True,
 87 |                                       name='exponential_decay_learning_rate')
 88 |   elif FLAGS.learning_rate_decay_type == 'fixed':
 89 |     return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
 90 |   elif FLAGS.learning_rate_decay_type == 'polynomial':
 91 |     return tf.train.polynomial_decay(FLAGS.learning_rate,
 92 |                                      global_step,
 93 |                                      decay_steps,
 94 |                                      FLAGS.end_learning_rate,
 95 |                                      power=0.9,
 96 |                                      cycle=False,
 97 |                                      name='polynomial_decay_learning_rate')
 98 |   else:
 99 |     raise ValueError('learning_rate_decay_type [%s] was not recognized',
100 |                      FLAGS.learning_rate_decay_type)
101 |   
102 | def _get_variables_to_train():
103 |   """Returns a list of variables to train.
104 | 
105 |   Returns:
106 |     A list of variables to train by the optimizer.
107 |   """
108 |   if FLAGS.trainable_scopes is None:
109 |     return tf.trainable_variables()
110 |   else:
111 |     scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
112 | 
113 |   variables_to_train = []
114 |   for scope in scopes:
115 |     variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
116 |     variables_to_train.extend(variables)
117 |   return variables_to_train
118 | 
119 | def _get_init_fn():
120 |   """Returns a function run by the chief worker to warm-start the training.
121 | 
122 |   Note that the init_fn is only run when initializing the model during the very
123 |   first global step.
124 | 
125 |   Returns:
126 |     An init function run by the supervisor.
127 |   """
128 |   if FLAGS.checkpoint_path is None:
129 |     return None
130 | 
131 |   # Warn the user if a checkpoint exists in the train_dir. Then we'll be
132 |   # ignoring the checkpoint anyway.
133 |   if tf.train.latest_checkpoint(FLAGS.train_dir):
134 |     tf.logging.info(
135 |         'Ignoring --checkpoint_path because a checkpoint already exists in %s'
136 |         % FLAGS.train_dir)
137 |     return None
138 | 
139 |   exclusions = []
140 |   if FLAGS.checkpoint_exclude_scopes:
141 |     exclusions = [scope.strip()
142 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
143 | 
144 |   # TODO(sguada) variables.filter_variables()
145 |   variables_to_restore = []
146 |   for var in slim.get_model_variables():
147 |     excluded = False
148 |     for exclusion in exclusions:
149 |       if var.op.name.startswith(exclusion):
150 |         excluded = True
151 |         break
152 |     if not excluded:
153 |       variables_to_restore.append(var)
154 | 
155 |   if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
156 |     checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
157 |   else:
158 |     checkpoint_path = FLAGS.checkpoint_path
159 | 
160 |   tf.logging.info('Fine-tuning from %s' % checkpoint_path)
161 | 
162 |   return slim.assign_from_checkpoint_fn(
163 |       checkpoint_path,
164 |       variables_to_restore,
165 |       ignore_missing_vars=FLAGS.ignore_missing_vars)
166 | 
167 | def get_var_list_to_restore():
168 |   """Choosing which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """
169 | 
170 |   variables_to_restore = []
171 |   if FLAGS.checkpoint_exclude_scopes is not None:
172 |     exclusions = [scope.strip()
173 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
174 | 
175 |     # build restore list
176 |     for var in tf.model_variables():
177 |       excluded = False
178 |       for exclusion in exclusions:
179 |         if var.name.startswith(exclusion):
180 |           excluded = True
181 |           break
182 |       if not excluded:
183 |         variables_to_restore.append(var)
184 |   else:
185 |     variables_to_restore = tf.model_variables()
186 | 
187 |   variables_to_restore_final = []
188 |   if FLAGS.checkpoint_include_scopes is not None:
189 |       includes = [
190 |               scope.strip()
191 |               for scope in FLAGS.checkpoint_include_scopes.split(',')
192 |               ]
193 |       for var in variables_to_restore:
194 |           included = False
195 |           for include in includes:
196 |               if var.name.startswith(include):
197 |                   included = True
198 |                   break
199 |           if included:
200 |               variables_to_restore_final.append(var)
201 |   else:
202 |       variables_to_restore_final = variables_to_restore
203 | 
204 |   return variables_to_restore_final
205 | 


--------------------------------------------------------------------------------
/libs/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/libs/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/nms/__init__.py


--------------------------------------------------------------------------------
/libs/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/libs/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/libs/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/libs/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/libs/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/libs/preprocessings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/preprocessings/__init__.py


--------------------------------------------------------------------------------
/libs/preprocessings/coco_v1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import time
  9 | import tensorflow as tf
 10 | import libs.configs.config_v1 as cfg
 11 | from . import utils as preprocess_utils
 12 | 
 13 | FLAGS = tf.app.flags.FLAGS 
 14 | 
 15 | def preprocess_image(image, gt_boxes, gt_masks, is_training=False):
 16 |     """preprocess image for coco
 17 |     1. random flipping
 18 |     2. min size resizing
 19 |     3. zero mean 
 20 |     4. ... 
 21 |     """
 22 |     if is_training:
 23 |         return preprocess_for_training(image, gt_boxes, gt_masks)
 24 |     else:
 25 |         return preprocess_for_test(image, gt_boxes, gt_masks)
 26 | 
 27 | 
 28 | def preprocess_for_training(image, gt_boxes, gt_masks):
 29 |     
 30 |     ih, iw = tf.shape(image)[0], tf.shape(image)[1]
 31 |     ## random flipping
 32 |     coin = tf.to_float(tf.random_uniform([1]))[0]
 33 |     image, gt_boxes, gt_masks =\
 34 |             tf.cond(tf.greater_equal(coin, 0.5), 
 35 |                     lambda: (preprocess_utils.flip_image(image),
 36 |                             preprocess_utils.flip_gt_boxes(gt_boxes, ih, iw),
 37 |                             preprocess_utils.flip_gt_masks(gt_masks)),
 38 |                     lambda: (image, gt_boxes, gt_masks))
 39 | 
 40 |     ## min size resizing
 41 |     new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)
 42 |     image = tf.expand_dims(image, 0)
 43 |     image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False)
 44 |     image = tf.squeeze(image, axis=[0])
 45 | 
 46 |     gt_masks = tf.expand_dims(gt_masks, -1)
 47 |     gt_masks = tf.cast(gt_masks, tf.float32)
 48 |     gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False)
 49 |     gt_masks = tf.cast(gt_masks, tf.int32)
 50 |     gt_masks = tf.squeeze(gt_masks, axis=[-1])
 51 | 
 52 |     scale_ratio = tf.to_float(new_ih) / tf.to_float(ih)
 53 |     gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio)
 54 | 
 55 |     ## random flip image
 56 |     # val_lr = tf.to_float(tf.random_uniform([1]))[0]
 57 |     # image = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_image(image), lambda: image)
 58 |     # gt_masks = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_masks(gt_masks), lambda: gt_masks)
 59 |     # gt_boxes = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_boxes(gt_boxes, new_ih, new_iw), lambda: gt_boxes)
 60 | 
 61 |     ## zero mean image
 62 |     image = tf.cast(image, tf.float32)
 63 |     image = image / 256.0
 64 |     image = (image - 0.5) * 2.0
 65 |     image = tf.expand_dims(image, axis=0)
 66 | 
 67 |     ## rgb to bgr
 68 |     image = tf.reverse(image, axis=[-1])
 69 | 
 70 |     return image, gt_boxes, gt_masks 
 71 | 
 72 | def preprocess_for_test(image, gt_boxes, gt_masks):
 73 | 
 74 | 
 75 |     ih, iw = tf.shape(image)[0], tf.shape(image)[1]
 76 | 
 77 |     ## min size resizing
 78 |     new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)
 79 |     image = tf.expand_dims(image, 0)
 80 |     image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False)
 81 |     image = tf.squeeze(image, axis=[0])
 82 | 
 83 |     gt_masks = tf.expand_dims(gt_masks, -1)
 84 |     gt_masks = tf.cast(gt_masks, tf.float32)
 85 |     gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False)
 86 |     gt_masks = tf.cast(gt_masks, tf.int32)
 87 |     gt_masks = tf.squeeze(gt_masks, axis=[-1])
 88 | 
 89 |     scale_ratio = tf.to_float(new_ih) / tf.to_float(ih)
 90 |     gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio)
 91 |     
 92 |     ## zero mean image
 93 |     image = tf.cast(image, tf.float32)
 94 |     image = image / 256.0
 95 |     image = (image - 0.5) * 2.0
 96 |     image = tf.expand_dims(image, axis=0)
 97 | 
 98 |     ## rgb to bgr
 99 |     image = tf.reverse(image, axis=[-1])
100 | 
101 |     return image, gt_boxes, gt_masks 
102 | 


--------------------------------------------------------------------------------
/libs/preprocessings/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | 
  7 | from tensorflow.python.ops import control_flow_ops
  8 | from tensorflow.contrib import slim
  9 | 
 10 | 
 11 | def _crop(image, offset_height, offset_width, crop_height, crop_width):
 12 |   original_shape = tf.shape(image)
 13 | 
 14 |   rank_assertion = tf.Assert(
 15 |       tf.equal(tf.rank(image), 3),
 16 |       ['Rank of image must be equal to 3.'])
 17 |   cropped_shape = control_flow_ops.with_dependencies(
 18 |       [rank_assertion],
 19 |       tf.stack([crop_height, crop_width, original_shape[2]]))
 20 | 
 21 |   size_assertion = tf.Assert(
 22 |       tf.logical_and(
 23 |           tf.greater_equal(original_shape[0], crop_height),
 24 |           tf.greater_equal(original_shape[1], crop_width)),
 25 |       ['Crop size greater than the image size.'])
 26 | 
 27 |   offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
 28 | 
 29 |   # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
 30 |   # define the crop size.
 31 |   image = control_flow_ops.with_dependencies(
 32 |       [size_assertion],
 33 |       tf.slice(image, offsets, cropped_shape))
 34 |   return tf.reshape(image, cropped_shape)
 35 | 
 36 | 
 37 | def _random_crop(image_list, label_list, crop_height, crop_width):
 38 |   if not image_list:
 39 |     raise ValueError('Empty image_list.')
 40 | 
 41 |   # Compute the rank assertions.
 42 |   rank_assertions = []
 43 |   for i in range(len(image_list)):
 44 |     image_rank = tf.rank(image_list[i])
 45 |     rank_assert = tf.Assert(
 46 |         tf.equal(image_rank, 3),
 47 |         ['Wrong rank for tensor  %s [expected] [actual]',
 48 |          image_list[i].name, 3, image_rank])
 49 |     rank_assertions.append(rank_assert)
 50 | 
 51 |   image_shape = control_flow_ops.with_dependencies(
 52 |       [rank_assertions[0]],
 53 |       tf.shape(image_list[0]))
 54 |   image_height = image_shape[0]
 55 |   image_width = image_shape[1]
 56 |   crop_size_assert = tf.Assert(
 57 |       tf.logical_and(
 58 |           tf.greater_equal(image_height, crop_height),
 59 |           tf.greater_equal(image_width, crop_width)),
 60 |       ['Crop size greater than the image size.', image_height, image_width, crop_height, crop_width])
 61 | 
 62 |   asserts = [rank_assertions[0], crop_size_assert]
 63 | 
 64 |   for i in range(1, len(image_list)):
 65 |     image = image_list[i]
 66 |     asserts.append(rank_assertions[i])
 67 |     shape = control_flow_ops.with_dependencies([rank_assertions[i]],
 68 |                                                tf.shape(image))
 69 |     height = shape[0]
 70 |     width = shape[1]
 71 | 
 72 |     height_assert = tf.Assert(
 73 |         tf.equal(height, image_height),
 74 |         ['Wrong height for tensor %s [expected][actual]',
 75 |          image.name, height, image_height])
 76 |     width_assert = tf.Assert(
 77 |         tf.equal(width, image_width),
 78 |         ['Wrong width for tensor %s [expected][actual]',
 79 |          image.name, width, image_width])
 80 |     asserts.extend([height_assert, width_assert])
 81 | 
 82 |   # Create a random bounding box.
 83 |   #
 84 |   # Use tf.random_uniform and not numpy.random.rand as doing the former would
 85 |   # generate random numbers at graph eval time, unlike the latter which
 86 |   # generates random numbers at graph definition time.
 87 |   max_offset_height = control_flow_ops.with_dependencies(
 88 |       asserts, tf.reshape(image_height - crop_height + 1, []))
 89 |   max_offset_width = control_flow_ops.with_dependencies(
 90 |       asserts, tf.reshape(image_width - crop_width + 1, []))
 91 |   offset_height = tf.random_uniform(
 92 |       [], maxval=max_offset_height, dtype=tf.int32)
 93 |   offset_width = tf.random_uniform(
 94 |       [], maxval=max_offset_width, dtype=tf.int32)
 95 | 
 96 |   cropped_images = [_crop(image, offset_height, offset_width,
 97 |                           crop_height, crop_width) for image in image_list]
 98 |   cropped_labels = [_crop(label, offset_height, offset_width,
 99 |                           crop_height, crop_width) for label in label_list]
100 |   return cropped_images, cropped_labels
101 | 
102 | 
103 | def _central_crop(image_list, label_list, crop_height, crop_width):
104 |   output_images = []
105 |   output_labels = []
106 |   for image, label in zip(image_list, label_list):
107 |     image_height = tf.shape(image)[0]
108 |     image_width = tf.shape(image)[1]
109 | 
110 |     offset_height = (image_height - crop_height) / 2
111 |     offset_width = (image_width - crop_width) / 2
112 | 
113 |     output_images.append(_crop(image, offset_height, offset_width,
114 |                                crop_height, crop_width))
115 |     output_labels.append(_crop(label, offset_height, offset_width,
116 |                                crop_height, crop_width))
117 |   return output_images, output_labels
118 | 
119 | 
120 | def _smallest_size_at_least(height, width, smallest_side):
121 |   smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
122 | 
123 |   height = tf.to_float(height)
124 |   width = tf.to_float(width)
125 |   smallest_side = tf.to_float(smallest_side)
126 | 
127 |   scale = tf.cond(tf.greater(height, width),
128 |                   lambda: smallest_side / width,
129 |                   lambda: smallest_side / height)
130 |   new_height = tf.to_int32(height * scale)
131 |   new_width = tf.to_int32(width * scale)
132 |   return new_height, new_width
133 | 
134 | def _aspect_preserving_resize(image, label, smallest_side):
135 |   smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
136 | 
137 |   shape = tf.shape(image)
138 |   height = shape[0]
139 |   width = shape[1]
140 |   new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
141 | 
142 |   image = tf.expand_dims(image, 0)
143 |   resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
144 |                                            align_corners=False)
145 |   resized_image = tf.squeeze(resized_image, axis=[0])
146 |   resized_image.set_shape([None, None, 3])
147 | 
148 |   label = tf.expand_dims(label, 0)
149 |   resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width],
150 |                                                    align_corners=False)
151 |   resized_label = tf.squeeze(resized_label, axis=[0])
152 |   resized_label.set_shape([None, None, 1])
153 |   return resized_image, resized_label
154 | 
155 | def flip_gt_boxes(gt_boxes, ih, iw):
156 |     x1s, y1s, x2s, y2s, cls = \
157 |             gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4]
158 |     x1s = tf.to_float(iw) - x1s
159 |     x2s = tf.to_float(iw) - x2s
160 |     return tf.concat(values=(x2s[:, tf.newaxis], 
161 |                              y1s[:, tf.newaxis], 
162 |                              x1s[:, tf.newaxis], 
163 |                              y2s[:, tf.newaxis], 
164 |                              cls[:, tf.newaxis]), axis=1)
165 | 
166 | def flip_gt_masks(gt_masks):
167 |     return tf.reverse(gt_masks, axis=[2])
168 | 
169 | def flip_image(image):
170 |     return tf.reverse(image, axis=[1])
171 | 
172 | def resize_gt_boxes(gt_boxes, scale_ratio):
173 |     xys, cls = \
174 |             gt_boxes[:, 0:4], gt_boxes[:, 4]
175 |     xys = xys * scale_ratio 
176 |     return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1)
177 | 
178 | 


--------------------------------------------------------------------------------
/libs/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | def find_in_path(name, path):
 16 |     "Find a file in a search path"
 17 |     #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 18 |     for dir in path.split(os.pathsep):
 19 |         binpath = pjoin(dir, name)
 20 |         if os.path.exists(binpath):
 21 |             return os.path.abspath(binpath)
 22 |     return None
 23 | 
 24 | def locate_cuda():
 25 |     """Locate the CUDA environment on the system
 26 | 
 27 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 28 |     and values giving the absolute path to each directory.
 29 | 
 30 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 31 |     is based on finding 'nvcc' in the PATH.
 32 |     """
 33 | 
 34 |     # first check if the CUDAHOME env variable is in use
 35 |     if 'CUDAHOME' in os.environ:
 36 |         home = os.environ['CUDAHOME']
 37 |         nvcc = pjoin(home, 'bin', 'nvcc')
 38 |     else:
 39 |         # otherwise, search the PATH for NVCC
 40 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 41 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 42 |         if nvcc is None:
 43 |             raise EnvironmentError('The nvcc binary could not be '
 44 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 45 |         home = os.path.dirname(os.path.dirname(nvcc))
 46 | 
 47 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 48 |                   'include': pjoin(home, 'include'),
 49 |                   'lib64': pjoin(home, 'lib64')}
 50 |     for k, v in cudaconfig.iteritems():
 51 |         if not os.path.exists(v):
 52 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 53 | 
 54 |     return cudaconfig
 55 | CUDA = locate_cuda()
 56 | 
 57 | # Obtain the numpy include directory.  This logic works across numpy versions.
 58 | try:
 59 |     numpy_include = np.get_include()
 60 | except AttributeError:
 61 |     numpy_include = np.get_numpy_include()
 62 | 
 63 | def customize_compiler_for_nvcc(self):
 64 |     """inject deep into distutils to customize how the dispatch
 65 |     to gcc/nvcc works.
 66 | 
 67 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 68 |     injected in, and still have the right customizations (i.e.
 69 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 70 |     the OO route, I have this. Note, it's kindof like a wierd functional
 71 |     subclassing going on."""
 72 | 
 73 |     # tell the compiler it can processes .cu
 74 |     self.src_extensions.append('.cu')
 75 | 
 76 |     # save references to the default compiler_so and _comple methods
 77 |     default_compiler_so = self.compiler_so
 78 |     super = self._compile
 79 | 
 80 |     # now redefine the _compile method. This gets executed for each
 81 |     # object but distutils doesn't have the ability to change compilers
 82 |     # based on source extension: we add it.
 83 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 84 |         print extra_postargs
 85 |         if os.path.splitext(src)[1] == '.cu':
 86 |             # use the cuda for .cu files
 87 |             self.set_executable('compiler_so', CUDA['nvcc'])
 88 |             # use only a subset of the extra_postargs, which are 1-1 translated
 89 |             # from the extra_compile_args in the Extension class
 90 |             postargs = extra_postargs['nvcc']
 91 |         else:
 92 |             postargs = extra_postargs['gcc']
 93 | 
 94 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 95 |         # reset the default compiler_so, which we might have changed for cuda
 96 |         self.compiler_so = default_compiler_so
 97 | 
 98 |     # inject our redefined _compile method into the class
 99 |     self._compile = _compile
100 | 
101 | 
102 | # run the customize_compiler
103 | class custom_build_ext(build_ext):
104 |     def build_extensions(self):
105 |         customize_compiler_for_nvcc(self.compiler)
106 |         build_ext.build_extensions(self)
107 | 
108 | ext_modules = [
109 |     Extension(
110 |         "boxes.cython_bbox",
111 |         ["boxes/bbox.pyx"],
112 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
113 |         include_dirs = [numpy_include]
114 |     ),
115 |     Extension(
116 |         "boxes.cython_anchor",
117 |         ["boxes/cython_anchor.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs = [numpy_include]
120 |     ),
121 |     Extension(
122 |       "boxes.cython_bbox_transform",
123 |       ["boxes/cython_bbox_transform.pyx"],
124 |       extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 |       include_dirs=[numpy_include]
126 |     ),
127 |     Extension(
128 |         "boxes.cython_nms",
129 |         ["boxes/nms.pyx"],
130 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
131 |         include_dirs = [numpy_include]
132 |     ),
133 |     Extension(
134 |         "nms.cpu_nms",
135 |         ["nms/cpu_nms.pyx"],
136 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
137 |         include_dirs = [numpy_include]
138 |     ),
139 |     Extension(
140 |         'nms.gpu_nms',
141 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
142 |         library_dirs=[CUDA['lib64']],
143 |         libraries=['cudart'],
144 |         language='c++',
145 |         runtime_library_dirs=[CUDA['lib64']],
146 |         # this syntax is specific to this build system
147 |         # we're only going to use certain compiler args with nvcc and not with gcc
148 |         # the implementation of this trick is in customize_compiler() below
149 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
150 |                             'nvcc': ['-arch=sm_52',
151 |                                      '--ptxas-options=-v',
152 |                                      '-c',
153 |                                      '--compiler-options',
154 |                                      "'-fPIC'"]},
155 |         include_dirs = [numpy_include, CUDA['include']]
156 |     ),
157 | ]
158 | 
159 | setup(
160 |     name='fast_rcnn',
161 |     ext_modules=ext_modules,
162 |     # inject our custom trigger
163 |     cmdclass={'build_ext': custom_build_ext},
164 | )
165 | 


--------------------------------------------------------------------------------
/libs/visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/libs/visualization/__init__.py


--------------------------------------------------------------------------------
/libs/visualization/pil_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
 4 | 
 5 | FLAGS = tf.app.flags.FLAGS
 6 | _DEBUG = False
 7 | 
 8 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None):
 9 |     #print("image")
10 |     #print(image)
11 |     #norm_image = np.uint8(image/np.max(np.abs(image))*255.0)
12 |     norm_image = np.uint8(image/0.1*127.0 + 127.0)
13 |     #print("norm_image")
14 |     #print(norm_image)
15 |     source_img = Image.fromarray(norm_image)
16 |     return source_img.save(FLAGS.train_dir + 'test_' + name + '_' +  str(step) +'.jpg', 'JPEG')
17 | 
18 | def draw_bbox(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None):
19 |     #print(prob[:,label])
20 |     source_img = Image.fromarray(image)
21 |     b, g, r = source_img.split()
22 |     source_img = Image.merge("RGB", (r, g, b))
23 |     draw = ImageDraw.Draw(source_img)
24 |     color = '#0000ff'
25 |     if bbox is not None:
26 |         for i, box in enumerate(bbox):
27 |             if label is not None:
28 |                 if prob is not None:
29 |                     if (prob[i,label[i]] > 0.5) and (label[i] > 0):
30 |                         if gt_label is not None:
31 |                             text  = cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i])
32 |                             if label[i] != gt_label[i]:
33 |                                 color = '#ff0000'#draw.text((2+bbox[i,0], 2+bbox[i,1]), cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]), fill='#ff0000')
34 |                             else:
35 |                                 color = '#0000ff'  
36 |                         else: 
37 |                             text = cat_id_to_cls_name(label[i])
38 |                         draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color)
39 |                         if _DEBUG is True:
40 |                             print("plot",label[i], prob[i,label[i]])
41 |                         draw.rectangle(box,fill=None,outline=color)
42 |                     else: 
43 |                         if _DEBUG is True:
44 |                             print("skip",label[i], prob[i,label[i]])
45 |                 else:
46 |                     text = cat_id_to_cls_name(label[i])
47 |                     draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color)
48 |                     draw.rectangle(box,fill=None,outline=color)
49 | 
50 | 
51 |     return source_img.save(FLAGS.train_dir + '/est_imgs/test_' + name + '_' +  str(step) +'.jpg', 'JPEG')
52 | 
53 | def cat_id_to_cls_name(catId):
54 |     cls_name = np.array([  'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
55 |                        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
56 |                        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
57 |                        'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
58 |                        'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
59 |                        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
60 |                        'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
61 |                        'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
62 |                        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
63 |                        'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
64 |                        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
65 |                        'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
66 |                        'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
67 |                        'scissors', 'teddy bear', 'hair drier', 'toothbrush'])
68 |     return cls_name[catId]


--------------------------------------------------------------------------------
/libs/visualization/summary_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def visualize_input(boxes, image, masks):
 5 |     image_sum_sample = image[:1]
 6 |     visualize_masks(masks, "input_image_gt_mask")
 7 |     visualize_bb(image, boxes, "input_image_gt_bb")
 8 |     visualize_input_image(image_sum_sample)
 9 | 
10 | 
11 | def visualize_rpn_predictions(boxes, image):
12 |     image_sum_sample = image[:1]
13 |     visualize_bb(image_sum_sample, boxes, "rpn_pred_bb")
14 | 
15 | # TODO: Present all masks in different colors
16 | def visualize_masks(masks, name):
17 |     masks = tf.cast(masks, tf.float32)
18 |     tf.summary.image(name=name, tensor=masks, max_outputs=1)
19 | 
20 | 
21 | def visualize_bb(image, boxes, name):
22 |     image_sum_sample_shape = tf.shape(image)[1:]
23 |     gt_x_min = boxes[:, 0] / tf.cast(image_sum_sample_shape[1], tf.float32)
24 |     gt_y_min = boxes[:, 1] / tf.cast(image_sum_sample_shape[0], tf.float32)
25 |     gt_x_max = boxes[:, 2] / tf.cast(image_sum_sample_shape[1], tf.float32)
26 |     gt_y_max = boxes[:, 3] / tf.cast(image_sum_sample_shape[0], tf.float32)
27 |     bb = tf.stack([gt_y_min, gt_x_min, gt_y_max, gt_x_max], axis=1)
28 |     tf.summary.image(name=name,
29 |                      tensor=tf.image.draw_bounding_boxes(image, tf.expand_dims(bb, 0), name=None),
30 |                      max_outputs=1)
31 | 
32 | 
33 | def visualize_input_image(image):
34 |     tf.summary.image(name="input_image", tensor=image, max_outputs=1)
35 | 
36 | 
37 | def visualize_final_predictions(boxes, image, masks):
38 |     visualize_masks(masks, "pred_mask")
39 |     visualize_bb(image, boxes, "final_bb_pred")
40 | 


--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | 
4 | from . import train_utils
5 | 


--------------------------------------------------------------------------------
/train/train_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import collections
  8 | import tensorflow as tf
  9 | import libs.configs.config_v1 as cfg
 10 | 
 11 | slim = tf.contrib.slim
 12 | FLAGS = tf.app.flags.FLAGS
 13 | 
 14 | def _configure_optimizer(learning_rate):
 15 |   """Configures the optimizer used for training.
 16 | 
 17 |   Args:
 18 |     learning_rate: A scalar or `Tensor` learning rate.
 19 | 
 20 |   Returns:
 21 |     An instance of an optimizer.
 22 | 
 23 |   Raises:
 24 |     ValueError: if FLAGS.optimizer is not recognized.
 25 |   """
 26 |   if FLAGS.optimizer == 'adadelta':
 27 |     optimizer = tf.train.AdadeltaOptimizer(
 28 |         learning_rate,
 29 |         rho=FLAGS.adadelta_rho,
 30 |         epsilon=FLAGS.opt_epsilon)
 31 |   elif FLAGS.optimizer == 'adagrad':
 32 |     optimizer = tf.train.AdagradOptimizer(
 33 |         learning_rate,
 34 |         initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
 35 |   elif FLAGS.optimizer == 'adam':
 36 |     optimizer = tf.train.AdamOptimizer(
 37 |         learning_rate,
 38 |         beta1=FLAGS.adam_beta1,
 39 |         beta2=FLAGS.adam_beta2,
 40 |         epsilon=FLAGS.opt_epsilon)
 41 |   elif FLAGS.optimizer == 'ftrl':
 42 |     optimizer = tf.train.FtrlOptimizer(
 43 |         learning_rate,
 44 |         learning_rate_power=FLAGS.ftrl_learning_rate_power,
 45 |         initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
 46 |         l1_regularization_strength=FLAGS.ftrl_l1,
 47 |         l2_regularization_strength=FLAGS.ftrl_l2)
 48 |   elif FLAGS.optimizer == 'momentum':
 49 |     optimizer = tf.train.MomentumOptimizer(
 50 |         learning_rate,
 51 |         momentum=FLAGS.momentum,
 52 |         name='Momentum')
 53 |   elif FLAGS.optimizer == 'rmsprop':
 54 |     optimizer = tf.train.RMSPropOptimizer(
 55 |         learning_rate,
 56 |         decay=FLAGS.rmsprop_decay,
 57 |         momentum=FLAGS.rmsprop_momentum,
 58 |         epsilon=FLAGS.opt_epsilon)
 59 |   elif FLAGS.optimizer == 'sgd':
 60 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 61 |   else:
 62 |     raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
 63 |   return optimizer
 64 | 
 65 | def _configure_learning_rate(num_samples_per_epoch, global_step):
 66 |   """Configures the learning rate.
 67 | 
 68 |   Args:
 69 |     num_samples_per_epoch: The number of samples in each epoch of training.
 70 |     global_step: The global_step tensor.
 71 | 
 72 |   Returns:
 73 |     A `Tensor` representing the learning rate.
 74 | 
 75 |   Raises:
 76 |     ValueError: if
 77 |   """
 78 |   decay_steps = int(num_samples_per_epoch / FLAGS.batch_size *
 79 |                     FLAGS.num_epochs_per_decay)
 80 |   if FLAGS.sync_replicas:
 81 |     decay_steps /= FLAGS.replicas_to_aggregate
 82 | 
 83 |   if FLAGS.learning_rate_decay_type == 'exponential':
 84 |     return tf.train.exponential_decay(FLAGS.learning_rate,
 85 |                                       global_step,
 86 |                                       decay_steps,
 87 |                                       FLAGS.learning_rate_decay_factor,
 88 |                                       staircase=True,
 89 |                                       name='exponential_decay_learning_rate')
 90 |   elif FLAGS.learning_rate_decay_type == 'fixed':
 91 |     return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
 92 |   elif FLAGS.learning_rate_decay_type == 'polynomial':
 93 |     return tf.train.polynomial_decay(FLAGS.learning_rate,
 94 |                                      global_step,
 95 |                                      decay_steps,
 96 |                                      FLAGS.end_learning_rate,
 97 |                                      power=0.9,
 98 |                                      cycle=False,
 99 |                                      name='polynomial_decay_learning_rate')
100 |   else:
101 |     raise ValueError('learning_rate_decay_type [%s] was not recognized',
102 |                      FLAGS.learning_rate_decay_type)
103 | 
104 | def _get_variables_to_train():
105 |   """Returns a list of variables to train.
106 | 
107 |   Returns:
108 |     A list of variables to train by the optimizer.
109 |   """
110 |   if FLAGS.trainable_scopes is None:
111 |     return tf.trainable_variables()
112 |   else:
113 |     scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
114 | 
115 |   variables_to_train = []
116 |   for scope in scopes:
117 |     variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
118 |     variables_to_train.extend(variables)
119 |   return variables_to_train
120 | 
121 | def _get_init_fn():
122 |   """Returns a function run by the chief worker to warm-start the training.
123 | 
124 |   Note that the init_fn is only run when initializing the model during the very
125 |   first global step.
126 | 
127 |   Returns:
128 |     An init function run by the supervisor.
129 |   """
130 |   if FLAGS.checkpoint_path is None:
131 |     return None
132 | 
133 |   # Warn the user if a checkpoint exists in the train_dir. Then we'll
134 |   # ignore the checkpoint anyway.
135 |   if tf.train.latest_checkpoint(FLAGS.train_dir):
136 |     tf.logging.info(
137 |         'Ignoring --checkpoint_path because a checkpoint already exists in %s'
138 |         % FLAGS.train_dir)
139 |     return None
140 | 
141 |   exclusions = []
142 |   if FLAGS.checkpoint_exclude_scopes:
143 |     exclusions = [scope.strip()
144 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
145 | 
146 |   # TODO(sguada) variables.filter_variables()
147 |   variables_to_restore = []
148 |   for var in slim.get_model_variables():
149 |     for exclusion in exclusions:
150 |       if var.op.name.startswith(exclusion):
151 |         break
152 |     else:
153 |       variables_to_restore.append(var)
154 | 
155 |   if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
156 |     checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
157 |   else:
158 |     checkpoint_path = FLAGS.checkpoint_path
159 | 
160 |   tf.logging.info('Fine-tuning from %s' % checkpoint_path)
161 | 
162 |   return slim.assign_from_checkpoint_fn(
163 |       checkpoint_path,
164 |       variables_to_restore,
165 |       ignore_missing_vars=FLAGS.ignore_missing_vars)
166 | 
167 | def get_var_list_to_restore():
168 |   """Choose which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """
169 | 
170 |   variables_to_restore = []
171 |   if FLAGS.checkpoint_exclude_scopes is not None:
172 |     exclusions = [scope.strip()
173 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
174 | 
175 |     # build restore list
176 |     for var in tf.model_variables():
177 |       for exclusion in exclusions:
178 |         if var.name.startswith(exclusion):
179 |           break
180 |       else:
181 |         variables_to_restore.append(var)
182 |   else:
183 |     variables_to_restore = tf.model_variables()
184 | 
185 |   variables_to_restore_final = []
186 |   if FLAGS.checkpoint_include_scopes is not None:
187 |       includes = [
188 |               scope.strip()
189 |               for scope in FLAGS.checkpoint_include_scopes.split(',')
190 |               ]
191 |       for var in variables_to_restore:
192 |           for include in includes:
193 |               if var.name.startswith(include):
194 |                   variables_to_restore_final.append(var)
195 |                   break
196 |   else:
197 |       variables_to_restore_final = variables_to_restore
198 | 
199 |   return variables_to_restore_final
200 | 


--------------------------------------------------------------------------------
/unit_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CharlesShang/FastMaskRCNN/bdae07702acccd85803e658f5e49690981efcdb2/unit_test/__init__.py


--------------------------------------------------------------------------------
/unit_test/data_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | import functools
 7 | 
 8 | import sys
 9 | import os
10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
11 | import numpy as np
12 | import PIL.Image as Image
13 | from PIL import ImageDraw
14 | import tensorflow as tf
15 | import tensorflow.contrib.slim as slim
16 | from libs.logs.log import LOG
17 | import libs.configs.config_v1 as cfg
18 | import libs.nets.resnet_v1 as resnet_v1
19 | import libs.datasets.dataset_factory as dataset_factory
20 | import libs.datasets.coco as coco
21 | import libs.preprocessings.coco_v1 as preprocess_coco
22 | from libs.layers import ROIAlign
23 | 
24 | resnet50 = resnet_v1.resnet_v1_50
25 | FLAGS = tf.app.flags.FLAGS
26 | 
27 | with tf.Graph().as_default():
28 | 
29 |   image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \
30 |     coco.read('./data/coco/records/coco_trainval2014_00000-of-00048.tfrecord')
31 |   
32 |   image, gt_boxes, gt_masks = \
33 |     preprocess_coco.preprocess_image(image, gt_boxes, gt_masks)
34 | 
35 |   
36 | 
37 |   sess = tf.Session()
38 |   init_op = tf.group(tf.global_variables_initializer(),
39 |                      tf.local_variables_initializer())
40 |   # init_op = tf.initialize_all_variables()
41 | 
42 |   boxes = [[100, 100, 200, 200],
43 |            [50, 50, 100, 100],
44 |            [100, 100, 750, 750],
45 |            [50, 50, 60, 60]]
46 |   # boxes = np.zeros((0, 4))
47 |   boxes = tf.constant(boxes, tf.float32)
48 |   feat = ROIAlign(image, boxes, False, 16, 7, 7)
49 |   sess.run(init_op)
50 | 
51 |   tf.train.start_queue_runners(sess=sess)
52 |   with sess.as_default():
53 |       for i in range(20000):
54 |         image_np, ih_np, iw_np, gt_boxes_np, gt_masks_np, num_instances_np, img_id_np, \
55 |         feat_np = \
56 |             sess.run([image, ih, iw, gt_boxes, gt_masks, num_instances, img_id,
57 |                 feat])
58 |         # print (image_np.shape, gt_boxes_np.shape, gt_masks_np.shape)
59 |             
60 |         if i % 100 == 0:
61 |             print ('%d, image_id: %s, instances: %d'%  (i, str(img_id_np), num_instances_np))
62 |             image_np = 256 * (image_np * 0.5 + 0.5)
63 |             image_np = image_np.astype(np.uint8)
64 |             image_np = np.squeeze(image_np)
65 |             print (image_np.shape, ih_np, iw_np)
66 |             print (feat_np.shape)
67 |             im = Image.fromarray(image_np)
68 |             imd = ImageDraw.Draw(im)
69 |             for i in range(gt_boxes_np.shape[0]):
70 |                 imd.rectangle(gt_boxes_np[i, :])
71 |             im.save(str(img_id_np) + '.png')
72 |             # print (gt_boxes_np)
73 |   sess.close()
74 | 


--------------------------------------------------------------------------------
/unit_test/preprocessing_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | 
 4 | import numpy as np
 5 | import sys
 6 | import os
 7 | import tensorflow as tf 
 8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 9 | 
10 | import libs.preprocessings.coco_v1 as coco_preprocess
11 | import  libs.configs.config_v1 as cfg
12 | 
13 | ih, iw, ic = 400,500, 3
14 | N = 3
15 | image = np.random.randint(0, 255, (ih, iw, ic)).astype(np.uint8)
16 | gt_masks = np.zeros((N, ih, iw)).astype(np.int32)
17 | xy = np.random.randint(0, min(iw, ih)-100, (N, 2)).astype(np.float32)
18 | wh = np.random.randint(20, 40, (N, 2)).astype(np.float32)
19 | cls = np.random.randint(1, 6, (N, 1)).astype(np.float32)
20 | gt_boxes = np.hstack((xy, xy + wh, cls)).astype(np.float32)
21 | gt_boxes_np = gt_boxes 
22 | image_np = image 
23 | gt_masks_np = gt_masks 
24 | 
25 | for i in range(N):
26 |     box = gt_boxes[i, 0:4]
27 |     gt_masks[i, int(box[1]):int(box[3]),
28 |                 int(box[0]):int(box[2])] = 1
29 | image = tf.constant(image)
30 | gt_boxes = tf.constant(gt_boxes)
31 | gt_masks = tf.constant(gt_masks)
32 | 
33 | image, gt_boxes, gt_masks = \
34 |         coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True)
35 | 
36 | with tf.Session() as sess:
37 |     # print(image.eval())
38 |     image_tf, gt_boxes_tf, gt_masks_tf = \
39 |             sess.run([image, gt_boxes, gt_masks])
40 |     print ('#######################')
41 |     print ('DATA PREPROCESSING TEST')
42 |     print ('#######################')
43 |     print ('gt_boxes shape:', gt_boxes_tf.shape)
44 |     print('mask shape:', gt_masks_tf.shape)
45 |     print(gt_boxes_tf)
46 |     for i in range(N):
47 |         box = np.round(gt_boxes_tf[i, 0:4])
48 |         box = box.astype(np.int32)
49 |         m = gt_masks_tf[i, box[1]:box[3], box[0]:box[2]]
50 |         print ('after:', box)
51 |         print (np.sum(m)/ (0.0 + m.size))
52 |         print (m)
53 |         box = np.round(gt_boxes_np[i, 0:4])
54 |         box = box.astype(np.int32)
55 |         m = gt_masks_np[i, box[1]:box[3], box[0]:box[2]]
56 |         print ('ori box:', box)
57 |         print (np.sum(m)/ (0.0 + m.size))
58 | 


--------------------------------------------------------------------------------
/unit_test/resnet50_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | import functools
  7 | import os, sys
  8 | import time
  9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 10 | import numpy as np
 11 | from time import gmtime, strftime
 12 | import tensorflow as tf
 13 | import tensorflow.contrib.slim as slim
 14 | import libs.configs.config_v1 as cfg
 15 | import libs.datasets.coco as coco
 16 | import libs.preprocessings.coco_v1 as coco_preprocess
 17 | import libs.nets.pyramid_network as pyramid_network
 18 | import libs.nets.resnet_v1 as resnet_v1
 19 | from train.train_utils import _configure_learning_rate, _configure_optimizer, \
 20 |   _get_variables_to_train, _get_init_fn, get_var_list_to_restore
 21 | 
 22 | resnet50 = resnet_v1.resnet_v1_50
 23 | FLAGS = tf.app.flags.FLAGS
 24 | 
 25 | DEBUG = False
 26 | 
 27 | with tf.Graph().as_default():
 28 |   gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, 
 29 |                               allow_growth=True,
 30 |                               )
 31 |   with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
 32 |                                         allow_soft_placement=True)) as sess:
 33 |       global_step = slim.create_global_step()
 34 |       
 35 |       ## data
 36 |       image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \
 37 |         coco.read('./data/coco/records/coco_train2014_00000-of-00040.tfrecord')
 38 |       with tf.control_dependencies([image, gt_boxes, gt_masks]):
 39 |         image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True)
 40 |       
 41 |       ##  network
 42 |       with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=0.0001)):
 43 |         logits, end_points = resnet50(image, 1000, is_training=False)
 44 |       end_points['inputs'] = image
 45 |       
 46 |       for x in sorted(end_points.keys()):
 47 |         print (x, end_points[x].name, end_points[x].shape)
 48 |         
 49 |       pyramid = pyramid_network.build_pyramid('resnet50', end_points)
 50 |       # for p in pyramid:
 51 |       #   print (p, pyramid[p])
 52 | 
 53 |       summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
 54 |       for p in pyramid:
 55 |         summaries.add(tf.summary.histogram('pyramid/hist/' + p, pyramid[p]))
 56 |         summaries.add(tf.summary.scalar('pyramid/means/'+ p, tf.reduce_mean(tf.abs(pyramid[p]))))
 57 |         
 58 |       outputs = pyramid_network.build_heads(pyramid, ih, iw, num_classes=81, base_anchors=9, is_training=True, gt_boxes=gt_boxes)
 59 |       
 60 |       ## losses
 61 |       loss, losses, batch_info = pyramid_network.build_losses(pyramid, outputs,
 62 |                                              gt_boxes, gt_masks,
 63 |                                              num_classes=81, base_anchors=9, 
 64 |                                              rpn_box_lw =0.1, rpn_cls_lw = 0.2,
 65 |                                              refined_box_lw=2.0, refined_cls_lw=0.1,
 66 |                                              mask_lw=0.2)
 67 | 
 68 |       ## optimization
 69 |       learning_rate = _configure_learning_rate(82783, global_step)
 70 |       optimizer = _configure_optimizer(learning_rate)
 71 |       summaries.add(tf.summary.scalar('learning_rate', learning_rate))
 72 |       for loss in tf.get_collection(tf.GraphKeys.LOSSES):
 73 |         summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))
 74 | 
 75 |       loss = tf.get_collection(tf.GraphKeys.LOSSES)
 76 |       regular_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
 77 |       total_loss = tf.add_n(loss + regular_loss)
 78 |       reg_loss = tf.add_n(regular_loss)
 79 |       summaries.add(tf.summary.scalar('total_loss', total_loss))
 80 |       summaries.add(tf.summary.scalar('regular_loss', reg_loss))
 81 |       
 82 |       variables_to_train = _get_variables_to_train()
 83 |       update_op = optimizer.minimize(total_loss)
 84 |       # gradients = optimizer.compute_gradients(total_loss, var_list=variables_to_train)
 85 |       # grad_updates = optimizer.apply_gradients(gradients,
 86 |       #                                          global_step=global_step)
 87 |       # update_op = tf.group(grad_updates)
 88 |       
 89 |       # summary_op = tf.summary.merge(list(summaries), name='summary_op')
 90 |       summary_op = tf.summary.merge_all()
 91 |       logdir = os.path.join(FLAGS.train_dir, strftime('%Y%m%d%H%M%S', gmtime()))
 92 |       if not os.path.exists(logdir):
 93 |         os.makedirs(logdir)
 94 |       summary_writer = tf.summary.FileWriter(
 95 |             logdir,
 96 |             graph=sess.graph)
 97 |       
 98 |       
 99 |       init_op = tf.group(tf.global_variables_initializer(),
100 |                          tf.local_variables_initializer())
101 |       
102 |       sess.run(init_op)
103 |       coord = tf.train.Coordinator()
104 |       tf.train.start_queue_runners(sess=sess, coord=coord)
105 | 
106 |       ## restore pretrained model
107 |       # FLAGS.pretrained_model = None
108 |       if FLAGS.pretrained_model:
109 |           if tf.gfile.IsDirectory(FLAGS.pretrained_model):
110 |               checkpoint_path = tf.train.latest_checkpoint(FLAGS.pretrained_model)
111 |           else:
112 |               checkpoint_path = FLAGS.pretrained_model
113 |           FLAGS.checkpoint_exclude_scopes='pyramid'
114 |           FLAGS.checkpoint_include_scopes='resnet_v1_50'
115 |           vars_to_restore = get_var_list_to_restore()
116 |           for var in vars_to_restore:
117 |               print ('restoring ', var.name)
118 |           
119 |           try:
120 |               restorer = tf.train.Saver(vars_to_restore)
121 |               restorer.restore(sess, checkpoint_path)
122 |               print ('Restored %d(%d) vars from %s' %(
123 |                   len(vars_to_restore), len(tf.global_variables()),
124 |                   checkpoint_path ))
125 |           except:
126 |               print ('Checking your params %s' %(checkpoint_path))
127 |               raise
128 |       
129 |       # import libs.memory_util as memory_util
130 |       # memory_util.vlog(1)
131 |       # with memory_util.capture_stderr() as stderr:
132 |       #     sess.run([update_op])
133 |       # memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000)
134 |       
135 |       ## training loop
136 |       saver = tf.train.Saver(max_to_keep=20)
137 |       for step in range(FLAGS.max_iters):
138 |         start_time = time.time()
139 |         
140 |         _, tot_loss, reg_lossnp, img_id_str, \
141 |         rpn_box_loss, rpn_cls_loss, refined_box_loss, refined_cls_loss, mask_loss, \
142 |         gt_boxesnp, \
143 |         rpn_batch_pos, rpn_batch, refine_batch_pos, refine_batch, mask_batch_pos, mask_batch = \
144 |                      sess.run([update_op, total_loss, reg_loss,  img_id] + 
145 |                               losses + 
146 |                               [gt_boxes] + 
147 |                               batch_info)
148 |       # TODO: sampling strategy
149 | 
150 |         duration_time = time.time() - start_time
151 |         if step % 1 == 0: 
152 |             print ( """iter %d: image-id:%07d, time:%.3f(sec), regular_loss: %.6f, """
153 |                     """total-loss %.4f(%.4f, %.4f, %.6f, %.4f, %.4f), """
154 |                     """instances: %d, """
155 |                     """batch:(%d|%d, %d|%d, %d|%d)""" 
156 |                    % (step, img_id_str, duration_time, reg_lossnp, 
157 |                       tot_loss, rpn_box_loss, rpn_cls_loss, refined_box_loss, refined_cls_loss, mask_loss,
158 |                       gt_boxesnp.shape[0], 
159 |                       rpn_batch_pos, rpn_batch, refine_batch_pos, refine_batch, mask_batch_pos, mask_batch))
160 | 
161 |             if np.isnan(tot_loss) or np.isinf(tot_loss):
162 |                 print (gt_boxesnp)
163 |                 raise
164 |           
165 |         if step % 100 == 0:
166 |            summary_str = sess.run(summary_op)
167 |            summary_writer.add_summary(summary_str, step)
168 | 
169 |         if (step % 1000 == 0 or step + 1 == FLAGS.max_iters) and step != 0:
170 |           checkpoint_path = os.path.join(FLAGS.train_dir, 
171 |                                          FLAGS.dataset_name + '_model.ckpt')
172 |           saver.save(sess, checkpoint_path, global_step=step)
173 | 
174 |         if coord.should_stop():
175 |               coord.request_stop()
176 |               coord.join(threads)
177 | 


--------------------------------------------------------------------------------