├── .gitignore
├── LICENSE
├── README.md
├── ThirdPartyNOtices.txt
├── experiments
    └── relation_rcnn
    │   ├── cfgs
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_dcn_end2end_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_dcn_end2end_relation_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_dcn_end2end_relation_learn_nms_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_end2end_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_end2end_learn_nms_3epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_end2end_relation_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_end2end_relation_learn_nms_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_fpn_8epoch.yaml
    │       ├── resnet_v1_101_coco_trainvalminus_rcnn_fpn_relation_8epoch.yaml
    │       └── resnet_v1_101_coco_trainvalminus_rcnn_fpn_relation_learn_nms_8epoch.yaml
    │   ├── rcnn_end2end_train_test.py
    │   ├── rcnn_test.py
    │   └── rcnn_train_test.py
├── init.sh
├── lib
    ├── Makefile
    ├── __init__.py
    ├── bbox
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── bbox_regression.py
    │   ├── bbox_transform.py
    │   └── setup_linux.py
    ├── dataset
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── imdb.py
    │   └── pycocotools
    │   │   ├── .gitignore
    │   │   ├── UPSTREAM_REV
    │   │   ├── __init__.py
    │   │   ├── _mask.pyx
    │   │   ├── coco.py
    │   │   ├── cocoeval.py
    │   │   ├── mask.py
    │   │   ├── maskApi.c
    │   │   ├── maskApi.h
    │   │   └── setup_linux.py
    ├── nms
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.cu
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms.py
    │   ├── nms_kernel.cu
    │   └── setup_linux.py
    ├── rpn
    │   ├── __init__.py
    │   ├── generate_anchor.py
    │   └── rpn.py
    └── utils
    │   ├── PrefetchingIter.py
    │   ├── __init__.py
    │   ├── create_logger.py
    │   ├── image.py
    │   ├── load_data.py
    │   ├── load_model.py
    │   ├── lr_scheduler.py
    │   └── symbol.py
├── relation_rcnn
    ├── __init__.py
    ├── _init_paths.py
    ├── config
    │   ├── __init__.py
    │   └── config.py
    ├── core
    │   ├── DataParallelExecutorGroup.py
    │   ├── __init__.py
    │   ├── callback.py
    │   ├── loader.py
    │   ├── metric.py
    │   ├── module.py
    │   ├── rcnn.py
    │   └── tester.py
    ├── function
    │   ├── __init__.py
    │   ├── test_rcnn.py
    │   ├── test_rpn.py
    │   ├── train_rcnn.py
    │   └── train_rpn.py
    ├── operator_cxx
    │   ├── deformable_convolution-inl.h
    │   ├── deformable_convolution.cc
    │   ├── deformable_convolution.cu
    │   ├── deformable_psroi_pooling-inl.h
    │   ├── deformable_psroi_pooling.cc
    │   ├── deformable_psroi_pooling.cu
    │   ├── nn
    │   │   ├── deformable_im2col.cuh
    │   │   └── deformable_im2col.h
    │   ├── psroi_pooling-inl.h
    │   ├── psroi_pooling.cc
    │   └── psroi_pooling.cu
    ├── operator_py
    │   ├── __init__.py
    │   ├── box_annotator_ohem.py
    │   ├── learn_nms.py
    │   ├── monitor_op.py
    │   ├── nms_multi_target.py
    │   ├── proposal.py
    │   └── proposal_target.py
    ├── symbols
    │   ├── __init__.py
    │   ├── resnet_v1_101_rcnn.py
    │   ├── resnet_v1_101_rcnn_attention_1024_pairwise_position_multi_head_16.py
    │   ├── resnet_v1_101_rcnn_attention_1024_pairwise_position_multi_head_16_learn_nms.py
    │   ├── resnet_v1_101_rcnn_base.py
    │   ├── resnet_v1_101_rcnn_dcn.py
    │   ├── resnet_v1_101_rcnn_dcn_attention_1024_pairwise_position_multi_head_16.py
    │   ├── resnet_v1_101_rcnn_dcn_attention_1024_pairwise_position_multi_head_16_learn_nms.py
    │   ├── resnet_v1_101_rcnn_fpn.py
    │   ├── resnet_v1_101_rcnn_fpn_attention_1024_pairwise_position_multi_head_16.py
    │   ├── resnet_v1_101_rcnn_fpn_attention_1024_pairwise_position_multi_head_16_learn_nms.py
    │   ├── resnet_v1_101_rcnn_learn_nms_1024_attention_1024_pairwise_position_multi_head_16.py
    │   └── resnet_v1_101_rcnn_learn_nms_base.py
    ├── test.py
    ├── train_end2end.py
    └── train_rcnn.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # IntelliJ project files
 2 | .idea
 3 | *.iml
 4 | out
 5 | gen
 6 | 
 7 | ### Vim template
 8 | [._]*.s[a-w][a-z]
 9 | [._]s[a-w][a-z]
10 | *.un~
11 | Session.vim
12 | .netrwhist
13 | *~
14 | 
15 | ### IPythonNotebook template
16 | # Temporary data
17 | .ipynb_checkpoints/
18 | 
19 | ### Python template
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 | 
25 | # C extensions
26 | *.so
27 | 
28 | # Distribution / packaging
29 | .Python
30 | env/
31 | build/
32 | develop-eggs/
33 | dist/
34 | downloads/
35 | eggs/
36 | .eggs/
37 | #lib/
38 | #lib64/
39 | parts/
40 | sdist/
41 | var/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | 
46 | # PyInstaller
47 | #  Usually these files are written by a python script from a template
48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 | 
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 | 
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *,cover
65 | 
66 | # Translations
67 | *.mo
68 | *.pot
69 | 
70 | # Django stuff:
71 | *.log
72 | 
73 | # Sphinx documentation
74 | docs/_build/
75 | 
76 | # PyBuilder
77 | target/
78 | 
79 | *.ipynb
80 | *.params
81 | *.json
82 | .vscode/
83 | 
84 | lib/dataset/pycocotools/*.cpp
85 | lib/nms/*.c
86 | lib/nms/*.cpp
87 | 
88 | data
89 | external
90 | output
91 | model
92 | 
93 | .db
94 | cache
95 | proposal
96 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Microsoft
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ThirdPartyNOtices.txt:
--------------------------------------------------------------------------------
  1 | ************************************************************************
  2 | 
  3 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
  4 | 
  5 | MXNet
  6 | 
  7 | Copyright (c) 2015-2016 by Contributors
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 |    http://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | 
 21 | 
 22 | Fast R-CNN
 23 | 
 24 | Copyright (c) Microsoft Corporation
 25 | 
 26 | All rights reserved.
 27 | 
 28 | MIT License
 29 | 
 30 | Permission is hereby granted, free of charge, to any person obtaining a
 31 | copy of this software and associated documentation files (the "Software"),
 32 | to deal in the Software without restriction, including without limitation
 33 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 34 | and/or sell copies of the Software, and to permit persons to whom the
 35 | Software is furnished to do so, subject to the following conditions:
 36 | 
 37 | The above copyright notice and this permission notice shall be included
 38 | in all copies or substantial portions of the Software.
 39 | 
 40 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 41 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 42 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 43 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 44 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 45 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 46 | OTHER DEALINGS IN THE SOFTWARE.
 47 | 
 48 | 
 49 | Faster R-CNN
 50 | 
 51 | The MIT License (MIT)
 52 | 
 53 | Copyright (c) 2015 Microsoft Corporation
 54 | 
 55 | Permission is hereby granted, free of charge, to any person obtaining a copy
 56 | of this software and associated documentation files (the "Software"), to deal
 57 | in the Software without restriction, including without limitation the rights
 58 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 59 | copies of the Software, and to permit persons to whom the Software is
 60 | furnished to do so, subject to the following conditions:
 61 | 
 62 | The above copyright notice and this permission notice shall be included in
 63 | all copies or substantial portions of the Software.
 64 | 
 65 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 66 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 67 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 68 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 69 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 70 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 71 | THE SOFTWARE.
 72 | 
 73 | 
 74 | Caffe
 75 | 
 76 | COPYRIGHT
 77 | 
 78 | All contributions by the University of California:
 79 | Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
 80 | All rights reserved.
 81 | 
 82 | All other contributions:
 83 | Copyright (c) 2014, 2015, the respective contributors
 84 | All rights reserved.
 85 | 
 86 | Caffe uses a shared copyright model: each contributor holds copyright over
 87 | their contributions to Caffe. The project versioning records all such
 88 | contribution and copyright details. If a contributor wants to further mark
 89 | their specific copyright on a particular contribution, they should indicate
 90 | their copyright solely in the commit message of the change when it is
 91 | committed.
 92 | 
 93 | LICENSE
 94 | 
 95 | Redistribution and use in source and binary forms, with or without
 96 | modification, are permitted provided that the following conditions are met:
 97 | 
 98 | 1. Redistributions of source code must retain the above copyright notice, this
 99 |    list of conditions and the following disclaimer.
100 | 2. Redistributions in binary form must reproduce the above copyright notice,
101 |    this list of conditions and the following disclaimer in the documentation
102 |    and/or other materials provided with the distribution.
103 | 
104 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
105 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
106 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
107 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
108 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
109 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
110 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
111 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
112 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
113 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
114 | 
115 | CONTRIBUTION AGREEMENT
116 | 
117 | By contributing to the BVLC/caffe repository through pull-request, comment,
118 | or otherwise, the contributor releases their content to the
119 | license and copyright terms herein.
120 | 
121 | 
122 | MS COCO API
123 | 
124 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
125 | All rights reserved.
126 | 
127 | Redistribution and use in source and binary forms, with or without
128 | modification, are permitted provided that the following conditions are met:
129 | 
130 | 1. Redistributions of source code must retain the above copyright notice, this
131 |    list of conditions and the following disclaimer.
132 | 2. Redistributions in binary form must reproduce the above copyright notice,
133 |    this list of conditions and the following disclaimer in the documentation
134 |    and/or other materials provided with the distribution.
135 | 
136 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
137 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
138 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
139 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
140 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
141 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
142 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
143 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
144 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
145 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
146 | 
147 | The views and conclusions contained in the software and documentation are those
148 | of the authors and should not be interpreted as representing official policies,
149 | either expressed or implied, of the FreeBSD Project.
150 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_dcn_end2end_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_dcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+valminusminival2014
 56 |   root_path: "./"
 57 |   test_image_set: minival2014
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.33'
 62 |   warmup: false
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rcnn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: true
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: -1
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.6
152 |   SOFTNMS: true
153 |   test_epoch: 8
154 |   max_per_image: 100
155 | 
156 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_dcn_end2end_relation_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_dcn_attention_1024_pairwise_position_multi_head_16
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+valminusminival2014
 56 |   root_path: "./"
 57 |   test_image_set: minival2014
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.33'
 62 |   warmup: false
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rcnn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: true
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: -1
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.3
152 |   test_epoch: 8
153 |   max_per_image: 100
154 | 
155 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_dcn_end2end_relation_learn_nms_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_dcn_attention_1024_pairwise_position_multi_head_16_learn_nms
  5 | gpus: '4,5,6,7'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 |   NMS_TARGET_THRESH: '0.5, 0.6, 0.7, 0.8, 0.9'
 52 | dataset:
 53 |   NUM_CLASSES: 81
 54 |   dataset: coco
 55 |   dataset_path: "./data/coco"
 56 |   image_set: train2014+valminusminival2014
 57 |   root_path: "./"
 58 |   test_image_set: minival2014
 59 |   proposal: rpn
 60 | TRAIN:
 61 |   lr: 0.0005
 62 |   lr_step: '5.33'
 63 |   warmup: false
 64 |   warmup_lr: 0.00005
 65 |   # typically we will use 8000 warmup step for single GPU for COCO
 66 |   warmup_step: 1000
 67 |   begin_epoch: 0
 68 |   end_epoch: 8
 69 |   model_prefix: 'rcnn_coco'
 70 |   # whether resume training
 71 |   RESUME: false
 72 |   # whether flip image
 73 |   FLIP: true
 74 |   # whether shuffle image
 75 |   SHUFFLE: true
 76 |   # whether use OHEM
 77 |   ENABLE_OHEM: true
 78 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 79 |   BATCH_IMAGES: 1
 80 |   # e2e changes behavior of anchor loader and metric
 81 |   END2END: true
 82 |   # group images with similar aspect ratio
 83 |   ASPECT_GROUPING: true
 84 |   # R-CNN
 85 |   # rcnn rois batch size
 86 |   BATCH_ROIS: -1
 87 |   BATCH_ROIS_OHEM: 128
 88 |   # rcnn rois sampling params
 89 |   FG_FRACTION: 0.25
 90 |   FG_THRESH: 0.5
 91 |   BG_THRESH_HI: 0.5
 92 |   BG_THRESH_LO: 0
 93 |   # rcnn bounding box regression params
 94 |   BBOX_REGRESSION_THRESH: 0.5
 95 |   BBOX_WEIGHTS:
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 |   - 1.0
100 | 
101 |   # RPN anchor loader
102 |   # rpn anchors batch size
103 |   RPN_BATCH_SIZE: 256
104 |   # rpn anchors sampling params
105 |   RPN_FG_FRACTION: 0.5
106 |   RPN_POSITIVE_OVERLAP: 0.7
107 |   RPN_NEGATIVE_OVERLAP: 0.3
108 |   RPN_CLOBBER_POSITIVES: false
109 |   # rpn bounding box regression params
110 |   RPN_BBOX_WEIGHTS:
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   - 1.0
115 |   RPN_POSITIVE_WEIGHT: -1.0
116 |   # used for end2end training
117 |   # RPN proposal
118 |   CXX_PROPOSAL: false
119 |   RPN_NMS_THRESH: 0.7
120 |   RPN_PRE_NMS_TOP_N: 6000
121 |   RPN_POST_NMS_TOP_N: 300
122 |   RPN_MIN_SIZE: 0
123 |   # approximate bounding box regression
124 |   BBOX_NORMALIZATION_PRECOMPUTED: true
125 |   BBOX_MEANS:
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   - 0.0
130 |   BBOX_STDS:
131 |   - 0.1
132 |   - 0.1
133 |   - 0.2
134 |   - 0.2
135 |   LEARN_NMS: true
136 |   FIRST_N: 100
137 |   JOINT_TRAINING: true
138 | TEST:
139 |   # use rpn to generate proposal
140 |   HAS_RPN: true
141 |   # size of images for each device
142 |   BATCH_IMAGES: 1
143 |   # RPN proposal
144 |   CXX_PROPOSAL: false
145 |   RPN_NMS_THRESH: 0.7
146 |   RPN_PRE_NMS_TOP_N: 6000
147 |   RPN_POST_NMS_TOP_N: 300
148 |   RPN_MIN_SIZE: 0
149 |   # RPN generate proposal
150 |   PROPOSAL_NMS_THRESH: 0.7
151 |   PROPOSAL_PRE_NMS_TOP_N: 20000
152 |   PROPOSAL_POST_NMS_TOP_N: 2000
153 |   PROPOSAL_MIN_SIZE: 0
154 |   # RCNN nms
155 |   NMS: 10.0
156 |   SOFTNMS: true
157 |   test_epoch: 8
158 |   max_per_image: 100
159 |   # Learn nms
160 |   LEARN_NMS: true
161 |   LEARN_NMS_CLASS_SCORE_TH: 0.01 
162 |   FIRST_N: 100
163 | 
164 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_end2end_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+valminusminival2014
 56 |   #image_set: minival2014
 57 |   root_path: "./"
 58 |   test_image_set: minival2014
 59 |   proposal: rpn
 60 | TRAIN:
 61 |   lr: 0.0005
 62 |   lr_step: '5.33'
 63 |   warmup: false
 64 |   warmup_lr: 0.00005
 65 |   # typically we will use 8000 warmup step for single GPU for COCO
 66 |   warmup_step: 1000
 67 |   begin_epoch: 0
 68 |   end_epoch: 8
 69 |   model_prefix: 'rcnn_coco'
 70 |   # whether resume training
 71 |   RESUME: false
 72 |   # whether flip image
 73 |   FLIP: true
 74 |   # whether shuffle image
 75 |   SHUFFLE: true
 76 |   # whether use OHEM
 77 |   ENABLE_OHEM: true
 78 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 79 |   BATCH_IMAGES: 1
 80 |   # e2e changes behavior of anchor loader and metric
 81 |   END2END: true
 82 |   # group images with similar aspect ratio
 83 |   ASPECT_GROUPING: true
 84 |   # R-CNN
 85 |   # rcnn rois batch size
 86 |   BATCH_ROIS: -1
 87 |   BATCH_ROIS_OHEM: 128
 88 |   # rcnn rois sampling params
 89 |   FG_FRACTION: 0.25
 90 |   FG_THRESH: 0.5
 91 |   BG_THRESH_HI: 0.5
 92 |   BG_THRESH_LO: 0
 93 |   # rcnn bounding box regression params
 94 |   BBOX_REGRESSION_THRESH: 0.5
 95 |   BBOX_WEIGHTS:
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 |   - 1.0
100 | 
101 |   # RPN anchor loader
102 |   # rpn anchors batch size
103 |   RPN_BATCH_SIZE: 256
104 |   # rpn anchors sampling params
105 |   RPN_FG_FRACTION: 0.5
106 |   RPN_POSITIVE_OVERLAP: 0.7
107 |   RPN_NEGATIVE_OVERLAP: 0.3
108 |   RPN_CLOBBER_POSITIVES: false
109 |   # rpn bounding box regression params
110 |   RPN_BBOX_WEIGHTS:
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   - 1.0
115 |   RPN_POSITIVE_WEIGHT: -1.0
116 |   # used for end2end training
117 |   # RPN proposal
118 |   CXX_PROPOSAL: false
119 |   RPN_NMS_THRESH: 0.7
120 |   RPN_PRE_NMS_TOP_N: 6000
121 |   RPN_POST_NMS_TOP_N: 300
122 |   RPN_MIN_SIZE: 0
123 |   # approximate bounding box regression
124 |   BBOX_NORMALIZATION_PRECOMPUTED: true
125 |   BBOX_MEANS:
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   - 0.0
130 |   BBOX_STDS:
131 |   - 0.1
132 |   - 0.1
133 |   - 0.2
134 |   - 0.2
135 | TEST:
136 |   # use rpn to generate proposal
137 |   HAS_RPN: true
138 |   # size of images for each device
139 |   BATCH_IMAGES: 1
140 |   # RPN proposal
141 |   CXX_PROPOSAL: false
142 |   RPN_NMS_THRESH: 0.7
143 |   RPN_PRE_NMS_TOP_N: 6000
144 |   RPN_POST_NMS_TOP_N: 300
145 |   RPN_MIN_SIZE: 0
146 |   # RPN generate proposal
147 |   PROPOSAL_NMS_THRESH: 0.7
148 |   PROPOSAL_PRE_NMS_TOP_N: 20000
149 |   PROPOSAL_POST_NMS_TOP_N: 2000
150 |   PROPOSAL_MIN_SIZE: 0
151 |   # RCNN nms
152 |   NMS: 0.6
153 |   SOFTNMS: true
154 |   test_epoch: 8
155 |   max_per_image: 100
156 | 
157 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_end2end_learn_nms_3epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_learn_nms_1024_attention_1024_pairwise_position_multi_head_16
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/coco_resnet_v1_101_rcnn"
 15 |   pretrained_epoch: 8
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - res3
 29 |   - bn3
 30 |   - res4
 31 |   - bn4
 32 |   - gamma
 33 |   - beta
 34 |   - rpn_conv_3x3
 35 |   - res5
 36 |   - bn5
 37 |   - fc_new
 38 |   - conv_new_1
 39 |   - cls_score
 40 |   - bbox_pred
 41 |   FIXED_PARAMS_SHARED:
 42 |   - conv1
 43 |   - bn_conv1
 44 |   - res2
 45 |   - bn2
 46 |   - res3
 47 |   - bn3
 48 |   - res4
 49 |   - bn4
 50 |   - gamma
 51 |   - beta
 52 |   ANCHOR_RATIOS:
 53 |   - 0.5
 54 |   - 1
 55 |   - 2
 56 |   ANCHOR_SCALES:
 57 |   - 4
 58 |   - 8
 59 |   - 16
 60 |   - 32
 61 |   NUM_ANCHORS: 12
 62 |   NMS_TARGET_THRESH: '0.5, 0.6, 0.7, 0.8, 0.9'
 63 | dataset:
 64 |   NUM_CLASSES: 81
 65 |   dataset: coco
 66 |   dataset_path: "./data/coco"
 67 |   image_set: train2014+valminusminival2014
 68 |   root_path: "./"
 69 |   test_image_set: minival2014
 70 |   proposal: rpn
 71 | TRAIN:
 72 |   lr: 0.0005
 73 |   lr_step: '2.0'
 74 |   warmup: false
 75 |   warmup_lr: 0.00005
 76 |   # typically we will use 8000 warmup step for single GPU for COCO
 77 |   warmup_step: 1000
 78 |   begin_epoch: 0
 79 |   end_epoch: 3
 80 |   model_prefix: 'rcnn_coco'
 81 |   # whether resume training
 82 |   RESUME: false
 83 |   # whether flip image
 84 |   FLIP: true
 85 |   # whether shuffle image
 86 |   SHUFFLE: true
 87 |   # whether use OHEM
 88 |   ENABLE_OHEM: false
 89 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 90 |   BATCH_IMAGES: 1
 91 |   # e2e changes behavior of anchor loader and metric
 92 |   END2END: true
 93 |   # group images with similar aspect ratio
 94 |   ASPECT_GROUPING: true
 95 |   # R-CNN
 96 |   # rcnn rois batch size
 97 |   BATCH_ROIS: -1
 98 |   BATCH_ROIS_OHEM: 128
 99 |   # rcnn rois sampling params
100 |   FG_FRACTION: 0.25
101 |   FG_THRESH: 0.5
102 |   BG_THRESH_HI: 0.5
103 |   BG_THRESH_LO: 0
104 |   # rcnn bounding box regression params
105 |   BBOX_REGRESSION_THRESH: 0.5
106 |   BBOX_WEIGHTS:
107 |   - 1.0
108 |   - 1.0
109 |   - 1.0
110 |   - 1.0
111 | 
112 |   # RPN anchor loader
113 |   # rpn anchors batch size
114 |   RPN_BATCH_SIZE: 256
115 |   # rpn anchors sampling params
116 |   RPN_FG_FRACTION: 0.5
117 |   RPN_POSITIVE_OVERLAP: 0.7
118 |   RPN_NEGATIVE_OVERLAP: 0.3
119 |   RPN_CLOBBER_POSITIVES: false
120 |   # rpn bounding box regression params
121 |   RPN_BBOX_WEIGHTS:
122 |   - 1.0
123 |   - 1.0
124 |   - 1.0
125 |   - 1.0
126 |   RPN_POSITIVE_WEIGHT: -1.0
127 |   # used for end2end training
128 |   # RPN proposal
129 |   CXX_PROPOSAL: false
130 |   RPN_NMS_THRESH: 0.7
131 |   RPN_PRE_NMS_TOP_N: 6000
132 |   RPN_POST_NMS_TOP_N: 300
133 |   RPN_MIN_SIZE: 0
134 |   # approximate bounding box regression
135 |   BBOX_NORMALIZATION_PRECOMPUTED: true
136 |   BBOX_MEANS:
137 |   - 0.0
138 |   - 0.0
139 |   - 0.0
140 |   - 0.0
141 |   BBOX_STDS:
142 |   - 0.1
143 |   - 0.1
144 |   - 0.2
145 |   - 0.2
146 |   LEARN_NMS: true
147 |   FIRST_N: 100
148 | TEST:
149 |   # use rpn to generate proposal
150 |   HAS_RPN: true
151 |   # size of images for each device
152 |   BATCH_IMAGES: 1
153 |   # RPN proposal
154 |   CXX_PROPOSAL: false
155 |   RPN_NMS_THRESH: 0.7
156 |   RPN_PRE_NMS_TOP_N: 6000
157 |   RPN_POST_NMS_TOP_N: 300
158 |   RPN_MIN_SIZE: 0
159 |   # RPN generate proposal
160 |   PROPOSAL_NMS_THRESH: 0.7
161 |   PROPOSAL_PRE_NMS_TOP_N: 20000
162 |   PROPOSAL_POST_NMS_TOP_N: 2000
163 |   PROPOSAL_MIN_SIZE: 0
164 |   # RCNN nms
165 |   NMS: 10.0
166 |   SOFTNMS: true
167 |   test_epoch: 3
168 |   max_per_image: 100
169 |   # Learn nms
170 |   LEARN_NMS: true
171 |   LEARN_NMS_CLASS_SCORE_TH: 0.01
172 |   FIRST_N: 100
173 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_end2end_relation_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_attention_1024_pairwise_position_multi_head_16
  5 | gpus: '4,5,6,7'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 | dataset:
 52 |   NUM_CLASSES: 81
 53 |   dataset: coco
 54 |   dataset_path: "./data/coco"
 55 |   image_set: train2014+valminusminival2014
 56 |   root_path: "./"
 57 |   test_image_set: minival2014
 58 |   proposal: rpn
 59 | TRAIN:
 60 |   lr: 0.0005
 61 |   lr_step: '5.33'
 62 |   warmup: false
 63 |   warmup_lr: 0.00005
 64 |   # typically we will use 8000 warmup step for single GPU for COCO
 65 |   warmup_step: 1000
 66 |   begin_epoch: 0
 67 |   end_epoch: 8
 68 |   model_prefix: 'rcnn_coco'
 69 |   # whether resume training
 70 |   RESUME: false
 71 |   # whether flip image
 72 |   FLIP: true
 73 |   # whether shuffle image
 74 |   SHUFFLE: true
 75 |   # whether use OHEM
 76 |   ENABLE_OHEM: true
 77 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 78 |   BATCH_IMAGES: 1
 79 |   # e2e changes behavior of anchor loader and metric
 80 |   END2END: true
 81 |   # group images with similar aspect ratio
 82 |   ASPECT_GROUPING: true
 83 |   # R-CNN
 84 |   # rcnn rois batch size
 85 |   BATCH_ROIS: -1
 86 |   BATCH_ROIS_OHEM: 128
 87 |   # rcnn rois sampling params
 88 |   FG_FRACTION: 0.25
 89 |   FG_THRESH: 0.5
 90 |   BG_THRESH_HI: 0.5
 91 |   BG_THRESH_LO: 0
 92 |   # rcnn bounding box regression params
 93 |   BBOX_REGRESSION_THRESH: 0.5
 94 |   BBOX_WEIGHTS:
 95 |   - 1.0
 96 |   - 1.0
 97 |   - 1.0
 98 |   - 1.0
 99 | 
100 |   # RPN anchor loader
101 |   # rpn anchors batch size
102 |   RPN_BATCH_SIZE: 256
103 |   # rpn anchors sampling params
104 |   RPN_FG_FRACTION: 0.5
105 |   RPN_POSITIVE_OVERLAP: 0.7
106 |   RPN_NEGATIVE_OVERLAP: 0.3
107 |   RPN_CLOBBER_POSITIVES: false
108 |   # rpn bounding box regression params
109 |   RPN_BBOX_WEIGHTS:
110 |   - 1.0
111 |   - 1.0
112 |   - 1.0
113 |   - 1.0
114 |   RPN_POSITIVE_WEIGHT: -1.0
115 |   # used for end2end training
116 |   # RPN proposal
117 |   CXX_PROPOSAL: false
118 |   RPN_NMS_THRESH: 0.7
119 |   RPN_PRE_NMS_TOP_N: 6000
120 |   RPN_POST_NMS_TOP_N: 300
121 |   RPN_MIN_SIZE: 0
122 |   # approximate bounding box regression
123 |   BBOX_NORMALIZATION_PRECOMPUTED: true
124 |   BBOX_MEANS:
125 |   - 0.0
126 |   - 0.0
127 |   - 0.0
128 |   - 0.0
129 |   BBOX_STDS:
130 |   - 0.1
131 |   - 0.1
132 |   - 0.2
133 |   - 0.2
134 | TEST:
135 |   # use rpn to generate proposal
136 |   HAS_RPN: true
137 |   # size of images for each device
138 |   BATCH_IMAGES: 1
139 |   # RPN proposal
140 |   CXX_PROPOSAL: false
141 |   RPN_NMS_THRESH: 0.7
142 |   RPN_PRE_NMS_TOP_N: 6000
143 |   RPN_POST_NMS_TOP_N: 300
144 |   RPN_MIN_SIZE: 0
145 |   # RPN generate proposal
146 |   PROPOSAL_NMS_THRESH: 0.7
147 |   PROPOSAL_PRE_NMS_TOP_N: 20000
148 |   PROPOSAL_POST_NMS_TOP_N: 2000
149 |   PROPOSAL_MIN_SIZE: 0
150 |   # RCNN nms
151 |   NMS: 0.6
152 |   SOFTNMS: true
153 |   test_epoch: 7
154 |   max_per_image: 100
155 | 
156 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_end2end_relation_learn_nms_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_attention_1024_pairwise_position_multi_head_16_learn_nms
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 0
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 |   NMS_TARGET_THRESH: '0.5, 0.6, 0.7, 0.8, 0.9'
 52 | dataset:
 53 |   NUM_CLASSES: 81
 54 |   dataset: coco
 55 |   dataset_path: "./data/coco"
 56 |   image_set: train2014+valminusminival2014
 57 |   #image_set: minival2014
 58 |   root_path: "./"
 59 |   test_image_set: minival2014
 60 |   proposal: rpn
 61 | TRAIN:
 62 |   lr: 0.0005
 63 |   lr_step: '5.33'
 64 |   warmup: false
 65 |   warmup_lr: 0.00005
 66 |   # typically we will use 8000 warmup step for single GPU for COCO
 67 |   warmup_step: 1000
 68 |   begin_epoch: 0
 69 |   end_epoch: 8
 70 |   model_prefix: 'rcnn_coco'
 71 |   # whether resume training
 72 |   RESUME: false
 73 |   # whether flip image
 74 |   FLIP: true
 75 |   # whether shuffle image
 76 |   SHUFFLE: true
 77 |   # whether use OHEM
 78 |   ENABLE_OHEM: true
 79 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 80 |   BATCH_IMAGES: 1
 81 |   # e2e changes behavior of anchor loader and metric
 82 |   END2END: true
 83 |   # group images with similar aspect ratio
 84 |   ASPECT_GROUPING: true
 85 |   # R-CNN
 86 |   # rcnn rois batch size
 87 |   BATCH_ROIS: -1
 88 |   BATCH_ROIS_OHEM: 128
 89 |   # rcnn rois sampling params
 90 |   FG_FRACTION: 0.25
 91 |   FG_THRESH: 0.5
 92 |   BG_THRESH_HI: 0.5
 93 |   BG_THRESH_LO: 0
 94 |   # rcnn bounding box regression params
 95 |   BBOX_REGRESSION_THRESH: 0.5
 96 |   BBOX_WEIGHTS:
 97 |   - 1.0
 98 |   - 1.0
 99 |   - 1.0
100 |   - 1.0
101 | 
102 |   # RPN anchor loader
103 |   # rpn anchors batch size
104 |   RPN_BATCH_SIZE: 256
105 |   # rpn anchors sampling params
106 |   RPN_FG_FRACTION: 0.5
107 |   RPN_POSITIVE_OVERLAP: 0.7
108 |   RPN_NEGATIVE_OVERLAP: 0.3
109 |   RPN_CLOBBER_POSITIVES: false
110 |   # rpn bounding box regression params
111 |   RPN_BBOX_WEIGHTS:
112 |   - 1.0
113 |   - 1.0
114 |   - 1.0
115 |   - 1.0
116 |   RPN_POSITIVE_WEIGHT: -1.0
117 |   # used for end2end training
118 |   # RPN proposal
119 |   CXX_PROPOSAL: false
120 |   RPN_NMS_THRESH: 0.7
121 |   RPN_PRE_NMS_TOP_N: 6000
122 |   RPN_POST_NMS_TOP_N: 300
123 |   RPN_MIN_SIZE: 0
124 |   # approximate bounding box regression
125 |   BBOX_NORMALIZATION_PRECOMPUTED: true
126 |   BBOX_MEANS:
127 |   - 0.0
128 |   - 0.0
129 |   - 0.0
130 |   - 0.0
131 |   BBOX_STDS:
132 |   - 0.1
133 |   - 0.1
134 |   - 0.2
135 |   - 0.2
136 |   LEARN_NMS: true
137 |   FIRST_N: 100
138 |   JOINT_TRAINING: true
139 | TEST:
140 |   # use rpn to generate proposal
141 |   HAS_RPN: true
142 |   # size of images for each device
143 |   BATCH_IMAGES: 1
144 |   # RPN proposal
145 |   CXX_PROPOSAL: false
146 |   RPN_NMS_THRESH: 0.7
147 |   RPN_PRE_NMS_TOP_N: 6000
148 |   RPN_POST_NMS_TOP_N: 300
149 |   RPN_MIN_SIZE: 0
150 |   # RPN generate proposal
151 |   PROPOSAL_NMS_THRESH: 0.7
152 |   PROPOSAL_PRE_NMS_TOP_N: 20000
153 |   PROPOSAL_POST_NMS_TOP_N: 2000
154 |   PROPOSAL_MIN_SIZE: 0
155 |   # RCNN nms
156 |   NMS: 10.0
157 |   SOFTNMS: true
158 |   test_epoch: 8
159 |   max_per_image: 100
160 |   # Learn nms
161 |   LEARN_NMS: true
162 |   LEARN_NMS_CLASS_SCORE_TH: 0.01
163 |   FIRST_N: 100
164 | 
165 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_fpn_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_fpn
  5 | gpus: '4,5,6,7'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 800
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 32
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 |   ROIDispatch: true
 52 |   USE_NONGT_INDEX: false
 53 | dataset:
 54 |   NUM_CLASSES: 81
 55 |   dataset: coco
 56 |   dataset_path: "./data/coco"
 57 |   image_set: train2014+valminusminival2014
 58 | #  image_set: minival2014
 59 |   root_path: "./"
 60 |   test_image_set: minival2014
 61 |   proposal: rpn
 62 |   proposal_cache: "./proposal/resnet_v1_101_fpn"
 63 | TRAIN:
 64 |   lr: 0.00125
 65 |   lr_step: '5.33'
 66 |   warmup: false
 67 |   warmup_lr: 0.000125
 68 |   # typically we will use 8000 warmup step for single GPU for COCO
 69 |   warmup_step: 1000
 70 |   begin_epoch: 0
 71 |   end_epoch: 8
 72 |   model_prefix: 'rcnn_fpn_coco'
 73 |   # whether resume training
 74 |   RESUME: false
 75 |   # whether flip image
 76 |   FLIP: true
 77 |   # whether shuffle image
 78 |   SHUFFLE: true
 79 |   # whether use OHEM
 80 |   ENABLE_OHEM: true
 81 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 82 |   BATCH_IMAGES: 1
 83 |   # e2e changes behavior of anchor loader and metric
 84 |   END2END: false
 85 |   # group images with similar aspect ratio
 86 |   ASPECT_GROUPING: true
 87 |   # R-CNN
 88 |   # rcnn rois batch size
 89 |   TOP_ROIS: 1000
 90 |   BATCH_ROIS: -1
 91 |   BATCH_ROIS_OHEM: 512
 92 |   # rcnn rois sampling params
 93 |   FG_FRACTION: 0.25
 94 |   FG_THRESH: 0.5
 95 |   BG_THRESH_HI: 0.5
 96 |   BG_THRESH_LO: 0
 97 |   # rcnn bounding box regression params
 98 |   BBOX_REGRESSION_THRESH: 0.5
 99 |   BBOX_WEIGHTS:
100 |   - 1.0
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 | 
105 |   # RPN anchor loader
106 |   # rpn anchors batch size
107 |   RPN_BATCH_SIZE: 256
108 |   # rpn anchors sampling params
109 |   RPN_FG_FRACTION: 0.5
110 |   RPN_POSITIVE_OVERLAP: 0.7
111 |   RPN_NEGATIVE_OVERLAP: 0.3
112 |   RPN_CLOBBER_POSITIVES: false
113 |   # rpn bounding box regression params
114 |   RPN_BBOX_WEIGHTS:
115 |   - 1.0
116 |   - 1.0
117 |   - 1.0
118 |   - 1.0
119 |   RPN_POSITIVE_WEIGHT: -1.0
120 |   # used for end2end training
121 |   # RPN proposal
122 |   CXX_PROPOSAL: false
123 |   RPN_NMS_THRESH: 0.7
124 |   RPN_PRE_NMS_TOP_N: 6000
125 |   RPN_POST_NMS_TOP_N: 300
126 |   RPN_MIN_SIZE: 0
127 |   # approximate bounding box regression
128 |   BBOX_NORMALIZATION_PRECOMPUTED: true
129 |   BBOX_MEANS:
130 |   - 0.0
131 |   - 0.0
132 |   - 0.0
133 |   - 0.0
134 |   BBOX_STDS:
135 |   - 0.1
136 |   - 0.1
137 |   - 0.2
138 |   - 0.2
139 | TEST:
140 |   # use rpn to generate proposal
141 |   HAS_RPN: false
142 |   # size of images for each device
143 |   BATCH_IMAGES: 1
144 |   # RPN proposal
145 |   CXX_PROPOSAL: false
146 |   RPN_NMS_THRESH: 0.7
147 |   RPN_PRE_NMS_TOP_N: 6000
148 |   RPN_POST_NMS_TOP_N: 300
149 |   RPN_MIN_SIZE: 0
150 |   # RPN generate proposal
151 |   PROPOSAL_NMS_THRESH: 0.7
152 |   PROPOSAL_PRE_NMS_TOP_N: 20000
153 |   PROPOSAL_POST_NMS_TOP_N: 2000
154 |   PROPOSAL_MIN_SIZE: 0
155 |   # RCNN nms
156 |   NMS: 0.6
157 |   SOFTNMS: true
158 |   test_epoch: 8
159 |   max_per_image: 100
160 |   TOP_ROIS: 1000
161 | 
162 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_fpn_relation_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_fpn_attention_1024_pairwise_position_multi_head_16
  5 | gpus: '0, 1, 2, 3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 800
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 32
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 |   ROIDispatch: true
 52 |   USE_NONGT_INDEX: true
 53 | dataset:
 54 |   NUM_CLASSES: 81
 55 |   dataset: coco
 56 |   dataset_path: "./data/coco"
 57 |   image_set: train2014+valminusminival2014
 58 | #  image_set: minival2014
 59 |   root_path: "./"
 60 |   test_image_set: minival2014
 61 |   proposal: rpn
 62 |   proposal_cache: "./proposal/resnet_v1_101_fpn"
 63 | TRAIN:
 64 |   lr: 0.00125
 65 |   lr_step: '5.33'
 66 |   warmup: false
 67 |   warmup_lr: 0.000125
 68 |   # typically we will use 8000 warmup step for single GPU for COCO
 69 |   warmup_step: 1000
 70 |   begin_epoch: 0
 71 |   end_epoch: 8
 72 |   model_prefix: 'rcnn_fpn_coco'
 73 |   # whether resume training
 74 |   RESUME: false
 75 |   # whether flip image
 76 |   FLIP: true
 77 |   # whether shuffle image
 78 |   SHUFFLE: true
 79 |   # whether use OHEM
 80 |   ENABLE_OHEM: true
 81 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 82 |   BATCH_IMAGES: 1
 83 |   # e2e changes behavior of anchor loader and metric
 84 |   END2END: false
 85 |   # group images with similar aspect ratio
 86 |   ASPECT_GROUPING: true
 87 |   # R-CNN
 88 |   # rcnn rois batch size
 89 |   TOP_ROIS: 1000
 90 |   BATCH_ROIS: -1
 91 |   BATCH_ROIS_OHEM: 512
 92 |   # rcnn rois sampling params
 93 |   FG_FRACTION: 0.25
 94 |   FG_THRESH: 0.5
 95 |   BG_THRESH_HI: 0.5
 96 |   BG_THRESH_LO: 0
 97 |   # rcnn bounding box regression params
 98 |   BBOX_REGRESSION_THRESH: 0.5
 99 |   BBOX_WEIGHTS:
100 |   - 1.0
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 | 
105 |   # RPN anchor loader
106 |   # rpn anchors batch size
107 |   RPN_BATCH_SIZE: 256
108 |   # rpn anchors sampling params
109 |   RPN_FG_FRACTION: 0.5
110 |   RPN_POSITIVE_OVERLAP: 0.7
111 |   RPN_NEGATIVE_OVERLAP: 0.3
112 |   RPN_CLOBBER_POSITIVES: false
113 |   # rpn bounding box regression params
114 |   RPN_BBOX_WEIGHTS:
115 |   - 1.0
116 |   - 1.0
117 |   - 1.0
118 |   - 1.0
119 |   RPN_POSITIVE_WEIGHT: -1.0
120 |   # used for end2end training
121 |   # RPN proposal
122 |   CXX_PROPOSAL: false
123 |   RPN_NMS_THRESH: 0.7
124 |   RPN_PRE_NMS_TOP_N: 6000
125 |   RPN_POST_NMS_TOP_N: 300
126 |   RPN_MIN_SIZE: 0
127 |   # approximate bounding box regression
128 |   BBOX_NORMALIZATION_PRECOMPUTED: true
129 |   BBOX_MEANS:
130 |   - 0.0
131 |   - 0.0
132 |   - 0.0
133 |   - 0.0
134 |   BBOX_STDS:
135 |   - 0.1
136 |   - 0.1
137 |   - 0.2
138 |   - 0.2
139 | TEST:
140 |   # use rpn to generate proposal
141 |   HAS_RPN: false
142 |   # size of images for each device
143 |   BATCH_IMAGES: 1
144 |   # RPN proposal
145 |   CXX_PROPOSAL: false
146 |   RPN_NMS_THRESH: 0.7
147 |   RPN_PRE_NMS_TOP_N: 6000
148 |   RPN_POST_NMS_TOP_N: 300
149 |   RPN_MIN_SIZE: 0
150 |   # RPN generate proposal
151 |   PROPOSAL_NMS_THRESH: 0.7
152 |   PROPOSAL_PRE_NMS_TOP_N: 20000
153 |   PROPOSAL_POST_NMS_TOP_N: 2000
154 |   PROPOSAL_MIN_SIZE: 0
155 |   # RCNN nms
156 |   NMS: 0.3
157 |   test_epoch: 8
158 |   max_per_image: 100
159 |   TOP_ROIS: 1000
160 | 
161 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/cfgs/resnet_v1_101_coco_trainvalminus_rcnn_fpn_relation_learn_nms_8epoch.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: "mxnet_v1.1.0"
  3 | output_path: "./output/rcnn/coco"
  4 | symbol: resnet_v1_101_rcnn_fpn_attention_1024_pairwise_position_multi_head_16_learn_nms
  5 | gpus: '4,5,6,7'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 800
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_epoch: 0
 16 |   PIXEL_MEANS:
 17 |   - 103.06
 18 |   - 115.90
 19 |   - 123.15
 20 |   IMAGE_STRIDE: 32
 21 |   RCNN_FEAT_STRIDE: 16
 22 |   RPN_FEAT_STRIDE: 16
 23 |   FIXED_PARAMS:
 24 |   - conv1
 25 |   - bn_conv1
 26 |   - res2
 27 |   - bn2
 28 |   - gamma
 29 |   - beta
 30 |   FIXED_PARAMS_SHARED:
 31 |   - conv1
 32 |   - bn_conv1
 33 |   - res2
 34 |   - bn2
 35 |   - res3
 36 |   - bn3
 37 |   - res4
 38 |   - bn4
 39 |   - gamma
 40 |   - beta
 41 |   ANCHOR_RATIOS:
 42 |   - 0.5
 43 |   - 1
 44 |   - 2
 45 |   ANCHOR_SCALES:
 46 |   - 4
 47 |   - 8
 48 |   - 16
 49 |   - 32
 50 |   NUM_ANCHORS: 12
 51 |   ROIDispatch: true
 52 |   USE_NONGT_INDEX: true
 53 |   NMS_TARGET_THRESH: '0.5, 0.6, 0.7, 0.8, 0.9'
 54 | dataset:
 55 |   NUM_CLASSES: 81
 56 |   dataset: coco
 57 |   dataset_path: "./data/coco"
 58 |   image_set: train2014+valminusminival2014
 59 |   #image_set: minival2014
 60 |   root_path: "./"
 61 |   test_image_set: minival2014
 62 |   proposal: rpn
 63 |   proposal_cache: "./proposal/resnet_v1_101_fpn"
 64 | TRAIN:
 65 |   lr: 0.00125
 66 |   lr_step: '5.33'
 67 |   warmup: false
 68 |   warmup_lr: 0.000125
 69 |   # typically we will use 8000 warmup step for single GPU for COCO
 70 |   warmup_step: 1000
 71 |   begin_epoch: 0
 72 |   end_epoch: 8
 73 |   model_prefix: 'rcnn_fpn_coco'
 74 |   # whether resume training
 75 |   RESUME: false
 76 |   # whether flip image
 77 |   FLIP: true
 78 |   # whether shuffle image
 79 |   SHUFFLE: true
 80 |   # whether use OHEM
 81 |   ENABLE_OHEM: true
 82 |   # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 83 |   BATCH_IMAGES: 1
 84 |   # e2e changes behavior of anchor loader and metric
 85 |   END2END: false
 86 |   # group images with similar aspect ratio
 87 |   ASPECT_GROUPING: true
 88 |   # R-CNN
 89 |   # rcnn rois batch size
 90 |   TOP_ROIS: 1000
 91 |   BATCH_ROIS: -1
 92 |   BATCH_ROIS_OHEM: 512
 93 |   # rcnn rois sampling params
 94 |   FG_FRACTION: 0.25
 95 |   FG_THRESH: 0.5
 96 |   BG_THRESH_HI: 0.5
 97 |   BG_THRESH_LO: 0
 98 |   # rcnn bounding box regression params
 99 |   BBOX_REGRESSION_THRESH: 0.5
100 |   BBOX_WEIGHTS:
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 |   - 1.0
105 | 
106 |   # RPN anchor loader
107 |   # rpn anchors batch size
108 |   RPN_BATCH_SIZE: 256
109 |   # rpn anchors sampling params
110 |   RPN_FG_FRACTION: 0.5
111 |   RPN_POSITIVE_OVERLAP: 0.7
112 |   RPN_NEGATIVE_OVERLAP: 0.3
113 |   RPN_CLOBBER_POSITIVES: false
114 |   # rpn bounding box regression params
115 |   RPN_BBOX_WEIGHTS:
116 |   - 1.0
117 |   - 1.0
118 |   - 1.0
119 |   - 1.0
120 |   RPN_POSITIVE_WEIGHT: -1.0
121 |   # used for end2end training
122 |   # RPN proposal
123 |   CXX_PROPOSAL: false
124 |   RPN_NMS_THRESH: 0.7
125 |   RPN_PRE_NMS_TOP_N: 6000
126 |   RPN_POST_NMS_TOP_N: 300
127 |   RPN_MIN_SIZE: 0
128 |   # approximate bounding box regression
129 |   BBOX_NORMALIZATION_PRECOMPUTED: true
130 |   BBOX_MEANS:
131 |   - 0.0
132 |   - 0.0
133 |   - 0.0
134 |   - 0.0
135 |   BBOX_STDS:
136 |   - 0.1
137 |   - 0.1
138 |   - 0.2
139 |   - 0.2
140 |   LEARN_NMS: true
141 |   FIRST_N: 150
142 |   JOINT_TRAINING: true
143 | TEST:
144 |   # use rpn to generate proposal
145 |   HAS_RPN: false
146 |   # size of images for each device
147 |   BATCH_IMAGES: 1
148 |   # RPN proposal
149 |   CXX_PROPOSAL: false
150 |   RPN_NMS_THRESH: 0.7
151 |   RPN_PRE_NMS_TOP_N: 6000
152 |   RPN_POST_NMS_TOP_N: 300
153 |   RPN_MIN_SIZE: 0
154 |   # RPN generate proposal
155 |   PROPOSAL_NMS_THRESH: 0.7
156 |   PROPOSAL_PRE_NMS_TOP_N: 20000
157 |   PROPOSAL_POST_NMS_TOP_N: 2000
158 |   PROPOSAL_MIN_SIZE: 0
159 |   # RCNN nms
160 |   NMS: 10.0
161 |   SOFTNMS: true
162 |   test_epoch: 8
163 |   max_per_image: 100
164 |   # Learn nms
165 |   LEARN_NMS: true
166 |   LEARN_NMS_CLASS_SCORE_TH: 0.05
167 |   FIRST_N: 150
168 |   TOP_ROIS: 1000
169 | 
170 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/rcnn_end2end_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Jiayuan Gu, Dazhi Cheng, Guodong Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | #os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
14 | this_dir = os.path.dirname(__file__)
15 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'relation_rcnn'))
16 | 
17 | import train_end2end
18 | import test
19 | 
20 | if __name__ == "__main__":
21 |     train_end2end.main()
22 |     test.main()
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/rcnn_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Jiayuan Gu, Dazhi Cheng, Guodong Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import cv2
 9 | import os
10 | import sys
11 | os.environ['PYTHONUNBUFFERED'] = '1'
12 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
13 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
14 | this_dir = os.path.dirname(__file__)
15 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'relation_rcnn'))
16 | 
17 | import test
18 | 
19 | if __name__ == "__main__":
20 |     test.main()
21 | 


--------------------------------------------------------------------------------
/experiments/relation_rcnn/rcnn_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Jiayuan Gu, Dazhi Cheng, Guodong Zhang
 6 | # --------------------------------------------------------
 7 | 
 8 | import cv2
 9 | import os
10 | import sys
11 | os.environ['PYTHONUNBUFFERED'] = '1'
12 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
13 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
14 | os.environ['MXNET_GPU_MEM_POOL_RESERVE'] = '10'
15 | os.environ['MXNET_BACKWARD_DO_MIRROR'] = '1'
16 | # os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
17 | this_dir = os.path.dirname(__file__)
18 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'relation_rcnn'))
19 | 
20 | import train_rcnn
21 | import test
22 | 
23 | if __name__ == "__main__":
24 |     train_rcnn.main()
25 |     test.main()
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p ./data
 4 | mkdir -p ./output
 5 | mkdir -p ./external/mxnet
 6 | mkdir -p ./model/pretrained_model
 7 | 
 8 | cd lib/bbox
 9 | python setup_linux.py build_ext --inplace
10 | cd ../dataset/pycocotools
11 | python setup_linux.py build_ext --inplace
12 | cd ../../nms
13 | python setup_linux.py build_ext --inplace
14 | cd ../..
15 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd nms/; python setup.py build_ext --inplace; rm -rf build; cd ../../
3 | 	cd bbox/; python setup.py build_ext --inplace; rm -rf build; cd ../../
4 | 	cd dataset/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../
5 | clean:
6 | 	cd nms/; rm *.so *.c *.cpp; cd ../../
7 | 	cd bbox/; rm *.so *.c *.cpp; cd ../../
8 | 	cd dataset/pycocotools/; rm *.so; cd ../../
9 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/lib/__init__.py


--------------------------------------------------------------------------------
/lib/bbox/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp


--------------------------------------------------------------------------------
/lib/bbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/lib/bbox/__init__.py


--------------------------------------------------------------------------------
/lib/bbox/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps_cython(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/lib/bbox/bbox_regression.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Dazhi Cheng, Jiayuan Gu, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # py-faster-rcnn
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The MIT License
 11 | # py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 12 | # --------------------------------------------------------
 13 | 
 14 | 
 15 | """
 16 | This file has functions about generating bounding box regression targets
 17 | """
 18 | 
 19 | import numpy as np
 20 | 
 21 | from bbox_transform import bbox_overlaps, bbox_transform
 22 | 
 23 | 
 24 | def compute_bbox_regression_targets(rois, overlaps, labels, cfg):
 25 |     """
 26 |     given rois, overlaps, gt labels, compute bounding box regression targets
 27 |     :param rois: roidb[i]['boxes'] k * 4
 28 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
 29 |     :param labels: roidb[i]['max_classes'] k * 1
 30 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
 31 |     """
 32 |     # Ensure ROIs are floats
 33 |     rois = rois.astype(np.float, copy=False)
 34 | 
 35 |     # Sanity check
 36 |     if len(rois) != len(overlaps):
 37 |         print 'bbox regression: this should not happen'
 38 | 
 39 |     # Indices of ground-truth ROIs
 40 |     gt_inds = np.where(overlaps == 1)[0]
 41 |     if len(gt_inds) == 0:
 42 |         print 'something wrong : zero ground truth rois'
 43 |     # Indices of examples for which we try to make predictions
 44 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0]
 45 |     # Get IoU overlap between each ex ROI and gt ROI
 46 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
 47 | 
 48 |     # Find which gt ROI each ex ROI has max overlap with:
 49 |     # this will be the ex ROI's gt target
 50 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
 51 |     gt_rois = rois[gt_inds[gt_assignment], :]
 52 |     ex_rois = rois[ex_inds, :]
 53 | 
 54 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
 55 |     targets[ex_inds, 0] = labels[ex_inds]
 56 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
 57 |     return targets
 58 | 
 59 | 
 60 | def add_bbox_regression_targets(roidb, cfg):
 61 |     """
 62 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
 63 |     :param roidb: roidb to be processed.
 64 |     :return: means, std variances of targets
 65 |     """
 66 |     print 'add bounding box regression targets'
 67 |     assert len(roidb) > 0
 68 |     assert 'max_classes' in roidb[0]
 69 | 
 70 |     num_images = len(roidb)
 71 |     num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1]
 72 | 
 73 |     for im_i in range(num_images):
 74 |         rois = roidb[im_i]['boxes']
 75 |         max_overlaps = roidb[im_i]['max_overlaps']
 76 |         max_classes = roidb[im_i]['max_classes']
 77 |         roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg)
 78 | 
 79 |     if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
 80 |         # use fixed / precomputed means and stds instead of empirical values
 81 |         means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1))
 82 |         stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1))
 83 |     else:
 84 |         # compute mean, std values
 85 |         class_counts = np.zeros((num_classes, 1)) + 1e-14
 86 |         sums = np.zeros((num_classes, 4))
 87 |         squared_sums = np.zeros((num_classes, 4))
 88 |         for im_i in range(num_images):
 89 |             targets = roidb[im_i]['bbox_targets']
 90 |             for cls in range(1, num_classes):
 91 |                 cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
 92 |                 if cls_indexes.size > 0:
 93 |                     class_counts[cls] += cls_indexes.size
 94 |                     sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
 95 |                     squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
 96 | 
 97 |         means = sums / class_counts
 98 |         # var(x) = E(x^2) - E(x)^2
 99 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
100 | 
101 |     print 'bbox target means:'
102 |     print means
103 |     print means[1:, :].mean(axis=0)  # ignore bg class
104 |     print 'bbox target stdevs:'
105 |     print stds
106 |     print stds[1:, :].mean(axis=0)  # ignore bg class
107 | 
108 | 
109 |     # normalized targets
110 |     for im_i in range(num_images):
111 |         targets = roidb[im_i]['bbox_targets']
112 |         for cls in range(1, num_classes):
113 |             cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
114 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
115 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
116 | 
117 |     return means.ravel(), stds.ravel()
118 | 
119 | 
120 | def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg):
121 |     """
122 |     expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
123 |     :param bbox_targets_data: [k * 5]
124 |     :param num_classes: number of classes
125 |     :return: bbox target processed [k * 4 num_classes]
126 |     bbox_weights ! only foreground boxes have bbox regression computation!
127 |     """
128 |     classes = bbox_targets_data[:, 0]
129 |     if cfg.CLASS_AGNOSTIC:
130 |         num_classes = 2
131 |     bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
132 |     bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
133 |     indexes = np.where(classes > 0)[0]
134 |     for index in indexes:
135 |         cls = classes[index]
136 |         start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls)
137 |         end = start + 4
138 |         bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
139 |         bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS
140 |     return bbox_targets, bbox_weights
141 | 
142 | 


--------------------------------------------------------------------------------
/lib/bbox/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Dazhi Cheng, Jiayuan Gu, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # py-faster-rcnn
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The MIT License
 11 | # py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 12 | # --------------------------------------------------------
 13 | 
 14 | import numpy as np
 15 | from bbox import bbox_overlaps_cython
 16 | 
 17 | 
 18 | def bbox_overlaps(boxes, query_boxes):
 19 |     return bbox_overlaps_cython(boxes, query_boxes)
 20 | 
 21 | 
 22 | def bbox_overlaps_py(boxes, query_boxes):
 23 |     """
 24 |     determine overlaps between boxes and query_boxes
 25 |     :param boxes: n * 4 bounding boxes
 26 |     :param query_boxes: k * 4 bounding boxes
 27 |     :return: overlaps: n * k overlaps
 28 |     """
 29 |     n_ = boxes.shape[0]
 30 |     k_ = query_boxes.shape[0]
 31 |     overlaps = np.zeros((n_, k_), dtype=np.float)
 32 |     for k in range(k_):
 33 |         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 34 |         for n in range(n_):
 35 |             iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
 36 |             if iw > 0:
 37 |                 ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
 38 |                 if ih > 0:
 39 |                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
 40 |                     all_area = float(box_area + query_box_area - iw * ih)
 41 |                     overlaps[n, k] = iw * ih / all_area
 42 |     return overlaps
 43 | 
 44 | 
 45 | def clip_boxes(boxes, im_shape):
 46 |     """
 47 |     Clip boxes to image boundaries.
 48 |     :param boxes: [N, 4* num_classes]
 49 |     :param im_shape: tuple of 2
 50 |     :return: [N, 4* num_classes]
 51 |     """
 52 |     # x1 >= 0
 53 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
 54 |     # y1 >= 0
 55 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
 56 |     # x2 < im_shape[1]
 57 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
 58 |     # y2 < im_shape[0]
 59 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
 60 |     return boxes
 61 | 
 62 | def filter_boxes(boxes, min_size):
 63 |     """
 64 |     filter small boxes.
 65 |     :param boxes: [N, 4* num_classes]
 66 |     :param min_size:
 67 |     :return: keep:
 68 |     """
 69 |     ws = boxes[:, 2] - boxes[:, 0] + 1
 70 |     hs = boxes[:, 3] - boxes[:, 1] + 1
 71 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
 72 |     return keep
 73 | 
 74 | def nonlinear_transform(ex_rois, gt_rois):
 75 |     """
 76 |     compute bounding box regression targets from ex_rois to gt_rois
 77 |     :param ex_rois: [N, 4]
 78 |     :param gt_rois: [N, 4]
 79 |     :return: [N, 4]
 80 |     """
 81 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 82 | 
 83 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 84 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 85 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 86 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 87 | 
 88 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 89 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 90 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
 91 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
 92 | 
 93 |     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
 94 |     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
 95 |     targets_dw = np.log(gt_widths / ex_widths)
 96 |     targets_dh = np.log(gt_heights / ex_heights)
 97 | 
 98 |     targets = np.vstack(
 99 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
100 |     return targets
101 | 
102 | 
103 | def nonlinear_pred(boxes, box_deltas):
104 |     """
105 |     Transform the set of class-agnostic boxes into class-specific boxes
106 |     by applying the predicted offsets (box_deltas)
107 |     :param boxes: !important [N 4]
108 |     :param box_deltas: [N, 4 * num_classes]
109 |     :return: [N 4 * num_classes]
110 |     """
111 |     if boxes.shape[0] == 0:
112 |         return np.zeros((0, box_deltas.shape[1]))
113 | 
114 |     boxes = boxes.astype(np.float, copy=False)
115 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
116 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
117 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
118 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
119 | 
120 |     dx = box_deltas[:, 0::4]
121 |     dy = box_deltas[:, 1::4]
122 |     dw = box_deltas[:, 2::4]
123 |     dh = box_deltas[:, 3::4]
124 | 
125 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
126 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
127 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
128 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
129 | 
130 |     pred_boxes = np.zeros(box_deltas.shape)
131 |     # x1
132 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
133 |     # y1
134 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
135 |     # x2
136 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
137 |     # y2
138 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
139 | 
140 |     return pred_boxes
141 | 
142 | 
143 | def iou_transform(ex_rois, gt_rois):
144 |     """ return bbox targets, IoU loss uses gt_rois as gt """
145 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
146 |     return gt_rois
147 | 
148 | 
149 | def iou_pred(boxes, box_deltas):
150 |     """
151 |     Transform the set of class-agnostic boxes into class-specific boxes
152 |     by applying the predicted offsets (box_deltas)
153 |     :param boxes: !important [N 4]
154 |     :param box_deltas: [N, 4 * num_classes]
155 |     :return: [N 4 * num_classes]
156 |     """
157 |     if boxes.shape[0] == 0:
158 |         return np.zeros((0, box_deltas.shape[1]))
159 | 
160 |     boxes = boxes.astype(np.float, copy=False)
161 |     x1 = boxes[:, 0]
162 |     y1 = boxes[:, 1]
163 |     x2 = boxes[:, 2]
164 |     y2 = boxes[:, 3]
165 | 
166 |     dx1 = box_deltas[:, 0::4]
167 |     dy1 = box_deltas[:, 1::4]
168 |     dx2 = box_deltas[:, 2::4]
169 |     dy2 = box_deltas[:, 3::4]
170 | 
171 |     pred_boxes = np.zeros(box_deltas.shape)
172 |     # x1
173 |     pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]
174 |     # y1
175 |     pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]
176 |     # x2
177 |     pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]
178 |     # y2
179 |     pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]
180 | 
181 |     return pred_boxes
182 | 
183 | 
184 | # define bbox_transform and bbox_pred
185 | bbox_transform = nonlinear_transform
186 | bbox_pred = nonlinear_pred
187 | 


--------------------------------------------------------------------------------
/lib/bbox/setup_linux.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Dazhi Cheng, Jiayuan Gu, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # py-faster-rcnn
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The MIT License
11 | # py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
12 | # --------------------------------------------------------
13 | 
14 | import os
15 | from os.path import join as pjoin
16 | from setuptools import setup
17 | from distutils.extension import Extension
18 | from Cython.Distutils import build_ext
19 | import numpy as np
20 | 
21 | # Obtain the numpy include directory.  This logic works across numpy versions.
22 | try:
23 |     numpy_include = np.get_include()
24 | except AttributeError:
25 |     numpy_include = np.get_numpy_include()
26 | 
27 | 
28 | def customize_compiler_for_nvcc(self):
29 |     """inject deep into distutils to customize how the dispatch
30 |     to gcc/nvcc works.
31 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
32 |     injected in, and still have the right customizations (i.e.
33 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
34 |     the OO route, I have this. Note, it's kindof like a wierd functional
35 |     subclassing going on."""
36 | 
37 |     # tell the compiler it can processes .cu
38 |     self.src_extensions.append('.cu')
39 | 
40 |     # save references to the default compiler_so and _comple methods
41 |     default_compiler_so = self.compiler_so
42 |     super = self._compile
43 | 
44 |     # now redefine the _compile method. This gets executed for each
45 |     # object but distutils doesn't have the ability to change compilers
46 |     # based on source extension: we add it.
47 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
48 |         if os.path.splitext(src)[1] == '.cu':
49 |             # use the cuda for .cu files
50 |             self.set_executable('compiler_so', CUDA['nvcc'])
51 |             # use only a subset of the extra_postargs, which are 1-1 translated
52 |             # from the extra_compile_args in the Extension class
53 |             postargs = extra_postargs['nvcc']
54 |         else:
55 |             postargs = extra_postargs['gcc']
56 | 
57 |         super(obj, src, ext, cc_args, postargs, pp_opts)
58 |         # reset the default compiler_so, which we might have changed for cuda
59 |         self.compiler_so = default_compiler_so
60 | 
61 |     # inject our redefined _compile method into the class
62 |     self._compile = _compile
63 | 
64 | 
65 | # run the customize_compiler
66 | class custom_build_ext(build_ext):
67 |     def build_extensions(self):
68 |         customize_compiler_for_nvcc(self.compiler)
69 |         build_ext.build_extensions(self)
70 | 
71 | 
72 | ext_modules = [
73 |     Extension(
74 |         "bbox",
75 |         ["bbox.pyx"],
76 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
77 |         include_dirs=[numpy_include]
78 |     ),
79 | ]
80 | 
81 | setup(
82 |     name='bbox_cython',
83 |     ext_modules=ext_modules,
84 |     # inject our custom trigger
85 |     cmdclass={'build_ext': custom_build_ext},
86 | )
87 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from imdb import IMDB
2 | from coco import coco
3 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/.gitignore:
--------------------------------------------------------------------------------
1 | _mask.c
2 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | import _mask as _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | #decode      = _mask.decode
78 | def decode(rleObjs):
79 |     if type(rleObjs) == list:
80 |         return _mask.decode(rleObjs)
81 |     else:
82 |         return _mask.decode([rleObjs])[:,:,0]
83 | iou         = _mask.iou
84 | merge       = _mask.merge
85 | area        = _mask.area
86 | toBbox      = _mask.toBbox
87 | frPyObjects = _mask.frPyObjects
88 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   if(cnts) for(siz j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(siz i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   for(siz i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   for( siz i=0; i<n; i++ ) {
 45 |     byte v=0; for( siz j=0; j<R[i].m; j++ ) {
 46 |       for( siz k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; bool v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   for( siz i=0; i<n; i++ ) {
 74 |     a[i]=0; for( siz j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; bool crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
 99 |   double h, w, i, u, ga, da; siz g, d; bool crowd;
100 |   for( g=0; g<n; g++ ) {
101 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
102 |     for( d=0; d<m; d++ ) {
103 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
104 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
105 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
106 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
107 |     }
108 |   }
109 | }
110 | 
111 | void rleToBbox( const RLE *R, BB bb, siz n ) {
112 |   for( siz i=0; i<n; i++ ) {
113 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
114 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
115 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
116 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
117 |     for( j=0; j<m; j++ ) {
118 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
119 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
120 |     }
121 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
122 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
123 |   }
124 | }
125 | 
126 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
127 |   for( siz i=0; i<n; i++ ) {
128 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
129 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
130 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
131 |     rleFrPoly( R+i, xy, 4, h, w );
132 |   }
133 | }
134 | 
135 | int uintCompare(const void *a, const void *b) {
136 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
137 | }
138 | 
139 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
140 |   // upsample and get discrete points densely along entire boundary
141 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
142 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
143 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
144 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
145 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
146 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
147 |   for( j=0; j<k; j++ ) {
148 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t;
149 |     bool flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
150 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
151 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
152 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
153 |     if(dx>=dy) for( int d=0; d<=dx; d++ ) {
154 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
155 |     } else for( int d=0; d<=dy; d++ ) {
156 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
157 |     }
158 |   }
159 |   // get points along y-boundary and downsample
160 |   free(x); free(y); k=m; m=0; double xd, yd;
161 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
162 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
163 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
164 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
165 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
166 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
167 |     x[m]=(int) xd; y[m]=(int) yd; m++;
168 |   }
169 |   // compute rle encoding given y-boundary points
170 |   k=m; a=malloc(sizeof(uint)*(k+1));
171 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
172 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
173 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
174 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
175 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
176 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
177 |     j++; if(j<k) b[m-1]+=a[j++]; }
178 |   rleInit(R,h,w,m,b); free(a); free(b);
179 | }
180 | 
181 | char* rleToString( const RLE *R ) {
182 |   // Similar to LEB128 but using 6 bits/char and ascii chars 48-111.
183 |   siz i, m=R->m, p=0; long x; bool more;
184 |   char *s=malloc(sizeof(char)*m*6);
185 |   for( i=0; i<m; i++ ) {
186 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
187 |     while( more ) {
188 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
189 |       if(more) c |= 0x20; c+=48; s[p++]=c;
190 |     }
191 |   }
192 |   s[p]=0; return s;
193 | }
194 | 
195 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
196 |   siz m=0, p=0, k; long x; bool more; uint *cnts;
197 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
198 |   while( s[p] ) {
199 |     x=0; k=0; more=1;
200 |     while( more ) {
201 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
202 |       more = c & 0x20; p++; k++;
203 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
204 |     }
205 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
206 |   }
207 |   rleInit(R,h,w,m,cnts); free(cnts);
208 | }
209 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/dataset/pycocotools/setup_linux.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['maskApi.c', '_mask.pyx'],
13 |         include_dirs=[np.get_include()],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       ext_modules=cythonize(ext_modules)
20 | )
21 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int32_t, ndim=1] \
26 |         order = scores.argsort()[::-1].astype(np.int32)
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/lib/nms/nms.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Dazhi Cheng, Jiayuan Gu, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # py-faster-rcnn
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The MIT License
 11 | # py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 12 | # --------------------------------------------------------
 13 | 
 14 | 
 15 | import numpy as np
 16 | 
 17 | from cpu_nms import cpu_nms
 18 | from gpu_nms import gpu_nms
 19 | 
 20 | 
 21 | def py_nms_wrapper(thresh):
 22 |     def _nms(dets):
 23 |         return nms(dets, thresh)
 24 |     return _nms
 25 | 
 26 | 
 27 | def py_softnms_wrapper(thresh, max_dets=-1):
 28 |     def _nms(dets):
 29 |         return soft_nms(dets, thresh, max_dets)
 30 |     return _nms
 31 | 
 32 | 
 33 | def cpu_nms_wrapper(thresh):
 34 |     def _nms(dets):
 35 |         return cpu_nms(dets, thresh)
 36 |     return _nms
 37 | 
 38 | 
 39 | def gpu_nms_wrapper(thresh, device_id):
 40 |     def _nms(dets):
 41 |         return gpu_nms(dets, thresh, device_id)
 42 |     return _nms
 43 | 
 44 | 
 45 | def nms(dets, thresh):
 46 |     """
 47 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 48 |     rule out overlap >= thresh
 49 |     :param dets: [[x1, y1, x2, y2 score]]
 50 |     :param thresh: retain overlap < thresh
 51 |     :return: indexes to keep
 52 |     """
 53 |     if dets.shape[0] == 0:
 54 |         return []
 55 | 
 56 |     x1 = dets[:, 0]
 57 |     y1 = dets[:, 1]
 58 |     x2 = dets[:, 2]
 59 |     y2 = dets[:, 3]
 60 |     scores = dets[:, 4]
 61 | 
 62 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 63 |     order = scores.argsort()[::-1]
 64 | 
 65 |     keep = []
 66 |     while order.size > 0:
 67 |         i = order[0]
 68 |         keep.append(i)
 69 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 70 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 71 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 72 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 73 | 
 74 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 75 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 76 |         inter = w * h
 77 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 78 | 
 79 |         inds = np.where(ovr <= thresh)[0]
 80 |         order = order[inds + 1]
 81 | 
 82 |     return keep
 83 | 
 84 | 
 85 | def rescore(overlap, scores, thresh, type='gaussian'):
 86 |     assert overlap.shape[0] == scores.shape[0]
 87 |     if type == 'linear':
 88 |         inds = np.where(overlap >= thresh)[0]
 89 |         scores[inds] = scores[inds] * (1 - overlap[inds])
 90 |     else:
 91 |         scores = scores * np.exp(- overlap**2 / thresh)
 92 | 
 93 |     return scores
 94 | 
 95 | 
 96 | def soft_nms(dets, thresh, max_dets):
 97 |     if dets.shape[0] == 0:
 98 |         return np.zeros((0, 5))
 99 | 
100 |     x1 = dets[:, 0]
101 |     y1 = dets[:, 1]
102 |     x2 = dets[:, 2]
103 |     y2 = dets[:, 3]
104 |     scores = dets[:, 4]
105 | 
106 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
107 |     order = scores.argsort()[::-1]
108 |     scores = scores[order]
109 | 
110 |     if max_dets == -1:
111 |         max_dets = order.size
112 | 
113 |     keep = np.zeros(max_dets, dtype=np.intp)
114 |     keep_cnt = 0
115 | 
116 |     while order.size > 0 and keep_cnt < max_dets:
117 |         i = order[0]
118 |         dets[i, 4] = scores[0]
119 |         xx1 = np.maximum(x1[i], x1[order[1:]])
120 |         yy1 = np.maximum(y1[i], y1[order[1:]])
121 |         xx2 = np.minimum(x2[i], x2[order[1:]])
122 |         yy2 = np.minimum(y2[i], y2[order[1:]])
123 | 
124 |         w = np.maximum(0.0, xx2 - xx1 + 1)
125 |         h = np.maximum(0.0, yy2 - yy1 + 1)
126 |         inter = w * h
127 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
128 | 
129 |         order = order[1:]
130 |         scores = rescore(ovr, scores[1:], thresh)
131 | 
132 |         tmp = scores.argsort()[::-1]
133 |         order = order[tmp]
134 |         scores = scores[tmp]
135 | 
136 |         keep[keep_cnt] = i
137 |         keep_cnt += 1
138 | 
139 |     keep = keep[:keep_cnt]
140 |     dets = dets[keep, :]
141 |     return dets
142 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Deformable Convolutional Networks
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License
  5 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/setup_linux.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 32 |     is based on finding 'nvcc' in the PATH.
 33 |     """
 34 | 
 35 |     # first check if the CUDAHOME env variable is in use
 36 |     if 'CUDAHOME' in os.environ:
 37 |         home = os.environ['CUDAHOME']
 38 |         nvcc = pjoin(home, 'bin', 'nvcc')
 39 |     else:
 40 |         # otherwise, search the PATH for NVCC
 41 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 42 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 43 |         if nvcc is None:
 44 |             raise EnvironmentError('The nvcc binary could not be '
 45 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 46 |         home = os.path.dirname(os.path.dirname(nvcc))
 47 | 
 48 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 49 |                   'include': pjoin(home, 'include'),
 50 |                   'lib64': pjoin(home, 'lib64')}
 51 |     for k, v in cudaconfig.iteritems():
 52 |         if not os.path.exists(v):
 53 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 54 | 
 55 |     return cudaconfig
 56 | CUDA = locate_cuda()
 57 | 
 58 | 
 59 | # Obtain the numpy include directory.  This logic works across numpy versions.
 60 | try:
 61 |     numpy_include = np.get_include()
 62 | except AttributeError:
 63 |     numpy_include = np.get_numpy_include()
 64 | 
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 70 |     injected in, and still have the right customizations (i.e.
 71 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 72 |     the OO route, I have this. Note, it's kindof like a wierd functional
 73 |     subclassing going on."""
 74 | 
 75 |     # tell the compiler it can processes .cu
 76 |     self.src_extensions.append('.cu')
 77 | 
 78 |     # save references to the default compiler_so and _comple methods
 79 |     default_compiler_so = self.compiler_so
 80 |     super = self._compile
 81 | 
 82 |     # now redefine the _compile method. This gets executed for each
 83 |     # object but distutils doesn't have the ability to change compilers
 84 |     # based on source extension: we add it.
 85 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 86 |         if os.path.splitext(src)[1] == '.cu':
 87 |             # use the cuda for .cu files
 88 |             self.set_executable('compiler_so', CUDA['nvcc'])
 89 |             # use only a subset of the extra_postargs, which are 1-1 translated
 90 |             # from the extra_compile_args in the Extension class
 91 |             postargs = extra_postargs['nvcc']
 92 |         else:
 93 |             postargs = extra_postargs['gcc']
 94 | 
 95 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 96 |         # reset the default compiler_so, which we might have changed for cuda
 97 |         self.compiler_so = default_compiler_so
 98 | 
 99 |     # inject our redefined _compile method into the class
100 |     self._compile = _compile
101 | 
102 | 
103 | # run the customize_compiler
104 | class custom_build_ext(build_ext):
105 |     def build_extensions(self):
106 |         customize_compiler_for_nvcc(self.compiler)
107 |         build_ext.build_extensions(self)
108 | 
109 | 
110 | ext_modules = [
111 |     Extension(
112 |         "cpu_nms",
113 |         ["cpu_nms.pyx"],
114 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
115 |         include_dirs = [numpy_include]
116 |     ),
117 |     Extension('gpu_nms',
118 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
119 |         library_dirs=[CUDA['lib64']],
120 |         libraries=['cudart'],
121 |         language='c++',
122 |         runtime_library_dirs=[CUDA['lib64']],
123 |         # this syntax is specific to this build system
124 |         # we're only going to use certain compiler args with nvcc and not with
125 |         # gcc the implementation of this trick is in customize_compiler() below
126 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
127 |                             'nvcc': ['-arch=sm_35',
128 |                                      '--ptxas-options=-v',
129 |                                      '-c',
130 |                                      '--compiler-options',
131 |                                      "'-fPIC'"]},
132 |         include_dirs = [numpy_include, CUDA['include']]
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='nms',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )
142 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/lib/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/rpn/generate_anchor.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # MX-RCNN
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The Apache 2.0 License
11 | # https://github.com/ijkguo/mx-rcnn/
12 | # --------------------------------------------------------
13 | 
14 | 
15 | """
16 | Generate base anchors on index 0
17 | """
18 | 
19 | import numpy as np
20 | 
21 | 
22 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
23 |                      scales=2 ** np.arange(3, 6)):
24 |     """
25 |     Generate anchor (reference) windows by enumerating aspect ratios X
26 |     scales wrt a reference (0, 0, 15, 15) window.
27 |     """
28 | 
29 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
30 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
31 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
32 |                          for i in xrange(ratio_anchors.shape[0])])
33 |     return anchors
34 | 
35 | 
36 | def _whctrs(anchor):
37 |     """
38 |     Return width, height, x center, and y center for an anchor (window).
39 |     """
40 | 
41 |     w = anchor[2] - anchor[0] + 1
42 |     h = anchor[3] - anchor[1] + 1
43 |     x_ctr = anchor[0] + 0.5 * (w - 1)
44 |     y_ctr = anchor[1] + 0.5 * (h - 1)
45 |     return w, h, x_ctr, y_ctr
46 | 
47 | 
48 | def _mkanchors(ws, hs, x_ctr, y_ctr):
49 |     """
50 |     Given a vector of widths (ws) and heights (hs) around a center
51 |     (x_ctr, y_ctr), output a set of anchors (windows).
52 |     """
53 | 
54 |     ws = ws[:, np.newaxis]
55 |     hs = hs[:, np.newaxis]
56 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
57 |                          y_ctr - 0.5 * (hs - 1),
58 |                          x_ctr + 0.5 * (ws - 1),
59 |                          y_ctr + 0.5 * (hs - 1)))
60 |     return anchors
61 | 
62 | 
63 | def _ratio_enum(anchor, ratios):
64 |     """
65 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
66 |     """
67 | 
68 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
69 |     size = w * h
70 |     size_ratios = size / ratios
71 |     ws = np.round(np.sqrt(size_ratios))
72 |     hs = np.round(ws * ratios)
73 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
74 |     return anchors
75 | 
76 | 
77 | def _scale_enum(anchor, scales):
78 |     """
79 |     Enumerate a set of anchors for each scale wrt an anchor.
80 |     """
81 | 
82 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
83 |     ws = w * scales
84 |     hs = h * scales
85 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
86 |     return anchors
87 | 


--------------------------------------------------------------------------------
/lib/utils/PrefetchingIter.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Jiayuan Gu, Dazhi Cheng, Han Hu, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # MX-RCNN
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The Apache 2.0 License
 11 | # https://github.com/ijkguo/mx-rcnn/
 12 | # --------------------------------------------------------
 13 | 
 14 | import mxnet as mx
 15 | from mxnet.io import DataDesc, DataBatch
 16 | import threading
 17 | 
 18 | 
 19 | class PrefetchingIterV2(mx.io.DataIter):
 20 |     """Base class for prefetching iterators. Takes one or more DataIters (
 21 |     or any class with "reset" and "next" methods) and combine them with
 22 |     prefetching. For example:
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     iters : DataIter or list of DataIter
 27 |         one or more DataIters (or any class with "reset" and "next" methods)
 28 |     rename_data : None or list of dict
 29 |         i-th element is a renaming map for i-th iter, in the form of
 30 |         {'original_name' : 'new_name'}. Should have one entry for each entry
 31 |         in iter[i].provide_data
 32 |     rename_label : None or list of dict
 33 |         Similar to rename_data
 34 | 
 35 |     Examples
 36 |     --------
 37 |     iter = PrefetchingIter([NDArrayIter({'data': X1}), NDArrayIter({'data': X2})],
 38 |                            rename_data=[{'data': 'data1'}, {'data': 'data2'}])
 39 |     """
 40 |     def __init__(self, iters, rename_data=None, rename_label=None, prefetch_n_iter=4):
 41 |         super(PrefetchingIterV2, self).__init__()
 42 |         if not isinstance(iters, list):
 43 |             iters = [iters]
 44 |         self.n_iter = len(iters)
 45 |         self.prefetch_n_iter = prefetch_n_iter
 46 |         assert self.n_iter == 1, "Our prefetching iter only support 1 DataIter"
 47 |         self.iters = iters
 48 |         self.rename_data = rename_data
 49 |         self.rename_label = rename_label
 50 |         self.batch_size = len(self.provide_data) * self.provide_data[0][0][1][0]
 51 |         self.data_ready = [threading.Event() for i in range(self.prefetch_n_iter)]
 52 |         self.data_taken = [threading.Event() for i in range(self.prefetch_n_iter)]
 53 | 
 54 |         self.cur_id = 0
 55 |         for e in self.data_taken:
 56 |             e.set()
 57 |         self.started = True
 58 |         self.current_batch = None
 59 |         self.next_batch = [[None for _ in range(self.n_iter)] for _ in range(self.prefetch_n_iter)]
 60 | 
 61 |         def prefetch_func(self, i):
 62 |             """Thread entry"""
 63 |             while True:
 64 |                 self.data_taken[i].wait()
 65 |                 if not self.started:
 66 |                     break
 67 |                 try:
 68 |                     self.next_batch[i][0] = self.iters[0].next()
 69 |                 except StopIteration:
 70 |                     self.next_batch[i][0] = None
 71 |                 self.data_taken[i].clear()
 72 |                 self.data_ready[i].set()
 73 |         self.prefetch_threads = [threading.Thread(target=prefetch_func, args=[self, i]) \
 74 |                                  for i in range(self.prefetch_n_iter)]
 75 |         for thread in self.prefetch_threads:
 76 |             thread.setDaemon(True)
 77 |             thread.start()
 78 | 
 79 |     def __del__(self):
 80 |         self.started = False
 81 |         for e in self.data_taken:
 82 |             e.set()
 83 |         for thread in self.prefetch_threads:
 84 |             thread.join()
 85 | 
 86 |     @property
 87 |     def provide_data(self):
 88 |         """The name and shape of data provided by this iterator"""
 89 |         if self.rename_data is None:
 90 |             return sum([i.provide_data for i in self.iters], [])
 91 |         else:
 92 |             return sum([[
 93 |                 DataDesc(r[x.name], x.shape, x.dtype)
 94 |                 if isinstance(x, DataDesc) else DataDesc(*x)
 95 |                 for x in i.provide_data
 96 |             ] for r, i in zip(self.rename_data, self.iters)], [])
 97 | 
 98 |     @property
 99 |     def provide_label(self):
100 |         """The name and shape of label provided by this iterator"""
101 |         if self.rename_label is None:
102 |             return sum([i.provide_label for i in self.iters], [])
103 |         else:
104 |             return sum([[
105 |                 DataDesc(r[x.name], x.shape, x.dtype)
106 |                 if isinstance(x, DataDesc) else DataDesc(*x)
107 |                 for x in i.provide_label
108 |             ] for r, i in zip(self.rename_label, self.iters)], [])
109 | 
110 |     def reset(self):
111 |         for e in self.data_ready:
112 |             e.wait()
113 |         for i in self.iters:
114 |             i.reset()
115 |         for e in self.data_ready:
116 |             e.clear()
117 |         for e in self.data_taken:
118 |             e.set()
119 | 
120 |     def iter_next(self):
121 |         self.data_ready[self.cur_id].wait()
122 |         if self.next_batch[self.cur_id][0] is None:
123 |             self.cur_id = (self.cur_id + 1) % self.prefetch_n_iter
124 |             return False
125 |         else:
126 |             self.current_batch = self.next_batch[self.cur_id][0]
127 |             self.data_ready[self.cur_id].clear()
128 |             self.data_taken[self.cur_id].set()
129 | 
130 |             self.cur_id = (self.cur_id + 1) % self.prefetch_n_iter
131 |             return True
132 | 
133 |     def next(self):
134 |         if self.iter_next():
135 |             return self.current_batch
136 |         else:
137 |             raise StopIteration
138 | 
139 |     def getdata(self):
140 |         return self.current_batch.data
141 | 
142 |     def getlabel(self):
143 |         return self.current_batch.label
144 | 
145 |     def getindex(self):
146 |         return self.current_batch.index
147 | 
148 |     def getpad(self):
149 |         return self.current_batch.pad
150 | 
151 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/create_logger.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Bin Xiao
 6 | # --------------------------------------------------------
 7 | 
 8 | 
 9 | import os
10 | import logging
11 | import time
12 | 
13 | def create_logger(root_output_path, cfg, image_set):
14 |     # set up logger
15 |     if not os.path.exists(root_output_path):
16 |         os.makedirs(root_output_path)
17 |     assert os.path.exists(root_output_path), '{} does not exist'.format(root_output_path)
18 | 
19 |     cfg_name = os.path.basename(cfg).split('.')[0]
20 |     config_output_path = os.path.join(root_output_path, '{}'.format(cfg_name))
21 |     if not os.path.exists(config_output_path):
22 |         os.makedirs(config_output_path)
23 | 
24 |     image_sets = [iset for iset in image_set.split('+')]
25 |     final_output_path = os.path.join(config_output_path, '{}'.format('_'.join(image_sets)))
26 |     if not os.path.exists(final_output_path):
27 |         os.makedirs(final_output_path)
28 | 
29 |     log_file = '{}_{}.log'.format(cfg_name, time.strftime('%Y-%m-%d-%H-%M'))
30 |     head = '%(asctime)-15s %(message)s'
31 |     logging.basicConfig(filename=os.path.join(final_output_path, log_file), format=head)
32 |     logger = logging.getLogger()
33 |     logger.setLevel(logging.INFO)
34 | 
35 |     return logger, final_output_path
36 | 
37 | 


--------------------------------------------------------------------------------
/lib/utils/image.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | 
  8 | 
  9 | import numpy as np
 10 | import os
 11 | import cv2
 12 | import random
 13 | from PIL import Image
 14 | from bbox.bbox_transform import clip_boxes
 15 | 
 16 | 
 17 | # TODO: This two functions should be merged with individual data loader
 18 | def get_image(roidb, config):
 19 |     """
 20 |     preprocess image and return processed roidb
 21 |     :param roidb: a list of roidb
 22 |     :return: list of img as in mxnet format
 23 |     roidb add new item['im_info']
 24 |     0 --- x (width, second dim of im)
 25 |     |
 26 |     y (height, first dim of im)
 27 |     """
 28 |     num_images = len(roidb)
 29 |     processed_ims = []
 30 |     processed_roidb = []
 31 |     for i in range(num_images):
 32 |         roi_rec = roidb[i]
 33 |         assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image'])
 34 |         im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
 35 |         if roidb[i]['flipped']:
 36 |             im = im[:, ::-1, :]
 37 |         new_rec = roi_rec.copy()
 38 |         scale_ind = random.randrange(len(config.SCALES))
 39 |         target_size = config.SCALES[scale_ind][0]
 40 |         max_size = config.SCALES[scale_ind][1]
 41 |         im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
 42 |         im_tensor = transform(im, config.network.PIXEL_MEANS)
 43 |         processed_ims.append(im_tensor)
 44 |         im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
 45 |         new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
 46 |         new_rec['im_info'] = im_info
 47 |         processed_roidb.append(new_rec)
 48 |     return processed_ims, processed_roidb
 49 | 
 50 | 
 51 | def get_segmentation_image(segdb, config):
 52 |     """
 53 |     propocess image and return segdb
 54 |     :param segdb: a list of segdb
 55 |     :return: list of img as mxnet format
 56 |     """
 57 |     num_images = len(segdb)
 58 |     assert num_images > 0, 'No images'
 59 |     processed_ims = []
 60 |     processed_segdb = []
 61 |     processed_seg_cls_gt = []
 62 |     for i in range(num_images):
 63 |         seg_rec = segdb[i]
 64 |         assert os.path.exists(seg_rec['image']), '%s does not exist'.format(seg_rec['image'])
 65 |         im = np.array(cv2.imread(seg_rec['image']))
 66 | 
 67 |         new_rec = seg_rec.copy()
 68 | 
 69 |         scale_ind = random.randrange(len(config.SCALES))
 70 |         target_size = config.SCALES[scale_ind][0]
 71 |         max_size = config.SCALES[scale_ind][1]
 72 |         im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
 73 |         im_tensor = transform(im, config.network.PIXEL_MEANS)
 74 |         im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
 75 |         new_rec['im_info'] = im_info
 76 | 
 77 |         seg_cls_gt = np.array(Image.open(seg_rec['seg_cls_path']))
 78 |         seg_cls_gt, seg_cls_gt_scale = resize(
 79 |             seg_cls_gt, target_size, max_size, stride=config.network.IMAGE_STRIDE, interpolation=cv2.INTER_NEAREST)
 80 |         seg_cls_gt_tensor = transform_seg_gt(seg_cls_gt)
 81 | 
 82 |         processed_ims.append(im_tensor)
 83 |         processed_segdb.append(new_rec)
 84 |         processed_seg_cls_gt.append(seg_cls_gt_tensor)
 85 | 
 86 |     return processed_ims, processed_seg_cls_gt, processed_segdb
 87 | 
 88 | def resize(im, target_size, max_size, stride=0, interpolation = cv2.INTER_LINEAR):
 89 |     """
 90 |     only resize input image to target size and return scale
 91 |     :param im: BGR image input by opencv
 92 |     :param target_size: one dimensional size (the short side)
 93 |     :param max_size: one dimensional max size (the long side)
 94 |     :param stride: if given, pad the image to designated stride
 95 |     :param interpolation: if given, using given interpolation method to resize image
 96 |     :return:
 97 |     """
 98 |     im_shape = im.shape
 99 |     im_size_min = np.min(im_shape[0:2])
100 |     im_size_max = np.max(im_shape[0:2])
101 |     im_scale = float(target_size) / float(im_size_min)
102 |     # prevent bigger axis from being more than max_size:
103 |     if np.round(im_scale * im_size_max) > max_size:
104 |         im_scale = float(max_size) / float(im_size_max)
105 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=interpolation)
106 | 
107 |     if stride == 0:
108 |         return im, im_scale
109 |     else:
110 |         # pad to product of stride
111 |         im_height = int(np.ceil(im.shape[0] / float(stride)) * stride)
112 |         im_width = int(np.ceil(im.shape[1] / float(stride)) * stride)
113 |         im_channel = im.shape[2]
114 |         padded_im = np.zeros((im_height, im_width, im_channel))
115 |         padded_im[:im.shape[0], :im.shape[1], :] = im
116 |         return padded_im, im_scale
117 | 
118 | def transform(im, pixel_means):
119 |     """
120 |     transform into mxnet tensor
121 |     substract pixel size and transform to correct format
122 |     :param im: [height, width, channel] in BGR
123 |     :param pixel_means: [B, G, R pixel means]
124 |     :return: [batch, channel, height, width]
125 |     """
126 |     im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
127 |     for i in range(3):
128 |         im_tensor[0, i, :, :] = im[:, :, 2 - i] - pixel_means[2 - i]
129 |     return im_tensor
130 | 
131 | def transform_seg_gt(gt):
132 |     """
133 |     transform segmentation gt image into mxnet tensor
134 |     :param gt: [height, width, channel = 1]
135 |     :return: [batch, channel = 1, height, width]
136 |     """
137 |     gt_tensor = np.zeros((1, 1, gt.shape[0], gt.shape[1]))
138 |     gt_tensor[0, 0, :, :] = gt[:, :]
139 | 
140 |     return gt_tensor
141 | 
142 | def transform_inverse(im_tensor, pixel_means):
143 |     """
144 |     transform from mxnet im_tensor to ordinary RGB image
145 |     im_tensor is limited to one image
146 |     :param im_tensor: [batch, channel, height, width]
147 |     :param pixel_means: [B, G, R pixel means]
148 |     :return: im [height, width, channel(RGB)]
149 |     """
150 |     assert im_tensor.shape[0] == 1
151 |     im_tensor = im_tensor.copy()
152 |     # put channel back
153 |     channel_swap = (0, 2, 3, 1)
154 |     im_tensor = im_tensor.transpose(channel_swap)
155 |     im = im_tensor[0]
156 |     assert im.shape[2] == 3
157 |     im += pixel_means[[2, 1, 0]]
158 |     im = im.astype(np.uint8)
159 |     return im
160 | 
161 | def tensor_vstack(tensor_list, pad=0):
162 |     """
163 |     vertically stack tensors
164 |     :param tensor_list: list of tensor to be stacked vertically
165 |     :param pad: label to pad with
166 |     :return: tensor with max shape
167 |     """
168 |     ndim = len(tensor_list[0].shape)
169 |     dtype = tensor_list[0].dtype
170 |     islice = tensor_list[0].shape[0]
171 |     dimensions = []
172 |     first_dim = sum([tensor.shape[0] for tensor in tensor_list])
173 |     dimensions.append(first_dim)
174 |     for dim in range(1, ndim):
175 |         dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
176 |     if pad == 0:
177 |         all_tensor = np.zeros(tuple(dimensions), dtype=dtype)
178 |     elif pad == 1:
179 |         all_tensor = np.ones(tuple(dimensions), dtype=dtype)
180 |     else:
181 |         all_tensor = np.full(tuple(dimensions), pad, dtype=dtype)
182 |     if ndim == 1:
183 |         for ind, tensor in enumerate(tensor_list):
184 |             all_tensor[ind*islice:(ind+1)*islice] = tensor
185 |     elif ndim == 2:
186 |         for ind, tensor in enumerate(tensor_list):
187 |             all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1]] = tensor
188 |     elif ndim == 3:
189 |         for ind, tensor in enumerate(tensor_list):
190 |             all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1], :tensor.shape[2]] = tensor
191 |     elif ndim == 4:
192 |         for ind, tensor in enumerate(tensor_list):
193 |             all_tensor[ind*islice:(ind+1)*islice, :tensor.shape[1], :tensor.shape[2], :tensor.shape[3]] = tensor
194 |     else:
195 |         raise Exception('Sorry, unimplemented.')
196 |     return all_tensor
197 | 


--------------------------------------------------------------------------------
/lib/utils/load_data.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Dazhi Cheng, Jiayuan Gu
 6 | # Written by Yuwen Xiong
 7 | # --------------------------------------------------------
 8 | 
 9 | 
10 | import numpy as np
11 | from dataset import *
12 | 
13 | 
14 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
15 |                   flip=False):
16 |     """ load ground truth roidb """
17 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
18 |     roidb = imdb.gt_roidb()
19 |     if flip:
20 |         roidb = imdb.append_flipped_images(roidb)
21 |     return roidb
22 | 
23 | 
24 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, rpn_path=None,
25 |                         proposal='rpn', append_gt=True, flip=False, top_roi=-1):
26 |     """ load proposal roidb (append_gt when training) """
27 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path, rpn_path)
28 | 
29 |     gt_roidb = imdb.gt_roidb()
30 |     roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt, top_roi)
31 |     if flip:
32 |         roidb = imdb.append_flipped_images(roidb)
33 |     return roidb
34 | 
35 | 
36 | 
37 | def merge_roidb(roidbs):
38 |     """ roidb are list, concat them together """
39 |     roidb = roidbs[0]
40 |     for r in roidbs[1:]:
41 |         roidb.extend(r)
42 |     return roidb
43 | 
44 | 
45 | def filter_roidb(roidb, config):
46 |     """ remove roidb entries without usable rois """
47 | 
48 |     def is_valid(entry):
49 |         """ valid images have at least 1 fg or bg roi """
50 | 
51 |         if all(entry['gt_classes'] == 0):
52 |             valid = False
53 |         else:
54 |             overlaps = entry['max_overlaps']
55 |             fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
56 |             bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
57 |             valid = len(fg_inds) > 0 or len(bg_inds) > 0
58 |         return valid
59 | 
60 |     num = len(roidb)
61 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
62 |     num_after = len(filtered_roidb)
63 |     print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)
64 | 
65 |     return filtered_roidb
66 | 


--------------------------------------------------------------------------------
/lib/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | 
 9 | import mxnet as mx
10 | 
11 | 
12 | def load_checkpoint(prefix, epoch):
13 |     """
14 |     Load model checkpoint from file.
15 |     :param prefix: Prefix of model name.
16 |     :param epoch: Epoch number of model we would like to load.
17 |     :return: (arg_params, aux_params)
18 |     arg_params : dict of str to NDArray
19 |         Model parameter, dict of name to NDArray of net's weights.
20 |     aux_params : dict of str to NDArray
21 |         Model parameter, dict of name to NDArray of net's auxiliary states.
22 |     """
23 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
24 |     arg_params = {}
25 |     aux_params = {}
26 |     for k, v in save_dict.items():
27 |         tp, name = k.split(':', 1)
28 |         if tp == 'arg':
29 |             arg_params[name] = v
30 |         if tp == 'aux':
31 |             aux_params[name] = v
32 |     return arg_params, aux_params
33 | 
34 | 
35 | def convert_context(params, ctx):
36 |     """
37 |     :param params: dict of str to NDArray
38 |     :param ctx: the context to convert to
39 |     :return: dict of str of NDArray with context ctx
40 |     """
41 |     new_params = dict()
42 |     for k, v in params.items():
43 |         new_params[k] = v.as_in_context(ctx)
44 |     return new_params
45 | 
46 | 
47 | def load_param(prefix, epoch, convert=False, ctx=None, process=False):
48 |     """
49 |     wrapper for load checkpoint
50 |     :param prefix: Prefix of model name.
51 |     :param epoch: Epoch number of model we would like to load.
52 |     :param convert: reference model should be converted to GPU NDArray first
53 |     :param ctx: if convert then ctx must be designated.
54 |     :param process: model should drop any test
55 |     :return: (arg_params, aux_params)
56 |     """
57 |     arg_params, aux_params = load_checkpoint(prefix, epoch)
58 |     if convert:
59 |         if ctx is None:
60 |             ctx = mx.cpu()
61 |         arg_params = convert_context(arg_params, ctx)
62 |         aux_params = convert_context(aux_params, ctx)
63 |     if process:
64 |         tests = [k for k in arg_params.keys() if '_test' in k]
65 |         for test in tests:
66 |             arg_params[test.replace('_test', '')] = arg_params.pop(test)
67 |     return arg_params, aux_params
68 | 


--------------------------------------------------------------------------------
/lib/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import logging
 9 | from mxnet.lr_scheduler import LRScheduler
10 | 
11 | class WarmupMultiFactorScheduler(LRScheduler):
12 |     """Reduce learning rate in factor at steps specified in a list
13 | 
14 |     Assume the weight has been updated by n times, then the learning rate will
15 |     be
16 | 
17 |     base_lr * factor^(sum((step/n)<=1)) # step is an array
18 | 
19 |     Parameters
20 |     ----------
21 |     step: list of int
22 |         schedule learning rate after n updates
23 |     factor: float
24 |         the factor for reducing the learning rate
25 |     """
26 |     def __init__(self, step, factor=1, warmup=False, warmup_lr=0, warmup_step=0):
27 |         super(WarmupMultiFactorScheduler, self).__init__()
28 |         assert isinstance(step, list) and len(step) >= 1
29 |         for i, _step in enumerate(step):
30 |             if i != 0 and step[i] <= step[i-1]:
31 |                 raise ValueError("Schedule step must be an increasing integer list")
32 |             if _step < 1:
33 |                 raise ValueError("Schedule step must be greater or equal than 1 round")
34 |         if factor > 1.0:
35 |             raise ValueError("Factor must be no more than 1 to make lr reduce")
36 |         self.step = step
37 |         self.cur_step_ind = 0
38 |         self.factor = factor
39 |         self.count = 0
40 |         self.warmup = warmup
41 |         self.warmup_lr = warmup_lr
42 |         self.warmup_step = warmup_step
43 | 
44 |     def __call__(self, num_update):
45 |         """
46 |         Call to schedule current learning rate
47 | 
48 |         Parameters
49 |         ----------
50 |         num_update: int
51 |             the maximal number of updates applied to a weight.
52 |         """
53 | 
54 |         # NOTE: use while rather than if  (for continuing training via load_epoch)
55 |         if self.warmup and num_update < self.warmup_step:
56 |             return self.warmup_lr
57 |         while self.cur_step_ind <= len(self.step)-1:
58 |             if num_update > self.step[self.cur_step_ind]:
59 |                 self.count = self.step[self.cur_step_ind]
60 |                 self.cur_step_ind += 1
61 |                 self.base_lr *= self.factor
62 |                 logging.info("Update[%d]: Change learning rate to %0.5e",
63 |                              num_update, self.base_lr)
64 |             else:
65 |                 return self.base_lr
66 |         return self.base_lr
67 | 


--------------------------------------------------------------------------------
/lib/utils/symbol.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Dazhi Cheng, Jiayuan Gu
 6 | # Written by Yuwen Xiong
 7 | # --------------------------------------------------------
 8 | 
 9 | import numpy as np
10 | class Symbol:
11 |     def __init__(self):
12 |         self.arg_shape_dict = None
13 |         self.out_shape_dict = None
14 |         self.aux_shape_dict = None
15 |         self.sym = None
16 | 
17 |     @property
18 |     def symbol(self):
19 |         return self.sym
20 | 
21 |     def get_symbol(self, cfg, is_train=True):
22 |         """
23 |         return a generated symbol, it also need to be assigned to self.sym
24 |         """
25 |         raise NotImplementedError()
26 | 
27 |     def init_weights(self, cfg, arg_params, aux_params):
28 |         raise NotImplementedError()
29 | 
30 |     def get_msra_std(self, shape):
31 |         fan_in = float(shape[1])
32 |         if len(shape) > 2:
33 |             fan_in *= np.prod(shape[2:])
34 |         print(np.sqrt(2 / fan_in))
35 |         return np.sqrt(2 / fan_in)
36 | 
37 |     def infer_shape(self, data_shape_dict):
38 |         # infer shape
39 |         arg_shape, out_shape, aux_shape = self.sym.infer_shape(**data_shape_dict)
40 |         self.arg_shape_dict = dict(zip(self.sym.list_arguments(), arg_shape))
41 |         self.out_shape_dict = dict(zip(self.sym.list_outputs(), out_shape))
42 |         self.aux_shape_dict = dict(zip(self.sym.list_auxiliary_states(), aux_shape))
43 | 
44 |     def check_parameter_shapes(self, arg_params, aux_params, data_shape_dict, is_train=True):
45 |         for k in self.sym.list_arguments():
46 |             if k in data_shape_dict or (False if is_train else 'label' in k):
47 |                 continue
48 |             assert k in arg_params, k + ' not initialized'
49 |             assert arg_params[k].shape == self.arg_shape_dict[k], \
50 |                 'shape inconsistent for ' + k + ' inferred ' + str(self.arg_shape_dict[k]) + ' provided ' + str(
51 |                     arg_params[k].shape)
52 |         for k in self.sym.list_auxiliary_states():
53 |             assert k in aux_params, k + ' not initialized'
54 |             assert aux_params[k].shape == self.aux_shape_dict[k], \
55 |                 'shape inconsistent for ' + k + ' inferred ' + str(self.aux_shape_dict[k]) + ' provided ' + str(
56 |                     aux_params[k].shape)
57 | 


--------------------------------------------------------------------------------
/relation_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/relation_rcnn/__init__.py


--------------------------------------------------------------------------------
/relation_rcnn/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Jiayuan Gu, Dazhi Cheng
 6 | # --------------------------------------------------------
 7 | 
 8 | import os.path as osp
 9 | import sys
10 | 
11 | def add_path(path):
12 |     if path not in sys.path:
13 |         sys.path.insert(0, path)
14 | 
15 | this_dir = osp.dirname(__file__)
16 | 
17 | lib_path = osp.join(this_dir, '..', 'lib')
18 | add_path(lib_path)
19 | 


--------------------------------------------------------------------------------
/relation_rcnn/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/relation_rcnn/config/__init__.py


--------------------------------------------------------------------------------
/relation_rcnn/config/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Dazhi Cheng, Jiayuan Gu, Yuwen Xiong, Bin Xiao
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # py-faster-rcnn
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The MIT License
 11 | # py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 12 | # --------------------------------------------------------
 13 | 
 14 | import yaml
 15 | import numpy as np
 16 | from easydict import EasyDict as edict
 17 | 
 18 | config = edict()
 19 | 
 20 | config.MXNET_VERSION = ''
 21 | config.output_path = ''
 22 | config.symbol = ''
 23 | config.gpus = ''
 24 | config.CLASS_AGNOSTIC = True
 25 | config.SCALES = [(600, 1000)]  # first is scale (the shorter side); second is max size
 26 | 
 27 | # default training
 28 | config.default = edict()
 29 | config.default.frequent = 20
 30 | config.default.kvstore = 'device'
 31 | 
 32 | # network related params
 33 | config.network = edict()
 34 | config.network.pretrained = ''
 35 | config.network.pretrained_epoch = 0
 36 | config.network.PIXEL_MEANS = np.array([0, 0, 0])
 37 | config.network.IMAGE_STRIDE = 0
 38 | config.network.RPN_FEAT_STRIDE = 16
 39 | config.network.RCNN_FEAT_STRIDE = 16
 40 | config.network.FIXED_PARAMS = ['gamma', 'beta']
 41 | config.network.FIXED_PARAMS_SHARED = ['gamma', 'beta']
 42 | config.network.ANCHOR_SCALES = (8, 16, 32)
 43 | config.network.ANCHOR_RATIOS = (0.5, 1, 2)
 44 | config.network.NUM_ANCHORS = len(config.network.ANCHOR_SCALES) * len(config.network.ANCHOR_RATIOS)
 45 | config.network.ROIDispatch = False
 46 | config.network.USE_NONGT_INDEX = False
 47 | config.network.NMS_TARGET_THRESH = '0.5'
 48 | 
 49 | # dataset related params
 50 | config.dataset = edict()
 51 | config.dataset.dataset = 'PascalVOC'
 52 | config.dataset.image_set = '2007_trainval'
 53 | config.dataset.test_image_set = '2007_test'
 54 | config.dataset.root_path = './data'
 55 | config.dataset.dataset_path = './data/VOCdevkit'
 56 | config.dataset.NUM_CLASSES = 21
 57 | 
 58 | 
 59 | config.TRAIN = edict()
 60 | 
 61 | config.TRAIN.lr = 0
 62 | config.TRAIN.lr_step = ''
 63 | config.TRAIN.lr_factor = 0.1
 64 | config.TRAIN.warmup = False
 65 | config.TRAIN.warmup_lr = 0
 66 | config.TRAIN.warmup_step = 0
 67 | config.TRAIN.momentum = 0.9
 68 | config.TRAIN.wd = 0.0005
 69 | config.TRAIN.begin_epoch = 0
 70 | config.TRAIN.end_epoch = 0
 71 | config.TRAIN.model_prefix = ''
 72 | config.TRAIN.rpn_loss_scale = 3.0
 73 | config.TRAIN.nms_loss_scale = 1.0
 74 | config.TRAIN.nms_pos_scale = 4.0
 75 | 
 76 | config.TRAIN.ALTERNATE = edict()
 77 | config.TRAIN.ALTERNATE.RPN_BATCH_IMAGES = 0
 78 | config.TRAIN.FC_DROPOUT_RATIO = 0
 79 | config.TRAIN.ATTENTION_DROPOUT_RATIO = 0
 80 | config.TRAIN.ATTENTION_SCALE_METHOD = 0
 81 | # whether resume training
 82 | config.TRAIN.RESUME = False
 83 | # whether flip image
 84 | config.TRAIN.FLIP = True
 85 | # whether shuffle image
 86 | config.TRAIN.SHUFFLE = True
 87 | # whether use OHEM
 88 | config.TRAIN.ENABLE_OHEM = False
 89 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 90 | config.TRAIN.BATCH_IMAGES = 2
 91 | # e2e changes behavior of anchor loader and metric
 92 | config.TRAIN.END2END = False
 93 | # group images with similar aspect ratio
 94 | config.TRAIN.ASPECT_GROUPING = True
 95 | 
 96 | # R-CNN
 97 | # rcnn rois batch size
 98 | config.TRAIN.TOP_ROIS = -1
 99 | config.TRAIN.BATCH_ROIS = 128
100 | config.TRAIN.BATCH_ROIS_OHEM = 128
101 | # rcnn rois sampling params
102 | config.TRAIN.FG_FRACTION = 0.25
103 | config.TRAIN.FG_THRESH = 0.5
104 | config.TRAIN.BG_THRESH_HI = 0.5
105 | config.TRAIN.BG_THRESH_LO = 0.0
106 | # rcnn bounding box regression params
107 | config.TRAIN.BBOX_REGRESSION_THRESH = 0.5
108 | config.TRAIN.BBOX_WEIGHTS = np.array([1.0, 1.0, 1.0, 1.0])
109 | 
110 | # RPN anchor loader
111 | # rpn anchors batch size
112 | config.TRAIN.RPN_BATCH_SIZE = 256
113 | # rpn anchors sampling params
114 | config.TRAIN.RPN_FG_FRACTION = 0.5
115 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
116 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
117 | config.TRAIN.RPN_CLOBBER_POSITIVES = False
118 | # rpn bounding box regression params
119 | config.TRAIN.RPN_BBOX_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
120 | config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
121 | 
122 | # used for end2end training
123 | # RPN proposal
124 | config.TRAIN.CXX_PROPOSAL = True
125 | config.TRAIN.RPN_NMS_THRESH = 0.7
126 | config.TRAIN.RPN_PRE_NMS_TOP_N = 12000
127 | config.TRAIN.RPN_POST_NMS_TOP_N = 2000
128 | config.TRAIN.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE
129 | # approximate bounding box regression
130 | config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = False
131 | config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0)
132 | config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2)
133 | # Learn NMS
134 | config.TRAIN.LEARN_NMS = False
135 | config.TRAIN.JOINT_TRAINING = False
136 | config.TRAIN.FIRST_N = 100
137 | 
138 | config.TEST = edict()
139 | 
140 | # R-CNN testing
141 | # use rpn to generate proposal
142 | config.TEST.HAS_RPN = False
143 | # size of images for each device
144 | config.TEST.BATCH_IMAGES = 1
145 | config.TEST.TOP_ROIS = 2000
146 | 
147 | # RPN proposal
148 | config.TEST.CXX_PROPOSAL = True
149 | config.TEST.RPN_NMS_THRESH = 0.7
150 | config.TEST.RPN_PRE_NMS_TOP_N = 6000
151 | config.TEST.RPN_POST_NMS_TOP_N = 300
152 | config.TEST.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE
153 | 
154 | # RPN generate proposal
155 | config.TEST.PROPOSAL_NMS_THRESH = 0.7
156 | config.TEST.PROPOSAL_PRE_NMS_TOP_N = 20000
157 | config.TEST.PROPOSAL_POST_NMS_TOP_N = 2000
158 | config.TEST.PROPOSAL_MIN_SIZE = config.network.RPN_FEAT_STRIDE
159 | 
160 | # whether to use softnms
161 | config.TEST.SOFTNMS = False
162 | # whether to use LEARN_NMS
163 | config.TEST.LEARN_NMS = False
164 | config.TEST.FIRST_N = 0
165 | config.TEST.MERGE_METHOD = -1
166 | # RCNN nms
167 | config.TEST.NMS = 0.3
168 | 
169 | config.TEST.max_per_image = 300
170 | 
171 | # Test Model Epoch
172 | config.TEST.test_epoch = 0
173 | # increasing this thresh will speed up test-time learn nms module, but may hurt performance
174 | config.TEST.LEARN_NMS_CLASS_SCORE_TH = 0.01
175 | 
176 | 
177 | def update_config(config_file):
178 |     exp_config = None
179 |     with open(config_file) as f:
180 |         exp_config = edict(yaml.load(f))
181 |         for k, v in exp_config.items():
182 |             if k in config:
183 |                 if isinstance(v, dict):
184 |                     if k == 'TRAIN':
185 |                         if 'BBOX_WEIGHTS' in v:
186 |                             v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS'])
187 |                     elif k == 'network':
188 |                         if 'PIXEL_MEANS' in v:
189 |                             v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS'])
190 |                     for vk, vv in v.items():
191 |                         config[k][vk] = vv
192 |                 else:
193 |                     if k == 'SCALES':
194 |                         config[k][0] = (tuple(v))
195 |                     else:
196 |                         config[k] = v
197 |             else:
198 |                 raise ValueError("key must exist in config.py")
199 | 


--------------------------------------------------------------------------------
/relation_rcnn/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/relation_rcnn/core/__init__.py


--------------------------------------------------------------------------------
/relation_rcnn/core/callback.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Dazhi cheng, Jiayuan Gu, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # MX-RCNN
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The Apache 2.0 License
11 | # https://github.com/ijkguo/mx-rcnn/
12 | # --------------------------------------------------------
13 | 
14 | import time
15 | import logging
16 | import mxnet as mx
17 | 
18 | 
19 | class Speedometer(object):
20 |     def __init__(self, batch_size, frequent=50):
21 |         self.batch_size = batch_size
22 |         self.frequent = frequent
23 |         self.init = False
24 |         self.tic = 0
25 |         self.last_count = 0
26 | 
27 |     def __call__(self, param):
28 |         """Callback to Show speed."""
29 |         count = param.nbatch
30 |         if self.last_count > count:
31 |             self.init = False
32 |         self.last_count = count
33 | 
34 |         if self.init:
35 |             if count % self.frequent == 0:
36 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
37 |                 s = ''
38 |                 if param.eval_metric is not None:
39 |                     name, value = param.eval_metric.get()
40 |                     s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed)
41 |                     for n, v in zip(name, value):
42 |                         s += "%s=%f,\t" % (n, v)
43 |                 else:
44 |                     s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed)
45 | 
46 |                 logging.info(s)
47 |                 print(s)
48 |                 self.tic = time.time()
49 |         else:
50 |             self.init = True
51 |             self.tic = time.time()
52 | 
53 | 
54 | def do_checkpoint(prefix, means, stds):
55 |     def _callback(iter_no, sym, arg, aux):
56 |         arg['bbox_pred_weight_test'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T
57 |         arg['bbox_pred_bias_test'] = arg['bbox_pred_bias'] * mx.nd.array(stds) + mx.nd.array(means)
58 |         mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
59 |         arg.pop('bbox_pred_weight_test')
60 |         arg.pop('bbox_pred_bias_test')
61 |     return _callback
62 | 


--------------------------------------------------------------------------------
/relation_rcnn/function/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/relation_rcnn/function/__init__.py


--------------------------------------------------------------------------------
/relation_rcnn/function/test_rcnn.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # MX-RCNN
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The Apache 2.0 License
11 | # https://github.com/ijkguo/mx-rcnn/
12 | # --------------------------------------------------------
13 | 
14 | 
15 | import argparse
16 | import pprint
17 | import logging
18 | import time
19 | import os
20 | import mxnet as mx
21 | 
22 | from symbols import *
23 | from dataset import *
24 | from core.loader import TestLoader
25 | from core.tester import Predictor, pred_eval
26 | from utils.load_model import load_param
27 | 
28 | 
29 | def test_rcnn(cfg, dataset, image_set, root_path, dataset_path,
30 |               ctx, prefix, epoch,
31 |               vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None):
32 |     if not logger:
33 |         assert False, 'require a logger'
34 | 
35 |     # print cfg
36 |     pprint.pprint(cfg)
37 |     logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))
38 | 
39 |     # load symbol and testing data
40 |     if has_rpn:
41 |         sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
42 |         sym = sym_instance.get_symbol(cfg, is_train=False)
43 |         imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
44 |         roidb = imdb.gt_roidb()
45 |     else:
46 |         sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
47 |         sym = sym_instance.get_symbol_rcnn(cfg, is_train=False)
48 |         rpn_path = cfg.dataset.proposal_cache
49 |         imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path, rpn_path=rpn_path)
50 |         gt_roidb = imdb.gt_roidb()
51 |         roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, top_roi=cfg.TEST.TOP_ROIS)
52 | 
53 |     # get test data iter
54 |     test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn)
55 | 
56 |     # load model
57 |     arg_params, aux_params = load_param(prefix, epoch, process=True)
58 | 
59 |     # infer shape
60 |     data_shape_dict = dict(test_data.provide_data_single)
61 |     #sym_instance.infer_shape(data_shape_dict)
62 | 
63 |     #sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)
64 | 
65 |     # decide maximum shape
66 |     data_names = [k[0] for k in test_data.provide_data_single]
67 |     label_names = None
68 |     #max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
69 |     max_height = max([v[0] for v in cfg.SCALES])
70 |     max_width = max([v[1] for v in cfg.SCALES])
71 |     if cfg.network.IMAGE_STRIDE > 0:
72 |         max_height = max_height + cfg.network.IMAGE_STRIDE - max_height%cfg.network.IMAGE_STRIDE
73 |         max_width = max_width + cfg.network.IMAGE_STRIDE - max_width % cfg.network.IMAGE_STRIDE
74 | 
75 |     max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max_height, max_width))]
76 | 
77 |     if not has_rpn:
78 |         #max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))
79 |         if cfg.network.ROIDispatch:
80 |             max_data_shape.append(('rois_0', (1, cfg.TEST.PROPOSAL_POST_NMS_TOP_N/4, 5)))
81 |             max_data_shape.append(('rois_1', (1, cfg.TEST.PROPOSAL_POST_NMS_TOP_N/4, 5)))
82 |             max_data_shape.append(('rois_2', (1, cfg.TEST.PROPOSAL_POST_NMS_TOP_N/4, 5)))
83 |             max_data_shape.append(('rois_3', (1, cfg.TEST.PROPOSAL_POST_NMS_TOP_N/4, 5)))
84 | 
85 |     max_data_shape = [max_data_shape]
86 |     # create predictor
87 |     #test_data.provide_label
88 |     predictor = Predictor(sym, data_names, label_names,
89 |                           context=ctx, max_data_shapes=max_data_shape,
90 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
91 |                           arg_params=arg_params, aux_params=aux_params)
92 | 
93 |     # start detection
94 |     pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
95 | 
96 | 


--------------------------------------------------------------------------------
/relation_rcnn/function/test_rpn.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # MX-RCNN
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The Apache 2.0 License
11 | # https://github.com/ijkguo/mx-rcnn/
12 | # --------------------------------------------------------
13 | 
14 | import argparse
15 | import pprint
16 | import logging
17 | import mxnet as mx
18 | 
19 | from symbols import *
20 | from dataset import *
21 | from core.loader import TestLoader
22 | from core.tester import Predictor, generate_proposals
23 | from utils.load_model import load_param
24 | 
25 | 
26 | def test_rpn(cfg, dataset, image_set, root_path, dataset_path,
27 |              ctx, prefix, epoch,
28 |              vis, shuffle, thresh, logger=None, output_path=None):
29 |     # set up logger
30 |     if not logger:
31 |         logging.basicConfig()
32 |         logger = logging.getLogger()
33 |         logger.setLevel(logging.INFO)
34 | 
35 |     # rpn generate proposal cfg
36 |     cfg.TEST.HAS_RPN = True
37 | 
38 |     # print cfg
39 |     pprint.pprint(cfg)
40 |     logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg)))
41 | 
42 |     # load symbol
43 |     sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
44 |     sym = sym_instance.get_symbol_rpn(cfg, is_train=False)
45 | 
46 |     # load dataset and prepare imdb for training
47 |     imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
48 |     roidb = imdb.gt_roidb()
49 |     test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True)
50 | 
51 |     # load model
52 |     arg_params, aux_params = load_param(prefix, epoch)
53 | 
54 |     # infer shape
55 |     data_shape_dict = dict(test_data.provide_data_single)
56 |     sym_instance.infer_shape(data_shape_dict)
57 | 
58 |     # check parameters
59 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)
60 | 
61 |     # decide maximum shape
62 |     data_names = [k[0] for k in test_data.provide_data[0]]
63 |     label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]]
64 |     max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
65 | 
66 |     # create predictor
67 |     predictor = Predictor(sym, data_names, label_names,
68 |                           context=ctx, max_data_shapes=max_data_shape,
69 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
70 |                           arg_params=arg_params, aux_params=aux_params)
71 | 
72 |     # start testing
73 |     imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh)
74 | 
75 |     all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
76 |     logger.info(all_log_info)
77 | 


--------------------------------------------------------------------------------
/relation_rcnn/function/train_rcnn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # MX-RCNN
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The Apache 2.0 License
 11 | # https://github.com/ijkguo/mx-rcnn/
 12 | # --------------------------------------------------------
 13 | 
 14 | import argparse
 15 | import logging
 16 | import pprint
 17 | import os
 18 | import mxnet as mx
 19 | import numpy as np
 20 | 
 21 | from symbols import *
 22 | from core import callback, metric
 23 | from core.loader import ROIIter
 24 | from core.module import MutableModule
 25 | from bbox.bbox_regression import add_bbox_regression_targets
 26 | from utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb
 27 | from utils.load_model import load_param
 28 | from utils.PrefetchingIter import PrefetchingIterV2 as PrefetchingIter
 29 | from utils.lr_scheduler import WarmupMultiFactorScheduler
 30 | 
 31 | 
 32 | def train_rcnn(cfg, dataset, image_set, root_path, dataset_path,
 33 |                frequent, kvstore, flip, shuffle, resume,
 34 |                ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 35 |                train_shared, lr, lr_step, proposal, logger=None, output_path=None):
 36 |     mx.random.seed(0)
 37 |     np.random.seed(0)
 38 |     # set up logger
 39 |     if not logger:
 40 |         logging.basicConfig()
 41 |         logger = logging.getLogger()
 42 |         logger.setLevel(logging.INFO)
 43 | 
 44 |     # load symbol
 45 |     sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
 46 |     sym = sym_instance.get_symbol_rcnn(cfg, is_train=True)
 47 | 
 48 |     # setup multi-gpu
 49 |     batch_size = len(ctx)
 50 |     input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size
 51 | 
 52 |     # print cfg
 53 |     pprint.pprint(cfg)
 54 |     logger.info('training rcnn cfg:{}\n'.format(pprint.pformat(cfg)))
 55 | 
 56 |     rpn_path = cfg.dataset.proposal_cache
 57 |     # load dataset and prepare imdb for training
 58 |     image_sets = [iset for iset in image_set.split('+')]
 59 |     roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path,
 60 |                                   proposal=proposal, append_gt=True, flip=flip, result_path=output_path,
 61 |                                   rpn_path=rpn_path, top_roi=cfg.TRAIN.TOP_ROIS)
 62 |               for image_set in image_sets]
 63 |     roidb = merge_roidb(roidbs)
 64 |     roidb = filter_roidb(roidb, cfg)
 65 |     means, stds = add_bbox_regression_targets(roidb, cfg)
 66 | 
 67 |     # load training data
 68 |     train_data = ROIIter(roidb, cfg, batch_size=input_batch_size, shuffle=shuffle,
 69 |                          ctx=ctx, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING)
 70 | 
 71 |     # infer max shape
 72 |     max_height = max([v[0] for v in cfg.SCALES])
 73 |     max_width = max([v[1] for v in cfg.SCALES])
 74 |     paded_max_height = max_height + cfg.network.IMAGE_STRIDE - max_height % cfg.network.IMAGE_STRIDE
 75 |     paded_max_width = max_width + cfg.network.IMAGE_STRIDE - max_width % (cfg.network.IMAGE_STRIDE)
 76 | 
 77 |     max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, paded_max_height, paded_max_width))]
 78 |     # infer shape
 79 |     data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
 80 |     sym_instance.infer_shape(data_shape_dict)
 81 |     # print shape
 82 |     pprint.pprint(sym_instance.arg_shape_dict)
 83 |     logging.info(pprint.pformat(sym_instance.arg_shape_dict))
 84 | 
 85 |     max_batch_roi = cfg.TRAIN.TOP_ROIS if cfg.TRAIN.BATCH_ROIS == -1 else cfg.TRAIN.BATCH_ROIS
 86 |     num_class = 2 if cfg.CLASS_AGNOSTIC else cfg.dataset.NUM_CLASSES
 87 |     max_label_shape = [('label', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi)),
 88 |                        ('bbox_target', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi, num_class * 4)),
 89 |                        ('bbox_weight', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi, num_class * 4))]
 90 | 
 91 |     if cfg.network.USE_NONGT_INDEX:
 92 |         max_label_shape.append(('nongt_index', (2000,)))
 93 | 
 94 |     if cfg.network.ROIDispatch:
 95 |         max_data_shape.append(('rois_0', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi / 4, 5)))
 96 |         max_data_shape.append(('rois_1', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi / 4, 5)))
 97 |         max_data_shape.append(('rois_2', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi / 4, 5)))
 98 |         max_data_shape.append(('rois_3', (cfg.TRAIN.BATCH_IMAGES, max_batch_roi / 4, 5)))
 99 |     else:
100 |         max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))
101 | 
102 |     #dot = mx.viz.plot_network(sym, node_attrs={'shape': 'rect', 'fixedsize': 'false'})
103 |     #dot.render(os.path.join('./output/rcnn/network_vis', cfg.symbol + cfg.TRAIN.model_prefix))
104 | 
105 |     # load and initialize params
106 |     if resume:
107 |         print('continue training from ', begin_epoch)
108 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
109 |     else:
110 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
111 |         sym_instance.init_weight_rcnn(cfg, arg_params, aux_params)
112 | 
113 |     # check parameter shapes
114 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
115 | 
116 |     # prepare training
117 |     # create solver
118 |     data_names = [k[0] for k in train_data.provide_data_single]
119 |     label_names = [k[0] for k in train_data.provide_label_single]
120 |     if train_shared:
121 |         fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED
122 |     else:
123 |         fixed_param_prefix = cfg.network.FIXED_PARAMS
124 | 
125 |     if cfg.network.ROIDispatch:
126 |         mod = MutableModule(sym, data_names=data_names, label_names=label_names,
127 |                             logger=logger, context=ctx,
128 |                             max_data_shapes=[max_data_shape for _ in range(batch_size)],
129 |                             max_label_shapes=[max_label_shape for _ in range(batch_size)],
130 |                             fixed_param_prefix=fixed_param_prefix)
131 |     else:
132 |         mod = MutableModule(sym, data_names=data_names, label_names=label_names,
133 |                             logger=logger, context=ctx,
134 |                             max_data_shapes=[max_data_shape for _ in range(batch_size)],
135 |                             max_label_shapes=[max_label_shape for _ in range(batch_size)],
136 |                             fixed_param_prefix=fixed_param_prefix)
137 |     if cfg.TRAIN.RESUME:
138 |         mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)
139 | 
140 |     # decide training params
141 |     # metric
142 |     eval_metric = metric.RCNNAccMetric(cfg)
143 |     cls_metric = metric.RCNNLogLossMetric(cfg)
144 |     bbox_metric = metric.RCNNL1LossMetric(cfg)
145 |     eval_metrics = mx.metric.CompositeEvalMetric()
146 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
147 |         eval_metrics.add(child_metric)
148 |     if cfg.TRAIN.LEARN_NMS:
149 |         eval_metrics.add(metric.NMSLossMetric(cfg, 'pos'))
150 |         eval_metrics.add(metric.NMSLossMetric(cfg, 'neg'))
151 |         eval_metrics.add(metric.NMSAccMetric(cfg))
152 |     # callback
153 |     batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent)
154 |     epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True),
155 |                           callback.do_checkpoint(prefix, means, stds)]
156 |     # decide learning rate
157 |     base_lr = lr
158 |     lr_factor = cfg.TRAIN.lr_factor
159 |     lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
160 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
161 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
162 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
163 |     print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
164 |     lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr,
165 |                                               cfg.TRAIN.warmup_step)
166 |     # optimizer
167 |     optimizer_params = {'momentum': cfg.TRAIN.momentum,
168 |                         'wd': cfg.TRAIN.wd,
169 |                         'learning_rate': lr,
170 |                         'lr_scheduler': lr_scheduler,
171 |                         'rescale_grad': 1.0,
172 |                         'clip_gradient': None}
173 | 
174 |     # train
175 | 
176 |     if not isinstance(train_data, PrefetchingIter):
177 |         train_data = PrefetchingIter(train_data)
178 | 
179 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
180 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
181 |             optimizer='sgd', optimizer_params=optimizer_params,
182 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
183 | 
184 | 


--------------------------------------------------------------------------------
/relation_rcnn/function/train_rpn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # MX-RCNN
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The Apache 2.0 License
 11 | # https://github.com/ijkguo/mx-rcnn/
 12 | # --------------------------------------------------------
 13 | 
 14 | import argparse
 15 | import logging
 16 | import pprint
 17 | import mxnet as mx
 18 | 
 19 | from symbols import *
 20 | from core import callback, metric
 21 | from core.loader import AnchorLoader
 22 | from core.module import MutableModule
 23 | from utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
 24 | from utils.load_model import load_param
 25 | from utils.PrefetchingIter import PrefetchingIterV2 as PrefetchingIter
 26 | from utils.lr_scheduler import WarmupMultiFactorScheduler
 27 | 
 28 | 
 29 | def train_rpn(cfg, dataset, image_set, root_path, dataset_path,
 30 |               frequent, kvstore, flip, shuffle, resume,
 31 |               ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 32 |               train_shared, lr, lr_step, logger=None, output_path=None):
 33 |     # set up logger
 34 |     if not logger:
 35 |         logging.basicConfig()
 36 |         logger = logging.getLogger()
 37 |         logger.setLevel(logging.INFO)
 38 | 
 39 |     # set up config
 40 |     cfg.TRAIN.BATCH_IMAGES = cfg.TRAIN.ALTERNATE.RPN_BATCH_IMAGES
 41 | 
 42 |     # load symbol
 43 |     sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
 44 |     sym = sym_instance.get_symbol_rpn(cfg, is_train=True)
 45 |     feat_sym = sym.get_internals()['rpn_cls_score_output']
 46 | 
 47 |     # setup multi-gpu
 48 |     batch_size = len(ctx)
 49 |     input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size
 50 | 
 51 |     # print cfg
 52 |     pprint.pprint(cfg)
 53 |     logger.info('training rpn cfg:{}\n'.format(pprint.pformat(cfg)))
 54 | 
 55 |     # load dataset and prepare imdb for training
 56 |     image_sets = [iset for iset in image_set.split('+')]
 57 |     roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path, result_path=output_path,
 58 |                             flip=flip)
 59 |               for image_set in image_sets]
 60 |     roidb = merge_roidb(roidbs)
 61 |     roidb = filter_roidb(roidb, cfg)
 62 | 
 63 |     # load training data
 64 |     train_data = AnchorLoader(feat_sym, roidb, cfg, batch_size=input_batch_size, shuffle=shuffle,
 65 |                               ctx=ctx, feat_stride=cfg.network.RPN_FEAT_STRIDE, anchor_scales=cfg.network.ANCHOR_SCALES,
 66 |                               anchor_ratios=cfg.network.ANCHOR_RATIOS, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING)
 67 | 
 68 |     # infer max shape
 69 |     max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]
 70 |     max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
 71 |     print('providing maximum shape', max_data_shape, max_label_shape)
 72 | 
 73 |     # infer shape
 74 |     data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
 75 |     sym_instance.infer_shape(data_shape_dict)
 76 | 
 77 |     # load and initialize params
 78 |     if resume:
 79 |         print('continue training from ', begin_epoch)
 80 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 81 |     else:
 82 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 83 |         sym_instance.init_weight_rpn(cfg, arg_params, aux_params)
 84 | 
 85 |     # check parameter shapes
 86 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
 87 | 
 88 |     # create solver
 89 |     data_names = [k[0] for k in train_data.provide_data_single]
 90 |     label_names = [k[0] for k in train_data.provide_label_single]
 91 |     if train_shared:
 92 |         fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED
 93 |     else:
 94 |         fixed_param_prefix = cfg.network.FIXED_PARAMS
 95 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
 96 |                         logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)],
 97 |                         max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix)
 98 | 
 99 |     # decide training params
100 |     # metric
101 |     eval_metric = metric.RPNAccMetric()
102 |     cls_metric = metric.RPNLogLossMetric()
103 |     bbox_metric = metric.RPNL1LossMetric()
104 |     eval_metrics = mx.metric.CompositeEvalMetric()
105 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
106 |         eval_metrics.add(child_metric)
107 |     # callback
108 |     batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent)
109 |     # epoch_end_callback = mx.callback.do_checkpoint(prefix)
110 |     epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True)
111 |     # decide learning rate
112 |     base_lr = lr
113 |     lr_factor = cfg.TRAIN.lr_factor
114 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
115 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
116 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
117 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
118 |     print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
119 |     lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step)
120 |     # optimizer
121 |     optimizer_params = {'momentum': cfg.TRAIN.momentum,
122 |                         'wd': cfg.TRAIN.wd,
123 |                         'learning_rate': lr,
124 |                         'lr_scheduler': lr_scheduler,
125 |                         'rescale_grad': 1.0,
126 |                         'clip_gradient': None}
127 | 
128 |     if not isinstance(train_data, PrefetchingIter):
129 |         train_data = PrefetchingIter(train_data)
130 | 
131 |     # train
132 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
133 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
134 |             optimizer='sgd', optimizer_params=optimizer_params,
135 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
136 | 
137 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_cxx/deformable_convolution.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_convolution.cc
 5 |  * \brief
 6 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai
 7 | */
 8 | 
 9 | #include "./deformable_convolution-inl.h"
10 | 
11 | namespace mxnet {
12 | namespace op {
13 | DMLC_REGISTER_PARAMETER(DeformableConvolutionParam);
14 | 
15 | template<>
16 | Operator* CreateOp<cpu>(DeformableConvolutionParam param, int dtype,
17 |                         std::vector<TShape> *in_shape,
18 |                         std::vector<TShape> *out_shape,
19 |                         Context ctx) {
20 |   Operator *op = NULL;
21 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
22 |     op = new DeformableConvolutionOp<cpu, DType>(param);
23 |   })
24 |   return op;
25 | }
26 | 
27 | // DO_BIND_DISPATCH comes from operator_common.h
28 | Operator *DeformableConvolutionProp::CreateOperatorEx(Context ctx,
29 |                                             std::vector<TShape> *in_shape,
30 |                                             std::vector<int> *in_type) const {
31 |   std::vector<TShape> out_shape, aux_shape;
32 |   std::vector<int> out_type, aux_type;
33 |   CHECK(InferType(in_type, &out_type, &aux_type));
34 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
35 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], in_shape, &out_shape, ctx);
36 | }
37 | 
38 | MXNET_REGISTER_OP_PROPERTY(_contrib_DeformableConvolution, DeformableConvolutionProp)
39 | .describe(R"code(Compute *N*-D convolution on *(N+2)*-D input.
40 | 
41 | In the 2-D convolution, given input data with shape *(batch_size,
42 | channel, height, width)*, the output is computed by
43 | 
44 | .. math::
45 | 
46 |    out[n,i,:,:] = bias[i] + \sum_{j=0}^{num\_filter} data[n,j,:,:] \star
47 |    weight[i,j,:,:]
48 | 
49 | where :math:`\star` is the 2-D cross-correlation operator.
50 | 
51 | For general 2-D convolution, the shapes are
52 | 
53 | - **data**: *(batch_size, channel, height, width)*
54 | - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
55 | - **bias**: *(num_filter,)*
56 | - **out**: *(batch_size, num_filter, out_height, out_width)*.
57 | 
58 | Define::
59 | 
60 |   f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1
61 | 
62 | then we have::
63 | 
64 |   out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
65 |   out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])
66 | 
67 | If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
68 | 
69 | The default data ``layout`` is *NCHW*, namely *(batch_size, channle, height,
70 | width)*. We can choose other layouts such as *NHWC*.
71 | 
72 | If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
73 | evenly into *g* parts along the channel axis, and also evenly split ``weight``
74 | along the first dimension. Next compute the convolution on the *i*-th part of
75 | the data with the *i*-th weight part. The output is obtained by concating all
76 | the *g* results.
77 | 
78 | Both ``weight`` and ``bias`` are learnable parameters.
79 | 
80 | 
81 | )code" ADD_FILELINE)
82 | .add_argument("data", "NDArray-or-Symbol", "Input data to the DeformableConvolutionOp.")
83 | .add_argument("offset", "NDArray-or-Symbol", "Input offset to the DeformableConvolutionOp.")
84 | .add_argument("weight", "NDArray-or-Symbol", "Weight matrix.")
85 | .add_argument("bias", "NDArray-or-Symbol", "Bias parameter.")
86 | .add_arguments(DeformableConvolutionParam::__FIELDS__());
87 | 
88 | }  // namespace op
89 | }  // namespace mxnet
90 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_cxx/deformable_convolution.cu:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_convolution.cu
 5 |  * \brief
 6 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai
 7 | */
 8 | 
 9 | #include "./deformable_convolution-inl.h"
10 | #include <vector>
11 | 
12 | namespace mxnet {
13 |   namespace op {
14 | 
15 |     template<>
16 |     Operator* CreateOp<gpu>(DeformableConvolutionParam param, int dtype,
17 |       std::vector<TShape> *in_shape,
18 |       std::vector<TShape> *out_shape,
19 |       Context ctx) {
20 |       Operator *op = NULL;
21 |       MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
22 |         op = new DeformableConvolutionOp<gpu, DType>(param);
23 |       })
24 |         return op;
25 |     }
26 | 
27 |   }  // namespace op
28 | }  // namespace mxnet
29 | 
30 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_cxx/deformable_psroi_pooling.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_psroi_pooling.cc
 5 |  * \brief 
 6 |  * \author Yi Li, Guodong Zhang, Jifeng Dai
 7 | */
 8 | #include "./deformable_psroi_pooling-inl.h"
 9 | #include <mshadow/base.h>
10 | #include <mshadow/tensor.h>
11 | #include <mshadow/packet-inl.h>
12 | #include <mshadow/dot_engine-inl.h>
13 | #include <cassert>
14 | 
15 | using std::max;
16 | using std::min;
17 | using std::floor;
18 | using std::ceil;
19 | 
20 | namespace mshadow {
21 |   template<typename DType>
22 |   inline void DeformablePSROIPoolForward(const Tensor<cpu, 4, DType> &out,
23 |     const Tensor<cpu, 4, DType> &data,
24 |     const Tensor<cpu, 2, DType> &bbox,
25 |     const Tensor<cpu, 4, DType> &trans,
26 |     const Tensor<cpu, 4, DType> &top_count,
27 |     const bool no_trans,
28 |     const float spatial_scale,
29 |     const int output_dim,
30 |     const int group_size,
31 |     const int pooled_size,
32 |     const int part_size,
33 |     const int sample_per_part,
34 |     const float trans_std) {
35 |     // NOT_IMPLEMENTED;
36 |     return;
37 |   }
38 | 
39 |   template<typename DType>
40 |   inline void DeformablePSROIPoolBackwardAcc(const Tensor<cpu, 4, DType> &in_grad,
41 |     const Tensor<cpu, 4, DType> &trans_grad,
42 |     const Tensor<cpu, 4, DType> &out_grad,
43 |     const Tensor<cpu, 4, DType> &data,
44 |     const Tensor<cpu, 2, DType> &bbox,
45 |     const Tensor<cpu, 4, DType> &trans,
46 |     const Tensor<cpu, 4, DType> &top_count,
47 |     const bool no_trans,
48 |     const float spatial_scale,
49 |     const int output_dim,
50 |     const int group_size,
51 |     const int pooled_size,
52 |     const int part_size,
53 |     const int sample_per_part,
54 |     const float trans_std) {
55 |     // NOT_IMPLEMENTED;
56 |     return;
57 |   }
58 | }  // namespace mshadow
59 | 
60 | namespace mxnet {
61 |   namespace op {
62 | 
63 |     template<>
64 |     Operator *CreateOp<cpu>(DeformablePSROIPoolingParam param, int dtype) {
65 |       Operator* op = NULL;
66 |       MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
67 |         op = new DeformablePSROIPoolingOp<cpu, DType>(param);
68 |       });
69 |       return op;
70 |     }
71 | 
72 |     Operator *DeformablePSROIPoolingProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
73 |       std::vector<int> *in_type) const {
74 |       std::vector<TShape> out_shape, aux_shape;
75 |       std::vector<int> out_type, aux_type;
76 |       CHECK(InferType(in_type, &out_type, &aux_type));
77 |       CHECK(InferShape(in_shape, &out_shape, &aux_shape));
78 |       DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
79 |     }
80 | 
81 |     DMLC_REGISTER_PARAMETER(DeformablePSROIPoolingParam);
82 | 
83 |     MXNET_REGISTER_OP_PROPERTY(_contrib_DeformablePSROIPooling, DeformablePSROIPoolingProp)
84 |       .describe("Performs region-of-interest pooling on inputs. Resize bounding box coordinates by "
85 |         "spatial_scale and crop input feature maps accordingly. The cropped feature maps are pooled "
86 |         "by max pooling to a fixed size output indicated by pooled_size. batch_size will change to "
87 |         "the number of region bounding boxes after DeformablePSROIPooling")
88 |       .add_argument("data", "Symbol", "Input data to the pooling operator, a 4D Feature maps")
89 |       .add_argument("rois", "Symbol", "Bounding box coordinates, a 2D array of "
90 |         "[[batch_index, x1, y1, x2, y2]]. (x1, y1) and (x2, y2) are top left and down right corners "
91 |         "of designated region of interest. batch_index indicates the index of corresponding image "
92 |         "in the input data")
93 |       .add_argument("trans", "Symbol", "transition parameter")
94 |       .add_arguments(DeformablePSROIPoolingParam::__FIELDS__());
95 |   }  // namespace op
96 | }  // namespace mxnet
97 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_cxx/nn/deformable_im2col.h:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  3 |  *
  4 |  * COPYRIGHT
  5 |  * 
  6 |  * All contributions by the University of California:
  7 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  8 |  * All rights reserved.
  9 |  * 
 10 |  * All other contributions:
 11 |  * Copyright (c) 2014-2017, the respective contributors
 12 |  * All rights reserved.
 13 |  * 
 14 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 15 |  * their contributions to Caffe. The project versioning records all such
 16 |  * contribution and copyright details. If a contributor wants to further mark
 17 |  * their specific copyright on a particular contribution, they should indicate
 18 |  * their copyright solely in the commit message of the change when it is
 19 |  * committed.
 20 |  * 
 21 |  * LICENSE
 22 |  * 
 23 |  * Redistribution and use in source and binary forms, with or without
 24 |  * modification, are permitted provided that the following conditions are met: 
 25 |  * 
 26 |  * 1. Redistributions of source code must retain the above copyright notice, this
 27 |  * list of conditions and the following disclaimer. 
 28 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 29 |  * this list of conditions and the following disclaimer in the documentation
 30 |  * and/or other materials provided with the distribution. 
 31 |  * 
 32 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 33 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 34 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 35 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 36 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 37 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 38 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 39 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 40 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 41 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 42 |  * 
 43 |  * CONTRIBUTION AGREEMENT
 44 |  * 
 45 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 46 |  * or otherwise, the contributor releases their content to the
 47 |  * license and copyright terms herein.
 48 |  *
 49 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 50 |  *
 51 |  * Copyright (c) 2017 Microsoft
 52 |  * Licensed under The MIT License [see LICENSE for details]
 53 |  * \file deformable_im2col.h
 54 |  * \brief Function definitions of converting an image to
 55 |  * column matrix based on kernel, padding, dilation, and offset.
 56 |  * These functions are mainly used in deformable convolution operators.
 57 |  * \ref: https://arxiv.org/abs/1703.06211
 58 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai
 59 |  */
 60 | 
 61 | #ifndef MXNET_OPERATOR_CONTRIB_NN_DEFORMABLE_IM2COL_H_
 62 | #define MXNET_OPERATOR_CONTRIB_NN_DEFORMABLE_IM2COL_H_
 63 | 
 64 | #include <mxnet/base.h>
 65 | #include <mxnet/operator.h>
 66 | #include <cstring>
 67 | #include <vector>
 68 | #include "../../mxnet_op.h"
 69 | 
 70 | namespace mxnet {
 71 | namespace op {
 72 | 
 73 | /*!\brief 
 74 |  * cpu function of deformable_im2col algorithm
 75 |  * \param s device stream
 76 |  * \param data_im pointer of an image (C, H, W, ...) in the image batch
 77 |  * \param data_offset pointer of offset (C, H, W, ...) in the offset batch
 78 |  * \param im_shape input image shape in dimensions (N, C, H, W,)
 79 |  * \param col_shape column buffer shape (#channels, output_im_height, output_im_width, ...)
 80 |  * \param kernel_shape kernel filter shape
 81 |  * \param pad pad shape
 82 |  * \param stride stride shape
 83 |  * \param dilation dilation shape
 84 |  * \param deformable_group #offset group that deformable convolution use
 85 |  * \param data_col column buffer pointer
 86 |  */
 87 | template <typename DType>
 88 | inline void deformable_im2col(mshadow::Stream<cpu>* s,
 89 |   const DType* data_im, const DType* data_offset, 
 90 |   const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
 91 |   const TShape& pad, const TShape& stride, const TShape& dilation, 
 92 |   const uint32_t deformable_group, DType* data_col) {
 93 |   if (2 == kernel_shape.ndim()) {
 94 | 	  LOG(FATAL) << "not implemented";
 95 |   } else {
 96 | 	  LOG(FATAL) << "not implemented";
 97 |   }
 98 | }
 99 | 
100 | 
101 | /*!\brief
102 |  * cpu function of deformable_col2im algorithm
103 |  * \param s device stream
104 |  * \param data_col start pointer of the column buffer to be filled
105 |  * \param data_offset pointer of offset (C, H, W, ...) in the offset batch
106 |  * \param im_shape input image shape in dimensions (N, C, H, W,)
107 |  * \param col_shape column buffer shape
108 |  * \param kernel_shape kernel filter shape
109 |  * \param pad pad shape
110 |  * \param stride stride shape
111 |  * \param dilation dilation shape
112 |  * \param deformable_group #offset group that deformable convolution use
113 |  * \param grad_im pointer of a image (C, H, W,...) in the image batch
114 |  */
115 | template <typename DType>
116 | inline void deformable_col2im(mshadow::Stream<cpu>* s,
117 |   const DType* data_col, const DType* data_offset,
118 |   const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
119 |   const TShape& pad, const TShape& stride,
120 |   const TShape& dilation, const uint32_t deformable_group,
121 |   DType* grad_im, OpReqType req) {
122 |   index_t num_spatial_axes = kernel_shape.ndim();
123 |   LOG(FATAL) << "not implemented";
124 | }
125 | 
126 | 
127 | /*!\brief
128 |  * cpu function of deformable_col2im_coord algorithm
129 |  * \param s device stream
130 |  * \param data_col start pointer of the column buffer to be filled
131 |  * \param data_im pointer of an image (C, H, W, ...) in the image batch
132 |  * \param data_offset pointer of offset (C, H, W, ...) in the offset batch
133 |  * \param im_shape input image shape in dimensions (N, C, H, W,)
134 |  * \param col_shape column buffer shape
135 |  * \param kernel_shape kernel filter shape
136 |  * \param pad pad shape
137 |  * \param stride stride shape
138 |  * \param dilation dilation shape
139 |  * \param deformable_group #offset group that deformable convolution use
140 |  * \param grad_offset pointer of the offset (C, H, W,...) in the offset batch
141 |  */
142 | 
143 | template <typename DType>
144 | inline void deformable_col2im_coord(mshadow::Stream<cpu>* s,
145 |   const DType* data_col, const DType* data_im, const DType* data_offset, const TShape& im_shape,
146 |   const TShape& col_shape, const TShape& kernel_shape,
147 |   const TShape& pad, const TShape& stride,
148 |   const TShape& dilation, const uint32_t deformable_group, DType* grad_offset, OpReqType req) {
149 |   LOG(FATAL) << "not implemented";
150 | }
151 | 
152 | }  // namespace op
153 | }  // namespace mxnet
154 | #ifdef __CUDACC__
155 | #include "./deformable_im2col.cuh"
156 | #endif
157 | #endif  // MXNET_OPERATOR_CONTRIB_NN_DEFORMABLE_IM2COL_H_
158 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_cxx/psroi_pooling.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 by Contributors
 3 |  * Copyright (c) 2017 Microsoft
 4 |  * Licensed under The MIT License [see LICENSE for details]
 5 |  * \file psroi_pooling.cc
 6 |  * \brief psroi pooling operator
 7 |  * \author Yi Li, Tairui Chen, Guodong Zhang, Jifeng Dai
 8 | */
 9 | #include "./psroi_pooling-inl.h"
10 | #include <mshadow/base.h>
11 | #include <mshadow/tensor.h>
12 | #include <mshadow/packet-inl.h>
13 | #include <mshadow/dot_engine-inl.h>
14 | #include <cassert>
15 | 
16 | using std::max;
17 | using std::min;
18 | using std::floor;
19 | using std::ceil;
20 | 
21 | namespace mshadow {
22 | template<typename DType>
23 | inline void PSROIPoolForward(const Tensor<cpu, 4, DType> &out,
24 |                            const Tensor<cpu, 4, DType> &data,
25 |                            const Tensor<cpu, 2, DType> &bbox,
26 |                            const Tensor<cpu, 4, DType> &mapping_channel,
27 |                            const float spatial_scale_,
28 |                            const int output_dim_, 
29 |                            const int group_size_) {
30 |   // NOT_IMPLEMENTED;
31 |   return;
32 | }
33 | 
34 | template<typename DType>
35 | inline void PSROIPoolBackwardAcc(const Tensor<cpu, 4, DType> &in_grad,
36 |                             const Tensor<cpu, 4, DType> &out_grad,
37 |                             const Tensor<cpu, 2, DType> &bbox,
38 |                             const Tensor<cpu, 4, DType> &mapping_channel,
39 |                             const float spatial_scale_,
40 |                             const int output_dim_) {
41 |   // NOT_IMPLEMENTED;
42 |   return;
43 | }
44 | }  // namespace mshadow
45 | 
46 | namespace mxnet {
47 | namespace op {
48 | 
49 | template<>
50 | Operator *CreateOp<cpu>(PSROIPoolingParam param, int dtype) {
51 |   Operator* op = NULL;
52 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
53 |     op = new PSROIPoolingOp<cpu, DType>(param);
54 |   });
55 |   return op;
56 | }
57 | 
58 | Operator *PSROIPoolingProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
59 |                                            std::vector<int> *in_type) const {
60 |   std::vector<TShape> out_shape, aux_shape;
61 |   std::vector<int> out_type, aux_type;
62 |   CHECK(InferType(in_type, &out_type, &aux_type));
63 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
64 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
65 | }
66 | 
67 | DMLC_REGISTER_PARAMETER(PSROIPoolingParam);
68 | 
69 | MXNET_REGISTER_OP_PROPERTY(_contrib_PSROIPooling, PSROIPoolingProp)
70 | .describe("Performs region-of-interest pooling on inputs. Resize bounding box coordinates by "
71 | "spatial_scale and crop input feature maps accordingly. The cropped feature maps are pooled "
72 | "by max pooling to a fixed size output indicated by pooled_size. batch_size will change to "
73 | "the number of region bounding boxes after PSROIPooling")
74 | .add_argument("data", "Symbol", "Input data to the pooling operator, a 4D Feature maps")
75 | .add_argument("rois", "Symbol", "Bounding box coordinates, a 2D array of "
76 | "[[batch_index, x1, y1, x2, y2]]. (x1, y1) and (x2, y2) are top left and down right corners "
77 | "of designated region of interest. batch_index indicates the index of corresponding image "
78 | "in the input data")
79 | .add_arguments(PSROIPoolingParam::__FIELDS__());
80 | }  // namespace op
81 | }  // namespace mxnet


--------------------------------------------------------------------------------
/relation_rcnn/operator_py/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msracver/Relation-Networks-for-Object-Detection/e83e911d828e3c86624ce0aeb8d742d5ee67d5ba/relation_rcnn/operator_py/__init__.py


--------------------------------------------------------------------------------
/relation_rcnn/operator_py/box_annotator_ohem.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | """
 9 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
10 | """
11 | 
12 | import mxnet as mx
13 | import numpy as np
14 | from distutils.util import strtobool
15 | 
16 | 
17 | 
18 | 
19 | class BoxAnnotatorOHEMOperator(mx.operator.CustomOp):
20 |     def __init__(self, num_classes, num_reg_classes, roi_per_img):
21 |         super(BoxAnnotatorOHEMOperator, self).__init__()
22 |         self._num_classes = num_classes
23 |         self._num_reg_classes = num_reg_classes
24 |         self._roi_per_img = roi_per_img
25 | 
26 |     def forward(self, is_train, req, in_data, out_data, aux):
27 | 
28 |         cls_score    = in_data[0]
29 |         bbox_pred    = in_data[1]
30 |         labels       = in_data[2].asnumpy()
31 |         bbox_targets = in_data[3]
32 |         bbox_weights = in_data[4]
33 | 
34 |         per_roi_loss_cls = mx.nd.SoftmaxActivation(cls_score) + 1e-14
35 |         per_roi_loss_cls = per_roi_loss_cls.asnumpy()
36 |         per_roi_loss_cls = per_roi_loss_cls[np.arange(per_roi_loss_cls.shape[0], dtype='int'), labels.astype('int')]
37 |         per_roi_loss_cls = -1 * np.log(per_roi_loss_cls)
38 |         per_roi_loss_cls = np.reshape(per_roi_loss_cls, newshape=(-1,))
39 | 
40 |         per_roi_loss_bbox = bbox_weights * mx.nd.smooth_l1((bbox_pred - bbox_targets), scalar=1.0)
41 |         per_roi_loss_bbox = mx.nd.sum(per_roi_loss_bbox, axis=1).asnumpy()
42 | 
43 |         top_k_per_roi_loss = np.argsort(per_roi_loss_cls + per_roi_loss_bbox)
44 |         labels_ohem = labels
45 |         labels_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = -1
46 |         bbox_weights_ohem = bbox_weights.asnumpy()
47 |         bbox_weights_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = 0
48 | 
49 |         labels_ohem = mx.nd.array(labels_ohem)
50 |         bbox_weights_ohem = mx.nd.array(bbox_weights_ohem)
51 | 
52 |         for ind, val in enumerate([labels_ohem, bbox_weights_ohem]):
53 |             self.assign(out_data[ind], req[ind], val)
54 | 
55 | 
56 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
57 |         for i in range(len(in_grad)):
58 |             self.assign(in_grad[i], req[i], 0)
59 | 
60 | 
61 | @mx.operator.register('BoxAnnotatorOHEM')
62 | class BoxAnnotatorOHEMProp(mx.operator.CustomOpProp):
63 |     def __init__(self, num_classes, num_reg_classes, roi_per_img):
64 |         super(BoxAnnotatorOHEMProp, self).__init__(need_top_grad=False)
65 |         self._num_classes = int(num_classes)
66 |         self._num_reg_classes = int(num_reg_classes)
67 |         self._roi_per_img = int(roi_per_img)
68 | 
69 |     def list_arguments(self):
70 |         return ['cls_score', 'bbox_pred', 'labels', 'bbox_targets', 'bbox_weights']
71 | 
72 |     def list_outputs(self):
73 |         return ['labels_ohem', 'bbox_weights_ohem']
74 | 
75 |     def infer_shape(self, in_shape):
76 |         labels_shape = in_shape[2]
77 |         bbox_weights_shape = in_shape[4]
78 | 
79 |         return in_shape, \
80 |                [labels_shape, bbox_weights_shape]
81 | 
82 |     def create_operator(self, ctx, shapes, dtypes):
83 |         return BoxAnnotatorOHEMOperator(self._num_classes, self._num_reg_classes, self._roi_per_img)
84 | 
85 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
86 |         return []
87 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_py/monitor_op.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Jiayuan Gu, Dazhi Cheng
 6 | # --------------------------------------------------------
 7 | import mxnet as mx
 8 | import numpy as np
 9 | from distutils.util import strtobool
10 | from easydict import EasyDict as edict
11 | import cPickle
12 | 
13 | DEBUG = False
14 | 
15 | 
16 | class MonitorOperator(mx.operator.CustomOp):
17 |     def __init__(self, nickname):
18 |         super(MonitorOperator, self).__init__()
19 |         self.nickname= nickname
20 | 
21 |     def forward(self, is_train, req, in_data, out_data, aux):
22 |         self.assign(out_data[0], req[0], in_data[0])
23 | 
24 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
25 |         self.assign(in_grad[0], req[0], out_grad[0])
26 | 
27 | 
28 | @mx.operator.register('monitor')
29 | class MonitorProp(mx.operator.CustomOpProp):
30 |     def __init__(self, nickname):
31 |         super(MonitorProp, self).__init__(need_top_grad=False)
32 |         self.nickname = nickname
33 | 
34 |     def list_arguments(self):
35 |         return ['input']
36 | 
37 |     def list_outputs(self):
38 |         return ['output']
39 | 
40 |     def infer_shape(self, in_shape):
41 |         output_shape = in_shape[0]
42 |         return [output_shape], [output_shape]
43 | 
44 |     def create_operator(self, ctx, shapes, dtypes):
45 |         return MonitorOperator(self.nickname)
46 | 
47 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
48 |         return [out_grad[0]]
49 | 
50 | 
51 | def monitor_wrapper(sym_instance, name):
52 |     return mx.sym.Custom(input=sym_instance,
53 |                          op_type='monitor',
54 |                          nickname=name)


--------------------------------------------------------------------------------
/relation_rcnn/operator_py/nms_multi_target.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Jiayuan Gu, Dazhi Cheng
  6 | # --------------------------------------------------------
  7 | """
  8 | Nms Multi-thresh Target Operator selects foreground and background roi,
  9 |     and assigns label, bbox_transform to them.
 10 | """
 11 | 
 12 | import mxnet as mx
 13 | import numpy as np
 14 | 
 15 | from bbox.bbox_transform import bbox_overlaps
 16 | 
 17 | 
 18 | class NmsMultiTargetOp(mx.operator.CustomOp):
 19 |     def __init__(self, target_thresh):
 20 |         super(NmsMultiTargetOp, self).__init__()
 21 |         self._target_thresh = target_thresh
 22 |         self._num_thresh = len(target_thresh)
 23 | 
 24 |     def forward(self, is_train, req, in_data, out_data, aux):
 25 |         # bbox, [first_n, num_fg_classes, 4]
 26 |         bbox = in_data[0].asnumpy()
 27 |         num_boxes = bbox.shape[0]
 28 |         num_fg_classes = bbox.shape[1]
 29 |         gt_box = in_data[1].asnumpy()
 30 |         # score, [first_n, num_fg_classes]
 31 |         score = in_data[2].asnumpy()
 32 | 
 33 |         batch_image, num_gt, code_size = gt_box.shape
 34 |         assert batch_image == 1, 'only support batch_image=1, but receive %d' % num_gt
 35 |         assert code_size == 5, 'code_size of gt should be 5, but receive %d' % code_size
 36 |         assert len(score.shape) == 2, 'shape of score is %d instead of 2.' % len(score.shape)
 37 |         assert score.shape[1] == num_fg_classes, 'number of fg classes should be same for boxes and scores'
 38 | 
 39 |         output_list = []
 40 |         for cls_idx in range(0, num_fg_classes):
 41 |             valid_gt_mask = (gt_box[0, :, -1].astype(np.int32)==(cls_idx+1))
 42 |             valid_gt_box = gt_box[0, valid_gt_mask, :]
 43 |             num_valid_gt = len(valid_gt_box)
 44 | 
 45 |             if num_valid_gt == 0:
 46 |                output = np.zeros(shape=(num_boxes, self._num_thresh), dtype=np.float32)
 47 |                output_list.append(output)
 48 |             else:
 49 |                 bbox_per_class = bbox[:, cls_idx, :]
 50 |                 score_per_class = score[:, cls_idx:cls_idx+1]
 51 |                 overlap_mat = bbox_overlaps(bbox_per_class.astype(np.float),
 52 |                                             valid_gt_box[:,:-1].astype(np.float))
 53 | 
 54 |                 eye_matrix = np.eye(num_valid_gt)
 55 |                 output_list_per_class = []
 56 | 
 57 |                 for thresh in self._target_thresh:
 58 |                     # following mAP metric
 59 |                     overlap_mask = (overlap_mat > thresh)
 60 |                     valid_bbox_indices = np.where(overlap_mask)[0]
 61 |                     # require score be 2-dim
 62 |                     overlap_score = np.tile(score_per_class, (1, num_valid_gt))
 63 |                     overlap_score *= overlap_mask
 64 |                     max_overlap_indices = np.argmax(overlap_mat, axis=1)
 65 |                     max_overlap_mask = eye_matrix[max_overlap_indices]
 66 |                     overlap_score *= max_overlap_mask
 67 |                     max_score_indices = np.argmax(overlap_score, axis=0)
 68 |                     output = np.zeros((num_boxes,))
 69 |                     output[np.intersect1d(max_score_indices,valid_bbox_indices)] = 1
 70 |                     output_list_per_class.append(output)
 71 |                 output_per_class = np.stack(output_list_per_class, axis=-1)
 72 |                 output_list.append(output_per_class)
 73 |         blob = np.stack(output_list, axis=1).astype(np.float32, copy=False)
 74 |         self.assign(out_data[0], req[0], blob)
 75 | 
 76 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
 77 |         self.assign(in_grad[0], req[0], 0)
 78 |         self.assign(in_grad[1], req[1], 0)
 79 |         self.assign(in_grad[2], req[2], 0)
 80 | 
 81 | 
 82 | @mx.operator.register("nms_multi_target")
 83 | class NmsMultiTargetProp(mx.operator.CustomOpProp):
 84 |     def __init__(self, target_thresh):
 85 |         super(NmsMultiTargetProp, self).__init__(need_top_grad=False)
 86 |         self._target_thresh = np.fromstring(target_thresh[1:-1], dtype=float, sep=' ')
 87 |         self._num_thresh = len(self._target_thresh)
 88 | 
 89 |     def list_arguments(self):
 90 |         return ['bbox', 'gt_bbox', 'score']
 91 | 
 92 |     def list_outputs(self):
 93 |         return ['nms_multi_target']
 94 | 
 95 |     def infer_shape(self, in_shape):
 96 |         bbox_shape = in_shape[0]
 97 |         # gt_box_shape = in_shape[1]
 98 |         score_shape = in_shape[2]
 99 | 
100 |         assert bbox_shape[0] == score_shape[0], 'ROI number should be same for bbox and score'
101 | 
102 |         num_boxes = bbox_shape[0]
103 |         num_fg_classes = bbox_shape[1]
104 |         output_shape = (num_boxes, num_fg_classes, self._num_thresh)
105 | 
106 |         return in_shape, [output_shape]
107 | 
108 |     def create_operator(self, ctx, shapes, dtypes):
109 |         return NmsMultiTargetOp(self._target_thresh)
110 | 
111 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
112 |         return []
113 | 


--------------------------------------------------------------------------------
/relation_rcnn/operator_py/proposal_target.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # MX-RCNN
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The Apache 2.0 License
 11 | # https://github.com/ijkguo/mx-rcnn/
 12 | # --------------------------------------------------------
 13 | 
 14 | """
 15 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
 16 | """
 17 | 
 18 | import mxnet as mx
 19 | import numpy as np
 20 | from distutils.util import strtobool
 21 | from easydict import EasyDict as edict
 22 | import cPickle
 23 | 
 24 | 
 25 | from core.rcnn import sample_rois, sample_rois_v2
 26 | 
 27 | DEBUG = False
 28 | 
 29 | 
 30 | class ProposalTargetOperator(mx.operator.CustomOp):
 31 |     def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction):
 32 |         super(ProposalTargetOperator, self).__init__()
 33 |         self._num_classes = num_classes
 34 |         self._batch_images = batch_images
 35 |         self._batch_rois = batch_rois
 36 |         self._cfg = cfg
 37 |         self._fg_fraction = fg_fraction
 38 | 
 39 |         if DEBUG:
 40 |             self._count = 0
 41 |             self._fg_num = 0
 42 |             self._bg_num = 0
 43 | 
 44 |     def forward(self, is_train, req, in_data, out_data, aux):
 45 |         assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \
 46 |             'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois)
 47 |         all_rois = in_data[0].asnumpy()
 48 |         gt_boxes = in_data[1].asnumpy()
 49 | 
 50 |         if self._batch_rois == -1:
 51 |             rois_per_image = all_rois.shape[0] + gt_boxes.shape[0]
 52 |             fg_rois_per_image = rois_per_image
 53 |         elif self._batch_rois == -2:
 54 |             rois_per_image = all_rois.shape[0]
 55 |             fg_rois_per_image = rois_per_image
 56 |         elif self._batch_rois < -10:
 57 |             rois_per_image = -self._batch_rois / self._batch_images
 58 |             fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int)
 59 |         else:
 60 |             rois_per_image = self._batch_rois / self._batch_images
 61 |             fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int)
 62 | 
 63 | 
 64 |         # Include ground-truth boxes in the set of candidate rois
 65 |         zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
 66 |         if self._batch_rois >= -1:
 67 |             all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))
 68 |         # Sanity check: single batch only
 69 |         assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported'
 70 | 
 71 |         if self._batch_rois == -1 or self._batch_rois == -2:
 72 |             #rois, labels, bbox_targets, bbox_weights = \
 73 |             #    sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes)
 74 |             rois, labels, bbox_targets, bbox_weights = \
 75 |                 sample_rois_v2(all_rois, self._num_classes, self._cfg, gt_boxes=gt_boxes)
 76 |         else:
 77 |             rois, labels, bbox_targets, bbox_weights = \
 78 |                 sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes)
 79 | 
 80 |         if DEBUG:
 81 |             print "labels=", labels
 82 |             print 'num fg: {}'.format((labels > 0).sum())
 83 |             print 'num bg: {}'.format((labels == 0).sum())
 84 |             self._count += 1
 85 |             self._fg_num += (labels > 0).sum()
 86 |             self._bg_num += (labels == 0).sum()
 87 |             print "self._count=", self._count
 88 |             print 'num fg avg: {}'.format(self._fg_num / self._count)
 89 |             print 'num bg avg: {}'.format(self._bg_num / self._count)
 90 |             print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))
 91 | 
 92 |         for ind, val in enumerate([rois, labels, bbox_targets, bbox_weights]):
 93 |             self.assign(out_data[ind], req[ind], val)
 94 | 
 95 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
 96 |         self.assign(in_grad[0], req[0], 0)
 97 |         self.assign(in_grad[1], req[1], 0)
 98 | 
 99 | 
100 | @mx.operator.register('proposal_target')
101 | class ProposalTargetProp(mx.operator.CustomOpProp):
102 |     def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction='0.25'):
103 |         super(ProposalTargetProp, self).__init__(need_top_grad=False)
104 |         self._num_classes = int(num_classes)
105 |         self._batch_images = int(batch_images)
106 |         self._batch_rois = int(batch_rois)
107 |         self._cfg = cPickle.loads(cfg)
108 |         self._fg_fraction = float(fg_fraction)
109 | 
110 |     def list_arguments(self):
111 |         return ['rois', 'gt_boxes']
112 | 
113 |     def list_outputs(self):
114 |         return ['rois_output', 'label', 'bbox_target', 'bbox_weight']
115 | 
116 |     def infer_shape(self, in_shape):
117 |         rpn_rois_shape = in_shape[0]
118 |         gt_boxes_shape = in_shape[1]
119 | 
120 |         if self._batch_rois == -1:
121 |             rois = rpn_rois_shape[0] + gt_boxes_shape[0]
122 |         elif self._batch_rois == -2:
123 |             rois = rpn_rois_shape[0]
124 |         elif self._batch_rois < -10:
125 |             rois = -self._batch_rois
126 |         else:
127 |             rois = self._batch_rois
128 | 
129 |         #rois = rpn_rois_shape[0] + gt_boxes_shape[0] if self._batch_rois == -1 else self._batch_rois
130 | 
131 |         output_rois_shape = (rois, 5)
132 |         label_shape = (rois, )
133 |         bbox_target_shape = (rois, self._num_classes * 4)
134 |         bbox_weight_shape = (rois, self._num_classes * 4)
135 | 
136 |         return [rpn_rois_shape, gt_boxes_shape], \
137 |                [output_rois_shape, label_shape, bbox_target_shape, bbox_weight_shape]
138 | 
139 |     def create_operator(self, ctx, shapes, dtypes):
140 |         return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._cfg, self._fg_fraction)
141 | 
142 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
143 |         return []
144 | 


--------------------------------------------------------------------------------
/relation_rcnn/symbols/__init__.py:
--------------------------------------------------------------------------------
 1 | import resnet_v1_101_rcnn_attention_1024_pairwise_position_multi_head_16
 2 | import resnet_v1_101_rcnn_dcn_attention_1024_pairwise_position_multi_head_16
 3 | import resnet_v1_101_rcnn_attention_1024_pairwise_position_multi_head_16_learn_nms
 4 | import resnet_v1_101_rcnn_dcn_attention_1024_pairwise_position_multi_head_16_learn_nms
 5 | import resnet_v1_101_rcnn_fpn_attention_1024_pairwise_position_multi_head_16
 6 | import resnet_v1_101_rcnn_fpn_attention_1024_pairwise_position_multi_head_16_learn_nms
 7 | import resnet_v1_101_rcnn_learn_nms_1024_attention_1024_pairwise_position_multi_head_16
 8 | import resnet_v1_101_rcnn_dcn
 9 | import resnet_v1_101_rcnn_fpn
10 | import resnet_v1_101_rcnn
11 | 


--------------------------------------------------------------------------------
/relation_rcnn/test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # MX-RCNN
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The Apache 2.0 License
11 | # https://github.com/ijkguo/mx-rcnn/
12 | # --------------------------------------------------------
13 | 
14 | import _init_paths
15 | import argparse
16 | import os
17 | import sys
18 | import time
19 | import logging
20 | from config.config import config, update_config
21 | 
22 | 
23 | def parse_args():
24 |     parser = argparse.ArgumentParser(description='Test a Faster R-CNN network')
25 |     # general
26 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
27 | 
28 |     args, rest = parser.parse_known_args()
29 |     update_config(args.cfg)
30 | 
31 |     # rcnn
32 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
33 |     parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true')
34 |     parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float)
35 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
36 |     parser.add_argument('--test_epoch', help='the epoch model to be test', default=config.TEST.test_epoch, type=int)
37 |     # nms
38 |     parser.add_argument('--nms', help='params for nms or softnms', default=config.TEST.NMS, type=float)
39 |     parser.add_argument('--softnms', help='whether to enable softnms', default=config.TEST.SOFTNMS, action='store_true')
40 |     parser.add_argument('--naive_nms', help='whether to enable naive nms', default=False, action='store_true')
41 |     parser.add_argument('--first_n', help='first_n for learn nms or nms', default=config.TEST.FIRST_N, type=int)
42 |     parser.add_argument('--merge', help='merge method for learn nms', default=config.TEST.MERGE_METHOD, type=int)
43 |     parser.add_argument('--debug', help='whether to enable debug mode', default=False, action='store_true')
44 |     # dataset
45 |     parser.add_argument('--test_set', help='which set to be tested', default=config.dataset.test_image_set, type=str)
46 |     args, rest = parser.parse_known_args()
47 |     # update config
48 |     config.TEST.test_epoch = args.test_epoch
49 |     config.TEST.NMS = args.nms
50 |     config.TEST.SOFTNMS = args.softnms and (not args.naive_nms)
51 |     config.TEST.FIRST_N = args.first_n
52 |     config.TEST.MERGE_METHOD = args.merge
53 |     config.dataset.test_image_set = args.test_set
54 |     return args
55 | 
56 | 
57 | args = parse_args()
58 | curr_path = os.path.abspath(os.path.dirname(__file__))
59 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
60 | 
61 | import mxnet as mx
62 | import numpy as np
63 | from function.test_rcnn import test_rcnn
64 | from utils.create_logger import create_logger
65 | 
66 | 
67 | def main():
68 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
69 |     print args
70 |     np.random.seed(0)
71 |     mx.random.seed(0)
72 |     logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)
73 | 
74 |     test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
75 |               ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch,
76 |               args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path)
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/relation_rcnn/train_end2end.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Relation Networks for Object Detection
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
  6 | # --------------------------------------------------------
  7 | # Based on:
  8 | # MX-RCNN
  9 | # Copyright (c) 2016 by Contributors
 10 | # Licence under The Apache 2.0 License
 11 | # https://github.com/ijkguo/mx-rcnn/
 12 | # --------------------------------------------------------
 13 | 
 14 | import _init_paths
 15 | 
 16 | import time
 17 | import argparse
 18 | import logging
 19 | import pprint
 20 | import os
 21 | import sys
 22 | from config.config import config, update_config
 23 | 
 24 | def parse_args():
 25 |     parser = argparse.ArgumentParser(description='Train Faster-RCNN network')
 26 |     # general
 27 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
 28 | 
 29 |     args, rest = parser.parse_known_args()
 30 |     # update config
 31 |     update_config(args.cfg)
 32 | 
 33 |     # training
 34 |     parser.add_argument('--frequent', help='frequency of logging', default=config.default.frequent, type=int)
 35 |     args, rest = parser.parse_known_args()
 36 |     return args
 37 | 
 38 | args = parse_args()
 39 | curr_path = os.path.abspath(os.path.dirname(__file__))
 40 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
 41 | 
 42 | import shutil
 43 | import numpy as np
 44 | import mxnet as mx
 45 | 
 46 | from symbols import *
 47 | from core import callback, metric
 48 | from core.loader import AnchorLoader
 49 | from core.module import MutableModule
 50 | from utils.create_logger import create_logger
 51 | from utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
 52 | from utils.load_model import load_param
 53 | from utils.PrefetchingIter import PrefetchingIterV2 as PrefetchingIter
 54 | from utils.lr_scheduler import WarmupMultiFactorScheduler
 55 | 
 56 | 
 57 | def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
 58 |     np.random.seed(0)
 59 |     mx.random.seed(0)
 60 |     logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
 61 |     prefix = os.path.join(final_output_path, prefix)
 62 | 
 63 |     # load symbol
 64 |     shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
 65 |     sym_instance = eval(config.symbol + '.' + config.symbol)()
 66 |     sym = sym_instance.get_symbol(config, is_train=True)
 67 |     feat_sym = sym.get_internals()['rpn_cls_score_output']
 68 | 
 69 |     # setup multi-gpu
 70 |     batch_size = len(ctx)
 71 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 72 | 
 73 |     # print config
 74 |     pprint.pprint(config)
 75 |     logger.info('training config:{}\n'.format(pprint.pformat(config)))
 76 | 
 77 |     # load dataset and prepare imdb for training
 78 |     image_sets = [iset for iset in config.dataset.image_set.split('+')]
 79 |     roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
 80 |                             flip=config.TRAIN.FLIP)
 81 |               for image_set in image_sets]
 82 |     roidb = merge_roidb(roidbs)
 83 |     roidb = filter_roidb(roidb, config)
 84 | 
 85 |     # load training data
 86 |     train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
 87 |                               feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
 88 |                               anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)
 89 | 
 90 |     # infer max shape
 91 |     max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 92 |     max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
 93 |     max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
 94 |     print 'providing maximum shape', max_data_shape, max_label_shape
 95 | 
 96 |     data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
 97 |     pprint.pprint(data_shape_dict)
 98 |     sym_instance.infer_shape(data_shape_dict)
 99 |     pprint.pprint(sym_instance.arg_shape_dict)
100 |     logging.info(pprint.pformat(sym_instance.arg_shape_dict))
101 |     #dot = mx.viz.plot_network(sym, node_attrs={'shape': 'rect', 'fixedsize': 'false'})
102 |     #dot.render(os.path.join('./output/rcnn/network_vis', config.symbol + '_rcnn'))
103 | 
104 |     # load and initialize params
105 |     if config.TRAIN.RESUME:
106 |         print('continue training from ', begin_epoch)
107 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
108 |     else:
109 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
110 |         sym_instance.init_weight(config, arg_params, aux_params)
111 | 
112 |     # check parameter shapes
113 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
114 | 
115 |     # create solver
116 |     fixed_param_prefix = config.network.FIXED_PARAMS
117 |     data_names = [k[0] for k in train_data.provide_data_single]
118 |     label_names = [k[0] for k in train_data.provide_label_single]
119 | 
120 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
121 |                         logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
122 |                         max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)
123 | 
124 |     if config.TRAIN.RESUME:
125 |         mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)
126 | 
127 |     # decide training params
128 |     # metric
129 |     eval_metric = metric.RCNNAccMetric(config)
130 |     cls_metric = metric.RCNNLogLossMetric(config)
131 |     bbox_metric = metric.RCNNL1LossMetric(config)
132 |     eval_metrics = mx.metric.CompositeEvalMetric()
133 |     # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
134 |     if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS):
135 |         rpn_eval_metric = metric.RPNAccMetric()
136 |         rpn_cls_metric = metric.RPNLogLossMetric()
137 |         rpn_bbox_metric = metric.RPNL1LossMetric()
138 |         for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]:
139 |             eval_metrics.add(child_metric)
140 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
141 |         eval_metrics.add(child_metric)
142 |     if config.TRAIN.LEARN_NMS:
143 |         eval_metrics.add(metric.NMSLossMetric(config, 'pos'))
144 |         eval_metrics.add(metric.NMSLossMetric(config, 'neg'))
145 |         eval_metrics.add(metric.NMSAccMetric(config))
146 | 
147 |     # callback
148 |     batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
149 |     means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
150 |     stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
151 |     epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True),
152 |                           callback.do_checkpoint(prefix, means, stds)]
153 |     # decide learning rate
154 |     base_lr = lr
155 |     lr_factor = config.TRAIN.lr_factor
156 |     lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
157 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
158 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
159 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
160 |     print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
161 |     lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
162 |     # optimizer
163 |     optimizer_params = {'momentum': config.TRAIN.momentum,
164 |                         'wd': config.TRAIN.wd,
165 |                         'learning_rate': lr,
166 |                         'lr_scheduler': lr_scheduler,
167 |                         'rescale_grad': 1.0,
168 |                         'clip_gradient': None}
169 | 
170 |     if not isinstance(train_data, PrefetchingIter):
171 |         train_data = PrefetchingIter(train_data)
172 | 
173 |     # train
174 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
175 |             batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
176 |             optimizer='sgd', optimizer_params=optimizer_params,
177 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
178 | 
179 | 
180 | def main():
181 |     print('Called with argument:', args)
182 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
183 |     train_net(args, ctx, config.network.pretrained, config.network.pretrained_epoch, config.TRAIN.model_prefix,
184 |               config.TRAIN.begin_epoch, config.TRAIN.end_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
185 | 
186 | if __name__ == '__main__':
187 |     main()
188 | 


--------------------------------------------------------------------------------
/relation_rcnn/train_rcnn.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Relation Networks for Object Detection
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified by Jiayuan Gu, Dazhi Cheng, Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | # Based on:
 8 | # MX-RCNN
 9 | # Copyright (c) 2016 by Contributors
10 | # Licence under The Apache 2.0 License
11 | # https://github.com/ijkguo/mx-rcnn/
12 | # --------------------------------------------------------
13 | 											  
14 | import _init_paths
15 | 
16 | import time
17 | import argparse
18 | import logging
19 | import pprint
20 | import os
21 | import sys
22 | from config.config import config, update_config
23 | 
24 | def parse_args():
25 |     parser = argparse.ArgumentParser(description='Train Faster-RCNN network')
26 |     # general
27 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
28 | 
29 |     args, rest = parser.parse_known_args()
30 |     # update config
31 |     update_config(args.cfg)
32 | 
33 |     # training
34 |     parser.add_argument('--frequent', help='frequency of logging', default=config.default.frequent, type=int)
35 |     args = parser.parse_args()
36 |     return args
37 | 
38 | args = parse_args()
39 | curr_path = os.path.abspath(os.path.dirname(__file__))
40 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
41 | 
42 | import shutil
43 | import numpy as np
44 | import mxnet as mx
45 | 
46 | from function.train_rpn import train_rpn
47 | from function.test_rpn import test_rpn
48 | from function.train_rcnn import train_rcnn
49 | from utils.create_logger import create_logger
50 | 
51 | 
52 | def main():
53 |     print ('Called with argument:', args)
54 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
55 |     logger, output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
56 |     shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), output_path)
57 | 
58 |     assert config.TRAIN.END2END == False
59 |     prefix = os.path.join(output_path, config.TRAIN.model_prefix)
60 |     logging.info('########## TRAIN rcnn WITH IMAGENET INIT AND RPN DETECTION')
61 |     train_rcnn(config, config.dataset.dataset, config.dataset.image_set, config.dataset.root_path, config.dataset.dataset_path,
62 |                args.frequent, config.default.kvstore, config.TRAIN.FLIP, config.TRAIN.SHUFFLE, config.TRAIN.RESUME,
63 |                ctx, config.network.pretrained, config.network.pretrained_epoch, prefix, config.TRAIN.begin_epoch,
64 |                config.TRAIN.end_epoch, train_shared=False, lr=config.TRAIN.lr, lr_step=config.TRAIN.lr_step,
65 |                proposal=config.dataset.proposal, logger=logger, output_path=output_path)
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython
2 | EasyDict
3 | opencv-python
4 | mxnet-cu80
5 | 


--------------------------------------------------------------------------------