├── config
    ├── __init__.py
    └── NASFPN
    │   └── __init__.py
├── core
    ├── __init__.py
    └── detection_metric.py
├── models
    ├── __init__.py
    ├── NASFPN
    │   ├── __init__.py
    │   └── README.md
    ├── maskrcnn
    │   ├── __init__.py
    │   ├── metric.py
    │   ├── process_output.py
    │   ├── utils.py
    │   ├── README.md
    │   └── bbox_post_processing.py
    ├── retinanet
    │   ├── __init__.py
    │   ├── metric.py
    │   └── README.md
    ├── cascade_rcnn
    │   ├── __init__.py
    │   └── README.md
    ├── tridentnet
    │   ├── __init__.py
    │   ├── README.md
    │   └── input.py
    ├── efficientnet
    │   ├── README.md
    │   └── builder.py
    ├── FPN
    │   ├── README.md
    │   ├── get_top_proposal.py
    │   ├── assign_layer_fpn.py
    │   └── input.py
    └── dcn
    │   └── builder.py
├── symbol
    └── __init__.py
├── utils
    ├── __init__.py
    ├── logger.py
    ├── load_model.py
    ├── patch_config.py
    ├── contrib
    │   ├── edit_model_weight.py
    │   └── data_to_coco.py
    ├── generate_roidb.py
    ├── callback.py
    └── graph_optimize.py
├── operator_py
    ├── __init__.py
    ├── cython
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── bbox.pyx
    │   ├── nms_kernel.cu
    │   └── setup.py
    └── nms.py
├── doc
    ├── image
    │   ├── demo83.jpg
    │   ├── loss1.png
    │   ├── loss2.png
    │   ├── diagram.png
    │   ├── groupsoftmax.png
    │   ├── trident_block.png
    │   └── trident_block_details.png
    ├── DISTRIBUTED.md
    └── INSTALL.md
├── Makefile
├── scripts
    ├── dist_worker.sh
    ├── train_hpc.sh
    ├── terminate.sh
    └── launch.sh
├── operator_cxx
    └── contrib
    │   ├── bbox_norm.cu
    │   ├── sync_batch_norm.cu
    │   ├── focal_loss.cu
    │   ├── broadcast_scale.cu
    │   ├── sync_inplace_activation_batch_norm.cu
    │   ├── focal_loss.cc
    │   ├── bbox_norm.cc
    │   ├── broadcast_scale.cc
    │   ├── axpy.cc
    │   ├── sigmoid_cross_entropy.cc
    │   ├── global_average_pooling.cc
    │   ├── group_norm.cc
    │   ├── generate_anchor.cc
    │   ├── decodebbox-inl.h
    │   ├── nms-inl.h
    │   ├── global_average_pooling.cu
    │   ├── sync_batch_norm.cc
    │   ├── sync_inplace_activation_batch_norm.cc
    │   ├── fixed_divisor.h
    │   ├── generate_proposal-inl.h
    │   ├── group_norm_helper.h
    │   ├── sigmoid_cross_entropy.cu
    │   └── generate_anchor-inl.h
├── .gitignore
├── detection_infer_speed.py
├── unittest
    └── test_loader.py
└── detection_img.py


/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/symbol/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/operator_py/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/NASFPN/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/NASFPN/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/maskrcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/retinanet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/cascade_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/tridentnet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/operator_py/cython/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/operator_py/cython/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/doc/image/demo83.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/demo83.jpg


--------------------------------------------------------------------------------
/doc/image/loss1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/loss1.png


--------------------------------------------------------------------------------
/doc/image/loss2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/loss2.png


--------------------------------------------------------------------------------
/doc/image/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/diagram.png


--------------------------------------------------------------------------------
/doc/image/groupsoftmax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/groupsoftmax.png


--------------------------------------------------------------------------------
/doc/image/trident_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/trident_block.png


--------------------------------------------------------------------------------
/doc/image/trident_block_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/trident_block_details.png


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd operator_py/cython/; python3 setup.py build_ext --inplace; rm -rf build; cd ../../
3 | clean:
4 | 	cd operator_py/cython/; rm *.so *.c *.cpp; cd ../../
5 | 


--------------------------------------------------------------------------------
/operator_py/cython/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/models/maskrcnn/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | 
 4 | class SigmoidCELossMetric(mx.metric.EvalMetric):
 5 |     def __init__(self, name, output_names, label_names):
 6 |         super().__init__(name, output_names, label_names)
 7 | 
 8 |     def update(self, labels, preds):
 9 |         self.sum_metric += preds[0].mean().asscalar()
10 |         self.num_inst += 1


--------------------------------------------------------------------------------
/scripts/dist_worker.sh:
--------------------------------------------------------------------------------
 1 | root_dir=$1
 2 | singularity_image=$2
 3 | conffile=$3
 4 | 
 5 | if test $(which singularity); then 
 6 |     singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "python -u detection_train.py --config ${conffile}"
 7 | else 
 8 |     singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "python -u detection_train.py"
 9 | fi
10 | 


--------------------------------------------------------------------------------
/scripts/train_hpc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | gpucount=8
 4 | num_node=2
 5 | num_servers=${num_node}
 6 | root_dir="/path/to/simpledet"
 7 | sync_dir="/tmp/simpledet_sync"
 8 | 
 9 | hostfile=hostfile.txt
10 | conffile=faster_r50v2c4_c5_256roi_1x
11 | singularity_image=simpledet.img
12 | 
13 | export DMLC_INTERFACE=eth0
14 | python -u ../../launcher/tools/launch.py \
15 |     -n ${num_node} \
16 |     --num-servers ${num_servers} \
17 |     --sync-dst-dir ${sync_dir} \
18 |     --launcher ssh -H ${hostfile} \
19 |     scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \
20 |     2>&1 | tee -a ${root_dir}/log/${conffile}.log
21 | 


--------------------------------------------------------------------------------
/models/maskrcnn/process_output.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pycocotools import mask as mask_util
 3 | 
 4 | from models.maskrcnn.utils import segm_results
 5 | 
 6 | def process_output(all_outputs, roidb):
 7 |     for output_record in all_outputs:
 8 |         rec_id = int(output_record['rec_id'])
 9 |         bbox_xyxy = output_record['bbox_xyxy']
10 |         cls_score = output_record['cls_score']
11 |         cls = output_record['cls']
12 |         mask = output_record['mask']
13 | 
14 |         im_h = roidb[rec_id]["h"]
15 |         im_w = roidb[rec_id]["w"]
16 |         segm = segm_results(bbox_xyxy, cls, mask, im_h, im_w)
17 |         output_record['segm'] = segm
18 |         del output_record['mask']
19 |     return all_outputs
20 | 


--------------------------------------------------------------------------------
/scripts/terminate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 | echo "usage: $0 comma_separated_worker_hostnames"
 5 | exit -1
 6 | fi
 7 | 
 8 | hosts=$1
 9 | 
10 | # extract worker and check reachablity
11 | IFS=, read -r -a host_array <<< $hosts
12 | for host in ${host_array[@]}; do
13 |     # check reachability
14 |     echo "check reachability of $host"
15 |     ssh -q $host exit
16 |     if [ $? -ne 0 ]; then
17 |         echo "$host is not reachable"
18 | 	exit -1
19 |     fi
20 | 
21 |     # check availablity (retreat if remote host is in use)
22 |     echo "check availability of $host"
23 |     for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 
24 | 	x="${x//[$'\t\r\n ']}"  # remove trailing whitespace
25 | 	if [ $x -gt 5 ]; then 
26 | 	    echo "$host has gpu utilization of $x%"; 
27 |         fi;  
28 |     done
29 |     
30 |     # cleanup potentially dead python process (march since we checked it)
31 |     ssh $host ps aux | grep python
32 |     echo -e "\n"
33 |     echo "Terminate tasks on $host in 5s"
34 |     sleep 5
35 |     ssh -q $host pkill python
36 | done
37 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | from pytz import utc, timezone
 4 | 
 5 | 
 6 | def config_logger(path):
 7 |     def custom_time(*args):
 8 |         utc_dt = utc.localize(datetime.utcnow())
 9 |         my_tz = timezone("Asia/Shanghai")
10 |         converted = utc_dt.astimezone(my_tz)
11 |         return converted.timetuple()
12 | 
13 |     logging.basicConfig()
14 |     logging.getLogger().handlers.pop()
15 | 
16 |     fmt = '%(asctime)s %(message)s'
17 |     date_fmt = '%m-%d %H:%M:%S'
18 |     formatter = logging.Formatter(fmt=fmt, datefmt=date_fmt)
19 |     formatter.converter = custom_time
20 | 
21 |     logging.getLogger().setLevel(logging.INFO)
22 | 
23 |     log_file_save_name = path
24 |     file_handler = logging.FileHandler(filename=log_file_save_name, mode='w')
25 |     file_handler.setLevel(logging.INFO)
26 |     file_handler.setFormatter(formatter)
27 |     logging.getLogger().addHandler(file_handler)
28 | 
29 |     console = logging.StreamHandler()
30 |     console.setLevel(logging.INFO)
31 |     console.setFormatter(formatter)
32 |     logging.getLogger().addHandler(console)


--------------------------------------------------------------------------------
/operator_py/cython/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/doc/DISTRIBUTED.md:
--------------------------------------------------------------------------------
 1 | #### Requirement
 2 | Here we only provide a guide to launch distributed training with singularity, please make sure your singularity works by checking [INSTALL.md](./doc/INSTALL.md)
 3 | 
 4 | #### Setup
 5 | 1. obtain the mxnet launcher and place it in the parent directory of the simpledet working directory
 6 | ```bash
 7 | git clone https://github.com/RogerChern/mxnet-dist-lancher.git lancher
 8 | ```
 9 | 
10 | 2. mv `data`, `pretrain_model`, `experiments` outside of simpledet and symink them back.
11 | This step is to avoid unnecessary `rsync` of large binary files in the working directory during launching.
12 | 
13 | 3. after step 1 and 2, your directory should be as following
14 | ```
15 | lancher/
16 | simpledet/
17 |   data -> /path/to/data
18 |   pretrain_model -> /path/to/pretain_model
19 |   experiments -> /path/to/experiments
20 |   ...
21 | ```
22 | 
23 | 4. make a hostfile containing hostnames of all nodes, these nodes would be accessed from our launch node by ssh without password
24 | simpledet/hostfile.txt
25 | ```
26 | node1
27 | node2
28 | ```
29 | 
30 | 5. change the singulariy mounting point in `scripts/dist_worker.sh`
31 | 
32 | 6. change working directories in `scritps/train_hpc.sh`
33 | 
34 | 7. launch distributed training with scripts
35 | ```bash
36 | bash scritps/train_hpc.sh
37 | ```
38 | 


--------------------------------------------------------------------------------
/models/cascade_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Cascade R-CNN
 2 | 
 3 | This repository implements [**Cascade R-CNN**](https://arxiv.org/abs/1712.00726) in the **SimpleDet** framework. Cascade R-CNN is a multi-stage object detector, aiming to reduce the overfitting problem by resampling of progressively improved hypotheses.
 4 | 
 5 | ### How we build Cascade R-CNN
 6 | 
 7 | #### Input
 8 | 
 9 | Cascade R-CNN can share the origin Faster R-CNN input, so there is no need to implement an extra one.
10 | 
11 | #### Symbol
12 | 
13 | - ```CascadeRcnn```: detector with three ```R-CNN``` stages
14 | - ```CascadeBbox2fcHead```: header for ```R-CNN``` stages. Note that it is also required to generate proposal for next ```R-CNN``` stages, thus we add ```get_all_proposal``` to decode boxes predicted in this stage and ```get_sampled_proposal``` to generate ```bbox_target```.
15 | 
16 | #### Config
17 | 
18 | - ```BboxParam```, ```BboxParam2nd```, ```BboxParam3rd```: config for ```R-CNN``` stages, ```mean``` and ```std``` in ```regress_target``` aim to decode boxes predicted in this stage, and those in ```bbox_target``` is prepared to generate ```bbox_target``` for next ```R-CNN``` stage. Note that we add ```stage``` field to specify the weight used by ```R-CNN```, as in **test phase** ```bbox_head_1st``` and ```bbox_head_2nd``` forward twice with different input feature.
19 | 
20 | 


--------------------------------------------------------------------------------
/models/efficientnet/README.md:
--------------------------------------------------------------------------------
 1 | ## EfficientNet for object detection
 2 | This repository implements [**EfficientNet**](https://arxiv.org/abs/1905.11946) in the SimpleDet framework. Efficient B5 achives the same mAP with **~1/10 FLOPs** compared with ResNet-50.
 3 | 
 4 | ### Qucik Start
 5 | ```bash
 6 | # train faster r-cnn with efficientnet fpn backbone
 7 | python3 detection_train.py --config config/efficientnet/efficientnet_b5_fpn_bn_scratch_400_6x.py
 8 | ```
 9 | 
10 | ### Results and Models
11 | All AP results are reported on minival of the [COCO dataset](http://cocodataset.org).
12 | 
13 | |Model|InputSize|Backbone|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP|Link|
14 | |-----|-----|--------|--------------|---|---------|----|---------|-----------|---------------|----|
15 | |Faster|400x600|B5-FPN|36 epoch(6X)|8X 1080Ti|8|yes|-|75 img/s|37.2|[model](http://simpledet.alarge.space:1234/?/efficientnet_b5_fpn_bn_scratch_400_6x.zip)|
16 | |Faster|400x600|B5-FPN|54 epoch(9X)|8X 1080Ti|8|yes|-|75 img/s|37.9|-|
17 | |Faster|400x600|B5-FPN|72 epoch(12X)|8X 1080Ti|8|yes|-|75 img/s|38.3|-|
18 | 
19 | ### Reference
20 | ```
21 | @inproceedings{tan2019,
22 |   title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
23 |   author={Tan, Mingxing and Le, Quoc V},
24 |   booktitle={ICML},
25 |   year={2019}
26 | }
27 | ```
28 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/bbox_norm.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file bbox_norm.cu
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./bbox_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(BBoxNormParam param, int dtype) {
33 |   Operator *op = NULL;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BBoxNormOp<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet
42 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sync_batch_norm.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | /*!
20 |  * Copyright (c) 2018 by Contributors
21 |  * \file sync_batch_norm.cc
22 |  * \brief Synchronized BatchNorm modified from BatchNormV1
23 |  * \author Hang Zhang
24 | */
25 | 
26 | #include "sync_batch_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | template<>
31 | Operator *CreateOp<gpu>(SyncBatchNormParam param, int dtype) {
32 |   return new SyncBatchNorm<gpu>(param);
33 | }
34 | 
35 | }  // namespace op
36 | }  // namespace mxnet
37 | 
38 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/focal_loss.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file focal_loss.cu
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./focal_loss-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(FocalLossParam param, int dtype) {
33 |   Operator *op = NULL;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new FocalLossOp<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet
42 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/broadcast_scale.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file broadcast_scale.cu
22 |  * \brief
23 |  * \author Yuntao Chen
24 | */
25 | 
26 | #include "./broadcast_scale-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(BroadcastScaleParam param, int dtype) {
33 |   Operator *op = NULL;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BroadcastScaleOp<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet
42 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sync_inplace_activation_batch_norm.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | /*!
20 |  * Copyright (c) 2018 by Contributors
21 |  * \file sync_inplace_activation_batch_norm.cc
22 |  * \brief Synchronized BatchNorm modified from BatchNormV1
23 |  * \author Yuntao Chen
24 | */
25 | 
26 | #include "sync_inplace_activation_batch_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | template<>
31 | Operator *CreateOp<gpu>(SyncInplaceABNParam param, int dtype) {
32 |   return new SyncInplaceABN<gpu>(param);
33 | }
34 | 
35 | }  // namespace op
36 | }  // namespace mxnet
37 | 
38 | 


--------------------------------------------------------------------------------
/models/retinanet/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | 
 4 | 
 5 | class FGAccMetric(mx.metric.EvalMetric):
 6 |     def __init__(self, name, output_names, label_names, threshold=0):
 7 |         super().__init__(name, output_names, label_names)
 8 |         self.thr = threshold
 9 | 
10 |     def update(self, labels, preds):
11 |         if len(preds) == 1 and len(labels) == 1:
12 |             pred = preds[0]
13 |             label = labels[0]
14 |         elif len(preds) == 2:
15 |             pred = preds[0]
16 |             label = preds[1]
17 |         else:
18 |             raise Exception(
19 |                 "unknown loss output: len(preds): {}, len(labels): {}".format(
20 |                     len(preds), len(labels)
21 |                 )
22 |             )
23 | 
24 |         label = label.asnumpy().astype('int32')
25 |         keep_inds = np.where(label >= 1)
26 | 
27 |         # treat as foreground if score larger than threshold
28 |         # select class with maximum score as prediction
29 |         pred_score = pred.max(axis=-1)
30 |         pred_label = pred.argmax(axis=-1) + 1
31 |         if self.thr != 0:
32 |             pred_label *= pred_score > self.thr
33 | 
34 |         pred_label = pred_label.asnumpy().astype('int32')
35 | 
36 |         pred_label = pred_label[keep_inds]
37 |         label = label[keep_inds]
38 | 
39 |         self.sum_metric += np.sum(pred_label.flat == label.flat)
40 |         self.num_inst += len(pred_label.flat)
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IntelliJ project files
 2 | .idea
 3 | *.iml
 4 | out
 5 | gen
 6 | 
 7 | ### Vim template
 8 | [._]*.s[a-w][a-z]
 9 | [._]s[a-w][a-z]
10 | *.un~
11 | Session.vim
12 | .netrwhist
13 | *~
14 | 
15 | ### IPythonNotebook template
16 | # Temporary data
17 | .ipynb_checkpoints/
18 | 
19 | ### Python template
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 | 
25 | # C extensions
26 | *.so
27 | 
28 | # Distribution / packaging
29 | .Python
30 | env/
31 | build/
32 | develop-eggs/
33 | dist/
34 | downloads/
35 | eggs/
36 | .eggs/
37 | lib/
38 | lib64/
39 | parts/
40 | sdist/
41 | var/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | 
46 | # PyInstaller
47 | #  Usually these files are written by a python script from a template
48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 | 
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 | 
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *,cover
65 | 
66 | # Translations
67 | *.mo
68 | *.pot
69 | 
70 | # Django stuff:
71 | *.log
72 | 
73 | # Sphinx documentation
74 | docs/_build/
75 | 
76 | # PyBuilder
77 | target/
78 | 
79 | *.ipynb
80 | *.params
81 | *.json
82 | .vscode/
83 | 
84 | model
85 | model/
86 | visimg
87 | mxnet/
88 | mxnext/
89 | data
90 | experiments
91 | pretrain_model
92 | !data/cache/coco_micro_test.roidb
93 | 


--------------------------------------------------------------------------------
/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | import glob
 4 | 
 5 | 
 6 | def get_latest_ckpt_epoch(prefix):
 7 |     """
 8 |     Get latest checkpoint epoch by prefix
 9 |     """
10 |     def get_checkpoint_epoch(prefix):
11 |         return int(prefix[prefix.rfind('.params')-4:prefix.rfind('.params')])
12 | 
13 |     checkpoints = glob.glob(prefix + '*.params')
14 |     assert len(checkpoints), 'can not find params startswith {}'.prefix
15 |     return max([get_checkpoint_epoch(x) for x in checkpoints])
16 | 
17 | 
18 | def load_checkpoint(prefix, epoch):
19 |     """
20 |     Load model checkpoint from file.
21 |     :param prefix: Prefix of model name.
22 |     :param epoch: Epoch number of model we would like to load.
23 |     :return: (arg_params, aux_params)
24 |     arg_params : dict of str to NDArray
25 |         Model parameter, dict of name to NDArray of net's weights.
26 |     aux_params : dict of str to NDArray
27 |         Model parameter, dict of name to NDArray of net's auxiliary states.
28 |     """
29 |     print('load %s-%04d.params' % (prefix, epoch))
30 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
31 |     arg_params = {}
32 |     aux_params = {}
33 |     for k, v in save_dict.items():
34 |         tp, name = k.split(':', 1)
35 |         if tp == 'arg':
36 |             arg_params[name] = v
37 |         if tp == 'aux':
38 |             aux_params[name] = v
39 |     return arg_params, aux_params
40 | 
41 | 
42 | def convert_context(params, ctx):
43 |     """
44 |     :param params: dict of str to NDArray
45 |     :param ctx: the context to convert to
46 |     :return: dict of str of NDArray with context ctx
47 |     """
48 |     new_params = dict()
49 |     for k, v in params.items():
50 |         new_params[k] = v.as_in_context(ctx)
51 |     return new_params
52 | 
53 | 


--------------------------------------------------------------------------------
/models/retinanet/README.md:
--------------------------------------------------------------------------------
 1 | ## RetinaNet
 2 | 
 3 | This repository implements [**RetinaNet**](https://arxiv.org/abs/1708.02002) in the **SimpleDet** framework. RetinaNet is state-of-the-art single stage detector, preventing the vast number of easy negatives from overwhelming the detector with focal loss.
 4 | 
 5 | ### How we build RetinaNet
 6 | 
 7 | #### Input
 8 | 
 9 | The pyramid label parts of **RetinaNet** is similar with **Feature Pyramid Network**, you can refer to [FPN README](../FPN/README.md) . In addition, the label assignment method is different compared with **Faster R-CNN**, thus we overrides ```_assign_label_to_anchor``` and ```apply``` of ```AnchorTarget2D```, named ```PyramidAnchorTarget2DBase```, to obtain class-aware labels and avoid sampling RoIs.
10 | 
11 | #### Operators
12 | 
13 | - **bbox_norm**, passes data in forward, and normalizes gradient by number of positive samples in backward
14 | - **focal_loss**, acts same as Sigmoid in forward, and return focal loss gradient in backward
15 | - **decode_retina**, reuses the code from [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test_retinanet.py) to decode boxes and scores. Note that ```min_det_score``` is moved to ```RpnParam.proposal``` as it requires different threshold for results from **P7** level.
16 | 
17 | #### Symbol
18 | 
19 | - ``` RetinaNet```, detector only with RPN
20 | - ```RetinaNetHead```, classification and regression header with sharing weights
21 | - ```RetinaNetNeck```, top-down pathway for **FPN** in **RetinaNet**
22 | 
23 | #### Config
24 | 
25 | - ```min_det_score``` in ```TestParam``` is set to 0 to remove those appended boxes with zero scores
26 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation.
27 | 


--------------------------------------------------------------------------------
/utils/patch_config.py:
--------------------------------------------------------------------------------
 1 | import types
 2 | import inspect
 3 | 
 4 | 
 5 | class NoThrowBase:
 6 |     def __getattr__(self, item):
 7 |         return None
 8 | 
 9 | 
10 | class NoThrowMeta(type):
11 |     def __getattr__(self, item):
12 |         return None
13 | 
14 | 
15 | def patch_config_as_nothrow(instance):
16 |     if "NoThrow" in [instance.__name__, instance.__class__.__name__]:
17 |         return instance
18 | 
19 |     if type(instance) == type:
20 |         instance = types.new_class(instance.__name__ + "NoThrow", (instance, ), dict(metaclass=NoThrowMeta))
21 |         for (k, v) in inspect.getmembers(instance):
22 |             if not k.startswith("__") and type(v) == type:
23 |                 type.__setattr__(instance, k, patch_config_as_nothrow(v))
24 |     else:
25 |         for (k, v) in inspect.getmembers(instance.__class__):
26 |             if not k.startswith("__") and type(v) == type:
27 |                 type.__setattr__(instance.__class__, k, patch_config_as_nothrow(v))
28 |         instance.__class__ = type(instance.__class__.__name__ + "NoThrow", (instance.__class__, NoThrowBase), {})
29 | 
30 |     return instance
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     class A:
35 |         a = 1
36 | 
37 |     A = patch_config_as_nothrow(A)
38 |     assert A.non_exist is None
39 |     assert A.a == 1
40 | 
41 |     class B:
42 |         b = 1
43 |         class B1:
44 |             b1 = 2
45 | 
46 |     B = patch_config_as_nothrow(B)
47 |     assert B.non_exist is None
48 |     assert B.B1.non_exist is None
49 |     assert B.b == 1
50 |     assert B.B1.b1 == 2
51 | 
52 |     class B:
53 |         b = 1
54 |         class B1:
55 |             b1 = 2
56 |             def b1f():
57 |                 return 3
58 | 
59 |     b = B()
60 |     b = patch_config_as_nothrow(b)
61 |     assert b.non_exist is None
62 |     assert b.B1.non_exist is None
63 |     assert b.b == 1
64 |     assert b.B1.b1 == 2
65 |     assert b.B1.b1f() == 3
66 | 


--------------------------------------------------------------------------------
/scripts/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 | echo "usage: $0 config_path comma_separated_worker_hostnames"
 5 | exit -1
 6 | fi
 7 | 
 8 | conffile=$1
 9 | hosts=$2
10 | 
11 | # extract worker and check reachablity
12 | IFS=, read -r -a host_array <<< $hosts
13 | for host in ${host_array[@]}; do
14 |     # check reachability
15 |     echo "check reachability of $host"
16 |     ssh -q $host exit
17 |     if [ $? -ne 0 ]; then
18 |         echo "$host is not reachable"
19 | 	exit -1
20 |     fi
21 | 
22 |     # check availablity (retreat if remote host is in use)
23 |     echo "check availability of $host"
24 |     for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 
25 | 	x="${x//[$'\t\r\n ']}"  # remove trailing whitespace
26 | 	if [ $x -gt 10 ]; then 
27 | 	    echo "$host has gpu utilization of $x%"; 
28 | 	    exit -1
29 |         fi;  
30 |     done
31 |     
32 |     # cleanup potentially dead python process (march since we checked it)
33 |     ssh -q $host pkill python
34 | done
35 | 
36 | gpucount=8
37 | num_node=${#host_array[@]}
38 | num_servers=${num_node}
39 | root_dir="/mnt/tscpfs/yuntao.chen/simpledet/simpledet_open"
40 | sync_dir="/tmp/simpledet_sync"
41 | singularity_image=/mnt/tscpfs/yuntao.chen/simpledet.img
42 | 
43 | # check existence of config file
44 | if [ ! -f ${conffile} ]; then
45 | echo "${conffile} does not exsit"
46 | exit -1
47 | fi
48 | 
49 | # dump hosts in a hostfile for launch.py
50 | IFS=,
51 | output=""
52 | for id in $hosts 
53 | do output+="${id}\n"
54 | done
55 | unset IFS
56 | echo -e ${output::-2} > scripts/hosts.txt
57 | sleep 1
58 | 
59 | logfile=${conffile#config/}
60 | logfile=${logfile%.py}
61 | 
62 | export DMLC_INTERFACE=eth0
63 | python -u /mnt/tscpfs/yuntao.chen/dist-mxnet/tools/launch.py \
64 |     -n ${num_node} \
65 |     -s ${num_servers} \
66 |     --launcher ssh \
67 |     -H scripts/hosts.txt \
68 |     scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \
69 |     2>&1 | tee -a ${root_dir}/log/${logfile}.log
70 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/focal_loss.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file focal_loss.cc
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./focal_loss-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<cpu>(FocalLossParam param, int dtype) {
33 |   Operator *op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new FocalLossOp<cpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | Operator *FocalLossProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
41 |                                          std::vector<int> *in_type) const {
42 |   std::vector<TShape> out_shape, aux_shape;
43 |   std::vector<int> out_type, aux_type;
44 |   CHECK(InferType(in_type, &out_type, &aux_type));
45 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
46 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
47 | }
48 | 
49 | DMLC_REGISTER_PARAMETER(FocalLossParam);
50 | 
51 | MXNET_REGISTER_OP_PROPERTY(_contrib_FocalLoss, FocalLossProp)
52 | .describe("Focal loss for dense object detection")
53 | .add_argument("data", "NDArray-or-Symbol", "Data")
54 | .add_argument("label", "NDArray-or-Symbol", "Label")
55 | .add_arguments(FocalLossParam::__FIELDS__());
56 | 
57 | }  // namespace op
58 | }  // namespace mxnet
59 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/bbox_norm.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file bbox_norm.cc
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./bbox_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<cpu>(BBoxNormParam param, int dtype) {
33 |   Operator *op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BBoxNormOp<cpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | Operator *BBoxNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
41 |                                          std::vector<int> *in_type) const {
42 |   std::vector<TShape> out_shape, aux_shape;
43 |   std::vector<int> out_type, aux_type;
44 |   CHECK(InferType(in_type, &out_type, &aux_type));
45 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
46 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
47 | }
48 | 
49 | DMLC_REGISTER_PARAMETER(BBoxNormParam);
50 | 
51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BBoxNorm, BBoxNormProp)
52 | .describe("Normalize those boxes with positive label")
53 | .add_argument("data", "NDArray-or-Symbol", "Data to normalize")
54 | .add_argument("label", "NDArray-or-Symbol", "Label")
55 | .add_arguments(BBoxNormParam::__FIELDS__());
56 | 
57 | }  // namespace op
58 | }  // namespace mxnet
59 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/broadcast_scale.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file broadcast_scale.cc
22 |  * \brief
23 |  * \author Yuntao Chen
24 | */
25 | 
26 | #include "./broadcast_scale-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<cpu>(BroadcastScaleParam param, int dtype) {
33 |   Operator *op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BroadcastScaleOp<cpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | Operator *BroadcastScaleProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
41 |                                          std::vector<int> *in_type) const {
42 |   std::vector<TShape> out_shape, aux_shape;
43 |   std::vector<int> out_type, aux_type;
44 |   CHECK(InferType(in_type, &out_type, &aux_type));
45 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
46 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
47 | }
48 | 
49 | DMLC_REGISTER_PARAMETER(BroadcastScaleParam);
50 | 
51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BroadcastScale, BroadcastScaleProp)
52 | .describe("Broadcast_scale to enable in-place scaling of tensor")
53 | .add_argument("data", "NDArray-or-Symbol", "Data")
54 | .add_argument("label", "NDArray-or-Symbol", "Label")
55 | .add_arguments(BroadcastScaleParam::__FIELDS__());
56 | 
57 | }  // namespace op
58 | }  // namespace mxnet
59 | 


--------------------------------------------------------------------------------
/models/FPN/README.md:
--------------------------------------------------------------------------------
 1 | ## Feature Pyramid Networks for Object Detection
 2 | 
 3 | Here we introduce how is [**Feature Pyramid Network**](https://arxiv.org/abs/1612.03144) built in **simpledet** framework. The following sections explain detail implementation.
 4 | 
 5 | #### AnchorTarget
 6 | 
 7 | Since **FPN** uses **Feature Pyramid** as backbone,  we cannot use ```AnchorTarget2D``` directly, which only generates anchor target for single stride declared in ```RpnParam```. Instead, we implement ```PyramidAnchorTarget2D``` to create a list of ```AnchorTarget2D```, each generating anchor target for single pyramid stride, then collect them together. More specifically, we create instances for each pyramid stride to generate anchor. To collect anchors from different pyramid levels, we overrides ```v_all_anchor``` and ```h_all_anchor``` property, which returns the concatenation of anchors from different levels, then assign to primary instances. Also, we override ```apply``` function to obtain label, sample anchor, target and weight from primary instances, then split and concat them in a certain axis.
 8 | 
 9 | #### Operators
10 | 
11 | - **get_top_proposal**, since **FPN** has mutli-scale proposals, we should concat the multi-scale proposals together and get the topK proposals for roi-pooling or roi-align
12 | - **assign_layer_fpn**, **FPN** assign the proposals to target levels(P2, P3, P4, P5) according to the areas, so we use this Operator to assign feature levels for proposals
13 | 
14 | 
15 | #### Symbol
16 | 
17 | - ``` Detector```, detector is the same as FasterRcnn
18 | - ```FPNNeck```, top-down pathway for **Feature Pyramid Network**
19 | - ```FPNRpnHead```, classification and regression header with sharing weights for FPN-RPN
20 | - ```FPNRoiAlign```, we use this module to get the proposal feature for the proposals of different levels respectively, then add the feature from different level proposals together for next rcnn head
21 | 
22 | #### Config
23 | 
24 | - ```TestParam``` is the same as the setting in [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md)
25 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation.
26 | 


--------------------------------------------------------------------------------
/models/NASFPN/README.md:
--------------------------------------------------------------------------------
 1 | ## NAS-FPN
 2 | 
 3 | This repository implements [**NAS-FPN**](https://arxiv.org/abs/1904.07392) in the SimpleDet framework.
 4 | 
 5 | ### Qucik Start
 6 | ```bash
 7 | # train baseline retinanet following the setting of NAS-FPN
 8 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_fpn_640_1@256_25epoch.py
 9 | 
10 | # train NAS-FPN
11 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_640_7@256_25epoch.py
12 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1024_7@256_25epoch.py
13 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1280_7@384_25epoch.py
14 | 
15 | # train hand-crafted neck
16 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_tdbu_1280_3@384_25epoch.py
17 | ```
18 | 
19 | ### Results and Models
20 | All AP results are reported on test-dev of the [COCO dataset](http://cocodataset.org).
21 | 
22 | |Model|InputSize|Backbone|Neck|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP(Mask AP)|Link|
23 | |-----|-----|--------|----|--------------|---|---------|----|---------|-----------|---------------|----|
24 | |RetinaNet|640|R50v1b-FPN|1@256|25 epoch|8X 1080Ti|8|yes|6.6G|85 img/s|37.4|[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/retina_r50v1b_fpn_640640_25epoch.zip)|
25 | |NAS-FPN|640|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|8|yes|7.8G|66 img/s|40.1|[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/retina_r50v1b_nasfpn_640640_25epoch.zip)|
26 | |NAS-FPN|1024|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|4|yes|9.1G|17 img/s|44.2|[model](http://simpledet.alarge.space:1234/?/retina_r50v1b_nasfpn_1024_7%40256_25epoch.zip)|
27 | |NAS-FPN|1280|R50v1b-FPN|7@384|25 epoch|8X 1080Ti|2|yes|8.9G|10 img/s|45.3|[model](http://simpledet.alarge.space:1234/?/retina_r50v1b_nasfpn_1280_7%40384_25epoch.zip)|
28 | |TD-BU*|1280|R50v1b-FPN|3@384|25 epoch|8X 1080Ti|3|yes|10.5G|12 img/s|44.7|[model](http://simpledet.alarge.space:1234/?/retina_r50v1b_tdbu_1280_3%40384_25epoch.zip)|
29 | 
30 | \* Short for TopDown-BottomUp neck which is highly symmetric proposed by Zehao.
31 | ### Reference
32 | ```
33 | @inproceedings{ghiasi2019fpn,
34 |   title={NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection},
35 |   author={Ghiasi, Golnaz and Lin, Tsung-Yi and Pang, Ruoming and Le, Quoc V},
36 |   booktitle={CVPR},
37 |   year={2019}
38 | }
39 | ```
40 | 


--------------------------------------------------------------------------------
/detection_infer_speed.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import importlib
 3 | import time
 4 | 
 5 | from core.detection_module import DetModule
 6 | 
 7 | import mxnet as mx
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(description='Test detector inference speed')
12 |     # general
13 |     parser.add_argument('--config', help='config file path', type=str, required=True)
14 |     parser.add_argument('--shape', help='specify input 2d image shape', metavar=('SHORT', 'LONG'), type=int, nargs=2, required=True)
15 |     parser.add_argument('--gpu', help='GPU index', type=int, default=0)
16 |     parser.add_argument('--count', help='number of runs, final result will be averaged', type=int, default=100)
17 |     args = parser.parse_args()
18 | 
19 |     config = importlib.import_module(args.config.replace('.py', '').replace('/', '.'))
20 |     return config, args.gpu, args.shape, args.count
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     config, gpu, shape, count = parse_args()
25 | 
26 |     pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
27 |         transform, data_name, label_name, metric_list = config.get_config(is_train=False)
28 | 
29 |     sym = pModel.test_symbol
30 |     sym.save(pTest.model.prefix + "_infer_speed.json")
31 | 
32 |     # create dummy data batch
33 |     data = mx.nd.ones(shape=[1, 3] + shape)
34 |     im_info = mx.nd.array([x / 2.0 for x in shape] + [2.0]).reshape(1, 3)
35 |     im_id = mx.nd.array([1])
36 |     rec_id = mx.nd.array([1])
37 |     data_names = ["data", "im_info", "im_id", "rec_id"]
38 |     data_shape = [[1, 3] + shape, [1, 3], [1], [1]]
39 |     data_shape = [(name, shape) for name, shape in zip(data_names, data_shape)]
40 |     data_batch = mx.io.DataBatch(data=[data, im_info, im_id, rec_id])
41 | 
42 |     ctx = mx.gpu(gpu)
43 |     mod = DetModule(sym, data_names=data_names, context=ctx)
44 |     mod.bind(data_shapes=data_shape, for_training=False)
45 |     mod.set_params({}, {}, True)
46 | 
47 |     # let AUTOTUNE run for once
48 |     mod.forward(data_batch, is_train=False)
49 |     for output in mod.get_outputs():
50 |         output.wait_to_read()
51 | 
52 |     tic = time.time()
53 |     for _ in range(count):
54 |         mod.forward(data_batch, is_train=False)
55 |         for output in mod.get_outputs():
56 |             output.wait_to_read()
57 |     toc = time.time()
58 | 
59 |     print((toc - tic) / count * 1000)
60 | 
61 | 


--------------------------------------------------------------------------------
/models/maskrcnn/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | from pycocotools import mask as mask_util
 5 | 
 6 | 
 7 | def expand_boxes(boxes, scale):
 8 |     """Expand an array of boxes by a given scale."""
 9 |     w_half = (boxes[:, 2] - boxes[:, 0]) * .5
10 |     h_half = (boxes[:, 3] - boxes[:, 1]) * .5
11 |     x_c = (boxes[:, 2] + boxes[:, 0]) * .5
12 |     y_c = (boxes[:, 3] + boxes[:, 1]) * .5
13 | 
14 |     w_half *= scale
15 |     h_half *= scale
16 | 
17 |     boxes_exp = np.zeros(boxes.shape)
18 |     boxes_exp[:, 0] = x_c - w_half
19 |     boxes_exp[:, 2] = x_c + w_half
20 |     boxes_exp[:, 1] = y_c - h_half
21 |     boxes_exp[:, 3] = y_c + h_half
22 | 
23 |     return boxes_exp
24 | 
25 | 
26 | def segm_results(bbox_xyxy, cls, masks, im_h, im_w):
27 |     # Modify from Detectron
28 |     # To work around an issue with cv2.resize (it seems to automatically pad
29 |     # with repeated border values), we manually zero-pad the masks by 1 pixel
30 |     # prior to resizing back to the original image resolution. This prevents
31 |     # "top hat" artifacts. We therefore need to expand the reference boxes by an
32 |     # appropriate factor.
33 |     segms = []
34 |     M = masks.shape[-1]
35 |     scale = (M + 2.0) / M
36 |     ref_boxes = expand_boxes(bbox_xyxy, scale)
37 |     ref_boxes = ref_boxes.astype(np.int32)
38 |     padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
39 | 
40 |     for ref_box_i, mask_i, cls_i in zip(ref_boxes, masks, cls):
41 |         padded_mask[1:-1, 1:-1] = mask_i[cls_i, :, :]
42 | 
43 |         w = ref_box_i[2] - ref_box_i[0] + 1
44 |         h = ref_box_i[3] - ref_box_i[1] + 1
45 |         w = np.maximum(w, 1)
46 |         h = np.maximum(h, 1)
47 | 
48 |         mask = cv2.resize(padded_mask, (w, h))
49 |         mask = np.array(mask > 0.5, dtype=np.uint8)
50 |         im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
51 | 
52 |         x_0 = max(ref_box_i[0], 0)
53 |         x_1 = min(ref_box_i[2] + 1, im_w)
54 |         y_0 = max(ref_box_i[1], 0)
55 |         y_1 = min(ref_box_i[3] + 1, im_h)
56 | 
57 |         im_mask[y_0:y_1, x_0:x_1] = mask[
58 |             (y_0 - ref_box_i[1]):(y_1 - ref_box_i[1]),
59 |             (x_0 - ref_box_i[0]):(x_1 - ref_box_i[0])
60 |         ]
61 | 
62 |         # Get RLE encoding used by the COCO evaluation API
63 |         rle = mask_util.encode(
64 |             np.array(im_mask[:, :, np.newaxis], order='F')
65 |         )[0]
66 |         segms.append(rle)
67 |     segms = np.array(segms)
68 |     return segms


--------------------------------------------------------------------------------
/models/FPN/get_top_proposal.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Collect top proposals across all levels for FPN
 3 | author: Yi Jiang, Chenxia Han
 4 | """
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | 
 9 | 
10 | class GetTopProposalOperator(mx.operator.CustomOp):
11 |     def __init__(self, top_n):
12 |         super().__init__()
13 |         self.top_n = top_n
14 | 
15 |     def forward(self, is_train, req, in_data, out_data, aux):
16 |         bboxes = in_data[0]
17 |         scores = in_data[1]
18 | 
19 |         num_image = bboxes.shape[0]
20 |         top_n = self.top_n
21 |         top_bboxes = []
22 |         top_scores = []
23 | 
24 |         for i in range(num_image):
25 |             image_bboxes = bboxes[i]
26 |             image_scores = scores[i]
27 |             argsort_ind = mx.nd.argsort(image_scores[:,0], is_ascend=False)
28 |             image_bboxes = image_bboxes[argsort_ind]
29 |             image_bboxes = image_bboxes[:top_n]
30 |             image_scores = image_scores[argsort_ind]
31 |             image_scores = image_scores[:top_n]
32 | 
33 |             top_bboxes.append(image_bboxes)
34 |             top_scores.append(image_scores)
35 | 
36 |         top_bboxes = mx.nd.stack(*top_bboxes)
37 |         top_scores = mx.nd.stack(*top_scores)
38 | 
39 |         self.assign(out_data[0], req[0], top_bboxes)
40 |         self.assign(out_data[1], req[1], top_scores)
41 | 
42 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
43 |         self.assign(in_grad[0], req[0], 0)
44 |         self.assign(in_grad[1], req[1], 0)
45 | 
46 | 
47 | @mx.operator.register('get_top_proposal')
48 | class GetTopProposalProp(mx.operator.CustomOpProp):
49 |     def __init__(self, top_n):
50 |         super().__init__(need_top_grad=False)
51 |         self.top_n = int(top_n)
52 | 
53 |     def list_arguments(self):
54 |         return ['bbox', 'score']
55 | 
56 |     def list_outputs(self):
57 |         return ['bbox', 'score']
58 | 
59 |     def infer_shape(self, in_shape):
60 |         bbox_shape = in_shape[0]
61 |         score_shape = in_shape[1]
62 |         num_image = bbox_shape[0]
63 | 
64 |         top_bbox_shape = (num_image, self.top_n, 4)
65 |         top_score_shape = (num_image, self.top_n, 1)
66 | 
67 |         return [bbox_shape, score_shape], \
68 |                [top_bbox_shape, top_score_shape]
69 | 
70 |     def create_operator(self, ctx, shapes, dtypes):
71 |         return GetTopProposalOperator(self.top_n)
72 | 
73 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
74 |         return []
75 | 


--------------------------------------------------------------------------------
/utils/contrib/edit_model_weight.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     This script allows you to edit the model weight from https://github.com/TuSimple/simpledet/blob/master/MODEL_ZOO.md
 4 |     for your own training. In this script, it assumes you don't change the layer of model, but only edit the content in 
 5 |     the layer that models of MODEL_ZOO have. For example, you just change the training classes but not add or delete a 
 6 |     unit of resnet.
 7 |     
 8 |     Example: Edit the weight from 80 + 1 classes to 3 + 1 classes training.
 9 |         - Train by your own configuration for one epoch, the configuration should have edited for 3 + 1 classes training.
10 |         - Edit the constant in this file.
11 |             - SIMPLEDET_WEIGHT_FOLDER   the path to the weight folder you download
12 |             - TRAINED_WEIGHT_FOLDER     the path to the weight folder you need the shape
13 |             - EDIT_KEY                  the key of layer which you want to edit the weight, you can show the key by 
14 |                                         print(arg_params_src), in this example, the key names are 
15 |                                         "bbox_cls_logit_weight", "bbox_cls_logit_bias"
16 |         - Run the code!
17 |     
18 |     Note: The new generated model weight file will cover your original downloaded weight file, if you don't want like this,
19 |     you can edit the last line of the code.
20 |     
21 |     TODO: Before you run the code, you should train a model for one epoch and edit the code as instructions above.
22 | """
23 | 
24 | import mxnet as mx
25 | import numpy as np
26 | import os
27 | 
28 | # TODO: Edit the path.
29 | SIMPLEDET_WEIGHT_FOLDER = " "
30 | TRAINED_WEIGHT_FOLDER = " "
31 | 
32 | # TODO: Edit the key names which you want to modify.
33 | EDIT_KEY = ["bbox_cls_logit_weight", "bbox_cls_logit_bias"]
34 | 
35 | def change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst):
36 |     for key in EDIT_KEY:
37 |         arg_params_src[key] = arg_params_dst[key]
38 |     return arg_params_src
39 | 
40 | if __name__ == "__main__":
41 |     sym, arg_params_src, aux_params = \
42 |         mx.model.load_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 6)
43 |     _, arg_params_dst, _ = \
44 |         mx.model.load_checkpoint(os.path.join(TRAINED_WEIGHT_FOLDER, "checkpoint"), 1)
45 | 
46 |     # print(arg_params_src) to show the key name.
47 |     # arg_params_src means the weight you want to change which is downloaded from simpledet, 
48 |     # arg_params_src means the weight you need the shape.
49 |     arg_params = change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst)
50 | 
51 |     mx.model.save_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 1, sym, arg_params, aux_params)
52 | 


--------------------------------------------------------------------------------
/operator_py/cython/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps_cython(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps


--------------------------------------------------------------------------------
/models/FPN/assign_layer_fpn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Assign Layer operator for FPN
 3 | author: Yi Jiang, Chenxia Han
 4 | """
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | 
 9 | 
10 | class AssignLayerFPNOperator(mx.operator.CustomOp):
11 |     def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level):
12 |         super().__init__()
13 |         self.rcnn_stride = rcnn_stride
14 |         self.roi_canonical_scale = roi_canonical_scale
15 |         self.roi_canonical_level = roi_canonical_level
16 | 
17 |     def forward(self, is_train, req, in_data, out_data, aux):
18 |         all_rois = in_data[0]
19 | 
20 |         rcnn_stride = self.rcnn_stride
21 |         scale0 = self.roi_canonical_scale
22 |         lvl0 = self.roi_canonical_level
23 |         k_min = np.log2(min(rcnn_stride))
24 |         k_max = np.log2(max(rcnn_stride))
25 | 
26 |         rois_area = (all_rois[:, :, 2] - all_rois[:, :, 0] + 1) \
27 |                     * (all_rois[:, :, 3] - all_rois[:, :, 1] + 1)
28 | 
29 |         scale = mx.nd.sqrt(rois_area)
30 |         target_lvls = mx.nd.floor(lvl0 + mx.nd.log2(scale / scale0 + 1e-6))
31 |         target_lvls = mx.nd.clip(target_lvls, k_min, k_max)
32 |         target_stride = (2 ** target_lvls).astype('uint8')
33 | 
34 |         for i, s in enumerate(rcnn_stride):
35 |             lvl_rois = mx.nd.zeros_like(all_rois)
36 |             lvl_inds = mx.nd.expand_dims(target_stride == s, axis=2).astype('float32')
37 |             lvl_inds = mx.nd.broadcast_like(lvl_inds, lvl_rois)
38 |             lvl_rois = mx.nd.where(lvl_inds, all_rois, lvl_rois)
39 | 
40 |             self.assign(out_data[i], req[i], lvl_rois)
41 | 
42 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
43 |         self.assign(in_grad[0], req[0], 0)
44 | 
45 | 
46 | @mx.operator.register('assign_layer_fpn')
47 | class AssignLayerFPNProp(mx.operator.CustomOpProp):
48 |     def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level):
49 |         super().__init__(need_top_grad=False)
50 |         self.rcnn_stride = eval(rcnn_stride)
51 |         self.roi_canonical_scale = int(roi_canonical_scale)
52 |         self.roi_canonical_level = int(roi_canonical_level)
53 | 
54 |     def list_arguments(self):
55 |         return ['rois']
56 | 
57 |     def list_outputs(self):
58 |         rois_list = ['rois_s{}'.format(s) for s in self.rcnn_stride]
59 |         return rois_list
60 | 
61 |     def infer_shape(self, in_shape):
62 |         rpn_rois_shape = in_shape[0]
63 | 
64 |         output_rois_shape = [rpn_rois_shape] * len(self.rcnn_stride)
65 | 
66 |         return [rpn_rois_shape], output_rois_shape
67 | 
68 |     def create_operator(self, ctx, shapes, dtypes):
69 |         return AssignLayerFPNOperator(self.rcnn_stride, self.roi_canonical_scale,
70 |                                       self.roi_canonical_level)
71 | 
72 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
73 |         return []
74 | 


--------------------------------------------------------------------------------
/utils/contrib/data_to_coco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     This script allows you to transfer your own data from your own data format to coco format.
 4 | 
 5 |     Attention: This is not the official format, it does not require licenses and other redundant info, but can generate
 6 |     coco-like dataset which can be accepted by Simpledet.
 7 | 
 8 |     TODO: You should reimplement the code from line 31 to the end, this file only describe the format of dataset
 9 |     and the way to save it.
10 | """
11 | 
12 | import json
13 | import sys
14 | 
15 | def main():
16 |     if len(sys.argv) < 3:
17 |         print("Usage: python data_to_coco.py infile outfile")
18 |         exit(1)
19 |     input_file = sys.argv[1]
20 |     output_file = sys.argv[2]
21 | 
22 |     # The whole coco dataset
23 |     dataset = {
24 |         'licenses': [],
25 |         'info': {},
26 |         'categories': [],   # Required
27 |         'images': [],       # Required
28 |         'annotations': []   # Required
29 |     }
30 | 
31 |     # TODO: class_map maps the class, which would be added into dataset['categories']
32 |     class_map = {
33 |         "box": 1,
34 |         "can": 2,
35 |         "bottle": 3
36 |     }
37 |     for class_name, class_id in class_map.items():
38 |         dataset['categories'].append({
39 |             'id': class_id,
40 |             'name': class_name,
41 |             'supercategory': 'supercategory_name'
42 |         })
43 | 
44 |     # TODO: Load your own data
45 |     self_data_list = []
46 |     with open(input_file, 'r') as in_file:
47 |         for line in in_file:
48 |             self_data_list.append(json.loads(line))
49 | 
50 |     # TODO: Dataset images info, normally you should implement an iter here to append the info
51 |     dataset['images'].append({
52 |         'coco_url': '',
53 |         'date_captured': '',
54 |         'file_name': '',    # Required (str)    image file name
55 |         'flickr_url': '',
56 |         'id': int(),        # Required (int)    id of image
57 |         'license': '',
58 |         'width': int(),     # Required (int)    width of image
59 |         'height': int()     # Required (int)    height of image
60 |     })
61 | 
62 |     # TODO: Dataset annotation info, normally you should implement an iter here to append the info
63 |     dataset["annotations"].append({
64 |         'area': int(),          # Required (int)    image area
65 |         'bbox': [int()] * 4,    # Required (int)    one of the image bboxes
66 |         'category_id': int(),   # Required (int)    class id of this bbox
67 |         'id': int(),            # Required (int)    bbox id in this image
68 |         'image_id': int(),      # Required (int)    image id of this bbox
69 |         'iscrowd': 0,           # Optional, required only if you want to train for semantic segmentation
70 |         'segmentation': []      # Optional, required only if you want to train for semantic segmentation
71 |     })
72 | 
73 |     with open(output_file, 'w') as ofile:
74 |         json.dump(dataset, ofile, sort_keys=True, indent=2)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     main()
79 | 


--------------------------------------------------------------------------------
/unittest/test_loader.py:
--------------------------------------------------------------------------------
 1 | import pickle as pkl
 2 | import unittest
 3 | import mxnet as mx
 4 | 
 5 | from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
 6 |     ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
 7 |     RenameRecord, AnchorTarget2D, AnchorLoader
 8 | from config import detection_config
 9 | 
10 | 
11 | class TestLoader(unittest.TestCase):
12 | 
13 |     def test_empty_v_loader(self):
14 |         pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
15 |         transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True)
16 |         roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1")
17 |         all_v_roidbs = [roidb for roidb in roidbs if roidb['h'] >= roidb['w']]
18 | 
19 |         loader = AnchorLoader(
20 |             roidb=all_v_roidbs,
21 |             transform=transform,
22 |             data_name=data_name,
23 |             label_name=label_name,
24 |             batch_size=1,
25 |             shuffle=True,
26 |             num_thread=1,
27 |             kv=mx.kvstore.create(pKv.kvstore)
28 |         )
29 |         with self.assertRaises(StopIteration):
30 |             while True:
31 |                 data_batch = loader.next()
32 | 
33 |     def test_empty_h_loader(self):
34 |         pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
35 |         transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True)
36 |         roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1")
37 |         all_h_roidbs = [roidb for roidb in roidbs if roidb['h'] < roidb['w']]
38 | 
39 |         loader = AnchorLoader(
40 |             roidb=all_h_roidbs,
41 |             transform=transform,
42 |             data_name=data_name,
43 |             label_name=label_name,
44 |             batch_size=1,
45 |             shuffle=True,
46 |             num_thread=1,
47 |             kv=mx.kvstore.create(pKv.kvstore)
48 |         )
49 |         with self.assertRaises(StopIteration):
50 |             while True:
51 |                 data_batch = loader.next()
52 | 
53 |     def test_record_num(self):
54 |         pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
55 |         transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True)
56 |         roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1")
57 |         batch_size = 4
58 | 
59 |         loader = AnchorLoader(
60 |             roidb=roidbs,
61 |             transform=transform,
62 |             data_name=data_name,
63 |             label_name=label_name,
64 |             batch_size=batch_size,
65 |             shuffle=True,
66 |             num_thread=1,
67 |             kv=mx.kvstore.create(pKv.kvstore)
68 |         )
69 | 
70 |         num_batch = 0
71 |         while True:
72 |             try:
73 |                 data_batch = loader.next()
74 |                 num_batch += 1
75 |             except StopIteration:
76 |                 break
77 |         self.assertEqual(batch_size * num_batch, loader.total_record)
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/axpy.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * Copyright (c) 2018 by Contributors
22 |  * \file axpy.cc
23 |  * \brief port from https://github.com/hujie-frank/SENet
24 |  * \author Yuntao Chen
25 | */
26 | #include "./axpy-inl.h"
27 | 
28 | namespace mshadow {
29 | template <typename Dtype>
30 | inline void AxpyForwardLauncher(const Tensor<cpu, 2, Dtype> &scale_data,
31 |                                 const Tensor<cpu, 4, Dtype> &x_data,
32 |                                 const Tensor<cpu, 4, Dtype> &y_data,
33 |                                 const Tensor<cpu, 1, Dtype> &out) {
34 |     LOG(FATAL) << "NotImplemented";
35 | }
36 | 
37 | template <typename Dtype>
38 | inline void AxpyBackwardLauncher(const Tensor<cpu, 2, Dtype> &scale_data,
39 |                                  const Tensor<cpu, 4, Dtype> &x_data,
40 |                                  const Tensor<cpu, 4, Dtype> &y_data,
41 |                                  const Tensor<cpu, 2, Dtype> &scale_grad,
42 |                                  const Tensor<cpu, 4, Dtype> &x_grad,
43 |                                  const Tensor<cpu, 4, Dtype> &y_grad,
44 |                                  const Tensor<cpu, 4, Dtype> &out_grad,
45 |                                  Stream<cpu> *s) {
46 |     LOG(FATAL) << "NotImplemented";
47 | }
48 | }  // namespace mshadow
49 | 
50 | namespace mxnet {
51 | namespace op {
52 | 
53 | template<>
54 | Operator *CreateOp<cpu>(AxpyParam param, int dtype) {
55 |   Operator* op = NULL;
56 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
57 |     op = new AxpyOp<cpu, DType>(param);
58 |   });
59 |   return op;
60 | }
61 | 
62 | Operator *AxpyProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
63 |                                            std::vector<int> *in_type) const {
64 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
65 | }
66 | 
67 | DMLC_REGISTER_PARAMETER(AxpyParam);
68 | 
69 | MXNET_REGISTER_OP_PROPERTY(_contrib_Axpy, AxpyProp)
70 | .describe(R"code(Accelerate Squeeze and Excitation Network)code" ADD_FILELINE)
71 | .add_argument("scale", "NDArray-or-Symbol", "channel scaling factor")
72 | .add_argument("x", "NDArray-or-Symbol", "resnet increase output")
73 | .add_argument("y", "NDArray-or-Symbol", "resnet shortcut output")
74 | .add_arguments(AxpyParam::__FIELDS__());
75 | }  // namespace op
76 | }  // namespace mxnet
77 | 


--------------------------------------------------------------------------------
/models/maskrcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Mask-RCNN
 2 | 
 3 | This repository implements [**Mask-RCNN**](https://arxiv.org/abs/1703.06870) in the SimpleDet framework.
 4 | Mask-RCNN is a simple and effective approach for object instance segmentation. By simply extending Faster-RCNN with a mask branch, Mask-RCNN can generate a high-quality segmentation mask for each instance. In the following, we will introduce how we build Mask-RCNN in the SimpleDet framework. Currently, we only provide FPN based Mask-RCNN.
 5 | 
 6 | ### Qucik Start
 7 | ```bash
 8 | # train
 9 | python3 detection_train.py --config config/mask_r50v1_fpn_1x.py
10 | 
11 | # test
12 | python3 mask_test.py --config config/mask_r50v1_fpn_1x.py
13 | ```
14 | ### How we build Mask-RCNN
15 | #### Input
16 | First, we need mask label.
17 | 
18 | Instead of providing binary masks to the network, we adopt poly format in the current implementation. Since each instance may contain several parts, we adopt a list of lists ([[ax1, ay1, ax2, ay2,...], [bx1, by1, bx2, by2,...], ...) to represent each instance following COCO. For simplicity, we note [ax1, ay1, ax2, ay2, ...] as a segm.
19 | 
20 | We implement these transforms for poly format mask label:
21 | - **PreprocessGtPoly**: convert each segm in a instance into ndarray.
22 | - **EncodeGtPoly**: encode each instance into a fixed length format ([class_id, num_segms, len_segm1, len_segm2, segm1, segm2]).
23 | 
24 | For data augmentation, we extend several transfroms from Faster-RCNN:
25 | - **Resize2DImageBboxMask**: based on **Resize2DImageBbox**
26 | - **Flip2DImageBboxMask**: based on **Flip2DImageBbox**
27 | - **Pad2DImageBboxMask**: based on **Pad2DImageBbox**
28 | 
29 | #### Operators
30 | Then, we extend proposal_target to get sampled mask target for mask branch training:
31 | - **proposal_mask_target**, decodes encoded gt poly into binary mask and samples a fixed amount of masks as mask target. For acceleration, we only provide mask target for fg roi. So the number of mask target is ```int(image_roi * fg_fraction)```. Currently we only support class specific mask target. So the shape of mask target is ```(batch_size, int(image_roi * fg_fraction), num_class (81 in COCO), mask_size, mask_size)```.
32 | 
33 | In order to test mask in an end-to-end manner, we reuses the code from detection_test.py and implement a bbox post processing operator:
34 | - **bbox_post_processing**, adopts NMS for multi-class bbox and get final bbox results.
35 | 
36 | For loss function, we implement sigmoid cross entropy:
37 | - **sigmoid_cross_entropy**, a general sigmoid cross entropy loss function.
38 | 
39 | #### Symbol
40 | - **MaskFasterRcnn**, detector for MaskRCNN
41 | - **MaskFPNRpnHead**, a new RpnHead inherited from FPNRpnHead, note that we slice the proposal sampled from proposal_mask_target since the mask target provided by this operator is only for fg roi.
42 | - **MaskFasterRcnnHead**, mask head for MaskRCNN
43 | - **MaskFasterRcnn4ConvHead**, a specific mask head with 4 convolutions.
44 | - **BboxPostProcessor**, a bbox post processor for end-to-end test.
45 | 
46 | ### How to build Mask-RCNN without FPN
47 | - Implement **MaskRpnHead** following **MaskFPNRpnHead**.
48 | - Implement your own MaskHead following **MaskFasterRcnn4ConvHead**
49 | - Write your own config following **mask_r50v1_fpn_1x.py** and **faster_r50v1c4_c5_512roi_1x.py**
50 | 
51 | 


--------------------------------------------------------------------------------
/doc/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Setup with Docker
 2 | We provide pre-built docker images for both cuda9.0 and cuda10.0.
 3 | 
 4 | Maxwell, Pascal, Volta and Turing GPUs are supported.
 5 | 
 6 | For nvidia-driver >= 410.48, cuda10 image is recommended.
 7 | 
 8 | For nvidia-driver >= 384.81, cuda9 image is recommended.
 9 | 
10 | Aliyun beijing mirror is provided for users pulling from China.
11 | 
12 | ```bash
13 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda9 zsh
14 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda10 zsh
15 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda9 zsh
16 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda10 zsh
17 | ```
18 | 
19 | ## Setup with Singularity
20 | We recommend the users to adopt singualrity as the default environment manager to minimize the efforts of configuration.
21 | Singularity is a virtual environment manager like virtualenv, but in the system-level.
22 | 
23 | #### Install Singularity >= 2.6
24 | ```bash
25 | # install dependency
26 | sudo apt update
27 | sudo apt install build-essential python libarchive-dev
28 | 
29 | # install singularity
30 | wget https://github.com/sylabs/singularity/releases/download/2.6.1/singularity-2.6.1.tar.gz
31 | tar xzfv singularity-2.6.1.tar.gz
32 | cd singularity-2.6.1
33 | ./configure --prefix=/usr/local
34 | make
35 | sudo make install
36 | ```
37 | 
38 | #### Download singularity image for SimpleDet
39 | ```bash
40 | wget https://simpledet-model.oss-cn-beijing.aliyuncs.com/simpledet.img
41 | ```
42 | 
43 | #### Invoke simpledet shell
44 | Here we need to map the working directory into singularity shell, note that **symlink to files outside the working directory will not work** since singularity has its own filesystem. Thus we recommend users to map the whole data storage into singularity by replacing $WORKDIR by something like `/data` or `/mnt/`.
45 | 
46 | ```bash
47 | sudo singularity shell --no-home --nv -s /usr/bin/zsh --bind $WORKDIR /path/to/simpledet.img
48 | ```
49 | 
50 | ## Setup from Scratch
51 | #### System Requirements
52 | - Ubuntu 16.04
53 | - Python >= 3.5
54 | 
55 | #### Install CUDA, cuDNN and NCCL
56 | 
57 | #### Install cocotools
58 | ```bash
59 | # Install a patched cocotools for python3
60 | pip3 install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI'
61 | ```
62 | 
63 | #### Install MXNet
64 | ```bash
65 | # Install dependency
66 | sudo apt-get update
67 | sudo apt-get install -y build-essential git
68 | sudo apt-get install -y libopenblas-dev
69 | ```
70 | 
71 | ```bash
72 | git clone --recursive https://github.com/apache/incubator-mxnet /tmp/mxnet && \
73 | git clone https://github.com/Tusimple/simpledet /tmp/simpledet && \
74 | git clone https://github.com/RogerChern/cocoapi /tmp/cocoapi && \
75 | cp -r /tmp/simpledet/operator_cxx/* /tmp/mxnet/src/operator && \
76 | mkdir -p /tmp/mxnet/src/coco_api && \
77 | cp -r /tmp/cocoapi/common /tmp/mxnet/src/coco_api && \
78 | cd /tmp/mxnet && \
79 | echo "USE_SIGNAL_HANDLER = 1" >> ./config.mk && \
80 | echo "USE_OPENCV = 0" >> ./config.mk && \
81 | echo "USE_MKLDNN = 0" >> ./config.mk && \
82 | echo "USE_BLAS = openblas" >> ./config.mk && \
83 | echo "USE_CUDA = 1" >> ./config.mk && \
84 | echo "USE_CUDA_PATH = /usr/local/cuda" >> ./config.mk && \
85 | echo "USE_CUDNN = 1" >> ./config.mk && \
86 | echo "USE_NCCL = 1" >> ./config.mk && \
87 | echo "USE_DIST_KVSTORE = 1" >> ./config.mk && \
88 | echo "CUDA_ARCH = -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70" >> ./config.mk && \
89 | rm /tmp/mxnet/src/operator/nn/group_norm* && \
90 | make -j$((`nproc`-1)) && \
91 | cd python && \
92 | python3 setup.py install && \
93 | rm -rf /tmp/mxnet /tmp/simpledet /tmp/cocoapi
94 | ```
95 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sigmoid_cross_entropy.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * Copyright (c) 2018 by Contributors
22 |  * \file sigmoid_cross_entropy.cc
23 |  * \brief
24 |  * \author Yuntao Chen
25 | */
26 | 
27 | #include "./sigmoid_cross_entropy-inl.h"
28 | 
29 | namespace mshadow {
30 | 
31 | template<typename T>
32 | inline void SigmoidCrossEntropyForward(const Tensor<cpu, 2, T> &data,
33 |                                        const Tensor<cpu, 2, T> &label,
34 |                                        Tensor<cpu, 2, T> &loss,
35 |                                        Tensor<cpu, 1, T> &loss_sum,
36 |                                        Tensor<cpu, 2, T> &count,
37 |                                        Tensor<cpu, 1, T> &count_sum,
38 |                                        Tensor<cpu, 1, T> &out,
39 |                                        T scale) {
40 |   LOG(FATAL) << "NotImplemented";
41 | }
42 | 
43 | template<typename T>
44 | inline void SigmoidCrossEntropyBackward(const Tensor<cpu, 2, T> &data,
45 |                                         const Tensor<cpu, 2, T> &label,
46 |                                         Tensor<cpu, 2, T> &d_data,
47 |                                         Tensor<cpu, 2, T> &count,
48 |                                         Tensor<cpu, 1, T> &count_sum,
49 |                                         T scale) {
50 |   LOG(FATAL) << "NotImplemented";
51 | }
52 | 
53 | }
54 | 
55 | namespace mxnet {
56 | namespace op {
57 | template<>
58 | Operator *CreateOp<cpu>(SigmoidCrossEntropyParam param, int dtype) {
59 |   Operator *op = NULL;
60 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
61 |     op = new SigmoidCrossEntropyOp<cpu, DType>(param);
62 |   })
63 |   return op;
64 | }
65 | 
66 | // DO_BIND_DISPATCH comes from operator_common.h
67 | Operator *SigmoidCrossEntropyProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
68 |                                      std::vector<int> *in_type) const {
69 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
70 | }
71 | 
72 | DMLC_REGISTER_PARAMETER(SigmoidCrossEntropyParam);
73 | 
74 | MXNET_REGISTER_OP_PROPERTY(_contrib_SigmoidCrossEntropy, SigmoidCrossEntropyProp)
75 | .describe(R"DOC(
76 | Compute sigmoid activations followed by averaged binary cross entropy loss. The
77 | target values may be in {-1, 0, 1}, where -1 indicates that the corresponding
78 | sample should be ignored and {0, 1} correspond to the binary classes 0 and 1. By
79 | default the loss is divided by the number of targets > -1 and then multiplied by
80 | the `grad_scale` op argument. The divisive normalization may be disable by setting
81 | the op argument `normalize` to 0 (the multiplication by `scale` still takes
82 | effect).
83 | This op fuses sigmoid and cross entropy for numerical stability in both forward
84 | and gradient computation.
85 | )DOC" ADD_FILELINE)
86 | .add_argument("data", "NDArray-or-Symbol", "Input array.")
87 | .add_argument("label", "NDArray-or-Symbol", "Ground truth label.")
88 | .add_arguments(SigmoidCrossEntropyParam::__FIELDS__());
89 | 
90 | 
91 | }  // namespace op
92 | }  // namespace mxnet
93 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/global_average_pooling.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * Copyright (c) 2018 by Contributors
 22 |  * \file global_average_pooling.cc
 23 |  * \brief port from https://github.com/hujie-frank/SENet
 24 |  * \author Chenxia Han
 25 | */
 26 | #include "./global_average_pooling-inl.h"
 27 | 
 28 | namespace mshadow {
 29 | template<typename DType>
 30 | inline void GAPForward(const Tensor<cpu, 4, DType> &out,
 31 |                        const Tensor<cpu, 4, DType> &data) {
 32 |   // NOT_IMPLEMENTED
 33 |   return;
 34 | }
 35 | 
 36 | template<typename DType>
 37 | inline void GAPBackward(const Tensor<cpu, 4, DType> &in_grad,
 38 |                        const Tensor<cpu, 4, DType> &out_grad) {
 39 |   // NOT_IMPLEMENTED
 40 |   return;
 41 | }
 42 | }  // namespace mshadow
 43 | 
 44 | namespace mxnet {
 45 | namespace op {
 46 | 
 47 | template<>
 48 | Operator *CreateOp<cpu>(GAPParam param, int dtype) {
 49 |   Operator *op = NULL;
 50 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
 51 |     op = new GAPOp<cpu, DType>(param);
 52 |   });
 53 | 
 54 |   return op;
 55 | }
 56 | 
 57 | // DO_BIND_DISPATCH comes from operator_common.h
 58 | Operator* GAPProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
 59 |                                      std::vector<int> *in_type) const {
 60 |   std::vector<TShape> out_shape, aux_shape;
 61 |   std::vector<int> out_type, aux_type;
 62 |   CHECK(InferType(in_type, &out_type, &aux_type));
 63 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
 64 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 65 | }
 66 | 
 67 | DMLC_REGISTER_PARAMETER(GAPParam);
 68 | 
 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_GAP, GAPProp)
 70 | .describe(R"code(This operator is DEPRECATED.
 71 | Perform pooling on the input.
 72 | 
 73 | The shapes for 2-D pooling is
 74 | 
 75 | - **data**: *(batch_size, channel, height, width)*
 76 | - **out**: *(batch_size, num_filter, out_height, out_width)*, with::
 77 | 
 78 |     out_height = f(height, kernel[0], pad[0], stride[0])
 79 |     out_width = f(width, kernel[1], pad[1], stride[1])
 80 | 
 81 | The definition of *f* depends on ``pooling_convention``, which has two options:
 82 | 
 83 | - **valid** (default)::
 84 | 
 85 |     f(x, k, p, s) = floor((x+2*p-k)/s)+1
 86 | 
 87 | - **full**, which is compatible with Caffe::
 88 | 
 89 |     f(x, k, p, s) = ceil((x+2*p-k)/s)+1
 90 | 
 91 | But ``global_pool`` is set to be true, then do a global pooling, namely reset
 92 | ``kernel=(height, width)``.
 93 | 
 94 | Three pooling options are supported by ``pool_type``:
 95 | 
 96 | - **avg**: average pooling
 97 | - **max**: max pooling
 98 | - **sum**: sum pooling
 99 | 
100 | 1-D pooling is special case of 2-D pooling with *weight=1* and
101 | *kernel[1]=1*.
102 | 
103 | For 3-D pooling, an additional *depth* dimension is added before
104 | *height*. Namely the input data will have shape *(batch_size, channel, depth,
105 | height, width)*.
106 | 
107 | )code" ADD_FILELINE)
108 | .add_argument("data", "NDArray-or-Symbol", "Input data to the pooling operator.")
109 | .add_arguments(GAPParam::__FIELDS__());
110 | 
111 | }  // namespace op
112 | }  // namespace mxnet
113 | 


--------------------------------------------------------------------------------
/utils/generate_roidb.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import pickle as pkl
  4 | import numpy as np
  5 | from pycocotools.coco import COCO
  6 | 
  7 | 
  8 | dataset_split_mapping = {
  9 |     "train2014": "train2014",
 10 |     "val2014": "val2014",
 11 |     "valminusminival2014": "val2014",
 12 |     "minival2014": "val2014",
 13 |     "train2017": "train2017",
 14 |     "val2017": "val2017",
 15 |     "test-dev2017": "test2017",
 16 |     "train": "train"
 17 | }
 18 | 
 19 | 
 20 | def parse_args():
 21 |     parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database')
 22 |     parser.add_argument('--dataset', help='dataset name', type=str)
 23 |     parser.add_argument('--dataset-split', help='dataset split, e.g. train2017, minival2014', type=str)
 24 | 
 25 |     args = parser.parse_args()
 26 |     return args.dataset, args.dataset_split
 27 | 
 28 | 
 29 | def generate_groundtruth_database(dataset_name, dataset_split):
 30 |     annotation_type = 'image_info' if 'test' in dataset_split else 'instances'
 31 |     annotation_path = "/ws/data/opendata/%s/annotations/%s_%s.json" % (dataset_name, annotation_type, dataset_split)
 32 | 
 33 |     catid_offset = 0
 34 |     lable_version = -1
 35 |     if dataset_name == "coco":
 36 |         lable_version = 1
 37 |         catid_offset  = 0
 38 |     elif dataset_name == "cctsdb":
 39 |         lable_version = 2
 40 |         catid_offset  = 80
 41 |     else:
 42 |         lable_version = -1
 43 |     assert lable_version > 0
 44 | 
 45 |     dataset = COCO(annotation_path)
 46 |     img_ids = dataset.getImgIds()
 47 |     roidb = []
 48 |     for img_id in img_ids:
 49 |         img_anno = dataset.loadImgs(img_id)[0]
 50 |         im_filename = img_anno['file_name']
 51 |         im_w = img_anno['width']
 52 |         im_h = img_anno['height']
 53 | 
 54 |         ins_anno_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=False)
 55 |         trainid_to_datasetid = dict({i + 1: cid for i, cid in enumerate(dataset.getCatIds())})  # 0 for bg
 56 |         datasetid_to_trainid = dict({cid: tid for tid, cid in trainid_to_datasetid.items()})
 57 |         instances = dataset.loadAnns(ins_anno_ids)
 58 | 
 59 |         # sanitize bboxes
 60 |         valid_instances = []
 61 |         for inst in instances:
 62 |             x, y, box_w, box_h = inst['bbox']
 63 |             x1 = max(0, x)
 64 |             y1 = max(0, y)
 65 |             x2 = min(im_w - 1, x1 + max(0, box_w - 1))
 66 |             y2 = min(im_h - 1, y1 + max(0, box_h - 1))
 67 |             if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
 68 |                 inst['clean_bbox'] = [x1, y1, x2, y2]
 69 |                 valid_instances.append(inst)
 70 |         num_instance = len(valid_instances)
 71 | 
 72 |         gt_bbox = np.zeros((num_instance, 4), dtype=np.float32)
 73 |         gt_class = np.zeros((num_instance, ), dtype=np.int32)
 74 |         gt_poly = [None] * num_instance
 75 | 
 76 |         for i, inst in enumerate(valid_instances):
 77 |             cls = catid_offset + datasetid_to_trainid[inst['category_id']]
 78 |             gt_bbox[i, :] = inst['clean_bbox']
 79 |             gt_class[i] = cls
 80 |             gt_poly[i] = inst['segmentation']
 81 | 
 82 |         split = dataset_split_mapping[dataset_split]
 83 |         roi_rec = {
 84 |             'image_url': '/ws/data/opendata/%s/images/%s/%s' % (dataset_name, split, im_filename),
 85 |             'im_id': img_id,
 86 |             'h': im_h,
 87 |             'w': im_w,
 88 |             'gt_class': gt_class,
 89 |             'gt_bbox': gt_bbox,
 90 |             'gt_poly': gt_poly,
 91 |             'version': lable_version,
 92 |             'flipped': False}
 93 | 
 94 |         roidb.append(roi_rec)
 95 | 
 96 |     return roidb
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     d, dsplit = parse_args()
101 |     roidb = generate_groundtruth_database(d, dsplit)
102 |     os.makedirs("data/cache", exist_ok=True)
103 |     with open("data/cache/%s_%s.roidb" % (d, dsplit), "wb") as fout:
104 |         pkl.dump(roidb, fout)
105 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/group_norm.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
15 |    and IDIAP Research Institute nor the names of its contributors may be
16 |    used to endorse or promote products derived from this software without
17 |    specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | */
31 | /*
32 |  * Licensed to the Apache Software Foundation (ASF) under one
33 |  * or more contributor license agreements.  See the NOTICE file
34 |  * distributed with this work for additional information
35 |  * regarding copyright ownership.  The ASF licenses this file
36 |  * to you under the Apache License, Version 2.0 (the
37 |  * "License"); you may not use this file except in compliance
38 |  * with the License.  You may obtain a copy of the License at
39 |  *
40 |  *   http://www.apache.org/licenses/LICENSE-2.0
41 |  *
42 |  * Unless required by applicable law or agreed to in writing,
43 |  * software distributed under the License is distributed on an
44 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
45 |  * KIND, either express or implied.  See the License for the
46 |  * specific language governing permissions and limitations
47 |  * under the License.
48 |  */
49 | 
50 | /*!
51 |  * \file group_norm.cc
52 |  * \author Yuntao Chen
53 | */
54 | 
55 | #include "./group_norm-inl.h"
56 | 
57 | namespace mxnet {
58 | namespace op {
59 | template <>
60 | Operator* CreateOp<cpu>(GroupNormParam param, int dtype) {
61 |   LOG(FATAL) << "not implemented.";
62 |   return NULL;
63 | }
64 | 
65 | // DO_BIND_DISPATCH comes from operator_common.h
66 | Operator* GroupNormProp::CreateOperatorEx(Context ctx,
67 |                                           std::vector<TShape>* in_shape,
68 |                                           std::vector<int>* in_type) const {
69 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
70 | }
71 | 
72 | DMLC_REGISTER_PARAMETER(GroupNormParam);
73 | 
74 | MXNET_REGISTER_OP_PROPERTY(_contrib_GroupNorm, GroupNormProp)
75 | .add_argument("data", "NDArray-or-Symbol",
76 |               "An n-dimensional input array (n > 2) of the form [batch, "
77 |               "channel, spatial_dim1, spatial_dim2, ...].")
78 | .add_argument("gamma", "NDArray-or-Symbol",
79 |               "A vector of length \'channel\', which multiplies the "
80 |               "normalized input.")
81 | .add_argument("beta", "NDArray-or-Symbol",
82 |               "A vector of length \'channel\', which is added to the "
83 |               "product of the normalized input and the weight.")
84 | .add_arguments(GroupNormParam::__FIELDS__())
85 | .describe(R"code(Group Normalization (GN) operation: https://arxiv.org/abs/1803.08494)code" ADD_FILELINE);
86 | }  // namespace op
87 | }  // namespace mxnet
88 | 


--------------------------------------------------------------------------------
/operator_py/nms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from .cython.cpu_nms import greedy_nms, soft_nms
  3 | 
  4 | 
  5 | def cython_soft_nms_wrapper(thresh, sigma=0.5, score_thresh=0.001, method='linear'):
  6 |     methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
  7 |     assert method in methods, 'Unknown soft_nms method: {}'.format(method)
  8 |     def _nms(dets):
  9 |         dets, _ = soft_nms(
 10 |                     np.ascontiguousarray(dets, dtype=np.float32),
 11 |                     np.float32(sigma),
 12 |                     np.float32(thresh),
 13 |                     np.float32(score_thresh),
 14 |                     np.uint8(methods[method]))
 15 |         return dets
 16 |     return _nms
 17 | 
 18 | 
 19 | def py_nms_wrapper(thresh):
 20 |     def _nms(dets):
 21 |         return nms(dets, thresh)
 22 |     return _nms
 23 | 
 24 | 
 25 | def cpu_nms_wrapper(thresh):
 26 |     def _nms(dets):
 27 |         return greedy_nms(dets, thresh)[0]
 28 |     return _nms
 29 | 
 30 | 
 31 | def wnms_wrapper(thresh_lo, thresh_hi):
 32 |     def _nms(dets):
 33 |         return py_weighted_nms(dets, thresh_lo, thresh_hi)
 34 |     return _nms
 35 | 
 36 | 
 37 | def nms(dets, thresh):
 38 |     """
 39 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
 40 |     rule out overlap >= thresh
 41 |     :param dets: [[x1, y1, x2, y2 score]]
 42 |     :param thresh: retain overlap < thresh
 43 |     :return: indexes to keep
 44 |     """
 45 |     x1 = dets[:, 0]
 46 |     y1 = dets[:, 1]
 47 |     x2 = dets[:, 2]
 48 |     y2 = dets[:, 3]
 49 |     scores = dets[:, 4]
 50 | 
 51 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 52 |     order = scores.argsort()[::-1]
 53 | 
 54 |     keep = []
 55 |     while order.size > 0:
 56 |         i = order[0]
 57 |         keep.append(i)
 58 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 59 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 60 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 61 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 62 | 
 63 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 64 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 65 |         inter = w * h
 66 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 67 | 
 68 |         inds = np.where(ovr <= thresh)[0]
 69 |         order = order[inds + 1]
 70 | 
 71 |     return dets[keep, :]
 72 | 
 73 | 
 74 | def py_weighted_nms(dets, thresh_lo, thresh_hi):
 75 |     """
 76 |     voting boxes with confidence > thresh_hi
 77 |     keep boxes overlap <= thresh_lo
 78 |     rule out overlap > thresh_hi
 79 |     :param dets: [[x1, y1, x2, y2 score]]
 80 |     :param thresh_lo: retain overlap <= thresh_lo
 81 |     :param thresh_hi: vote overlap > thresh_hi
 82 |     :return: indexes to keep
 83 |     """
 84 |     x1 = dets[:, 0]
 85 |     y1 = dets[:, 1]
 86 |     x2 = dets[:, 2]
 87 |     y2 = dets[:, 3]
 88 |     scores = dets[:, 4]
 89 | 
 90 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 91 |     order = scores.argsort()[::-1]
 92 | 
 93 |     keep = []
 94 |     while order.size > 0:
 95 |         i = order[0]
 96 |         xx1 = np.maximum(x1[i], x1[order])
 97 |         yy1 = np.maximum(y1[i], y1[order])
 98 |         xx2 = np.minimum(x2[i], x2[order])
 99 |         yy2 = np.minimum(y2[i], y2[order])
100 | 
101 |         w = np.maximum(0.0, xx2 - xx1 + 1)
102 |         h = np.maximum(0.0, yy2 - yy1 + 1)
103 |         inter = w * h
104 |         ovr = inter / (areas[i] + areas[order] - inter)
105 | 
106 |         inds = np.where(ovr <= thresh_lo)[0]
107 |         inds_keep = np.where(ovr > thresh_hi)[0]
108 |         if len(inds_keep) == 0:
109 |             break
110 | 
111 |         order_keep = order[inds_keep]
112 | 
113 |         tmp=np.sum(scores[order_keep])
114 |         x1_avg = np.sum(scores[order_keep] * x1[order_keep]) / tmp
115 |         y1_avg = np.sum(scores[order_keep] * y1[order_keep]) / tmp
116 |         x2_avg = np.sum(scores[order_keep] * x2[order_keep]) / tmp
117 |         y2_avg = np.sum(scores[order_keep] * y2[order_keep]) / tmp
118 | 
119 |         keep.append([x1_avg, y1_avg, x2_avg, y2_avg, scores[i]])
120 |         order = order[inds]
121 |     return np.array(keep)
122 | 


--------------------------------------------------------------------------------
/models/tridentnet/README.md:
--------------------------------------------------------------------------------
 1 | ## Scale-Aware Trident Networks for Object Detection
 2 | 
 3 | Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang
 4 | 
 5 | <p align="center"> <img src="../../doc/image/trident_block.png" width="480"> </p>
 6 | 
 7 | ### Introduction
 8 | 
 9 | This repository implements [TridentNet](https://arxiv.org/abs/1901.01892) in the SimpleDet framework. 
10 | 
11 | Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. Then, we propose a scale-aware training scheme to specialize each branch by sampling object instances of proper scales for training. As a bonus, a fast approximation version of TridentNet could achieve significant improvements without any additional parameters and computational cost. On the COCO dataset, our TridentNet with ResNet-101 backbone achieves state-of-the-art single-model results by obtaining an mAP of 48.4.
12 | 
13 | #### Trident Blocks
14 | 
15 | - Dilated convolution for efficient scale enumeration
16 | - Weight sharing between convs for uniform representation
17 | 
18 | <p align="center"> <img src="../../doc/image/trident_block_details.png" width="480"> </p>
19 | 
20 | The above figure shows how to convert bottleneck residual blocks to 3-branch Trident Blocks. The dilation rate of three branches are set as 1, 2 and 3, respectively.
21 | 
22 | ### Use TridentNet
23 | 
24 | Please setup SimpleDet following [README](../../README.md) and [INSTALL](../../doc/INSTALL.md) and use the TridentNet configuration files in the `config` folder.
25 | 
26 | ### Results on MS-COCO
27 | 
28 | |                             | Backbone   | Test data | mAP@[0.5:0.95] | Link |
29 | | --------------------------- | ---------- | --------- | :------------: | -----|
30 | | Faster R-CNN, 1x            | ResNet-101 | minival   |      37.6      |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/faster_r101v2c4_c5_256roi_1x.zip)|
31 | | TridentNet, 1x              | ResNet-101 | minival   |      40.6      |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_1x.zip)|
32 | | TridentNet, 1x, Fast Approx | ResNet-101 | minival   |      39.9      |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_fastapprox_1x.zip)|
33 | | TridentNet, 2x              | ResNet-101 | test-dev  |      42.8      |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_addminival_2x.zip)|
34 | | TridentNet*, 3x             | ResNet-101 | test-dev  |      48.4      |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.zip)|
35 | 
36 | Note: 
37 | 1. These models are not trained in SimpleDet. Re-training these models in SimpleDet gives a slightly better result.
38 | 2. TridentNet* - TridentNet = extended training + softNMS + multi-scale training/testing + syncBN + DCNv1.
39 | 
40 | ### Results on MS-COCO with stronger baselines
41 | All config files are available in [config/resnet_v1b](../../config/resnet_v1b).
42 | 
43 | |Model|Backbone|Head|Train Schedule|AP|AP50|AP75|APs|APm|APl|
44 | |-----|--------|----|--------------|--|----|----|---|---|---|
45 | |Faster|R50v1b-C4|C5-512ROI|2X|36.9|57.9|39.3|19.9|41.4|50.2|
46 | |Trident|R50v1b-C4|C5-128ROI|2X|39.6|60.9|42.9|22.5|44.5|53.9|
47 | |TridentFast|R50v1b-C4|C5-128ROI|2X|39.0|60.2|41.8|20.8|43.6|53.8|
48 | |Faster|R101v1b-C4|C5-512ROI|2X|40.5|61.2|43.8|22.5|44.8|55.4|
49 | |Trident|R101v1b-C4|C5-128ROI|2X|43.0|64.3|46.3|25.3|47.9|58.4|
50 | |TridentFast|R101v1b-C4|C5-128ROI|2X|42.5|63.7|46.0|23.3|46.7|59.3|
51 | |Faster|R152v1b-C4|C5-512ROI|2X|41.8|62.4|45.2|23.2|46.0|56.9|
52 | |Trident|R152v1b-C4|C5-128ROI|2X|44.4|65.4|48.3|26.4|49.4|59.6|
53 | |TridentFast|R152v1b-C4|C5-128ROI|2X|43.9|65.1|47.0|25.1|48.1|60.4|
54 | 
55 | ### Citing TridentNet
56 | 
57 | ```
58 | @article{li2019scale,
59 |   title={Scale-Aware Trident Networks for Object Detection},
60 |   author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang},
61 |   journal={ICCV 2019},
62 |   year={2019}
63 | }
64 | ```
65 | 


--------------------------------------------------------------------------------
/utils/callback.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | import mxnet as mx
  4 | 
  5 | 
  6 | class Speedometer(object):
  7 |     def __init__(self, batch_size, frequent=50):
  8 |         self.batch_size = batch_size
  9 |         self.frequent = frequent
 10 |         self.init = False
 11 |         self.tic = 0
 12 |         self.last_count = 0
 13 | 
 14 |     def __call__(self, param):
 15 |         """Callback to Show speed."""
 16 |         count = param.nbatch
 17 |         if self.last_count > count:
 18 |             self.init = False
 19 |         self.last_count = count
 20 | 
 21 |         if self.init:
 22 |             if count % self.frequent == 0:
 23 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
 24 |                 if param.eval_metric is not None:
 25 |                     name, value = param.eval_metric.get()
 26 |                     s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed)
 27 |                     for n, v in zip(name, value):
 28 |                         s += "%s=%f,\t" % (n, v)
 29 |                     logging.info(s)
 30 |                 else:
 31 |                     logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec",
 32 |                                  param.epoch, count, speed)
 33 |                 self.tic = time.time()
 34 |         else:
 35 |             self.init = True
 36 |             self.tic = time.time()
 37 | 
 38 | class DetailSpeedometer(object):
 39 |     def __init__(self, batch_size, frequent=50):
 40 |         self.batch_size = batch_size
 41 |         self.frequent = frequent
 42 |         self.init = False
 43 |         self.tic = 0
 44 |         self.last_count = 0
 45 | 
 46 |     def __call__(self, param):
 47 |         """Callback to Show speed."""
 48 |         count = param.nbatch
 49 |         rank = param.rank
 50 |         total_iter = param.total_iter
 51 | 
 52 |         if self.last_count > count:
 53 |             self.init = False
 54 |         self.last_count = count
 55 | 
 56 |         if self.init:
 57 |             if count % self.frequent == 0:
 58 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
 59 |                 if param.eval_metric is not None:
 60 |                     name, value = param.eval_metric.get()
 61 |                     s = "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\t" \
 62 |                         "data:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec\tTrain-" % (
 63 |                         param.epoch, rank, count, total_iter,
 64 |                         param.cur_batch_time, param.avg_batch_time,
 65 |                         param.cur_kvstore_sync_time, param.avg_kvstore_sync_time,
 66 |                         param.cur_data_time, param.avg_data_time,
 67 |                         param.cur_iter_total_time, param.avg_iter_total_time,
 68 |                         speed)
 69 |                     for n, v in zip(name, value):
 70 |                         s += "%s=%f,\t" % (n, v)
 71 |                     logging.info(s)
 72 |                 else:
 73 |                     logging.info(
 74 |                         "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\tdata:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec",
 75 |                         param.epoch, rank, count, total_iter,
 76 |                         param.cur_batch_time, param.avg_batch_time,
 77 |                         param.cur_kvstore_sync_time, param.avg_kvstore_sync_time,
 78 |                         param.cur_data_time, param.avg_data_time,
 79 |                         param.cur_iter_total_time, param.avg_iter_total_time,
 80 |                         speed)
 81 | 
 82 |                 self.tic = time.time()
 83 |         else:
 84 |             self.init = True
 85 |             self.tic = time.time()
 86 | 
 87 | 
 88 | def do_checkpoint(prefix):
 89 |     def _callback(iter_no, sym, arg, aux):
 90 |         mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
 91 |     return _callback
 92 | 
 93 | 
 94 | def do_checkpoint_iter(prefix, checkpoint_iter):
 95 |     def _callback(param):
 96 |         if checkpoint_iter == param.locals["total_iter"]:
 97 |             arg_params, aux_params = param.locals["self"].get_params()
 98 |             save_dict = {('arg:%s' % k) : v.as_in_context(mx.cpu()) for k, v in arg_params.items()}
 99 |             save_dict.update({('aux:%s' % k) : v.as_in_context(mx.cpu()) for k, v in aux_params.items()})
100 |             param_name = '%s-iter-%s.params' % (prefix, checkpoint_iter)
101 |             mx.nd.save(param_name, save_dict)
102 |             logging.info('Saved checkpoint to \"%s\"', param_name)
103 |     return _callback
104 | 


--------------------------------------------------------------------------------
/models/dcn/builder.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import mxnext as X
  3 | from mxnext import conv, relu, add
  4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit
  5 | from symbol.builder import Backbone
  6 | 
  7 | 
  8 | def dcn_resnet_unit(input, name, filter, stride, dilate, proj, norm, **kwargs):
  9 |     conv1 = conv(input, name=name + "_conv1", filter=filter // 4)
 10 |     bn1 = norm(conv1, name=name + "_bn1")
 11 |     relu1 = relu(bn1, name=name + "_relu1")
 12 | 
 13 |     # conv2 filter router
 14 |     conv2_offset = conv(relu1, name=name + "_conv2_offset", filter=72, kernel=3, stride=stride, dilate=dilate)
 15 |     conv2 = mx.sym.contrib.DeformableConvolution(relu1, conv2_offset, kernel=(3, 3),
 16 |         stride=(stride, stride), dilate=(dilate, dilate), pad=(1, 1), num_filter=filter // 4,
 17 |         num_deformable_group=4, no_bias=True, name=name + "_conv2")
 18 |     bn2 = norm(conv2, name=name + "_bn2")
 19 |     relu2 = relu(bn2, name=name + "_relu2")
 20 | 
 21 |     conv3 = conv(relu2, name=name + "_conv3", filter=filter)
 22 |     bn3 = norm(conv3, name=name + "_bn3")
 23 | 
 24 |     if proj:
 25 |         shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride)
 26 |         shortcut = norm(shortcut, name=name + "_sc_bn")
 27 |     else:
 28 |         shortcut = input
 29 | 
 30 |     eltwise = add(bn3, shortcut, name=name + "_plus")
 31 | 
 32 |     return relu(eltwise, name=name + "_relu")
 33 | 
 34 | 
 35 | def hybrid_resnet_stage(data, name, num_block, num_special_block, special_res_unit, filter,
 36 |     stride, dilate, norm, **kwargs):
 37 |     s, d = stride, dilate
 38 | 
 39 |     for i in range(1, num_block + 1 - num_special_block):
 40 |         proj = True if i == 1 else False
 41 |         s = stride if i == 1 else 1
 42 |         d = dilate
 43 |         data = resnet_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm)
 44 | 
 45 |     for i in range(num_block + 1 - num_special_block, num_block + 1):
 46 |         proj = True if i == 1 else False
 47 |         s = stride if i == 1 else 1
 48 |         d = dilate
 49 |         data = special_res_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm, **kwargs)
 50 | 
 51 |     return data
 52 | 
 53 | 
 54 | def hybrid_resnet_c4_builder(special_resnet_unit):
 55 |     class ResNetC4(Backbone):
 56 |         def __init__(self, pBackbone):
 57 |             super().__init__(pBackbone)
 58 |             p = self.p
 59 | 
 60 |             import mxnext.backbone.resnet_v1b_helper as helper
 61 |             num_c2, num_c3, num_c4, _ = helper.depth_config[p.depth]
 62 | 
 63 |             data = X.var("data")
 64 |             if p.fp16:
 65 |                 data = data.astype("float16")
 66 |             c1 = helper.resnet_c1(data, p.normalizer)
 67 |             c2 = helper.resnet_c2(c1, num_c2, 1, 1, p.normalizer)
 68 |             c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1,
 69 |                 p.normalizer, params=p)
 70 |             c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1,
 71 |                 p.normalizer, params=p)
 72 | 
 73 |             self.symbol = c4
 74 | 
 75 |         def get_rpn_feature(self):
 76 |             return self.symbol
 77 | 
 78 |         def get_rcnn_feature(self):
 79 |             return self.symbol
 80 | 
 81 |     return ResNetC4
 82 | 
 83 | 
 84 | def hybrid_resnet_fpn_builder(special_resnet_unit):
 85 |     class ResNetFPN(Backbone):
 86 |         def __init__(self, pBackbone):
 87 |             super().__init__(pBackbone)
 88 |             p = self.p
 89 | 
 90 |             import mxnext.backbone.resnet_v1b_helper as helper
 91 |             num_c2, num_c3, num_c4, num_c5 = helper.depth_config[p.depth]
 92 | 
 93 |             data = X.var("data")
 94 |             if p.fp16:
 95 |                 data = data.astype("float16")
 96 |             c1 = helper.resnet_c1(data, p.normalizer)
 97 |             c2 = helper.resnet_c2(c1, num_c2, 1, 1, p.normalizer)
 98 |             c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1,
 99 |                 p.normalizer, params=p)
100 |             c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1,
101 |                 p.normalizer, params=p)
102 |             c5 = hybrid_resnet_stage(c4, "stage4", num_c5, p.num_c5_block or 0, special_resnet_unit, 2048, 2, 1,
103 |                 p.normalizer, params=p)
104 | 
105 |             self.symbol = (c2, c3, c4, c5)
106 | 
107 |         def get_rpn_feature(self):
108 |             return self.symbol
109 | 
110 |         def get_rcnn_feature(self):
111 |             return self.symbol
112 | 
113 |     return ResNetFPN
114 | 
115 | 
116 | DCNResNetC4 = hybrid_resnet_c4_builder(dcn_resnet_unit)
117 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/generate_anchor.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file generate_anchor.cc
 22 |  * \brief
 23 |  * \author Yanghao Li, Chenxia Han
 24 | */
 25 | 
 26 | #include "./generate_anchor-inl.h"
 27 | 
 28 | namespace mxnet {
 29 | namespace op {
 30 | 
 31 | template<typename xpu>
 32 | class GenAnchorOp : public Operator{
 33 |  public:
 34 |   explicit GenAnchorOp(GenAnchorParam param) {
 35 |     this->param_ = param;
 36 |   }
 37 | 
 38 |   virtual void Forward(const OpContext &ctx,
 39 |                        const std::vector<TBlob> &in_data,
 40 |                        const std::vector<OpReqType> &req,
 41 |                        const std::vector<TBlob> &out_data,
 42 |                        const std::vector<TBlob> &aux_states) {
 43 |     using namespace mshadow;
 44 |     using namespace mshadow::expr;
 45 |     CHECK_EQ(in_data.size(), 1);
 46 |     CHECK_EQ(out_data.size(), 1);
 47 |     CHECK_EQ(req.size(), 1);
 48 |     CHECK_EQ(req[gen_anchor::kOut], kWriteTo);
 49 | 
 50 |     Stream<xpu> *s = ctx.get_stream<xpu>();
 51 |     Tensor<xpu, 4> scores = in_data[gen_anchor::kClsProb].get<cpu, 4, float>(s);
 52 | 
 53 |     Tensor<cpu, 2> out = out_data[gen_anchor::kOut].get<cpu, 2, float>(s);
 54 | 
 55 |     std::vector<double> scales(param_.scales.begin(), param_.scales.end());
 56 |     std::vector<double> ratios(param_.ratios.begin(), param_.ratios.end());
 57 | 
 58 |     int num_anchors = scales.size() * ratios.size();
 59 |     int height = scores.size(2);
 60 |     int width = scores.size(3);
 61 | 
 62 |     // Generate anchors
 63 |     std::vector<double> base_anchor({
 64 |       0.0f, 0.0f, param_.feature_stride - 1.0f, param_.feature_stride - 1.0f
 65 |     });
 66 |     std::vector<double> anchors;
 67 |     gen_anchor_utils::GenerateAnchors(
 68 |       base_anchor, ratios, scales, anchors
 69 |     );
 70 | 
 71 |     // Enumerate all shifted anchors
 72 |     for (index_t i = 0; i < num_anchors; ++i) {
 73 |       for (index_t j = 0; j < height; ++j) {
 74 |         for (index_t k = 0; k < width; ++k) {
 75 |           index_t index = j * (width * num_anchors) + k * (num_anchors) + i;
 76 |           out[index][0] = static_cast<float>(anchors[i * 4 + 0] + k * param_.feature_stride);
 77 |           out[index][1] = static_cast<float>(anchors[i * 4 + 1] + j * param_.feature_stride);
 78 |           out[index][2] = static_cast<float>(anchors[i * 4 + 2] + k * param_.feature_stride);
 79 |           out[index][3] = static_cast<float>(anchors[i * 4 + 3] + j * param_.feature_stride);
 80 |         }
 81 |       }
 82 |     }
 83 |   }
 84 | 
 85 |   virtual void Backward(const OpContext &ctx,
 86 |                         const std::vector<TBlob> &out_grad,
 87 |                         const std::vector<TBlob> &in_data,
 88 |                         const std::vector<TBlob> &out_data,
 89 |                         const std::vector<OpReqType> &req,
 90 |                         const std::vector<TBlob> &in_grad,
 91 |                         const std::vector<TBlob> &aux_states) {
 92 |     using namespace mshadow;
 93 |     using namespace mshadow::expr;
 94 |     CHECK_EQ(in_grad.size(), 1);
 95 | 
 96 |     Stream<xpu> *s = ctx.get_stream<xpu>();
 97 |     Tensor<xpu, 4> gscores = in_grad[gen_anchor::kClsProb].get<xpu, 4, float>(s);
 98 | 
 99 |     // can not assume the grad would be zero
100 |     Assign(gscores, req[gen_anchor::kClsProb], 0);
101 |   }
102 | 
103 |  private:
104 |   GenAnchorParam param_;
105 | };  // class GenAnchorOp
106 | 
107 | template<>
108 | Operator *CreateOp<cpu>(GenAnchorParam param) {
109 |   return new GenAnchorOp<cpu>(param);
110 | }
111 | 
112 | Operator* GenAnchorProp::CreateOperator(Context ctx) const {
113 |   DO_BIND_DISPATCH(CreateOp, param_);
114 | }
115 | 
116 | DMLC_REGISTER_PARAMETER(GenAnchorParam);
117 | 
118 | MXNET_REGISTER_OP_PROPERTY(_contrib_GenAnchor, GenAnchorProp)
119 | .describe("Generate region anchors")
120 | .add_argument("cls_prob", "NDArray-or-Symbol", "Probability of how likely proposal is object.")
121 | .add_arguments(GenAnchorParam::__FIELDS__());
122 | 
123 | }  // namespace op
124 | }  // namespace mxnet
125 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/decodebbox-inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file decodebbox-inl.h
 22 |  * \brief DecodeBBox Operator
 23 |  * \author Ziyang Zhou, Chenxia Han
 24 | */
 25 | #ifndef MXNET_OPERATOR_CONTRIB_DECODEBBOX_INL_H_
 26 | #define MXNET_OPERATOR_CONTRIB_DECODEBBOX_INL_H_
 27 | 
 28 | #include <dmlc/logging.h>
 29 | #include <dmlc/parameter.h>
 30 | #include <mxnet/operator.h>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <string>
 34 | #include <utility>
 35 | #include <ctime>
 36 | #include <cstring>
 37 | #include <iostream>
 38 | #include "../operator_common.h"
 39 | #include "../mshadow_op.h"
 40 | 
 41 | namespace mxnet {
 42 | namespace op {
 43 | 
 44 | namespace decodebbox {
 45 | enum DecodeBBoxOpInputs {kRois, kBBoxPred, kImInfo};
 46 | enum DecodeBBoxOpOutputs {kOut};
 47 | }  // decodebbox
 48 | 
 49 | struct DecodeBBoxParam : public dmlc::Parameter<DecodeBBoxParam> {
 50 |   nnvm::Tuple<float> bbox_mean;
 51 |   nnvm::Tuple<float> bbox_std;
 52 |   bool class_agnostic;
 53 | 
 54 |   DMLC_DECLARE_PARAMETER(DecodeBBoxParam) {
 55 |     float tmp[] = {0.f, 0.f, 0.f, 0.f};
 56 |     DMLC_DECLARE_FIELD(bbox_mean).set_default(nnvm::Tuple<float>(tmp, tmp+4)).describe("Bounding box mean");
 57 |     tmp[0] = 0.1f; tmp[1] = 0.1f; tmp[2] = 0.2f; tmp[3] = 0.2f;
 58 |     DMLC_DECLARE_FIELD(bbox_std).set_default(nnvm::Tuple<float>(tmp, tmp+4)).describe("Bounding box std");
 59 |     DMLC_DECLARE_FIELD(class_agnostic).set_default(true)
 60 |     .describe("Whether use class agnostic");
 61 |   }
 62 | };
 63 | 
 64 | template<typename xpu>
 65 | Operator *CreateOp(DecodeBBoxParam param);
 66 | 
 67 | #if DMLC_USE_CXX11
 68 | class DecodeBBoxProp : public OperatorProperty {
 69 |  public:
 70 |   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
 71 |     param_.Init(kwargs);
 72 |   }
 73 | 
 74 |   std::map<std::string, std::string> GetParams() const override {
 75 |     return param_.__DICT__();
 76 |   }
 77 | 
 78 |   bool InferShape(std::vector<TShape> *in_shape,
 79 |                   std::vector<TShape> *out_shape,
 80 |                   std::vector<TShape> *aux_shape) const override {
 81 |     using namespace mshadow;
 82 |     CHECK_EQ(in_shape->size(), 3) << "Input:[rois, bbox_pred, im_info]";
 83 |     const TShape &dshape = in_shape->at(decodebbox::kBBoxPred);
 84 | 
 85 |     const bool class_agnostic = param_.class_agnostic;
 86 |     TShape bbox_shape;
 87 |     if (class_agnostic) {
 88 |       const int nbatch = dshape[0];
 89 |       const int nrois = dshape[1];
 90 |       bbox_shape = Shape3(nbatch, nrois, 4);
 91 |     } else {
 92 |       bbox_shape = dshape;
 93 |     }
 94 | 
 95 |     out_shape->clear();
 96 |     aux_shape->clear();
 97 |     out_shape->push_back(bbox_shape);
 98 | 
 99 |     return true;
100 |   }
101 | 
102 |   OperatorProperty* Copy() const override {
103 |     auto ptr = new DecodeBBoxProp();
104 |     ptr->param_ = param_;
105 |     return ptr;
106 |   }
107 | 
108 |   std::string TypeString() const override {
109 |     return "_contrib_DecodeBBox";
110 |   }
111 | 
112 |   int NumOutputs() const override {
113 |     return 1;
114 |   }
115 | 
116 |   std::vector<std::string> ListArguments() const override {
117 |     return {"rois", "bbox_pred", "im_info"};
118 |   }
119 | 
120 |   std::vector<std::string> ListOutputs() const override {
121 |     return {"output"};
122 |   }
123 | 
124 |   std::vector<int> DeclareBackwardDependency(
125 |     const std::vector<int> &out_grad,
126 |     const std::vector<int> &in_data,
127 |     const std::vector<int> &out_data) const override {
128 |     return {};
129 |   }
130 | 
131 |   // Operator* CreateOperator(Context ctx) const override;
132 |   Operator *CreateOperator(Context ctx) const override {
133 |     LOG(FATAL) << "Not Implemented.";
134 |     return NULL;
135 |   }
136 | 
137 |   Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
138 |                              std::vector<int> *in_type) const override;
139 | 
140 |  private:
141 |   DecodeBBoxParam param_;
142 | };  // class DecodeBBoxProp
143 | 
144 | #endif  // DMLC_USE_CXX11
145 | }  // namespace op
146 | }  // namespace mxnet
147 | 
148 | #endif  //  MXNET_OPERATOR_CONTRIB_DECODEBBOX_INL_H_
149 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/nms-inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file nms-inl.h
 22 |  * \brief NMS Operator
 23 |  * \author Yanghao Li
 24 | */
 25 | #ifndef MXNET_OPERATOR_CONTRIB_NMS_INL_H_
 26 | #define MXNET_OPERATOR_CONTRIB_NMS_INL_H_
 27 | 
 28 | #include <dmlc/logging.h>
 29 | #include <dmlc/parameter.h>
 30 | #include <mxnet/operator.h>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <string>
 34 | #include <utility>
 35 | #include <ctime>
 36 | #include <cstring>
 37 | #include <iostream>
 38 | #include "../operator_common.h"
 39 | #include "../mshadow_op.h"
 40 | 
 41 | namespace mxnet {
 42 | namespace op {
 43 | 
 44 | namespace nms {
 45 | enum NMSOpInputs {kBBox};
 46 | enum NMSOpOutputs {kOut, kScore};
 47 | enum NMSForwardResource {kTempSpace};
 48 | }  // nms
 49 | 
 50 | struct NMSParam : public dmlc::Parameter<NMSParam> {
 51 |   int rpn_pre_nms_top_n;
 52 |   int rpn_post_nms_top_n;
 53 |   float threshold;
 54 |   bool output_score;
 55 |   bool already_sorted;
 56 |   uint64_t workspace;
 57 | 
 58 |   DMLC_DECLARE_PARAMETER(NMSParam) {
 59 |     DMLC_DECLARE_FIELD(rpn_pre_nms_top_n).set_default(6000)
 60 |     .describe("Number of top scoring boxes to keep before applying NMS to RPN proposals");
 61 |     DMLC_DECLARE_FIELD(rpn_post_nms_top_n).set_default(300)
 62 |     .describe("Number of top scoring boxes to keep after applying NMS to RPN proposals");
 63 |     DMLC_DECLARE_FIELD(threshold).set_default(0.7)
 64 |     .describe("NMS value, below which to suppress.");
 65 |     DMLC_DECLARE_FIELD(output_score).set_default(false)
 66 |     .describe("Add score to outputs");
 67 |     DMLC_DECLARE_FIELD(already_sorted).set_default(false)
 68 |     .describe("if input rois have been sorted by confidence");
 69 |     DMLC_DECLARE_FIELD(workspace).set_default(256)
 70 |     .describe("Workspace for NMS in MB, default to 256");
 71 |   }
 72 | };
 73 | 
 74 | template<typename xpu>
 75 | Operator *CreateOp(NMSParam param);
 76 | 
 77 | #if DMLC_USE_CXX11
 78 | class NMSProp : public OperatorProperty {
 79 |  public:
 80 |   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
 81 |     param_.Init(kwargs);
 82 |   }
 83 | 
 84 |   std::map<std::string, std::string> GetParams() const override {
 85 |     return param_.__DICT__();
 86 |   }
 87 | 
 88 |   bool InferShape(std::vector<TShape> *in_shape,
 89 |                   std::vector<TShape> *out_shape,
 90 |                   std::vector<TShape> *aux_shape) const override {
 91 |     using namespace mshadow;
 92 |     CHECK_EQ(in_shape->size(), 1) << "Input:[bbox]";
 93 |     const TShape &dshape = in_shape->at(nms::kBBox);
 94 |     if (dshape.ndim() == 0) return false;
 95 |     out_shape->clear();
 96 |     // output
 97 |     out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 4));
 98 |     // score
 99 |     out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 1));
100 |     return true;
101 |   }
102 | 
103 |   OperatorProperty* Copy() const override {
104 |     auto ptr = new NMSProp();
105 |     ptr->param_ = param_;
106 |     return ptr;
107 |   }
108 | 
109 |   std::string TypeString() const override {
110 |     return "_contrib_NMS";
111 |   }
112 | 
113 |   std::vector<ResourceRequest> ForwardResource(
114 |       const std::vector<TShape> &in_shape) const override {
115 |     return {ResourceRequest::kTempSpace};
116 |   }
117 | 
118 |   std::vector<int> DeclareBackwardDependency(
119 |     const std::vector<int> &out_grad,
120 |     const std::vector<int> &in_data,
121 |     const std::vector<int> &out_data) const override {
122 |     return {};
123 |   }
124 | 
125 |   int NumVisibleOutputs() const override {
126 |     if (param_.output_score) {
127 |       return 2;
128 |     } else {
129 |       return 1;
130 |     }
131 |   }
132 | 
133 |   int NumOutputs() const override {
134 |     return 2;
135 |   }
136 | 
137 |   std::vector<std::string> ListArguments() const override {
138 |     return {"rois"};
139 |   }
140 | 
141 |   std::vector<std::string> ListOutputs() const override {
142 |     return {"output", "score"};
143 |   }
144 | 
145 |   Operator* CreateOperator(Context ctx) const override;
146 | 
147 |  private:
148 |   NMSParam param_;
149 | };  // class NMSProp
150 | 
151 | #endif  // DMLC_USE_CXX11
152 | }  // namespace op
153 | }  // namespace mxnet
154 | 
155 | #endif  //  MXNET_OPERATOR_CONTRIB_NMS_INL_H_
156 | 


--------------------------------------------------------------------------------
/models/maskrcnn/bbox_post_processing.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import numpy as np
  3 | 
  4 | from operator_py.nms import py_nms_wrapper
  5 | 
  6 | def multiclass_nms(nms, cls_score, bbox_xyxy, min_det_score, max_det_per_image):
  7 |     # remove background
  8 |     cls_score = cls_score[:, 1:]
  9 |     # TODO: the output shape of class_agnostic box is [n, 4], while class_aware box is [n, 4 * (1 + class)]
 10 |     bbox_xyxy = bbox_xyxy[:, 4:] if bbox_xyxy.shape[1] != 4 else bbox_xyxy
 11 |     num_class = cls_score.shape[1]
 12 | 
 13 |     cls_det = [np.empty((0, 6), dtype=np.float32) for _ in range(num_class)] # [x1, y1, x2, y2, score, cls]
 14 | 
 15 |     for cid in range(num_class):
 16 |         score = cls_score[:, cid]
 17 |         if bbox_xyxy.shape[1] != 4:
 18 |             _bbox_xyxy = bbox_xyxy[:, cid * 4:(cid + 1) * 4]
 19 |         else:
 20 |             _bbox_xyxy = bbox_xyxy
 21 |         valid_inds = np.where(score > min_det_score)[0]
 22 |         box = _bbox_xyxy[valid_inds]
 23 |         score = score[valid_inds]
 24 |         det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32)
 25 |         det = nms(det)
 26 |         cls = np.full((det.shape[0], 1), cid, dtype=np.float32)
 27 |         cls_det[cid] = np.hstack((det, cls))
 28 | 
 29 |     cls_det = np.vstack([det for det in cls_det])
 30 |     scores = cls_det[:, -2]
 31 |     top_index = np.argsort(scores)[::-1][:max_det_per_image]
 32 |     return cls_det[top_index]
 33 | 
 34 | 
 35 | class BboxPostProcessingOperator(mx.operator.CustomOp):
 36 |     def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr):
 37 |         super().__init__()
 38 |         self.max_det_per_image = max_det_per_image
 39 |         self.min_det_score = min_det_score
 40 |         self.nms_type = nms_type
 41 |         self.nms_thr = nms_thr
 42 | 
 43 |     def forward(self, is_train, req, in_data, out_data, aux):
 44 |         if self.nms_type == 'nms':
 45 |             nms = py_nms_wrapper(self.nms_thr)
 46 |         else:
 47 |             raise NotImplementedError
 48 | 
 49 |         cls_score = in_data[0].asnumpy()
 50 |         bbox_xyxy = in_data[1].asnumpy()
 51 | 
 52 |         cls_score_shape = cls_score.shape # (b, n, num_class_withbg)
 53 |         bbox_xyxy_shape = bbox_xyxy.shape # (b, n, 4) or (b, n, 4 * num_class_withbg)
 54 |         batch_image = cls_score_shape[0]
 55 |         num_bbox = cls_score_shape[1]
 56 |         num_class_withbg = cls_score_shape[2]
 57 | 
 58 |         post_score = np.zeros((batch_image, self.max_det_per_image, 1), dtype=np.float32)
 59 |         post_bbox_xyxy = np.zeros((batch_image, self.max_det_per_image, 4), dtype=np.float32)
 60 |         post_cls = np.full((batch_image, self.max_det_per_image, 1), -1, dtype=np.float32)
 61 | 
 62 |         for i, (per_image_cls_score, per_image_bbox_xyxy) in enumerate(zip(cls_score, bbox_xyxy)):
 63 |             cls_det = multiclass_nms(nms, per_image_cls_score, per_image_bbox_xyxy, \
 64 |                                      self.min_det_score, self.max_det_per_image)
 65 |             num_det = cls_det.shape[0]
 66 |             post_bbox_xyxy[i, :num_det] = cls_det[:, :4]
 67 |             post_score[i, :num_det] = cls_det[:, -2][:, np.newaxis] # convert to (n, 1)
 68 |             post_cls[i, :num_det] = cls_det[:, -1][:, np.newaxis] # convert to (n, 1)
 69 | 
 70 |         self.assign(out_data[0], req[0], post_score)
 71 |         self.assign(out_data[1], req[1], post_bbox_xyxy)
 72 |         self.assign(out_data[2], req[2], post_cls)
 73 | 
 74 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
 75 |         self.assign(in_grad[0], req[0], 0)
 76 |         self.assign(in_grad[1], req[1], 0)
 77 | 
 78 | 
 79 | @mx.operator.register("BboxPostProcessing")
 80 | class BboxPostProcessingProp(mx.operator.CustomOpProp):
 81 |     def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr):
 82 |         super().__init__(need_top_grad=False)
 83 |         self.max_det_per_image = int(max_det_per_image)
 84 |         self.min_det_score = float(min_det_score)
 85 |         self.nms_type = str(nms_type)
 86 |         self.nms_thr = float(nms_thr)
 87 | 
 88 |     def list_arguments(self):
 89 |         return ['cls_score', 'bbox_xyxy']
 90 | 
 91 |     def list_outputs(self):
 92 |         return ['post_score', 'post_bbox_xyxy', 'post_cls']
 93 | 
 94 |     def infer_shape(self, in_shape):
 95 |         cls_score_shape = in_shape[0] # (b, n, num_class_withbg)
 96 |         bbox_xyxy_shape = in_shape[1] # (b, n, 4) or (b, n, 4 * num_class_withbg)
 97 | 
 98 |         batch_image = cls_score_shape[0]
 99 | 
100 |         post_score_shape = (batch_image, self.max_det_per_image, 1)
101 |         post_bbox_xyxy_shape = (batch_image, self.max_det_per_image, 4)
102 |         post_cls_shape = (batch_image, self.max_det_per_image, 1)
103 | 
104 |         return [cls_score_shape, bbox_xyxy_shape], \
105 |                [post_score_shape, post_bbox_xyxy_shape, post_cls_shape]
106 | 
107 |     def create_operator(self, ctx, shapes, dtypes):
108 |         return BboxPostProcessingOperator(self.max_det_per_image, self.min_det_score, self.nms_type, self.nms_thr)
109 | 
110 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
111 |         return []
112 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/global_average_pooling.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * Copyright (c) 2018 by Contributors
 22 |  * \file global_average_pooling.cu
 23 |  * \brief port from https://github.com/hujie-frank/SENet
 24 |  * \author Chenxia Han
 25 | */
 26 | #include <vector>
 27 | #include <algorithm>
 28 | #include "../mxnet_op.h"
 29 | #include "../../common/cuda_utils.h"
 30 | #include "./global_average_pooling-inl.h"
 31 | 
 32 | #define GAP_CUDA_CHECK(condition) \
 33 |   /* Code block avoids redefinition of cudaError_t error */ \
 34 |   do { \
 35 |     cudaError_t error = condition; \
 36 |     CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
 37 |   } while (0)
 38 | #define CUDA_KERNEL_LOOP(i, n) \
 39 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
 40 |       i < (n); \
 41 |       i += blockDim.x * gridDim.x)
 42 | 
 43 | constexpr int CAFFE_CUDA_NUM_THREADS = 512;
 44 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096;
 45 | 
 46 | inline int CAFFE_GET_BLOCKS(const int N) {
 47 |   return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS,
 48 |                   CAFFE_MAXIMUM_NUM_BLOCKS);
 49 | }
 50 | 
 51 | namespace mshadow {
 52 | namespace cuda {
 53 | 
 54 | template <typename Dtype>
 55 | __global__ void GlobalAvePoolForwardKernel(const int spatial_dim, 
 56 |     const Dtype* bottom_data, Dtype* top_data) {
 57 |   __shared__ Dtype buffer[CAFFE_CUDA_NUM_THREADS];
 58 |   unsigned int tid = threadIdx.x;
 59 |   buffer[tid] = 0;
 60 |   __syncthreads();
 61 | 
 62 |   for (int j = tid; j < spatial_dim; j += blockDim.x) {
 63 |     buffer[tid] += bottom_data[blockIdx.x * spatial_dim + j];
 64 |   }
 65 |   __syncthreads();
 66 | 
 67 |   for (int i = blockDim.x / 2; i > 0; i >>= 1) {
 68 |     if (tid < i) {
 69 |       buffer[threadIdx.x] += buffer[threadIdx.x + i];
 70 |     }
 71 |     __syncthreads();
 72 |   }
 73 | 
 74 |   if (tid == 0) {
 75 |     top_data[blockIdx.x] = buffer[0] / spatial_dim;
 76 |   }
 77 | }
 78 | 
 79 | template<typename DType>
 80 | inline void GAPForward(const Tensor<gpu, 4, DType> &out,
 81 | 					   const Tensor<gpu, 4, DType> &data) {
 82 |   const DType *bottom_data = data.dptr_;
 83 |   DType *top_data = out.dptr_;
 84 |   const int nblocks = data.shape_.ProdShape(0, 2);
 85 |   const int spatial_dim = data.shape_.ProdShape(2, 4);
 86 |   cudaStream_t stream = Stream<gpu>::GetStream(out.stream_);
 87 |   GlobalAvePoolForwardKernel<DType> << <nblocks, CAFFE_CUDA_NUM_THREADS,
 88 | 	0, stream >> >(spatial_dim, bottom_data, top_data);
 89 |   GAP_CUDA_CHECK(cudaPeekAtLastError());
 90 | }
 91 | 
 92 | template <typename Dtype> 
 93 | __global__ void GlobalAvePoolBackwardKernel(const int nthreads, const int spatial_dim, 
 94 |     const Dtype* top_diff, Dtype* bottom_diff) {
 95 |   CUDA_KERNEL_LOOP(index, nthreads) {
 96 |     const int n = index / spatial_dim;
 97 |     bottom_diff[index] = top_diff[n] / spatial_dim;
 98 |   }
 99 | }
100 | 
101 | template<typename DType>
102 | inline void GAPBackward(const Tensor<gpu, 4, DType> &in_grad,
103 | 					    const Tensor<gpu, 4, DType> &out_grad) {
104 |   const DType *top_diff = out_grad.dptr_;
105 |   DType *bottom_diff = in_grad.dptr_;
106 |   const int count = in_grad.shape_.Size();
107 |   const int spatial_dim = in_grad.shape_.ProdShape(2, 4);
108 |   cudaStream_t stream = Stream<gpu>::GetStream(in_grad.stream_);
109 |   GlobalAvePoolBackwardKernel<DType> << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS,
110 |     0, stream >> >(count, spatial_dim, top_diff, bottom_diff);
111 |   GAP_CUDA_CHECK(cudaPeekAtLastError());
112 | }
113 | 
114 | }  // namespace cuda
115 | 
116 | template<typename DType>
117 | inline void GAPForward(const Tensor<gpu, 4, DType> &out,
118 |                        const Tensor<gpu, 4, DType> &data) {
119 |   cuda::GAPForward(out, data);
120 | }
121 | 
122 | template<typename DType>
123 | inline void GAPBackward(const Tensor<gpu, 4, DType> &in_grad,
124 |                         const Tensor<gpu, 4, DType> &out_grad) {
125 |   cuda::GAPBackward(in_grad, out_grad);
126 | }
127 | 
128 | }  // namespace mshadow
129 | 
130 | namespace mxnet {
131 | namespace op {
132 | template<>
133 | Operator *CreateOp<gpu>(GAPParam param, int dtype) {
134 |   Operator *op = NULL;
135 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
136 |     op = new GAPOp<gpu, DType>(param);
137 |   });
138 |   return op;
139 | }
140 | 
141 | }  // namespace op
142 | }  // namespace mxnet
143 | 
144 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sync_batch_norm.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | /*!
 20 |  * Copyright (c) 2018 by Contributors
 21 |  * \file sync_batch_norm.cc
 22 |  * \brief Synchronized BatchNorm modified from BatchNormV1
 23 |  * \author Hang Zhang
 24 | */
 25 | 
 26 | #include "sync_batch_norm-inl.h"
 27 | #include <nnvm/op_attr_types.h>
 28 | 
 29 | namespace mxnet {
 30 | namespace op {
 31 | template<>
 32 | Operator *CreateOp<cpu>(SyncBatchNormParam param, int dtype) {
 33 |   return new SyncBatchNorm<cpu>(param);
 34 | }
 35 | 
 36 | // DO_BIND_DISPATCH comes from operator_common.h
 37 | Operator *SyncBatchNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
 38 |     std::vector<int> *in_type) const {
 39 |     std::vector<TShape> out_shape, aux_shape;
 40 |     std::vector<int> out_type, aux_type;
 41 |     CHECK(InferType(in_type, &out_type, &aux_type));
 42 |     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
 43 |     DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 44 | }
 45 | 
 46 | DMLC_REGISTER_PARAMETER(SyncBatchNormParam);
 47 | 
 48 | MXNET_REGISTER_OP_PROPERTY(_contrib_SyncBatchNorm, SyncBatchNormProp)
 49 | .describe(R"code(Batch normalization.
 50 | 
 51 | Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 52 | well as offset ``beta``.
 53 | Standard BN [1]_ implementation only normalize the data within each device.
 54 | SyncBN normalizes the input within the whole mini-batch.
 55 | We follow the sync-onece implmentation described in the paper [2]_ .
 56 | 
 57 | Assume the input has more than one dimension and we normalize along axis 1.
 58 | We first compute the mean and variance along this axis:
 59 | 
 60 | .. math::
 61 | 
 62 |   data\_mean[i] = mean(data[:,i,:,...]) \\
 63 |   data\_var[i] = var(data[:,i,:,...])
 64 | 
 65 | Then compute the normalized output, which has the same shape as input, as following:
 66 | 
 67 | .. math::
 68 | 
 69 |   out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]
 70 | 
 71 | Both *mean* and *var* returns a scalar by treating the input as a vector.
 72 | 
 73 | Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 74 | have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and
 75 | ``data_var`` as well, which are needed for the backward pass.
 76 | 
 77 | Besides the inputs and the outputs, this operator accepts two auxiliary
 78 | states, ``moving_mean`` and ``moving_var``, which are *k*-length
 79 | vectors. They are global statistics for the whole dataset, which are updated
 80 | by::
 81 | 
 82 |   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 83 |   moving_var = moving_var * momentum + data_var * (1 - momentum)
 84 | 
 85 | If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 86 | ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 87 | the output. It is often used during inference.
 88 | 
 89 | Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is true,
 90 | then set ``gamma`` to 1 and its gradient to 0.
 91 | 
 92 | Reference:
 93 |   .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating
 94 |   deep network training by reducing internal covariate shift." *ICML 2015*
 95 |   .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang,
 96 |   Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018*
 97 | )code" ADD_FILELINE)
 98 | .add_argument("data", "NDArray-or-Symbol", "Input data to batch normalization")
 99 | .add_argument("gamma", "NDArray-or-Symbol", "gamma array")
100 | .add_argument("beta", "NDArray-or-Symbol", "beta array")
101 | .add_argument("moving_mean", "NDArray-or-Symbol", "running mean of input")
102 | .add_argument("moving_var", "NDArray-or-Symbol", "running variance of input")
103 | .add_arguments(SyncBatchNormParam::__FIELDS__());
104 | 
105 | NNVM_REGISTER_OP(_contrib_SyncBatchNorm)
106 | .set_attr<nnvm::FSetInputVarAttrOnCompose>("FSetInputVarAttrOnCompose",
107 |     [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) {
108 |       if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return;
109 |       if (index == 3) {
110 |         var->attrs.dict["__init__"] = "[\"zero\", {}]";
111 |       } else if (index == 4) {
112 |         var->attrs.dict["__init__"] = "[\"one\", {}]";
113 |       }
114 |     });
115 | 
116 | }  // namespace op
117 | }  // namespace mxnet
118 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sync_inplace_activation_batch_norm.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | /*!
 20 |  * Copyright (c) 2018 by Contributors
 21 |  * \file sync_inplace_activation_batch_norm.cc
 22 |  * \brief Synchronized BatchNorm modified from BatchNormV1
 23 |  * \author Yuntao Chen
 24 | */
 25 | 
 26 | #include "sync_inplace_activation_batch_norm-inl.h"
 27 | #include <nnvm/op_attr_types.h>
 28 | 
 29 | namespace mxnet {
 30 | namespace op {
 31 | template<>
 32 | Operator *CreateOp<cpu>(SyncInplaceABNParam param, int dtype) {
 33 |   return new SyncInplaceABN<cpu>(param);
 34 | }
 35 | 
 36 | // DO_BIND_DISPATCH comes from operator_common.h
 37 | Operator *SyncInplaceABNProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
 38 |     std::vector<int> *in_type) const {
 39 |     std::vector<TShape> out_shape, aux_shape;
 40 |     std::vector<int> out_type, aux_type;
 41 |     CHECK(InferType(in_type, &out_type, &aux_type));
 42 |     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
 43 |     DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 44 | }
 45 | 
 46 | DMLC_REGISTER_PARAMETER(SyncInplaceABNParam);
 47 | 
 48 | MXNET_REGISTER_OP_PROPERTY(_contrib_SyncInplaceABN, SyncInplaceABNProp)
 49 | .describe(R"code(Batch normalization.
 50 | 
 51 | Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 52 | well as offset ``beta``.
 53 | Standard BN [1]_ implementation only normalize the data within each device.
 54 | SyncBN normalizes the input within the whole mini-batch.
 55 | We follow the sync-onece implmentation described in the paper [2]_ .
 56 | 
 57 | Assume the input has more than one dimension and we normalize along axis 1.
 58 | We first compute the mean and variance along this axis:
 59 | 
 60 | .. math::
 61 | 
 62 |   data\_mean[i] = mean(data[:,i,:,...]) \\
 63 |   data\_var[i] = var(data[:,i,:,...])
 64 | 
 65 | Then compute the normalized output, which has the same shape as input, as following:
 66 | 
 67 | .. math::
 68 | 
 69 |   out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]
 70 | 
 71 | Both *mean* and *var* returns a scalar by treating the input as a vector.
 72 | 
 73 | Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 74 | have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and
 75 | ``data_var`` as well, which are needed for the backward pass.
 76 | 
 77 | Besides the inputs and the outputs, this operator accepts two auxiliary
 78 | states, ``moving_mean`` and ``moving_var``, which are *k*-length
 79 | vectors. They are global statistics for the whole dataset, which are updated
 80 | by::
 81 | 
 82 |   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 83 |   moving_var = moving_var * momentum + data_var * (1 - momentum)
 84 | 
 85 | If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 86 | ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 87 | the output. It is often used during inference.
 88 | 
 89 | Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is true,
 90 | then set ``gamma`` to 1 and its gradient to 0.
 91 | 
 92 | Reference:
 93 |   .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating
 94 |   deep network training by reducing internal covariate shift." *ICML 2015*
 95 |   .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang,
 96 |   Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018*
 97 | )code" ADD_FILELINE)
 98 | .add_argument("data", "NDArray-or-Symbol", "Input data to batch normalization")
 99 | .add_argument("gamma", "NDArray-or-Symbol", "gamma array")
100 | .add_argument("beta", "NDArray-or-Symbol", "beta array")
101 | .add_argument("moving_mean", "NDArray-or-Symbol", "running mean of input")
102 | .add_argument("moving_var", "NDArray-or-Symbol", "running variance of input")
103 | .add_arguments(SyncInplaceABNParam::__FIELDS__());
104 | 
105 | NNVM_REGISTER_OP(_contrib_SyncInplaceABN)
106 | .set_attr<nnvm::FSetInputVarAttrOnCompose>("FSetInputVarAttrOnCompose",
107 |     [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) {
108 |       if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return;
109 |       if (index == 3) {
110 |         var->attrs.dict["__init__"] = "[\"zero\", {}]";
111 |       } else if (index == 4) {
112 |         var->attrs.dict["__init__"] = "[\"one\", {}]";
113 |       }
114 |     });
115 | 
116 | }  // namespace op
117 | }  // namespace mxnet
118 | 


--------------------------------------------------------------------------------
/models/tridentnet/input.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import mxnet as mx
  3 | 
  4 | from core.detection_input import DetectionAugmentation, AnchorTarget2D
  5 | from operator_py.cython.bbox import bbox_overlaps_cython
  6 | 
  7 | 
  8 | class ScaleAwareRange(DetectionAugmentation):
  9 |     def __init__(self, pScaleRange):
 10 |         super().__init__()
 11 |         self.p = pScaleRange
 12 | 
 13 |     def apply(self, input_record):
 14 |         p = self.p
 15 | 
 16 |         im_info = input_record['im_info']
 17 | 
 18 |        # input_record["valid_ranges_on_origin"] = p.cal_on_origin
 19 |         input_record["valid_ranges"] = np.array(p.valid_ranges, dtype=np.float32).reshape(-1, 2)
 20 |         if p.cal_on_origin:
 21 |             input_record["valid_ranges"] *= im_info[2]
 22 |         # replace -1 with max_size
 23 |         inds = np.where(input_record["valid_ranges"][:, 1] < 0)[0]
 24 |         input_record["valid_ranges"][inds, 1] = max(im_info[0], im_info[1])
 25 | 
 26 | 
 27 | class TridentAnchorTarget2D(AnchorTarget2D):
 28 |     """
 29 |     input: image_meta: tuple(h, w, scale)
 30 |            gt_bbox, ndarry(max_num_gt, 4)
 31 |     output: anchor_label, ndarray(num_branch, num_anchor * 2, h, w)
 32 |             anchor_bbox_target, ndarray(num_branch, num_anchor * 4, h, w)
 33 |             anchor_bbox_weight, ndarray(num_branch, num_anchor * 4, h, w)
 34 |             valid_ranges, ndarray(num_branch, 2)
 35 |     """
 36 | 
 37 |     def __init__(self, pAnchor):
 38 |         super().__init__(pAnchor)
 39 | 
 40 |     def _filter_anchor_by_scale_range(self, cls_label, valid_anchor, gt_bbox, valid_range, invalid_anchor_threshd):
 41 |         if len(gt_bbox) == 0:
 42 |             return
 43 |         gt_bbox_sizes = (gt_bbox[:, 2] - gt_bbox[:, 0] + 1.0) * (gt_bbox[:, 3] - gt_bbox[:, 1] + 1.0)
 44 |         invalid_gt_bbox_inds = np.where((gt_bbox_sizes < valid_range[0]**2) | (gt_bbox_sizes > valid_range[1]**2))[0]
 45 |         invalid_gt_bbox = gt_bbox[invalid_gt_bbox_inds]
 46 |         if len(invalid_gt_bbox) > 0:
 47 |             invalid_overlaps = bbox_overlaps_cython(
 48 |                 valid_anchor.astype(np.float32, copy=False), invalid_gt_bbox.astype(np.float32, copy=False))
 49 |             invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1)
 50 |             invalid_max_overlaps = invalid_overlaps[np.arange(len(valid_anchor)), invalid_argmax_overlaps]
 51 | 
 52 |             # ignore anchors overlapped with invalid gt boxes
 53 |             disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshd))[0]
 54 |             cls_label[disable_inds] = -1
 55 | 
 56 |     def apply(self, input_record):
 57 |         p = self.p
 58 | 
 59 |         im_info = input_record["im_info"]
 60 |         gt_bbox = input_record["gt_bbox"]
 61 |         valid_ranges = input_record["valid_ranges"]
 62 |         assert isinstance(gt_bbox, np.ndarray)
 63 |         assert gt_bbox.dtype == np.float32
 64 |         assert gt_bbox.shape[1] == 5
 65 |         valid = np.where(gt_bbox[:, 0] != -1)[0]
 66 |         gt_bbox  = gt_bbox[valid]
 67 |         gt_class = gt_bbox[:, -1].copy()
 68 |         gt_bbox  = gt_bbox[:, :4].copy()
 69 | 
 70 |         h, w = im_info[:2]
 71 |         if h >= w:
 72 |             fh, fw = p.generate.long, p.generate.short
 73 |         else:
 74 |             fh, fw = p.generate.short, p.generate.long
 75 |         
 76 |         valid_cls_label    = None
 77 |         valid_anchor_label = None
 78 |         valid_index, valid_anchor = self._gather_valid_anchor(im_info)
 79 |         if p.generate.use_groupsoftmax:
 80 |             gt_class = p.gtclass2rpn(gt_class)
 81 |             valid_cls_label, valid_anchor_label = \
 82 |                 self._assign_label_to_anchor_group(valid_anchor, gt_bbox, gt_class,
 83 |                                                    p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr)
 84 |         else:
 85 |             valid_cls_label, valid_anchor_label = \
 86 |                 self._assign_label_to_anchor(valid_anchor, gt_bbox,
 87 |                                              p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr)
 88 | 
 89 |         cls_labels, reg_targets, reg_weights = [], [], []
 90 |         for valid_range in valid_ranges:
 91 |             cls_label = valid_cls_label.copy()
 92 |             self._filter_anchor_by_scale_range(cls_label, valid_anchor, gt_bbox,
 93 |                                                valid_range, p.trident.invalid_anchor_threshd)
 94 |             self._sample_anchor(cls_label, p.sample.image_anchor, p.sample.pos_fraction)
 95 |             reg_target, reg_weight = self._cal_anchor_target(cls_label, valid_anchor, gt_bbox, valid_anchor_label)
 96 |             cls_label, reg_target, reg_weight = \
 97 |                 self._scatter_valid_anchor(valid_index, cls_label, reg_target, reg_weight)
 98 | 
 99 |             cls_labels.append(cls_label.reshape((fh, fw, -1)).transpose(2, 0, 1).reshape(-1))
100 |             reg_targets.append(reg_target.reshape((fh, fw, -1)).transpose(2, 0, 1))
101 |             reg_weights.append(reg_weight.reshape((fh, fw, -1)).transpose(2, 0, 1))
102 | 
103 |         input_record["rpn_cls_label"] = np.stack(cls_labels)
104 |         input_record["rpn_reg_target"] = np.stack(reg_targets)
105 |         input_record["rpn_reg_weight"] = np.stack(reg_weights)
106 | 
107 |         return input_record["rpn_cls_label"], \
108 |                input_record["rpn_reg_target"], \
109 |                input_record["rpn_reg_weight"]
110 | 
111 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/fixed_divisor.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
  3 | 
  4 | Redistribution and use in source and binary forms, with or without
  5 | modification, are permitted provided that the following conditions are met:
  6 | 
  7 | 1. Redistributions of source code must retain the above copyright
  8 |    notice, this list of conditions and the following disclaimer.
  9 | 
 10 | 2. Redistributions in binary form must reproduce the above copyright
 11 |    notice, this list of conditions and the following disclaimer in the
 12 |    documentation and/or other materials provided with the distribution.
 13 | 
 14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
 15 |    and IDIAP Research Institute nor the names of its contributors may be
 16 |    used to endorse or promote products derived from this software without
 17 |    specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 29 | POSSIBILITY OF SUCH DAMAGE.
 30 | */
 31 | #ifndef CAFFE2_UTILS_FIXED_DIVISOR_H_
 32 | #define CAFFE2_UTILS_FIXED_DIVISOR_H_
 33 | 
 34 | #include <stdint.h>
 35 | 
 36 | #include <cstdint>
 37 | #include <cstdio>
 38 | #include <cstdlib>
 39 | 
 40 | #if defined(__CUDA_ARCH__) || defined(__HIP_ARCH__)
 41 | #define FIXED_DIVISOR_DECL inline __host__ __device__
 42 | #else
 43 | #define FIXED_DIVISOR_DECL inline
 44 | #endif
 45 | 
 46 | 
 47 | // Utility class for quickly calculating quotients and remainders for
 48 | // a known integer divisor
 49 | template <typename T>
 50 | class FixedDivisor {};
 51 | 
 52 | // Works for any positive divisor, 1 to INT_MAX. One 64-bit
 53 | // multiplication and one 64-bit shift is used to calculate the
 54 | // result.
 55 | template <>
 56 | class FixedDivisor<std::int32_t> {
 57 |  public:
 58 |   FixedDivisor() = default;
 59 | 
 60 |   explicit FixedDivisor(const std::int32_t d) : d_(d) {
 61 |     CalcSignedMagic();
 62 |   }
 63 | 
 64 |   FIXED_DIVISOR_DECL std::int32_t d() const {
 65 |     return d_;
 66 |   }
 67 | 
 68 |   FIXED_DIVISOR_DECL std::uint64_t magic() const {
 69 |     return magic_;
 70 |   }
 71 | 
 72 |   FIXED_DIVISOR_DECL int shift() const {
 73 |     return shift_;
 74 |   }
 75 | 
 76 |   /// Calculates `q = n / d`.
 77 |   FIXED_DIVISOR_DECL std::int32_t Div(const std::int32_t n) const {
 78 |     // In lieu of a mulhi instruction being available, perform the
 79 |     // work in uint64
 80 |     return (int32_t)((magic_ * (uint64_t)n) >> shift_);
 81 |   }
 82 | 
 83 |   /// Calculates `r = n % d`.
 84 |   FIXED_DIVISOR_DECL std::int32_t Mod(const std::int32_t n) const {
 85 |     return n - d_ * Div(n);
 86 |   }
 87 | 
 88 |   /// Calculates `q = n / d` and `r = n % d` together.
 89 |   FIXED_DIVISOR_DECL void
 90 |   DivMod(const std::int32_t n, std::int32_t* q, int32_t* r) const {
 91 |     *q = Div(n);
 92 |     *r = n - d_ * *q;
 93 |   }
 94 | 
 95 |  private:
 96 |   // Calculates magic multiplicative value and shift amount for calculating `q =
 97 |   // n / d` for signed 32-bit integers.
 98 |   // Implementation taken from Hacker's Delight section 10.
 99 |   void CalcSignedMagic() {
100 |     if (d_ == 1) {
101 |       magic_ = UINT64_C(0x1) << 32;
102 |       shift_ = 32;
103 |       return;
104 |     }
105 | 
106 |     const std::uint32_t two31 = UINT32_C(0x80000000);
107 |     const std::uint32_t ad = std::abs(d_);
108 |     const std::uint32_t t = two31 + ((uint32_t)d_ >> 31);
109 |     const std::uint32_t anc = t - 1 - t % ad; // Absolute value of nc.
110 |     std::uint32_t p = 31; // Init. p.
111 |     std::uint32_t q1 = two31 / anc; // Init. q1 = 2**p/|nc|.
112 |     std::uint32_t r1 = two31 - q1 * anc; // Init. r1 = rem(2**p, |nc|).
113 |     std::uint32_t q2 = two31 / ad; // Init. q2 = 2**p/|d|.
114 |     std::uint32_t r2 = two31 - q2 * ad; // Init. r2 = rem(2**p, |d|).
115 |     std::uint32_t delta = 0;
116 |     do {
117 |       ++p;
118 |       q1 <<= 1; // Update q1 = 2**p/|nc|.
119 |       r1 <<= 1; // Update r1 = rem(2**p, |nc|).
120 |       if (r1 >= anc) { // (Must be an unsigned
121 |         ++q1; // comparison here).
122 |         r1 -= anc;
123 |       }
124 |       q2 <<= 1; // Update q2 = 2**p/|d|.
125 |       r2 <<= 1; // Update r2 = rem(2**p, |d|).
126 |       if (r2 >= ad) { // (Must be an unsigned
127 |         ++q2; // comparison here).
128 |         r2 -= ad;
129 |       }
130 |       delta = ad - r2;
131 |     } while (q1 < delta || (q1 == delta && r1 == 0));
132 |     std::int32_t magic = q2 + 1;
133 |     if (d_ < 0) {
134 |       magic = -magic;
135 |     }
136 |     shift_ = p;
137 |     magic_ = (std::uint64_t)(std::uint32_t)magic;
138 |   }
139 | 
140 |   std::int32_t d_ = 1;
141 |   std::uint64_t magic_;
142 |   int shift_;
143 | };
144 | 
145 | 
146 | #endif // CAFFE2_UTILS_FIXED_DIVISOR_H_


--------------------------------------------------------------------------------
/utils/graph_optimize.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing,
 12 | # software distributed under the License is distributed on an
 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14 | # KIND, either express or implied.  See the License for the
 15 | # specific language governing permissions and limitations
 16 | # under the License.
 17 | 
 18 | import json
 19 | import logging
 20 | import mxnet as mx
 21 | 
 22 | 
 23 | def merge_bn(symbol, args, auxs, symbol_only=False):
 24 |     """
 25 |     Adapted from https://github.com/dmlc/tvm/blob/master/python/tvm/relay/frontend/mxnet.py
 26 |     Instead of translating nnvm graph into TVM relay graph, we adapt the script to translate
 27 |     it back to mxnet graph.
 28 |     """
 29 |     assert symbol is not None
 30 |     jgraph = json.loads(symbol.tojson())
 31 |     jnodes = jgraph["nodes"]
 32 |     node_map = {}
 33 |     node_op_map = {}
 34 | 
 35 |     for nid, node in enumerate(jnodes):
 36 |         # edges are [which_node, which_output, type(? not sure)]
 37 |         # mx.symbol has an attribute of __getitem__. sym[1] gives the second output
 38 |         children = [node_map[e[0]][e[1]] for e in node["inputs"]]
 39 |         attrs = node.get("attrs", {})
 40 |         node_name = node["name"]
 41 |         op_name = node["op"]
 42 |         if op_name == "null":
 43 |             attrs = dict({k:v for k, v in attrs.items() if k.startswith("__")})
 44 |             node_map[nid] = mx.sym.var(node_name, **attrs)
 45 |             node_op_map[nid] = ["Variable"]
 46 |         elif op_name == "BatchNorm":
 47 |             e = node["inputs"][0]
 48 |             _, gamma, beta, mmean, mvar = children
 49 |             gamma_name, beta_name, mmean_name, mvar_name = gamma.name, beta.name, mmean.name, mvar.name
 50 |             assert "gamma" in gamma_name
 51 |             assert "beta" in beta_name
 52 |             assert "moving_mean" in mmean_name
 53 |             assert "moving_var" in mvar_name
 54 |             eps = float(attrs["eps"])
 55 |             if attrs["use_global_stats"] == "True" and node_op_map[e[0]][e[1]] == "Convolution":
 56 |                 if not symbol_only:
 57 |                     if (mmean_name) not in auxs:
 58 |                         logging.info("Can not find {}, skipping".format(node_name + "_moving_mean"))
 59 |                     else:
 60 |                         logging.info("Merging {}".format(node_name))
 61 |                         # modify beta before gamma since gamma is not depend on beta
 62 |                         args[beta_name] -= args[gamma_name] * auxs[mmean_name] / mx.nd.sqrt(eps + auxs[mvar_name])
 63 |                         args[gamma_name] /= mx.nd.sqrt(eps + auxs[mvar_name])
 64 |                         # expand for broadcasting
 65 |                         if args[gamma_name].ndim == 1:
 66 |                             args[gamma_name] = args[gamma_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1)
 67 |                             args[beta_name] = args[beta_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1)
 68 |                             auxs[mmean_name] = auxs[mmean_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1)
 69 |                             auxs[mvar_name] = auxs[mvar_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1)
 70 |                         # set mmean and mvar to identity to avoid fusing more than once in weight sharing
 71 |                         auxs[mmean_name][:] = 0.0
 72 |                         auxs[mvar_name][:] = 1.0
 73 |                         # copy shared gamma and beta for each BN
 74 |                         args[node_name + "_gamma"] = args[gamma_name]
 75 |                         args[node_name + "_beta"] = args[beta_name]
 76 |                 # BroadcastScale is needed
 77 |                 gamma = mx.sym.var(node_name + "_gamma", shape=args[node_name + "_gamma"].shape)
 78 |                 beta = mx.sym.var(node_name + "_beta", shape=args[node_name + "_beta"].shape)
 79 |                 res = mx.sym.broadcast_add(mx.sym.contrib.BroadcastScale(data=children[0], scaler=gamma), beta)
 80 |             else:
 81 |                 res = mx.sym.BatchNorm(*children, **attrs, name=node_name)
 82 |             node_map[nid] = res
 83 |             node_op_map[nid] = ["BatchNorm"]
 84 |         else:
 85 |             if op_name.startswith("_contrib_"):
 86 |                 op_name = op_name.replace("_contrib_", "")
 87 |                 operator = eval("mx.sym.contrib." + op_name)
 88 |             elif op_name.startswith("_"):
 89 |                 operator = eval("mx.sym._internal." + op_name)
 90 |             else:
 91 |                 operator = eval("mx.sym." + op_name)
 92 |             res = operator(*children, **attrs, name=node_name)
 93 |             node_map[nid] = res
 94 |             node_op_map[nid] = [op_name]
 95 | 
 96 |     outputs = [node_map[e[0]][e[1]] for e in jgraph["heads"]]
 97 |     outputs = outputs[0] if len(outputs) == 1 else mx.sym.Group(outputs)
 98 |     return outputs, args, auxs
 99 | 
100 | if __name__ == "__main__":
101 |     sym = mx.sym.load("experiments/faster_r50v1_2fc_1x/checkpoint.json")
102 |     sym1, _, _ = merge_bn(sym, None, None, True)
103 |     print(sym1.tojson())
104 | 


--------------------------------------------------------------------------------
/operator_py/cython/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/generate_proposal-inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file generate_proposal-inl.h
 22 |  * \brief GenerateProposal Operator
 23 |  * \author Piotr Teterwak, Bing Xu, Jian Guo, Pengfei Chen, Yuntao Chen, Yanghao Li
 24 | */
 25 | #ifndef MXNET_OPERATOR_CONTRIB_GENERATE_PROPOSAL_INL_H_
 26 | #define MXNET_OPERATOR_CONTRIB_GENERATE_PROPOSAL_INL_H_
 27 | 
 28 | #include <dmlc/logging.h>
 29 | #include <dmlc/parameter.h>
 30 | #include <mxnet/operator.h>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <string>
 34 | #include <utility>
 35 | #include <ctime>
 36 | #include <cstring>
 37 | #include <iostream>
 38 | #include "../operator_common.h"
 39 | #include "../mshadow_op.h"
 40 | 
 41 | namespace mxnet {
 42 | namespace op {
 43 | 
 44 | namespace gen_proposal {
 45 | enum GenProposalOpInputs {kClsProb, kBBoxPred, kImInfo, kAnchor};
 46 | enum GenProposalOpOutputs {kOut, kScore};
 47 | enum GenProposalForwardResource {kTempSpace};
 48 | }  // gen_proposal
 49 | 
 50 | struct GenProposalParam : public dmlc::Parameter<GenProposalParam> {
 51 |   int rpn_pre_nms_top_n;
 52 |   int rpn_min_size;
 53 |   int feature_stride;
 54 |   bool iou_loss;
 55 |   uint64_t workspace;
 56 |   int num_class;
 57 | 
 58 |   DMLC_DECLARE_PARAMETER(GenProposalParam) {
 59 |     DMLC_DECLARE_FIELD(rpn_pre_nms_top_n).set_default(6000)
 60 |     .describe("Number of top scoring boxes to keep after applying NMS to RPN proposals");
 61 |     DMLC_DECLARE_FIELD(rpn_min_size).set_default(16)
 62 |     .describe("Minimum height or width in proposal");
 63 |     DMLC_DECLARE_FIELD(feature_stride).set_default(16)
 64 |     .describe("The size of the receptive field each unit in the convolution layer of the rpn,"
 65 |               "for example the product of all stride's prior to this layer.");
 66 |     DMLC_DECLARE_FIELD(iou_loss).set_default(false)
 67 |     .describe("Usage of IoU Loss");
 68 |     DMLC_DECLARE_FIELD(workspace).set_default(256)
 69 |     .describe("Workspace for proposal in MB, default to 256");
 70 |     DMLC_DECLARE_FIELD(num_class).set_default(2)
 71 |     .describe("The number of classes");
 72 |   }
 73 | };
 74 | 
 75 | template<typename xpu>
 76 | Operator *CreateOp(GenProposalParam param);
 77 | 
 78 | #if DMLC_USE_CXX11
 79 | class GenProposalProp : public OperatorProperty {
 80 |  public:
 81 |   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
 82 |     param_.Init(kwargs);
 83 |   }
 84 | 
 85 |   std::map<std::string, std::string> GetParams() const override {
 86 |     return param_.__DICT__();
 87 |   }
 88 | 
 89 |   bool InferShape(std::vector<TShape> *in_shape,
 90 |                   std::vector<TShape> *out_shape,
 91 |                   std::vector<TShape> *aux_shape) const override {
 92 |     using namespace mshadow;
 93 |     CHECK_EQ(in_shape->size(), 4) << "Input:[cls_prob, bbox_pred, im_info, anchors]";
 94 |     const TShape &dshape = in_shape->at(gen_proposal::kClsProb);
 95 |     if (dshape.ndim() == 0) return false;
 96 |     Shape<4> bbox_pred_shape;
 97 |     bbox_pred_shape = Shape4(dshape[0], dshape[1] / param_.num_class * 4, dshape[2], dshape[3]);
 98 |     SHAPE_ASSIGN_CHECK(*in_shape, gen_proposal::kBBoxPred,
 99 |                        bbox_pred_shape);
100 |     Shape<2> im_info_shape;
101 |     im_info_shape = Shape2(dshape[0], 3);
102 |     SHAPE_ASSIGN_CHECK(*in_shape, gen_proposal::kImInfo, im_info_shape);
103 |     Shape<2> anchors_shape;
104 |     anchors_shape = Shape2(dshape[2] * dshape[3] * dshape[1] / 2,  4);
105 |     SHAPE_ASSIGN_CHECK(*in_shape, gen_proposal::kAnchor, anchors_shape);
106 |     out_shape->clear();
107 |     // output
108 |     out_shape->push_back(Shape3(dshape[0], param_.rpn_pre_nms_top_n, 5));
109 |     return true;
110 |   }
111 | 
112 |   OperatorProperty* Copy() const override {
113 |     auto ptr = new GenProposalProp();
114 |     ptr->param_ = param_;
115 |     return ptr;
116 |   }
117 | 
118 |   std::string TypeString() const override {
119 |     return "_contrib_GenProposal";
120 |   }
121 | 
122 |   std::vector<ResourceRequest> ForwardResource(
123 |       const std::vector<TShape> &in_shape) const override {
124 |     return {ResourceRequest::kTempSpace};
125 |   }
126 | 
127 |   std::vector<int> DeclareBackwardDependency(
128 |     const std::vector<int> &out_grad,
129 |     const std::vector<int> &in_data,
130 |     const std::vector<int> &out_data) const override {
131 |     return {};
132 |   }
133 | 
134 |   int NumOutputs() const override {
135 |     return 1;
136 |   }
137 | 
138 |   std::vector<std::string> ListArguments() const override {
139 |     return {"cls_prob", "bbox_pred", "im_info", "anchors"};
140 |   }
141 | 
142 |   std::vector<std::string> ListOutputs() const override {
143 |     return {"output"};
144 |   }
145 | 
146 |   Operator* CreateOperator(Context ctx) const override;
147 | 
148 |  private:
149 |   GenProposalParam param_;
150 | };  // class GenProposalProp
151 | 
152 | #endif  // DMLC_USE_CXX11
153 | }  // namespace op
154 | }  // namespace mxnet
155 | 
156 | #endif  //  MXNET_OPERATOR_CONTRIB_GENERATE_PROPOSAL_INL_H_
157 | 


--------------------------------------------------------------------------------
/core/detection_metric.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import numpy as np
  3 | 
  4 | 
  5 | class LossWithIgnore(mx.metric.EvalMetric):
  6 |     def __init__(self, name, output_names, label_names, ignore_label=-1):
  7 |         super().__init__(name, output_names, label_names)
  8 |         self.ignore_label = ignore_label
  9 | 
 10 |     def update(self, labels, preds):
 11 |         raise NotImplementedError
 12 | 
 13 | 
 14 | class FgLossWithIgnore(LossWithIgnore):
 15 |     def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1):
 16 |         super().__init__(name, output_names, label_names, ignore_label)
 17 |         self.bg_label = bg_label
 18 | 
 19 |     def update(self, labels, preds):
 20 |         raise NotImplementedError
 21 | 
 22 | 
 23 | class AccWithIgnore(LossWithIgnore):
 24 |     def __init__(self, name, output_names, label_names, ignore_label=-1):
 25 |         super().__init__(name, output_names, label_names, ignore_label)
 26 | 
 27 |     def update(self, labels, preds):
 28 |         if len(preds) == 1 and len(labels) == 1:
 29 |             pred = preds[0]
 30 |             label = labels[0]
 31 |         elif len(preds) == 2:
 32 |             pred = preds[0]
 33 |             label = preds[1]
 34 |         else:
 35 |             raise Exception(
 36 |                 "unknown loss output: len(preds): {}, len(labels): {}".format(
 37 |                     len(preds), len(labels)
 38 |                 )
 39 |             )
 40 | 
 41 |         pred_label = mx.ndarray.argmax_channel(pred).astype('int32').asnumpy().reshape(-1)
 42 |         label = label.astype('int32').asnumpy().reshape(-1)
 43 | 
 44 |         keep_inds = np.where(label != self.ignore_label)[0]
 45 |         pred_label = pred_label[keep_inds]
 46 |         label = label[keep_inds]
 47 | 
 48 |         self.sum_metric += np.sum(pred_label == label)
 49 |         self.num_inst += len(pred_label)
 50 | 
 51 | 
 52 | class FgAccWithIgnore(FgLossWithIgnore):
 53 |     def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1):
 54 |         super().__init__(name, output_names, label_names, bg_label, ignore_label)
 55 | 
 56 |     def update(self, labels, preds):
 57 |         pred = preds[0]
 58 |         label = labels[0]
 59 | 
 60 |         pred_label = mx.ndarray.argmax_channel(pred).astype('int32').asnumpy().reshape(-1)
 61 |         label = label.astype('int32').asnumpy().reshape(-1)
 62 | 
 63 |         keep_inds = np.where((label != self.bg_label) & (label != self.ignore_label))[0]
 64 |         pred_label = pred_label[keep_inds]
 65 |         label = label[keep_inds]
 66 | 
 67 |         self.sum_metric += np.sum(pred_label == label)
 68 |         self.num_inst += len(pred_label)
 69 | 
 70 | 
 71 | class CeWithIgnore(LossWithIgnore):
 72 |     def __init__(self, name, output_names, label_names, ignore_label=-1):
 73 |         super().__init__(name, output_names, label_names, ignore_label)
 74 | 
 75 |     def update(self, labels, preds):
 76 |         pred = preds[0]
 77 |         label = labels[0]
 78 | 
 79 |         label = label.astype('int32').asnumpy().reshape(-1)
 80 |         pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
 81 |         pred = pred.reshape((label.shape[0], -1))  # -1 x c
 82 | 
 83 |         keep_inds = np.where(label != self.ignore_label)[0]
 84 |         label = label[keep_inds]
 85 |         prob = pred[keep_inds, label]
 86 | 
 87 |         prob += 1e-14
 88 |         ce_loss = -1 * np.log(prob)
 89 |         ce_loss = np.sum(ce_loss)
 90 |         self.sum_metric += ce_loss
 91 |         self.num_inst += label.shape[0]
 92 | 
 93 | 
 94 | class FgCeWithIgnore(FgLossWithIgnore):
 95 |     def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1):
 96 |         super().__init__(name, output_names, label_names, bg_label, ignore_label)
 97 | 
 98 |     def update(self, labels, preds):
 99 |         pred = preds[0]
100 |         label = labels[0]
101 | 
102 |         label = label.astype('int32').asnumpy().reshape(-1)
103 |         pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
104 |         pred = pred.reshape((label.shape[0], -1))  # -1 x c
105 | 
106 |         keep_inds = np.where((label != self.ignore_label) & (label != self.bg_label))[0]
107 |         label = label[keep_inds]
108 |         prob = pred[keep_inds, label]
109 | 
110 |         prob += 1e-14
111 |         ce_loss = -1 * np.log(prob)
112 |         ce_loss = np.sum(ce_loss)
113 |         self.sum_metric += ce_loss
114 |         self.num_inst += label.shape[0]
115 | 
116 | 
117 | class L1(FgLossWithIgnore):
118 |     def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1):
119 |         super().__init__(name, output_names, label_names, bg_label, ignore_label)
120 | 
121 |     def update(self, labels, preds):
122 |         if len(preds) == 1 and len(labels) == 1:
123 |             pred = preds[0].asnumpy()
124 |             label = labels[0].asnumpy()
125 |         elif len(preds) == 2:
126 |             pred = preds[0].asnumpy()
127 |             label = preds[1].asnumpy()
128 |         else:
129 |             raise Exception(
130 |                 "unknown loss output: len(preds): {}, len(labels): {}".format(
131 |                     len(preds), len(labels)
132 |                 )
133 |             )
134 | 
135 |         label = label.reshape(-1)
136 |         num_inst = len(np.where((label != self.bg_label) & (label != self.ignore_label))[0])
137 | 
138 |         self.sum_metric += np.sum(pred)
139 |         self.num_inst += num_inst
140 | 
141 | 
142 | class SigmoidCrossEntropy(mx.metric.EvalMetric):
143 |     def __init__(self, name, output_names, label_names):
144 |         super().__init__(name, output_names, label_names)
145 | 
146 |     def update(self, labels, preds):
147 |         x = preds[0].reshape(-1)  # logit
148 |         z = preds[1].reshape(-1)  # label
149 |         l = mx.nd.relu(x) - x * z + mx.nd.log1p(mx.nd.exp(-mx.nd.abs(x)))
150 |         l = l.mean().asnumpy()
151 | 
152 |         self.num_inst += 1
153 |         self.sum_metric += l


--------------------------------------------------------------------------------
/operator_py/cython/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 | 
 30 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 |     and values giving the absolute path to each directory.
 32 | 
 33 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 |     is based on finding 'nvcc' in the PATH.
 35 |     """
 36 | 
 37 |     # first check if the CUDAHOME env variable is in use
 38 |     if 'CUDAHOME' in os.environ:
 39 |         home = os.environ['CUDAHOME']
 40 |         nvcc = pjoin(home, 'bin', 'nvcc')
 41 |     else:
 42 |         # otherwise, search the PATH for NVCC
 43 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 |         if nvcc is None:
 46 |             raise EnvironmentError('The nvcc binary could not be '
 47 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 |         home = os.path.dirname(os.path.dirname(nvcc))
 49 | 
 50 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 51 |                   'include': pjoin(home, 'include'),
 52 |                   'lib64': pjoin(home, 'lib64')}
 53 |     for k, v in cudaconfig.items():
 54 |         if not os.path.exists(v):
 55 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | 
 57 |     return cudaconfig
 58 | CUDA = locate_cuda()
 59 | 
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | 
 68 | def customize_compiler_for_nvcc(self):
 69 |     """inject deep into distutils to customize how the dispatch
 70 |     to gcc/nvcc works.
 71 | 
 72 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 73 |     injected in, and still have the right customizations (i.e.
 74 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 75 |     the OO route, I have this. Note, it's kindof like a wierd functional
 76 |     subclassing going on."""
 77 | 
 78 |     # tell the compiler it can processes .cu
 79 |     self.src_extensions.append('.cu')
 80 | 
 81 |     # save references to the default compiler_so and _comple methods
 82 |     default_compiler_so = self.compiler_so
 83 |     super = self._compile
 84 | 
 85 |     # now redefine the _compile method. This gets executed for each
 86 |     # object but distutils doesn't have the ability to change compilers
 87 |     # based on source extension: we add it.
 88 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 89 |         if os.path.splitext(src)[1] == '.cu':
 90 |             # use the cuda for .cu files
 91 |             self.set_executable('compiler_so', CUDA['nvcc'])
 92 |             # use only a subset of the extra_postargs, which are 1-1 translated
 93 |             # from the extra_compile_args in the Extension class
 94 |             postargs = extra_postargs['nvcc']
 95 |         else:
 96 |             postargs = extra_postargs['gcc']
 97 | 
 98 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 99 |         # reset the default compiler_so, which we might have changed for cuda
100 |         self.compiler_so = default_compiler_so
101 | 
102 |     # inject our redefined _compile method into the class
103 |     self._compile = _compile
104 | 
105 | 
106 | # run the customize_compiler
107 | class custom_build_ext(build_ext):
108 |     def build_extensions(self):
109 |         customize_compiler_for_nvcc(self.compiler)
110 |         build_ext.build_extensions(self)
111 | 
112 | 
113 | ext_modules = [
114 |     Extension(
115 |         "bbox",
116 |         ["bbox.pyx"],
117 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
118 |         include_dirs=[numpy_include]
119 |     ),
120 |     Extension(
121 |         "cpu_nms",
122 |         ["cpu_nms.pyx"],
123 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
124 |         include_dirs = [numpy_include]
125 |     ),
126 |     Extension('gpu_nms',
127 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
128 |         library_dirs=[CUDA['lib64']],
129 |         libraries=['cudart'],
130 |         language='c++',
131 |         runtime_library_dirs=[CUDA['lib64']],
132 |         # this syntax is specific to this build system
133 |         # we're only going to use certain compiler args with nvcc and not with
134 |         # gcc the implementation of this trick is in customize_compiler() below
135 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
136 |                             'nvcc': ['-arch=sm_35',
137 |                                      '--ptxas-options=-v',
138 |                                      '-c',
139 |                                      '--compiler-options',
140 |                                      "'-fPIC'"]},
141 |         include_dirs = [numpy_include, CUDA['include']]
142 |     ),
143 | ]
144 | 
145 | setup(
146 |     name='frcnn_cython',
147 |     ext_modules=ext_modules,
148 |     # inject our custom trigger
149 |     cmdclass={'build_ext': custom_build_ext},
150 | )
151 | 


--------------------------------------------------------------------------------
/models/efficientnet/builder.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import mxnext as X
  3 | from mxnext import dwconv, conv, relu6, add, global_avg_pool, sigmoid, to_fp16, to_fp32
  4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit
  5 | from symbol.builder import Backbone
  6 | 
  7 | 
  8 | def _make_divisible(dividend, divisor):
  9 |     if dividend % divisor == 0:
 10 |         return dividend
 11 |     else:
 12 |         return (dividend // divisor + 1) * divisor
 13 | 
 14 | round32 = lambda dividend: _make_divisible(dividend, 32)
 15 | 
 16 | 
 17 | def se(input, prefix, f_down, f_up):
 18 |     with mx.name.Prefix(prefix + "_"):
 19 |         gap = mx.sym.mean(input, axis=-1, keepdims=True)
 20 |         gap = mx.sym.mean(gap, axis=-2, keepdims=True)
 21 |         fc1 = conv(gap, name="fc1", filter=f_down)
 22 |         fc1 = relu6(fc1, name="fc1_relu")
 23 |         fc2 = conv(fc1, name="fc2", filter=f_up)
 24 |         att = sigmoid(fc2, name="sigmoid")
 25 |         input = mx.sym.broadcast_mul(input, att, name="mul")
 26 | 
 27 |     return input
 28 | 
 29 | 
 30 | def convnormrelu(input, prefix, kernel, f_in, f_out, stride, proj, norm, **kwargs):
 31 |     with mx.name.Prefix(prefix + "_"):
 32 |         conv1 = conv(input, name="conv1", filter=f_out, kernel=kernel, stride=stride, no_bias=False)
 33 |         bn1 = norm(conv1, name="bn1")
 34 |         relu1 = relu6(bn1, name="relu1")
 35 |     return relu1
 36 | 
 37 | 
 38 | def mbconv(input, prefix, kernel, f_in, f_out, stride, proj, bottleneck_ratio, norm, **kwargs):
 39 |     with mx.name.Prefix(prefix + "_"):
 40 |         if bottleneck_ratio != 1:
 41 |             conv1 = conv(input, name="conv1", filter=f_in * bottleneck_ratio, no_bias=False)
 42 |             bn1 = norm(conv1, name="bn1")
 43 |             relu1 = relu6(bn1, name="relu1")
 44 |         else:
 45 |             relu1 = input
 46 | 
 47 |         conv2 = dwconv(relu1, name="conv2", filter=f_in * bottleneck_ratio,
 48 |             kernel=kernel, stride=stride, no_bias=False)
 49 |         bn2 = norm(conv2, name="bn2")
 50 |         relu2 = relu6(bn2, name="relu2")
 51 |         relu2 = se(relu2, prefix=prefix + "_se2", f_down=f_in//4, f_up=f_in * bottleneck_ratio)
 52 | 
 53 |         conv3 = conv(relu2, name="conv3", filter=f_out, no_bias=False)
 54 |         bn3 = norm(conv3, name="bn3")
 55 | 
 56 |         if proj:
 57 |             return bn3
 58 |         else:
 59 |             return bn3 + input
 60 | 
 61 | 
 62 | mbc1 = lambda input, prefix, kernel, f_in, f_out, stride, proj, norm, **kwargs: \
 63 |     mbconv(input, prefix, kernel, f_in, f_out, stride, proj, 1, norm, **kwargs)
 64 | mbc6 = lambda input, prefix, kernel, f_in, f_out, stride, proj, norm, **kwargs: \
 65 |     mbconv(input, prefix, kernel, f_in, f_out, stride, proj, 6, norm, **kwargs)
 66 | 
 67 | 
 68 | def efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs):
 69 |     stages = []
 70 |     for i, (u, fo, fi, s, k, c) in enumerate(zip(us, fos, fis, ss, ks, cs), start=1):
 71 |         for j in range(1, u + 1):
 72 |             s = s if j == 1 else 1
 73 |             proj = True if j == 1 else False
 74 |             fi = fi if j == 1 else fo
 75 |             data = c(data, prefix="stage%s_unit%s" % (i, j), f_in=fi, f_out=fo,
 76 |                 kernel=k, stride=s, proj=proj, norm=norm)
 77 |         stages.append(data)
 78 |     return stages
 79 | 
 80 | 
 81 | def efficientnet_b4(data, norm, **kwargs):
 82 |     # 1.5 GFLOPs
 83 |     us = [1, 2, 4, 4, 6, 6, 8, 2, 1]
 84 |     fos = [48, 24, 32, 56, 112, 160, 272, 448, 1792]
 85 |     fis = [0] + fos[:-1]
 86 |     ss = [2, 1, 2, 2, 2, 1, 2, 1, 1]
 87 |     ks = [3, 3, 3, 5, 3, 5, 5, 3, 1]
 88 |     cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu]
 89 |     return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs)
 90 | 
 91 | 
 92 | def efficientnet_b5(data, norm, **kwargs):
 93 |     # 2.3 GFLOPs
 94 |     us = [1, 3, 5, 5, 7, 7, 9, 3, 1]
 95 |     fos = [48, 24, 40, 64, 128, 172, 304, 512, 2048]
 96 |     fis = [0] + fos[:-1]
 97 |     ss = [2, 1, 2, 2, 2, 1, 2, 1, 1]
 98 |     ks = [3, 3, 3, 5, 3, 5, 5, 3, 1]
 99 |     # ks = [3, 5, 5, 5, 5, 5, 5, 5, 1]
100 |     cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu]
101 |     return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs)
102 | 
103 | 
104 | def efficientnet_b6(data, norm, **kwargs):
105 |     # 3.3 GFLOPs
106 |     us = [1, 3, 6, 6, 8, 8, 11, 3, 1]
107 |     fos = [56, 32, 40, 72, 144, 200, 344, 576, 2304]
108 |     fis = [0] + fos[:-1]
109 |     ss = [2, 1, 2, 2, 2, 1, 2, 1, 1]
110 |     ks = [3, 3, 3, 5, 3, 5, 5, 3, 1]
111 |     cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu]
112 |     return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs)
113 | 
114 | 
115 | def efficientnet_b7(data, norm, **kwargs):
116 |     # 5.1 GFLOPs
117 |     us = [1, 4, 7, 7, 10, 10, 13, 4, 1]
118 |     fos = [64, 32, 48, 80, 160, 224, 384, 640, 2560]
119 |     fis = [0] + fos[:-1]
120 |     ss = [2, 1, 2, 2, 2, 1, 2, 1, 1]
121 |     ks = [3, 3, 3, 5, 3, 5, 5, 3, 1]
122 |     cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu]
123 |     return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs)
124 | 
125 | 
126 | def efficientnet_fpn_builder(efficientnet):
127 |     class EfficientNetFPN(Backbone):
128 |         def __init__(self, pBackbone):
129 |             super().__init__(pBackbone)
130 |             p = self.p
131 |             data = X.var("data")
132 |             if p.fp16:
133 |                 data = data.astype("float16")
134 |             stages = efficientnet(data, p.normalizer, params=p)
135 |             self.symbol = (stages[2], stages[3], stages[5], stages[8])
136 | 
137 |         def get_rpn_feature(self):
138 |             return self.symbol
139 | 
140 |         def get_rcnn_feature(self):
141 |             return self.symbol
142 |     return EfficientNetFPN
143 | 
144 | 
145 | EfficientNetB4FPN = efficientnet_fpn_builder(efficientnet_b4)
146 | EfficientNetB5FPN = efficientnet_fpn_builder(efficientnet_b5)
147 | EfficientNetB6FPN = efficientnet_fpn_builder(efficientnet_b6)
148 | EfficientNetB7FPN = efficientnet_fpn_builder(efficientnet_b7)
149 | 
150 | 
151 | if __name__ == "__main__":
152 |     data = X.var("data")
153 |     norm = X.normalizer_factory()
154 |     *_, last = efficientnet_b4(data, norm)
155 |     mx.viz.print_summary(last, shape={"data": (1, 3, 224, 224)})
156 | 


--------------------------------------------------------------------------------
/models/FPN/input.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | 
  4 | import numpy as np
  5 | import copy
  6 | 
  7 | from core.detection_input import AnchorTarget2D
  8 | 
  9 | 
 10 | class PyramidAnchorTarget2DBase(AnchorTarget2D):
 11 |     """
 12 |     input: image_meta: tuple(h, w, scale)
 13 |            gt_bbox, ndarry(max_num_gt, 4)
 14 |     output: anchor_label, ndarray(num_anchor * h * w)
 15 |             anchor_bbox_target, ndarray(num_anchor * h * w, 4)
 16 |             anchor_bbox_weight, ndarray(num_anchor * h * w, 4)
 17 |     """
 18 | 
 19 |     def apply(self, input_record):
 20 |         p = self.p
 21 | 
 22 |         im_info = input_record["im_info"]
 23 |         gt_bbox = input_record["gt_bbox"]
 24 |         assert isinstance(gt_bbox, np.ndarray)
 25 |         assert gt_bbox.dtype == np.float32
 26 |         valid = np.where(gt_bbox[:, 0] != -1)[0]
 27 |         gt_bbox = gt_bbox[valid]
 28 | 
 29 |         if gt_bbox.shape[1] == 5:
 30 |             gt_bbox = gt_bbox[:, :4]
 31 | 
 32 |         valid_index, valid_anchor = self._gather_valid_anchor(im_info)
 33 |         cls_label, anchor_label = \
 34 |             self._assign_label_to_anchor(valid_anchor, gt_bbox,
 35 |                                          p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr)
 36 |         self._sample_anchor(cls_label, p.sample.image_anchor, p.sample.pos_fraction)
 37 |         reg_target, reg_weight = self._cal_anchor_target(cls_label, valid_anchor, gt_bbox, anchor_label)
 38 |         cls_label, reg_target, reg_weight = \
 39 |             self._scatter_valid_anchor(valid_index, cls_label, reg_target, reg_weight)
 40 | 
 41 |         """
 42 |         cls_label: (all_anchor,)
 43 |         reg_target: (all_anchor, 4)
 44 |         reg_weight: (all_anchor, 4)
 45 |         """
 46 |         input_record["rpn_cls_label"] = cls_label
 47 |         input_record["rpn_reg_target"] = reg_target
 48 |         input_record["rpn_reg_weight"] = reg_weight
 49 | 
 50 |         return input_record["rpn_cls_label"], \
 51 |                input_record["rpn_reg_target"], \
 52 |                input_record["rpn_reg_weight"]
 53 | 
 54 | 
 55 | class PyramidAnchorTarget2D(PyramidAnchorTarget2DBase):
 56 |     """
 57 |     input: image_meta: tuple(h, w, scale)
 58 |            gt_bbox, ndarry(max_num_gt, 4)
 59 |     output: anchor_label, ndarray(num_anchor * h * w)
 60 |             anchor_bbox_target, ndarray(num_anchor * 4, h * w)
 61 |             anchor_bbox_weight, ndarray(num_anchor * 4, h * w)
 62 |     """
 63 | 
 64 |     def __init__(self, pAnchor):
 65 |         super().__init__(pAnchor)
 66 | 
 67 |         self.pyramid_levels = len(self.p.generate.stride)
 68 |         self.p_list = [copy.deepcopy(self.p) for _ in range(self.pyramid_levels)]
 69 | 
 70 |         pyramid_stride = self.p.generate.stride
 71 |         pyramid_short = self.p.generate.short
 72 |         pyramid_long = self.p.generate.long
 73 | 
 74 |         for i in range(self.pyramid_levels):
 75 |             self.p_list[i].generate.stride = pyramid_stride[i]
 76 |             self.p_list[i].generate.short = pyramid_short[i]
 77 |             self.p_list[i].generate.long = pyramid_long[i]
 78 | 
 79 |         # generate anchors for multi-leval feature map
 80 |         self.anchor_target_2d_list = [PyramidAnchorTarget2DBase(p) for p in self.p_list]
 81 |         self.anchor_target_2d = PyramidAnchorTarget2DBase(self.p_list[0])
 82 | 
 83 |         self.anchor_target_2d.v_all_anchor = self.v_all_anchor
 84 |         self.anchor_target_2d.h_all_anchor = self.h_all_anchor
 85 | 
 86 |     @property
 87 |     def v_all_anchor(self):
 88 |         anchors_list = [anchor_target_2d.v_all_anchor for anchor_target_2d in self.anchor_target_2d_list]
 89 |         anchors = np.concatenate(anchors_list)
 90 |         return anchors
 91 | 
 92 |     @property
 93 |     def h_all_anchor(self):
 94 |         anchors_list = [anchor_target_2d.h_all_anchor for anchor_target_2d in self.anchor_target_2d_list]
 95 |         anchors = np.concatenate(anchors_list)
 96 |         return anchors
 97 | 
 98 |     def apply(self, input_record):
 99 | 
100 |         anchor_size = [0] + [x.h_all_anchor.shape[0] for x in self.anchor_target_2d_list]
101 |         anchor_size = np.cumsum(anchor_size)
102 |         cls_label, reg_target, reg_weight = \
103 |             self.anchor_target_2d.apply(input_record)
104 | 
105 |         im_info = input_record["im_info"]
106 |         h, w = im_info[:2]
107 | 
108 |         cls_label_list = []
109 |         reg_target_list = []
110 |         reg_weight_list = []
111 |         for i in range(self.pyramid_levels):
112 |             p = self.anchor_target_2d_list[i].p
113 | 
114 |             cls_label_level = cls_label[anchor_size[i]:anchor_size[i + 1]]
115 |             reg_target_level = reg_target[anchor_size[i]:anchor_size[i + 1]]
116 |             reg_weight_level = reg_weight[anchor_size[i]:anchor_size[i + 1]]
117 |             """
118 |             label: (h * w * A) -> (A * h * w)
119 |             bbox_target: (h * w * A, 4) -> (A * 4, h * w)
120 |             bbox_weight: (h * w * A, 4) -> (A * 4, h * w)
121 |             """
122 |             if h >= w:
123 |                 fh, fw = p.generate.long, p.generate.short
124 |             else:
125 |                 fh, fw = p.generate.short, p.generate.long
126 |             cls_label_level = cls_label_level.reshape((fh, fw, -1)).transpose(2, 0, 1)
127 |             reg_target_level = reg_target_level.reshape((fh, fw, -1)).transpose(2, 0, 1)
128 |             reg_weight_level = reg_weight_level.reshape((fh, fw, -1)).transpose(2, 0, 1)
129 | 
130 |             cls_label_level = cls_label_level.reshape(-1, fh * fw)
131 |             reg_target_level = reg_target_level.reshape(-1, fh * fw)
132 |             reg_weight_level = reg_weight_level.reshape(-1, fh * fw)
133 | 
134 |             cls_label_list.append(cls_label_level)
135 |             reg_target_list.append(reg_target_level)
136 |             reg_weight_list.append(reg_weight_level)
137 | 
138 |         cls_label = np.concatenate(cls_label_list, axis=1).reshape(-1)
139 |         reg_target = np.concatenate(reg_target_list, axis=1)
140 |         reg_weight = np.concatenate(reg_weight_list, axis=1)
141 | 
142 |         input_record["rpn_cls_label"] = cls_label
143 |         input_record["rpn_reg_target"] = reg_target
144 |         input_record["rpn_reg_weight"] = reg_weight
145 | 
146 |         return input_record["rpn_cls_label"], \
147 |                input_record["rpn_reg_target"], \
148 |                input_record["rpn_reg_weight"]
149 | 


--------------------------------------------------------------------------------
/detection_img.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from core.detection_module import DetModule
  4 | from core.detection_input import Loader
  5 | from utils.load_model import load_checkpoint
  6 | from operator_py.nms import py_nms_wrapper
  7 | from utils import callback
  8 | from mxnet.base import _as_list
  9 | 
 10 | from six.moves import reduce
 11 | from six.moves.queue import Queue
 12 | from threading import Thread
 13 | import argparse
 14 | import importlib
 15 | import mxnet as mx
 16 | import numpy as np
 17 | import six.moves.cPickle as pkl
 18 | import time
 19 | import json
 20 | import cv2
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser(description='Test Detection')
 25 |     # general
 26 |     parser.add_argument('--config', help='config file path', type=str)
 27 |     parser.add_argument('--gpu_id', help='gpu_id', type=int, default=0)
 28 |     parser.add_argument('--epoch', help='load params epoch', type=int, default=0)
 29 |     parser.add_argument('--thr', help='detection threshold', type=float, default=0.80)
 30 |     parser.add_argument('--path', help='images path to detect', type=str)
 31 |     args = parser.parse_args()
 32 | 
 33 |     config = importlib.import_module(args.config.replace('.py', '').replace('/', '.'))
 34 |     return args, config
 35 | 
 36 | if __name__ == "__main__":
 37 |     # os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
 38 | 
 39 |     args, config = parse_args()
 40 | 
 41 |     pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
 42 |     transform, data_name, label_name, metric_list = config.get_config(is_train=False)
 43 | 
 44 |     nms = py_nms_wrapper(pTest.nms.thr)
 45 |     sym = pModel.test_symbol
 46 |     pshort = 800
 47 |     plong  = 2000
 48 | 
 49 |     arg_params, aux_params = load_checkpoint(pTest.model.prefix, args.epoch)
 50 |     mod = DetModule(sym, data_names=["data", "im_info", "im_id", "rec_id"], context=mx.gpu(args.gpu_id))
 51 |     provide_data = [("data", (1, 3, pshort, plong)), ("im_info", (1, 3)), ("im_id", (1,)), ("rec_id", (1,))]
 52 |     mod.bind(data_shapes=provide_data, for_training=False)
 53 |     mod.set_params(arg_params, aux_params, allow_extra=False)
 54 | 
 55 |     image_list = []
 56 |     if os.path.isfile(args.path):
 57 |         if ".txt" in args.path:
 58 |             list_file = open(args.path, 'r')
 59 |             list_lines = list_file.readlines()
 60 |             list_file.close()
 61 |             (fpath, fname) = os.path.split(args.path)
 62 |             for aline in list_lines:
 63 |                 uints = aline.split(' ')
 64 |                 imgpath = os.path.join(fpath, uints[0])
 65 |                 image_list.append(imgpath)
 66 |         else:
 67 |             image_list.append(args.path)
 68 |     else:
 69 |         for fname in os.listdir(args.path):
 70 |             fpath = os.path.join(args.path, fname)
 71 |             if os.path.isfile(fpath):
 72 |                 image_list.append(fpath)
 73 |     
 74 |     for imgpath in image_list:
 75 |         img   = cv2.imread(imgpath, cv2.IMREAD_COLOR)
 76 |         image = img[:, :, ::-1]
 77 |         short = image.shape[0]
 78 |         long  = image.shape[1]
 79 |         scale = min(pshort / short, plong / long)
 80 |         image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR)
 81 |         # exactly as opencv
 82 |         h, w = image.shape[:2]
 83 |         im_info = (h, w, scale)
 84 |         # shape = (plong, pshort, 3) if h >= w else (pshort, plong, 3)
 85 |         shape = (pshort, plong, 3)
 86 |         padded_image = np.zeros(shape, dtype=np.float32)
 87 |         padded_image[:h, :w] = image
 88 |         padded_image = padded_image.transpose((2, 0, 1))
 89 |         img_array = []
 90 |         img_array.append(padded_image)
 91 |         iminfo_array = []
 92 |         iminfo_array.append(im_info)
 93 |         im_id = mx.nd.array([1])
 94 |         rec_id = mx.nd.array([1])
 95 |         data = [mx.nd.array(img_array)]
 96 |         data.append(mx.nd.array(iminfo_array))
 97 |         data.append(im_id)
 98 |         data.append(rec_id)
 99 |         mbatch = mx.io.DataBatch(data=data, provide_data=provide_data)
100 |     
101 |         start_t = time.time()
102 |         mod.forward(mbatch, is_train=False)
103 |         outs = [x.asnumpy() for x in mod.get_outputs()]
104 |         im_info   = outs[2]       # h_raw, w_raw, scale
105 |         cls_score = outs[3]
106 |         bbox_xyxy = outs[4]
107 |         if cls_score.ndim == 3:
108 |             cls_score = cls_score[0]
109 |             bbox_xyxy = bbox_xyxy[0]
110 |         bbox_xyxy = bbox_xyxy / scale       # scale to original image scale
111 |         cls_score = cls_score[:, 1:]        # remove background score
112 |         # TODO: the output shape of class_agnostic box is [n, 4], while class_aware box is [n, 4 * (1 + class)]
113 |         bbox_xyxy = bbox_xyxy[:, 4:] if bbox_xyxy.shape[1] != 4 else bbox_xyxy
114 | 
115 |         final_dets = {}
116 |         for cid in range(cls_score.shape[1]):
117 |             score = cls_score[:, cid]
118 |             if bbox_xyxy.shape[1] != 4:
119 |                 cls_box = bbox_xyxy[:, cid * 4:(cid + 1) * 4]
120 |             else:
121 |                 cls_box = bbox_xyxy
122 |             valid_inds = np.where(score > args.thr)[0]
123 |             box   = cls_box[valid_inds]
124 |             score = score[valid_inds]
125 |             det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32)
126 |             final_dets[cid] = nms(det)
127 |         end_t = time.time()
128 |         print("detection use: %.3f seconds." % (end_t - start_t))
129 | 
130 |         for cid in final_dets:
131 |             det = final_dets[cid]
132 |             if det.shape[0] == 0:
133 |                 continue
134 |             scores = det[:, -1]
135 |             x1 = det[:, 0]
136 |             y1 = det[:, 1]
137 |             x2 = det[:, 2]
138 |             y2 = det[:, 3]
139 |             for k in range(det.shape[0]):
140 |                 bbox  = [float(x1[k]), float(y1[k]), float(x2[k]), float(y2[k])]
141 |                 score =  float(scores[k])
142 |                 cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
143 |                 # cv2.putText(img, "{}:{:.2}".format(str(cid), score), (int(bbox[0]), int(bbox[1] - 10)), 4, 0.6, (0, 0, 255))
144 |         (filepath, filename) = os.path.split(imgpath)
145 |         cv2.imwrite(filename, img)
146 |     exit()
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/group_norm_helper.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
  3 | 
  4 | Redistribution and use in source and binary forms, with or without
  5 | modification, are permitted provided that the following conditions are met:
  6 | 
  7 | 1. Redistributions of source code must retain the above copyright
  8 |    notice, this list of conditions and the following disclaimer.
  9 | 
 10 | 2. Redistributions in binary form must reproduce the above copyright
 11 |    notice, this list of conditions and the following disclaimer in the
 12 |    documentation and/or other materials provided with the distribution.
 13 | 
 14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
 15 |    and IDIAP Research Institute nor the names of its contributors may be
 16 |    used to endorse or promote products derived from this software without
 17 |    specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 29 | POSSIBILITY OF SUCH DAMAGE.
 30 | */
 31 | #include <algorithm>
 32 | #include <vector>
 33 | #include <cub/block/block_reduce.cuh>
 34 | #include "../../common/cuda_utils.h"
 35 | 
 36 | 
 37 | #define CUDA_1D_KERNEL_LOOP(i, n)                                 \
 38 |   for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
 39 |        i += blockDim.x * gridDim.x)
 40 | 
 41 | constexpr int CAFFE_CUDA_NUM_THREADS = 512;
 42 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096;
 43 | 
 44 | inline int CAFFE_GET_BLOCKS(const int N) {
 45 |   return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS,
 46 |                   CAFFE_MAXIMUM_NUM_BLOCKS);
 47 | }
 48 | 
 49 | template <typename T, int N>
 50 | struct SimpleArray {
 51 |   T data[N];
 52 | };
 53 | 
 54 | template <typename DType>
 55 | using BlockReduce = cub::BlockReduce<DType, CAFFE_CUDA_NUM_THREADS>;
 56 | 
 57 | 
 58 | constexpr int kCUDATensorMaxDims = 8;
 59 | 
 60 | #define DISPATCH_FUNCTION_BY_VALUE_WITH_TYPE_1(val, Func, T, ...) \
 61 |   do {                                                            \
 62 |     CHECK_LT(val, kCUDATensorMaxDims);                    \
 63 |     switch (val) {                                                \
 64 |       case 1: {                                                   \
 65 |         Func<T, 1>(__VA_ARGS__);                                  \
 66 |         break;                                                    \
 67 |       }                                                           \
 68 |       case 2: {                                                   \
 69 |         Func<T, 2>(__VA_ARGS__);                                  \
 70 |         break;                                                    \
 71 |       }                                                           \
 72 |       case 3: {                                                   \
 73 |         Func<T, 3>(__VA_ARGS__);                                  \
 74 |         break;                                                    \
 75 |       }                                                           \
 76 |       case 4: {                                                   \
 77 |         Func<T, 4>(__VA_ARGS__);                                  \
 78 |         break;                                                    \
 79 |       }                                                           \
 80 |       case 5: {                                                   \
 81 |         Func<T, 5>(__VA_ARGS__);                                  \
 82 |         break;                                                    \
 83 |       }                                                           \
 84 |       case 6: {                                                   \
 85 |         Func<T, 6>(__VA_ARGS__);                                  \
 86 |         break;                                                    \
 87 |       }                                                           \
 88 |       case 7: {                                                   \
 89 |         Func<T, 7>(__VA_ARGS__);                                  \
 90 |         break;                                                    \
 91 |       }                                                           \
 92 |       case 8: {                                                   \
 93 |         Func<T, 8>(__VA_ARGS__);                                  \
 94 |         break;                                                    \
 95 |       }                                                           \
 96 |       default: { break; }                                         \
 97 |     }                                                             \
 98 |   } while (false)
 99 | 
100 | 
101 | void ComputeTransposeAxesForReduceOp(
102 |     const int num_dims,
103 |     const int num_reduce_axes,
104 |     const int* reduce_axes,
105 |     int* transpose_axes);
106 | 
107 | 
108 | void ComputeTransposedStrides(
109 |     const int ndim,
110 |     const int* dims,
111 |     const int* axes,
112 |     int* strides);
113 | 
114 | 
115 | bool IsRowwiseReduce(
116 |     const int ndim,
117 |     const int* X_dims,
118 |     const int* Y_dims,
119 |     int* rows,
120 |     int* cols);
121 | 
122 | 
123 | bool IsColwiseReduce(
124 |     const int ndim,
125 |     const int* X_dims,
126 |     const int* Y_dims,
127 |     int* rows,
128 |     int* cols);
129 | 
130 | 
131 | template <typename T>
132 | void Set(const size_t N, const T alpha, T* X, cudaStream_t context);
133 | 
134 | 
135 | template <typename T>
136 | void Moments(
137 |     const int num_dims,
138 |     const int* dims,
139 |     const int num_axes,
140 |     const int* axes,
141 |     const T* X,
142 |     T* mean,
143 |     T* variance,
144 |     cudaStream_t context);
145 | 
146 | template <typename T>
147 | void InvStd(
148 |     const int N,
149 |     const T epsilon,
150 |     const T* var,
151 |     T* inv_std,
152 |     cudaStream_t context);


--------------------------------------------------------------------------------
/operator_cxx/contrib/sigmoid_cross_entropy.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * Copyright (c) 2018 by Contributors
 22 |  * \file sigmoid_cross_entropy.cu
 23 |  * \brief
 24 |  * \author Yuntao Chen
 25 | */
 26 | 
 27 | #include "./sigmoid_cross_entropy-inl.h"
 28 | 
 29 | #define CUDA_1D_KERNEL_LOOP(i, n)                               \
 30 | for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
 31 |      i += blockDim.x * gridDim.x)
 32 | 
 33 | constexpr int CAFFE_CUDA_NUM_THREADS = 512;
 34 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096;
 35 | 
 36 | inline int CAFFE_GET_BLOCKS(const int N) {
 37 |   return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS,
 38 |                   CAFFE_MAXIMUM_NUM_BLOCKS);
 39 | }
 40 | 
 41 | namespace mshadow {
 42 | namespace cuda {
 43 | 
 44 | template<typename T>
 45 | __global__ void SigmoidCrossEntropyLossKernel(
 46 |     const int n,
 47 |     const T* logits,
 48 |     const T* targets,
 49 |     T* losses,
 50 |     T* counts) {
 51 |   CUDA_1D_KERNEL_LOOP(index, n) {
 52 |     if (targets[index] == -1) {
 53 |       losses[index] = 0.;
 54 |       counts[index] = 0.;
 55 |     } else {
 56 |       losses[index] =
 57 |           -1. * logits[index] * (targets[index] - (logits[index] >= 0)) +
 58 |           logf(
 59 |               1 +
 60 |               expf(logits[index] - 2 * logits[index] * (logits[index] >= 0)));
 61 |       counts[index] = 1.;
 62 |     }
 63 |   }
 64 | }
 65 | 
 66 | template<typename T>
 67 | __global__ void SigmoidCrossEntropyLossGradientKernel(
 68 |     const int n,
 69 |     const T* logits,
 70 |     const T* targets,
 71 |     T* d_logits,
 72 |     T* counts) {
 73 |   CUDA_1D_KERNEL_LOOP(index, n) {
 74 |     if (targets[index] == -1) {
 75 |       d_logits[index] = 0.;
 76 |       counts[index] = 0.;
 77 |     } else {
 78 |       d_logits[index] = 1. / (1. + expf(-logits[index])) - targets[index];
 79 |       counts[index] = 1.;
 80 |     }
 81 |   }
 82 | }
 83 | 
 84 | template<typename T>
 85 | inline void SigmoidCrossEntropyForward(const Tensor<gpu, 2, T> &data,
 86 |                                        const Tensor<gpu, 2, T> &label,
 87 |                                        Tensor<gpu, 2, T> &loss,
 88 |                                        Tensor<gpu, 1, T> &loss_sum,
 89 |                                        Tensor<gpu, 2, T> &count,
 90 |                                        Tensor<gpu, 1, T> &count_sum,
 91 |                                        Tensor<gpu, 1, T> &out,
 92 |                                        T scale) {
 93 |   using namespace mshadow::expr;
 94 |   SigmoidCrossEntropyLossKernel<<<CAFFE_GET_BLOCKS(data.shape_.Size()), CAFFE_CUDA_NUM_THREADS, 0>>>(
 95 |     data.shape_.Size(), data.dptr_, label.dptr_, loss.dptr_, count.dptr_);
 96 |   loss_sum = sumall_except_dim<0>(loss);
 97 |   count_sum = sumall_except_dim<0>(count);
 98 |   count_sum += static_cast<T>(1e-5);
 99 |   out = loss_sum / count_sum;
100 |   int count_num = (count.size(0) * count.size(1));
101 |   //out /= static_cast<T>(count_num);
102 |   // mx.metric.Loss will take care of this
103 |   // out *= scale; 
104 | }
105 | 
106 | template<typename T>
107 | inline void SigmoidCrossEntropyBackward(const Tensor<gpu, 2, T> &data,
108 |                                         const Tensor<gpu, 2, T> &label,
109 |                                         Tensor<gpu, 2, T> &d_data,
110 |                                         Tensor<gpu, 2, T> &count,
111 |                                         Tensor<gpu, 1, T> &count_sum,
112 |                                         T scale) {
113 |   using namespace mshadow::expr;
114 |   SigmoidCrossEntropyLossGradientKernel<<<CAFFE_GET_BLOCKS(data.shape_.Size()), CAFFE_CUDA_NUM_THREADS, 0>>>(
115 |     data.shape_.Size(), data.dptr_, label.dptr_, d_data.dptr_, count.dptr_);
116 |   count_sum = sumall_except_dim<0>(count);
117 |   count_sum += static_cast<T>(1e-5);
118 |   d_data /= broadcast<0>(count_sum, d_data.shape_);
119 |   int count_num = (count.size(0) * count.size(1));
120 |   //d_data /= static_cast<T>(count_num);
121 |   d_data *= scale;
122 | }
123 | 
124 | } // namespace cuda
125 | 
126 | template<typename T>
127 | inline void SigmoidCrossEntropyForward(const Tensor<gpu, 2, T> &data,
128 |                                        const Tensor<gpu, 2, T> &label,
129 |                                        Tensor<gpu, 2, T> &loss,
130 |                                        Tensor<gpu, 1, T> &loss_sum,
131 |                                        Tensor<gpu, 2, T> &count,
132 |                                        Tensor<gpu, 1, T> &count_sum,
133 |                                        Tensor<gpu, 1, T> &out,
134 |                                        T scale) {
135 |   cuda::SigmoidCrossEntropyForward(data, label, loss, loss_sum, count, count_sum, out, scale);
136 | }
137 | 
138 | template<typename T>
139 | inline void SigmoidCrossEntropyBackward(const Tensor<gpu, 2, T> &data,
140 |                                         const Tensor<gpu, 2, T> &label,
141 |                                         Tensor<gpu, 2, T> &d_data,
142 |                                         Tensor<gpu, 2, T> &count,
143 |                                         Tensor<gpu, 1, T> &count_sum,
144 |                                         T scale) {
145 |   cuda::SigmoidCrossEntropyBackward(data, label, d_data, count, count_sum, scale);
146 | }
147 | 
148 | } // namespace mshadow
149 | 
150 | namespace mxnet {
151 | namespace op {
152 | 
153 | template<>
154 | Operator *CreateOp<gpu>(SigmoidCrossEntropyParam param, int dtype) {
155 |   Operator *op = NULL;
156 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
157 |     op = new SigmoidCrossEntropyOp<gpu, DType>(param);
158 |   })
159 |   return op;
160 | }
161 | 
162 | }  // namespace op
163 | }  // namespace mxnet
164 | 
165 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/generate_anchor-inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file generate_anchor-inl.h
 22 |  * \brief GenerateAnchor Operator
 23 |  * \author Yanghao Li, Chenxia Han
 24 | */
 25 | #ifndef MXNET_OPERATOR_CONTRIB_GENERATE_ANCHOR_INL_H_
 26 | #define MXNET_OPERATOR_CONTRIB_GENERATE_ANCHOR_INL_H_
 27 | 
 28 | #include <dmlc/logging.h>
 29 | #include <dmlc/parameter.h>
 30 | #include <mxnet/operator.h>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <string>
 34 | #include <utility>
 35 | #include <ctime>
 36 | #include <cstring>
 37 | #include <iostream>
 38 | #include "../operator_common.h"
 39 | #include "../mshadow_op.h"
 40 | 
 41 | namespace mxnet {
 42 | namespace op {
 43 | 
 44 | namespace gen_anchor {
 45 | enum GenAnchorOpInputs {kClsProb, kAnchor};
 46 | enum GenAnchorOpOutputs {kOut};
 47 | }  // gen_anchor
 48 | 
 49 | struct GenAnchorParam : public dmlc::Parameter<GenAnchorParam> {
 50 |   // use double to keep consistency with python implementation
 51 |   nnvm::Tuple<double> scales;
 52 |   nnvm::Tuple<double> ratios;
 53 |   int feature_stride;
 54 | 
 55 |   DMLC_DECLARE_PARAMETER(GenAnchorParam) {
 56 |     DMLC_DECLARE_FIELD(scales).set_default(nnvm::Tuple<double>({4.0f, 8.0f, 16.0f, 32.0f}))
 57 |     .describe("Used to generate anchor windows by enumerating scales");
 58 |     DMLC_DECLARE_FIELD(ratios).set_default(nnvm::Tuple<double>({0.5f, 1.0f, 2.0f}))
 59 |     .describe("Used to generate anchor windows by enumerating ratios");
 60 |     DMLC_DECLARE_FIELD(feature_stride).set_default(16)
 61 |     .describe("The size of the receptive field each unit in the convolution layer of the rpn,"
 62 |               "for example the product of all stride's prior to this layer.");
 63 |   }
 64 | };
 65 | 
 66 | template<typename xpu>
 67 | Operator *CreateOp(GenAnchorParam param);
 68 | 
 69 | #if DMLC_USE_CXX11
 70 | class GenAnchorProp : public OperatorProperty {
 71 |  public:
 72 |   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
 73 |     param_.Init(kwargs);
 74 |   }
 75 | 
 76 |   std::map<std::string, std::string> GetParams() const override {
 77 |     return param_.__DICT__();
 78 |   }
 79 | 
 80 |   bool InferShape(std::vector<TShape> *in_shape,
 81 |                   std::vector<TShape> *out_shape,
 82 |                   std::vector<TShape> *aux_shape) const override {
 83 |     using namespace mshadow;
 84 |     CHECK_EQ(in_shape->size(), 1) << "Input:[cls_prob]";
 85 |     const TShape &dshape = in_shape->at(gen_anchor::kClsProb);
 86 |     if (dshape.ndim() == 0) return false;
 87 |     int num_anchors = param_.scales.ndim() * param_.ratios.ndim();
 88 |     out_shape->clear();
 89 |     // output
 90 |     out_shape->push_back(Shape2(dshape[2] * dshape[3] * num_anchors,  4));
 91 |     return true;
 92 |   }
 93 | 
 94 |   OperatorProperty* Copy() const override {
 95 |     auto ptr = new GenAnchorProp();
 96 |     ptr->param_ = param_;
 97 |     return ptr;
 98 |   }
 99 | 
100 |   std::string TypeString() const override {
101 |     return "_contrib_GenAnchor";
102 |   }
103 | 
104 |   std::vector<int> DeclareBackwardDependency(
105 |     const std::vector<int> &out_grad,
106 |     const std::vector<int> &in_data,
107 |     const std::vector<int> &out_data) const override {
108 |     return {};
109 |   }
110 | 
111 |   int NumOutputs() const override {
112 |     return 1;
113 |   }
114 | 
115 |   std::vector<std::string> ListArguments() const override {
116 |     return {"cls_prob"};
117 |   }
118 | 
119 |   std::vector<std::string> ListOutputs() const override {
120 |     return {"output"};
121 |   }
122 | 
123 |   Operator* CreateOperator(Context ctx) const override;
124 | 
125 |  private:
126 |   GenAnchorParam param_;
127 | };  // class GenAnchorProp
128 | 
129 | #endif  // DMLC_USE_CXX11
130 | }  // namespace op
131 | }  // namespace mxnet
132 | 
133 | //========================
134 | // Anchor Generation Utils
135 | //========================
136 | namespace mxnet {
137 | namespace op {
138 | namespace gen_anchor_utils {
139 | 
140 | template <typename DType>
141 | inline void _MakeAnchor(DType w,
142 |                         DType h,
143 |                         DType x_ctr,
144 |                         DType y_ctr,
145 |                         std::vector<DType>& out_anchors) {
146 |   out_anchors.push_back(x_ctr - 0.5f * (w - 1.0f));
147 |   out_anchors.push_back(y_ctr - 0.5f * (h - 1.0f));
148 |   out_anchors.push_back(x_ctr + 0.5f * (w - 1.0f));
149 |   out_anchors.push_back(y_ctr + 0.5f * (h - 1.0f));
150 | }
151 | 
152 | template <typename DType>
153 | inline void _Transform(DType scale,
154 |                        DType ratio,
155 |                        const std::vector<DType>& base_anchor,
156 |                        std::vector<DType>& out_anchors) {
157 |   // use double in intermedia computation for consistency with numpy
158 |   DType w = base_anchor[2] - base_anchor[0] + 1.0f;
159 |   DType h = base_anchor[3] - base_anchor[1] + 1.0f;
160 |   DType x_ctr = base_anchor[0] + 0.5 * (w - 1.0f);
161 |   DType y_ctr = base_anchor[1] + 0.5 * (h - 1.0f);
162 |   DType size = w * h;
163 |   DType size_ratios = size / ratio;
164 |   DType new_w = std::rint(std::sqrt(size_ratios)) * scale;
165 |   DType new_h = std::rint((new_w / scale * ratio)) * scale;
166 | 
167 |   _MakeAnchor(new_w, new_h, x_ctr, y_ctr, out_anchors);
168 | }
169 | 
170 | // out_anchors must have shape (n, 4), where n is ratios.size() * scales.size()
171 | template <typename DType>
172 | inline void GenerateAnchors(const std::vector<DType>& base_anchor,
173 |                             const std::vector<DType>& ratios,
174 |                             const std::vector<DType>& scales,
175 |                             std::vector<DType>& out_anchors) {
176 |   for (size_t j = 0; j < ratios.size(); ++j) {
177 |     for (size_t k = 0; k < scales.size(); ++k) {
178 |       _Transform(scales[k], ratios[j], base_anchor, out_anchors);
179 |     }
180 |   }
181 | }
182 | 
183 | }  // namespace anchor_utils
184 | }  // namespace op
185 | }  // namespace mxnet
186 | 
187 | #endif  //  MXNET_OPERATOR_CONTRIB_GENERATE_ANCHOR_INL_H_
188 | 


--------------------------------------------------------------------------------