├── config ├── __init__.py └── NASFPN │ └── __init__.py ├── core ├── __init__.py └── detection_metric.py ├── models ├── __init__.py ├── NASFPN │ ├── __init__.py │ └── README.md ├── maskrcnn │ ├── __init__.py │ ├── metric.py │ ├── process_output.py │ ├── utils.py │ ├── README.md │ └── bbox_post_processing.py ├── retinanet │ ├── __init__.py │ ├── metric.py │ └── README.md ├── cascade_rcnn │ ├── __init__.py │ └── README.md ├── tridentnet │ ├── __init__.py │ ├── README.md │ └── input.py ├── efficientnet │ ├── README.md │ └── builder.py ├── FPN │ ├── README.md │ ├── get_top_proposal.py │ ├── assign_layer_fpn.py │ └── input.py └── dcn │ └── builder.py ├── symbol └── __init__.py ├── utils ├── __init__.py ├── logger.py ├── load_model.py ├── patch_config.py ├── contrib │ ├── edit_model_weight.py │ └── data_to_coco.py ├── generate_roidb.py ├── callback.py └── graph_optimize.py ├── operator_py ├── __init__.py ├── cython │ ├── __init__.py │ ├── .gitignore │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── bbox.pyx │ ├── nms_kernel.cu │ └── setup.py └── nms.py ├── doc ├── image │ ├── demo83.jpg │ ├── loss1.png │ ├── loss2.png │ ├── diagram.png │ ├── groupsoftmax.png │ ├── trident_block.png │ └── trident_block_details.png ├── DISTRIBUTED.md └── INSTALL.md ├── Makefile ├── scripts ├── dist_worker.sh ├── train_hpc.sh ├── terminate.sh └── launch.sh ├── operator_cxx └── contrib │ ├── bbox_norm.cu │ ├── sync_batch_norm.cu │ ├── focal_loss.cu │ ├── broadcast_scale.cu │ ├── sync_inplace_activation_batch_norm.cu │ ├── focal_loss.cc │ ├── bbox_norm.cc │ ├── broadcast_scale.cc │ ├── axpy.cc │ ├── sigmoid_cross_entropy.cc │ ├── global_average_pooling.cc │ ├── group_norm.cc │ ├── generate_anchor.cc │ ├── decodebbox-inl.h │ ├── nms-inl.h │ ├── global_average_pooling.cu │ ├── sync_batch_norm.cc │ ├── sync_inplace_activation_batch_norm.cc │ ├── fixed_divisor.h │ ├── generate_proposal-inl.h │ ├── group_norm_helper.h │ ├── sigmoid_cross_entropy.cu │ └── generate_anchor-inl.h ├── .gitignore ├── detection_infer_speed.py ├── unittest └── test_loader.py └── detection_img.py /config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /symbol/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /operator_py/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/NASFPN/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/NASFPN/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/maskrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/retinanet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/cascade_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/tridentnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /operator_py/cython/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /operator_py/cython/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /doc/image/demo83.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/demo83.jpg -------------------------------------------------------------------------------- /doc/image/loss1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/loss1.png -------------------------------------------------------------------------------- /doc/image/loss2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/loss2.png -------------------------------------------------------------------------------- /doc/image/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/diagram.png -------------------------------------------------------------------------------- /doc/image/groupsoftmax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/groupsoftmax.png -------------------------------------------------------------------------------- /doc/image/trident_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/trident_block.png -------------------------------------------------------------------------------- /doc/image/trident_block_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengzhengxin/groupsoftmax-simpledet/HEAD/doc/image/trident_block_details.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd operator_py/cython/; python3 setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd operator_py/cython/; rm *.so *.c *.cpp; cd ../../ 5 | -------------------------------------------------------------------------------- /operator_py/cython/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /models/maskrcnn/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | class SigmoidCELossMetric(mx.metric.EvalMetric): 5 | def __init__(self, name, output_names, label_names): 6 | super().__init__(name, output_names, label_names) 7 | 8 | def update(self, labels, preds): 9 | self.sum_metric += preds[0].mean().asscalar() 10 | self.num_inst += 1 -------------------------------------------------------------------------------- /scripts/dist_worker.sh: -------------------------------------------------------------------------------- 1 | root_dir=$1 2 | singularity_image=$2 3 | conffile=$3 4 | 5 | if test $(which singularity); then 6 | singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "python -u detection_train.py --config ${conffile}" 7 | else 8 | singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "python -u detection_train.py" 9 | fi 10 | -------------------------------------------------------------------------------- /scripts/train_hpc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gpucount=8 4 | num_node=2 5 | num_servers=${num_node} 6 | root_dir="/path/to/simpledet" 7 | sync_dir="/tmp/simpledet_sync" 8 | 9 | hostfile=hostfile.txt 10 | conffile=faster_r50v2c4_c5_256roi_1x 11 | singularity_image=simpledet.img 12 | 13 | export DMLC_INTERFACE=eth0 14 | python -u ../../launcher/tools/launch.py \ 15 | -n ${num_node} \ 16 | --num-servers ${num_servers} \ 17 | --sync-dst-dir ${sync_dir} \ 18 | --launcher ssh -H ${hostfile} \ 19 | scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \ 20 | 2>&1 | tee -a ${root_dir}/log/${conffile}.log 21 | -------------------------------------------------------------------------------- /models/maskrcnn/process_output.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pycocotools import mask as mask_util 3 | 4 | from models.maskrcnn.utils import segm_results 5 | 6 | def process_output(all_outputs, roidb): 7 | for output_record in all_outputs: 8 | rec_id = int(output_record['rec_id']) 9 | bbox_xyxy = output_record['bbox_xyxy'] 10 | cls_score = output_record['cls_score'] 11 | cls = output_record['cls'] 12 | mask = output_record['mask'] 13 | 14 | im_h = roidb[rec_id]["h"] 15 | im_w = roidb[rec_id]["w"] 16 | segm = segm_results(bbox_xyxy, cls, mask, im_h, im_w) 17 | output_record['segm'] = segm 18 | del output_record['mask'] 19 | return all_outputs 20 | -------------------------------------------------------------------------------- /scripts/terminate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "usage: $0 comma_separated_worker_hostnames" 5 | exit -1 6 | fi 7 | 8 | hosts=$1 9 | 10 | # extract worker and check reachablity 11 | IFS=, read -r -a host_array <<< $hosts 12 | for host in ${host_array[@]}; do 13 | # check reachability 14 | echo "check reachability of $host" 15 | ssh -q $host exit 16 | if [ $? -ne 0 ]; then 17 | echo "$host is not reachable" 18 | exit -1 19 | fi 20 | 21 | # check availablity (retreat if remote host is in use) 22 | echo "check availability of $host" 23 | for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 24 | x="${x//[$'\t\r\n ']}" # remove trailing whitespace 25 | if [ $x -gt 5 ]; then 26 | echo "$host has gpu utilization of $x%"; 27 | fi; 28 | done 29 | 30 | # cleanup potentially dead python process (march since we checked it) 31 | ssh $host ps aux | grep python 32 | echo -e "\n" 33 | echo "Terminate tasks on $host in 5s" 34 | sleep 5 35 | ssh -q $host pkill python 36 | done 37 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | from pytz import utc, timezone 4 | 5 | 6 | def config_logger(path): 7 | def custom_time(*args): 8 | utc_dt = utc.localize(datetime.utcnow()) 9 | my_tz = timezone("Asia/Shanghai") 10 | converted = utc_dt.astimezone(my_tz) 11 | return converted.timetuple() 12 | 13 | logging.basicConfig() 14 | logging.getLogger().handlers.pop() 15 | 16 | fmt = '%(asctime)s %(message)s' 17 | date_fmt = '%m-%d %H:%M:%S' 18 | formatter = logging.Formatter(fmt=fmt, datefmt=date_fmt) 19 | formatter.converter = custom_time 20 | 21 | logging.getLogger().setLevel(logging.INFO) 22 | 23 | log_file_save_name = path 24 | file_handler = logging.FileHandler(filename=log_file_save_name, mode='w') 25 | file_handler.setLevel(logging.INFO) 26 | file_handler.setFormatter(formatter) 27 | logging.getLogger().addHandler(file_handler) 28 | 29 | console = logging.StreamHandler() 30 | console.setLevel(logging.INFO) 31 | console.setFormatter(formatter) 32 | logging.getLogger().addHandler(console) -------------------------------------------------------------------------------- /operator_py/cython/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /doc/DISTRIBUTED.md: -------------------------------------------------------------------------------- 1 | #### Requirement 2 | Here we only provide a guide to launch distributed training with singularity, please make sure your singularity works by checking [INSTALL.md](./doc/INSTALL.md) 3 | 4 | #### Setup 5 | 1. obtain the mxnet launcher and place it in the parent directory of the simpledet working directory 6 | ```bash 7 | git clone https://github.com/RogerChern/mxnet-dist-lancher.git lancher 8 | ``` 9 | 10 | 2. mv `data`, `pretrain_model`, `experiments` outside of simpledet and symink them back. 11 | This step is to avoid unnecessary `rsync` of large binary files in the working directory during launching. 12 | 13 | 3. after step 1 and 2, your directory should be as following 14 | ``` 15 | lancher/ 16 | simpledet/ 17 | data -> /path/to/data 18 | pretrain_model -> /path/to/pretain_model 19 | experiments -> /path/to/experiments 20 | ... 21 | ``` 22 | 23 | 4. make a hostfile containing hostnames of all nodes, these nodes would be accessed from our launch node by ssh without password 24 | simpledet/hostfile.txt 25 | ``` 26 | node1 27 | node2 28 | ``` 29 | 30 | 5. change the singulariy mounting point in `scripts/dist_worker.sh` 31 | 32 | 6. change working directories in `scritps/train_hpc.sh` 33 | 34 | 7. launch distributed training with scripts 35 | ```bash 36 | bash scritps/train_hpc.sh 37 | ``` 38 | -------------------------------------------------------------------------------- /models/cascade_rcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Cascade R-CNN 2 | 3 | This repository implements [**Cascade R-CNN**](https://arxiv.org/abs/1712.00726) in the **SimpleDet** framework. Cascade R-CNN is a multi-stage object detector, aiming to reduce the overfitting problem by resampling of progressively improved hypotheses. 4 | 5 | ### How we build Cascade R-CNN 6 | 7 | #### Input 8 | 9 | Cascade R-CNN can share the origin Faster R-CNN input, so there is no need to implement an extra one. 10 | 11 | #### Symbol 12 | 13 | - ```CascadeRcnn```: detector with three ```R-CNN``` stages 14 | - ```CascadeBbox2fcHead```: header for ```R-CNN``` stages. Note that it is also required to generate proposal for next ```R-CNN``` stages, thus we add ```get_all_proposal``` to decode boxes predicted in this stage and ```get_sampled_proposal``` to generate ```bbox_target```. 15 | 16 | #### Config 17 | 18 | - ```BboxParam```, ```BboxParam2nd```, ```BboxParam3rd```: config for ```R-CNN``` stages, ```mean``` and ```std``` in ```regress_target``` aim to decode boxes predicted in this stage, and those in ```bbox_target``` is prepared to generate ```bbox_target``` for next ```R-CNN``` stage. Note that we add ```stage``` field to specify the weight used by ```R-CNN```, as in **test phase** ```bbox_head_1st``` and ```bbox_head_2nd``` forward twice with different input feature. 19 | 20 | -------------------------------------------------------------------------------- /models/efficientnet/README.md: -------------------------------------------------------------------------------- 1 | ## EfficientNet for object detection 2 | This repository implements [**EfficientNet**](https://arxiv.org/abs/1905.11946) in the SimpleDet framework. Efficient B5 achives the same mAP with **~1/10 FLOPs** compared with ResNet-50. 3 | 4 | ### Qucik Start 5 | ```bash 6 | # train faster r-cnn with efficientnet fpn backbone 7 | python3 detection_train.py --config config/efficientnet/efficientnet_b5_fpn_bn_scratch_400_6x.py 8 | ``` 9 | 10 | ### Results and Models 11 | All AP results are reported on minival of the [COCO dataset](http://cocodataset.org). 12 | 13 | |Model|InputSize|Backbone|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP|Link| 14 | |-----|-----|--------|--------------|---|---------|----|---------|-----------|---------------|----| 15 | |Faster|400x600|B5-FPN|36 epoch(6X)|8X 1080Ti|8|yes|-|75 img/s|37.2|[model](http://simpledet.alarge.space:1234/?/efficientnet_b5_fpn_bn_scratch_400_6x.zip)| 16 | |Faster|400x600|B5-FPN|54 epoch(9X)|8X 1080Ti|8|yes|-|75 img/s|37.9|-| 17 | |Faster|400x600|B5-FPN|72 epoch(12X)|8X 1080Ti|8|yes|-|75 img/s|38.3|-| 18 | 19 | ### Reference 20 | ``` 21 | @inproceedings{tan2019, 22 | title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks}, 23 | author={Tan, Mingxing and Le, Quoc V}, 24 | booktitle={ICML}, 25 | year={2019} 26 | } 27 | ``` 28 | -------------------------------------------------------------------------------- /operator_cxx/contrib/bbox_norm.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file bbox_norm.cu 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./bbox_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BBoxNormParam param, int dtype) { 33 | Operator *op = NULL; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BBoxNormOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet 42 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sync_batch_norm.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /*! 20 | * Copyright (c) 2018 by Contributors 21 | * \file sync_batch_norm.cc 22 | * \brief Synchronized BatchNorm modified from BatchNormV1 23 | * \author Hang Zhang 24 | */ 25 | 26 | #include "sync_batch_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | template<> 31 | Operator *CreateOp(SyncBatchNormParam param, int dtype) { 32 | return new SyncBatchNorm(param); 33 | } 34 | 35 | } // namespace op 36 | } // namespace mxnet 37 | 38 | -------------------------------------------------------------------------------- /operator_cxx/contrib/focal_loss.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file focal_loss.cu 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./focal_loss-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(FocalLossParam param, int dtype) { 33 | Operator *op = NULL; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new FocalLossOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet 42 | -------------------------------------------------------------------------------- /operator_cxx/contrib/broadcast_scale.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file broadcast_scale.cu 22 | * \brief 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "./broadcast_scale-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BroadcastScaleParam param, int dtype) { 33 | Operator *op = NULL; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BroadcastScaleOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet 42 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sync_inplace_activation_batch_norm.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /*! 20 | * Copyright (c) 2018 by Contributors 21 | * \file sync_inplace_activation_batch_norm.cc 22 | * \brief Synchronized BatchNorm modified from BatchNormV1 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "sync_inplace_activation_batch_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | template<> 31 | Operator *CreateOp(SyncInplaceABNParam param, int dtype) { 32 | return new SyncInplaceABN(param); 33 | } 34 | 35 | } // namespace op 36 | } // namespace mxnet 37 | 38 | -------------------------------------------------------------------------------- /models/retinanet/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | 5 | class FGAccMetric(mx.metric.EvalMetric): 6 | def __init__(self, name, output_names, label_names, threshold=0): 7 | super().__init__(name, output_names, label_names) 8 | self.thr = threshold 9 | 10 | def update(self, labels, preds): 11 | if len(preds) == 1 and len(labels) == 1: 12 | pred = preds[0] 13 | label = labels[0] 14 | elif len(preds) == 2: 15 | pred = preds[0] 16 | label = preds[1] 17 | else: 18 | raise Exception( 19 | "unknown loss output: len(preds): {}, len(labels): {}".format( 20 | len(preds), len(labels) 21 | ) 22 | ) 23 | 24 | label = label.asnumpy().astype('int32') 25 | keep_inds = np.where(label >= 1) 26 | 27 | # treat as foreground if score larger than threshold 28 | # select class with maximum score as prediction 29 | pred_score = pred.max(axis=-1) 30 | pred_label = pred.argmax(axis=-1) + 1 31 | if self.thr != 0: 32 | pred_label *= pred_score > self.thr 33 | 34 | pred_label = pred_label.asnumpy().astype('int32') 35 | 36 | pred_label = pred_label[keep_inds] 37 | label = label[keep_inds] 38 | 39 | self.sum_metric += np.sum(pred_label.flat == label.flat) 40 | self.num_inst += len(pred_label.flat) 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IntelliJ project files 2 | .idea 3 | *.iml 4 | out 5 | gen 6 | 7 | ### Vim template 8 | [._]*.s[a-w][a-z] 9 | [._]s[a-w][a-z] 10 | *.un~ 11 | Session.vim 12 | .netrwhist 13 | *~ 14 | 15 | ### IPythonNotebook template 16 | # Temporary data 17 | .ipynb_checkpoints/ 18 | 19 | ### Python template 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | env/ 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib/ 38 | lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *,cover 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | *.ipynb 80 | *.params 81 | *.json 82 | .vscode/ 83 | 84 | model 85 | model/ 86 | visimg 87 | mxnet/ 88 | mxnext/ 89 | data 90 | experiments 91 | pretrain_model 92 | !data/cache/coco_micro_test.roidb 93 | -------------------------------------------------------------------------------- /utils/load_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import glob 4 | 5 | 6 | def get_latest_ckpt_epoch(prefix): 7 | """ 8 | Get latest checkpoint epoch by prefix 9 | """ 10 | def get_checkpoint_epoch(prefix): 11 | return int(prefix[prefix.rfind('.params')-4:prefix.rfind('.params')]) 12 | 13 | checkpoints = glob.glob(prefix + '*.params') 14 | assert len(checkpoints), 'can not find params startswith {}'.prefix 15 | return max([get_checkpoint_epoch(x) for x in checkpoints]) 16 | 17 | 18 | def load_checkpoint(prefix, epoch): 19 | """ 20 | Load model checkpoint from file. 21 | :param prefix: Prefix of model name. 22 | :param epoch: Epoch number of model we would like to load. 23 | :return: (arg_params, aux_params) 24 | arg_params : dict of str to NDArray 25 | Model parameter, dict of name to NDArray of net's weights. 26 | aux_params : dict of str to NDArray 27 | Model parameter, dict of name to NDArray of net's auxiliary states. 28 | """ 29 | print('load %s-%04d.params' % (prefix, epoch)) 30 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 31 | arg_params = {} 32 | aux_params = {} 33 | for k, v in save_dict.items(): 34 | tp, name = k.split(':', 1) 35 | if tp == 'arg': 36 | arg_params[name] = v 37 | if tp == 'aux': 38 | aux_params[name] = v 39 | return arg_params, aux_params 40 | 41 | 42 | def convert_context(params, ctx): 43 | """ 44 | :param params: dict of str to NDArray 45 | :param ctx: the context to convert to 46 | :return: dict of str of NDArray with context ctx 47 | """ 48 | new_params = dict() 49 | for k, v in params.items(): 50 | new_params[k] = v.as_in_context(ctx) 51 | return new_params 52 | 53 | -------------------------------------------------------------------------------- /models/retinanet/README.md: -------------------------------------------------------------------------------- 1 | ## RetinaNet 2 | 3 | This repository implements [**RetinaNet**](https://arxiv.org/abs/1708.02002) in the **SimpleDet** framework. RetinaNet is state-of-the-art single stage detector, preventing the vast number of easy negatives from overwhelming the detector with focal loss. 4 | 5 | ### How we build RetinaNet 6 | 7 | #### Input 8 | 9 | The pyramid label parts of **RetinaNet** is similar with **Feature Pyramid Network**, you can refer to [FPN README](../FPN/README.md) . In addition, the label assignment method is different compared with **Faster R-CNN**, thus we overrides ```_assign_label_to_anchor``` and ```apply``` of ```AnchorTarget2D```, named ```PyramidAnchorTarget2DBase```, to obtain class-aware labels and avoid sampling RoIs. 10 | 11 | #### Operators 12 | 13 | - **bbox_norm**, passes data in forward, and normalizes gradient by number of positive samples in backward 14 | - **focal_loss**, acts same as Sigmoid in forward, and return focal loss gradient in backward 15 | - **decode_retina**, reuses the code from [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test_retinanet.py) to decode boxes and scores. Note that ```min_det_score``` is moved to ```RpnParam.proposal``` as it requires different threshold for results from **P7** level. 16 | 17 | #### Symbol 18 | 19 | - ``` RetinaNet```, detector only with RPN 20 | - ```RetinaNetHead```, classification and regression header with sharing weights 21 | - ```RetinaNetNeck```, top-down pathway for **FPN** in **RetinaNet** 22 | 23 | #### Config 24 | 25 | - ```min_det_score``` in ```TestParam``` is set to 0 to remove those appended boxes with zero scores 26 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation. 27 | -------------------------------------------------------------------------------- /utils/patch_config.py: -------------------------------------------------------------------------------- 1 | import types 2 | import inspect 3 | 4 | 5 | class NoThrowBase: 6 | def __getattr__(self, item): 7 | return None 8 | 9 | 10 | class NoThrowMeta(type): 11 | def __getattr__(self, item): 12 | return None 13 | 14 | 15 | def patch_config_as_nothrow(instance): 16 | if "NoThrow" in [instance.__name__, instance.__class__.__name__]: 17 | return instance 18 | 19 | if type(instance) == type: 20 | instance = types.new_class(instance.__name__ + "NoThrow", (instance, ), dict(metaclass=NoThrowMeta)) 21 | for (k, v) in inspect.getmembers(instance): 22 | if not k.startswith("__") and type(v) == type: 23 | type.__setattr__(instance, k, patch_config_as_nothrow(v)) 24 | else: 25 | for (k, v) in inspect.getmembers(instance.__class__): 26 | if not k.startswith("__") and type(v) == type: 27 | type.__setattr__(instance.__class__, k, patch_config_as_nothrow(v)) 28 | instance.__class__ = type(instance.__class__.__name__ + "NoThrow", (instance.__class__, NoThrowBase), {}) 29 | 30 | return instance 31 | 32 | 33 | if __name__ == "__main__": 34 | class A: 35 | a = 1 36 | 37 | A = patch_config_as_nothrow(A) 38 | assert A.non_exist is None 39 | assert A.a == 1 40 | 41 | class B: 42 | b = 1 43 | class B1: 44 | b1 = 2 45 | 46 | B = patch_config_as_nothrow(B) 47 | assert B.non_exist is None 48 | assert B.B1.non_exist is None 49 | assert B.b == 1 50 | assert B.B1.b1 == 2 51 | 52 | class B: 53 | b = 1 54 | class B1: 55 | b1 = 2 56 | def b1f(): 57 | return 3 58 | 59 | b = B() 60 | b = patch_config_as_nothrow(b) 61 | assert b.non_exist is None 62 | assert b.B1.non_exist is None 63 | assert b.b == 1 64 | assert b.B1.b1 == 2 65 | assert b.B1.b1f() == 3 66 | -------------------------------------------------------------------------------- /scripts/launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "usage: $0 config_path comma_separated_worker_hostnames" 5 | exit -1 6 | fi 7 | 8 | conffile=$1 9 | hosts=$2 10 | 11 | # extract worker and check reachablity 12 | IFS=, read -r -a host_array <<< $hosts 13 | for host in ${host_array[@]}; do 14 | # check reachability 15 | echo "check reachability of $host" 16 | ssh -q $host exit 17 | if [ $? -ne 0 ]; then 18 | echo "$host is not reachable" 19 | exit -1 20 | fi 21 | 22 | # check availablity (retreat if remote host is in use) 23 | echo "check availability of $host" 24 | for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 25 | x="${x//[$'\t\r\n ']}" # remove trailing whitespace 26 | if [ $x -gt 10 ]; then 27 | echo "$host has gpu utilization of $x%"; 28 | exit -1 29 | fi; 30 | done 31 | 32 | # cleanup potentially dead python process (march since we checked it) 33 | ssh -q $host pkill python 34 | done 35 | 36 | gpucount=8 37 | num_node=${#host_array[@]} 38 | num_servers=${num_node} 39 | root_dir="/mnt/tscpfs/yuntao.chen/simpledet/simpledet_open" 40 | sync_dir="/tmp/simpledet_sync" 41 | singularity_image=/mnt/tscpfs/yuntao.chen/simpledet.img 42 | 43 | # check existence of config file 44 | if [ ! -f ${conffile} ]; then 45 | echo "${conffile} does not exsit" 46 | exit -1 47 | fi 48 | 49 | # dump hosts in a hostfile for launch.py 50 | IFS=, 51 | output="" 52 | for id in $hosts 53 | do output+="${id}\n" 54 | done 55 | unset IFS 56 | echo -e ${output::-2} > scripts/hosts.txt 57 | sleep 1 58 | 59 | logfile=${conffile#config/} 60 | logfile=${logfile%.py} 61 | 62 | export DMLC_INTERFACE=eth0 63 | python -u /mnt/tscpfs/yuntao.chen/dist-mxnet/tools/launch.py \ 64 | -n ${num_node} \ 65 | -s ${num_servers} \ 66 | --launcher ssh \ 67 | -H scripts/hosts.txt \ 68 | scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \ 69 | 2>&1 | tee -a ${root_dir}/log/${logfile}.log 70 | -------------------------------------------------------------------------------- /operator_cxx/contrib/focal_loss.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file focal_loss.cc 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./focal_loss-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(FocalLossParam param, int dtype) { 33 | Operator *op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new FocalLossOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | Operator *FocalLossProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 41 | std::vector *in_type) const { 42 | std::vector out_shape, aux_shape; 43 | std::vector out_type, aux_type; 44 | CHECK(InferType(in_type, &out_type, &aux_type)); 45 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 46 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 47 | } 48 | 49 | DMLC_REGISTER_PARAMETER(FocalLossParam); 50 | 51 | MXNET_REGISTER_OP_PROPERTY(_contrib_FocalLoss, FocalLossProp) 52 | .describe("Focal loss for dense object detection") 53 | .add_argument("data", "NDArray-or-Symbol", "Data") 54 | .add_argument("label", "NDArray-or-Symbol", "Label") 55 | .add_arguments(FocalLossParam::__FIELDS__()); 56 | 57 | } // namespace op 58 | } // namespace mxnet 59 | -------------------------------------------------------------------------------- /operator_cxx/contrib/bbox_norm.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file bbox_norm.cc 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./bbox_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BBoxNormParam param, int dtype) { 33 | Operator *op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BBoxNormOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | Operator *BBoxNormProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 41 | std::vector *in_type) const { 42 | std::vector out_shape, aux_shape; 43 | std::vector out_type, aux_type; 44 | CHECK(InferType(in_type, &out_type, &aux_type)); 45 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 46 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 47 | } 48 | 49 | DMLC_REGISTER_PARAMETER(BBoxNormParam); 50 | 51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BBoxNorm, BBoxNormProp) 52 | .describe("Normalize those boxes with positive label") 53 | .add_argument("data", "NDArray-or-Symbol", "Data to normalize") 54 | .add_argument("label", "NDArray-or-Symbol", "Label") 55 | .add_arguments(BBoxNormParam::__FIELDS__()); 56 | 57 | } // namespace op 58 | } // namespace mxnet 59 | -------------------------------------------------------------------------------- /operator_cxx/contrib/broadcast_scale.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file broadcast_scale.cc 22 | * \brief 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "./broadcast_scale-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BroadcastScaleParam param, int dtype) { 33 | Operator *op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BroadcastScaleOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | Operator *BroadcastScaleProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 41 | std::vector *in_type) const { 42 | std::vector out_shape, aux_shape; 43 | std::vector out_type, aux_type; 44 | CHECK(InferType(in_type, &out_type, &aux_type)); 45 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 46 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 47 | } 48 | 49 | DMLC_REGISTER_PARAMETER(BroadcastScaleParam); 50 | 51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BroadcastScale, BroadcastScaleProp) 52 | .describe("Broadcast_scale to enable in-place scaling of tensor") 53 | .add_argument("data", "NDArray-or-Symbol", "Data") 54 | .add_argument("label", "NDArray-or-Symbol", "Label") 55 | .add_arguments(BroadcastScaleParam::__FIELDS__()); 56 | 57 | } // namespace op 58 | } // namespace mxnet 59 | -------------------------------------------------------------------------------- /models/FPN/README.md: -------------------------------------------------------------------------------- 1 | ## Feature Pyramid Networks for Object Detection 2 | 3 | Here we introduce how is [**Feature Pyramid Network**](https://arxiv.org/abs/1612.03144) built in **simpledet** framework. The following sections explain detail implementation. 4 | 5 | #### AnchorTarget 6 | 7 | Since **FPN** uses **Feature Pyramid** as backbone, we cannot use ```AnchorTarget2D``` directly, which only generates anchor target for single stride declared in ```RpnParam```. Instead, we implement ```PyramidAnchorTarget2D``` to create a list of ```AnchorTarget2D```, each generating anchor target for single pyramid stride, then collect them together. More specifically, we create instances for each pyramid stride to generate anchor. To collect anchors from different pyramid levels, we overrides ```v_all_anchor``` and ```h_all_anchor``` property, which returns the concatenation of anchors from different levels, then assign to primary instances. Also, we override ```apply``` function to obtain label, sample anchor, target and weight from primary instances, then split and concat them in a certain axis. 8 | 9 | #### Operators 10 | 11 | - **get_top_proposal**, since **FPN** has mutli-scale proposals, we should concat the multi-scale proposals together and get the topK proposals for roi-pooling or roi-align 12 | - **assign_layer_fpn**, **FPN** assign the proposals to target levels(P2, P3, P4, P5) according to the areas, so we use this Operator to assign feature levels for proposals 13 | 14 | 15 | #### Symbol 16 | 17 | - ``` Detector```, detector is the same as FasterRcnn 18 | - ```FPNNeck```, top-down pathway for **Feature Pyramid Network** 19 | - ```FPNRpnHead```, classification and regression header with sharing weights for FPN-RPN 20 | - ```FPNRoiAlign```, we use this module to get the proposal feature for the proposals of different levels respectively, then add the feature from different level proposals together for next rcnn head 21 | 22 | #### Config 23 | 24 | - ```TestParam``` is the same as the setting in [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md) 25 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation. 26 | -------------------------------------------------------------------------------- /models/NASFPN/README.md: -------------------------------------------------------------------------------- 1 | ## NAS-FPN 2 | 3 | This repository implements [**NAS-FPN**](https://arxiv.org/abs/1904.07392) in the SimpleDet framework. 4 | 5 | ### Qucik Start 6 | ```bash 7 | # train baseline retinanet following the setting of NAS-FPN 8 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_fpn_640_1@256_25epoch.py 9 | 10 | # train NAS-FPN 11 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_640_7@256_25epoch.py 12 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1024_7@256_25epoch.py 13 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1280_7@384_25epoch.py 14 | 15 | # train hand-crafted neck 16 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_tdbu_1280_3@384_25epoch.py 17 | ``` 18 | 19 | ### Results and Models 20 | All AP results are reported on test-dev of the [COCO dataset](http://cocodataset.org). 21 | 22 | |Model|InputSize|Backbone|Neck|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP(Mask AP)|Link| 23 | |-----|-----|--------|----|--------------|---|---------|----|---------|-----------|---------------|----| 24 | |RetinaNet|640|R50v1b-FPN|1@256|25 epoch|8X 1080Ti|8|yes|6.6G|85 img/s|37.4|[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/retina_r50v1b_fpn_640640_25epoch.zip)| 25 | |NAS-FPN|640|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|8|yes|7.8G|66 img/s|40.1|[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/retina_r50v1b_nasfpn_640640_25epoch.zip)| 26 | |NAS-FPN|1024|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|4|yes|9.1G|17 img/s|44.2|[model](http://simpledet.alarge.space:1234/?/retina_r50v1b_nasfpn_1024_7%40256_25epoch.zip)| 27 | |NAS-FPN|1280|R50v1b-FPN|7@384|25 epoch|8X 1080Ti|2|yes|8.9G|10 img/s|45.3|[model](http://simpledet.alarge.space:1234/?/retina_r50v1b_nasfpn_1280_7%40384_25epoch.zip)| 28 | |TD-BU*|1280|R50v1b-FPN|3@384|25 epoch|8X 1080Ti|3|yes|10.5G|12 img/s|44.7|[model](http://simpledet.alarge.space:1234/?/retina_r50v1b_tdbu_1280_3%40384_25epoch.zip)| 29 | 30 | \* Short for TopDown-BottomUp neck which is highly symmetric proposed by Zehao. 31 | ### Reference 32 | ``` 33 | @inproceedings{ghiasi2019fpn, 34 | title={NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection}, 35 | author={Ghiasi, Golnaz and Lin, Tsung-Yi and Pang, Ruoming and Le, Quoc V}, 36 | booktitle={CVPR}, 37 | year={2019} 38 | } 39 | ``` 40 | -------------------------------------------------------------------------------- /detection_infer_speed.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import importlib 3 | import time 4 | 5 | from core.detection_module import DetModule 6 | 7 | import mxnet as mx 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Test detector inference speed') 12 | # general 13 | parser.add_argument('--config', help='config file path', type=str, required=True) 14 | parser.add_argument('--shape', help='specify input 2d image shape', metavar=('SHORT', 'LONG'), type=int, nargs=2, required=True) 15 | parser.add_argument('--gpu', help='GPU index', type=int, default=0) 16 | parser.add_argument('--count', help='number of runs, final result will be averaged', type=int, default=100) 17 | args = parser.parse_args() 18 | 19 | config = importlib.import_module(args.config.replace('.py', '').replace('/', '.')) 20 | return config, args.gpu, args.shape, args.count 21 | 22 | 23 | if __name__ == "__main__": 24 | config, gpu, shape, count = parse_args() 25 | 26 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 27 | transform, data_name, label_name, metric_list = config.get_config(is_train=False) 28 | 29 | sym = pModel.test_symbol 30 | sym.save(pTest.model.prefix + "_infer_speed.json") 31 | 32 | # create dummy data batch 33 | data = mx.nd.ones(shape=[1, 3] + shape) 34 | im_info = mx.nd.array([x / 2.0 for x in shape] + [2.0]).reshape(1, 3) 35 | im_id = mx.nd.array([1]) 36 | rec_id = mx.nd.array([1]) 37 | data_names = ["data", "im_info", "im_id", "rec_id"] 38 | data_shape = [[1, 3] + shape, [1, 3], [1], [1]] 39 | data_shape = [(name, shape) for name, shape in zip(data_names, data_shape)] 40 | data_batch = mx.io.DataBatch(data=[data, im_info, im_id, rec_id]) 41 | 42 | ctx = mx.gpu(gpu) 43 | mod = DetModule(sym, data_names=data_names, context=ctx) 44 | mod.bind(data_shapes=data_shape, for_training=False) 45 | mod.set_params({}, {}, True) 46 | 47 | # let AUTOTUNE run for once 48 | mod.forward(data_batch, is_train=False) 49 | for output in mod.get_outputs(): 50 | output.wait_to_read() 51 | 52 | tic = time.time() 53 | for _ in range(count): 54 | mod.forward(data_batch, is_train=False) 55 | for output in mod.get_outputs(): 56 | output.wait_to_read() 57 | toc = time.time() 58 | 59 | print((toc - tic) / count * 1000) 60 | 61 | -------------------------------------------------------------------------------- /models/maskrcnn/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from pycocotools import mask as mask_util 5 | 6 | 7 | def expand_boxes(boxes, scale): 8 | """Expand an array of boxes by a given scale.""" 9 | w_half = (boxes[:, 2] - boxes[:, 0]) * .5 10 | h_half = (boxes[:, 3] - boxes[:, 1]) * .5 11 | x_c = (boxes[:, 2] + boxes[:, 0]) * .5 12 | y_c = (boxes[:, 3] + boxes[:, 1]) * .5 13 | 14 | w_half *= scale 15 | h_half *= scale 16 | 17 | boxes_exp = np.zeros(boxes.shape) 18 | boxes_exp[:, 0] = x_c - w_half 19 | boxes_exp[:, 2] = x_c + w_half 20 | boxes_exp[:, 1] = y_c - h_half 21 | boxes_exp[:, 3] = y_c + h_half 22 | 23 | return boxes_exp 24 | 25 | 26 | def segm_results(bbox_xyxy, cls, masks, im_h, im_w): 27 | # Modify from Detectron 28 | # To work around an issue with cv2.resize (it seems to automatically pad 29 | # with repeated border values), we manually zero-pad the masks by 1 pixel 30 | # prior to resizing back to the original image resolution. This prevents 31 | # "top hat" artifacts. We therefore need to expand the reference boxes by an 32 | # appropriate factor. 33 | segms = [] 34 | M = masks.shape[-1] 35 | scale = (M + 2.0) / M 36 | ref_boxes = expand_boxes(bbox_xyxy, scale) 37 | ref_boxes = ref_boxes.astype(np.int32) 38 | padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) 39 | 40 | for ref_box_i, mask_i, cls_i in zip(ref_boxes, masks, cls): 41 | padded_mask[1:-1, 1:-1] = mask_i[cls_i, :, :] 42 | 43 | w = ref_box_i[2] - ref_box_i[0] + 1 44 | h = ref_box_i[3] - ref_box_i[1] + 1 45 | w = np.maximum(w, 1) 46 | h = np.maximum(h, 1) 47 | 48 | mask = cv2.resize(padded_mask, (w, h)) 49 | mask = np.array(mask > 0.5, dtype=np.uint8) 50 | im_mask = np.zeros((im_h, im_w), dtype=np.uint8) 51 | 52 | x_0 = max(ref_box_i[0], 0) 53 | x_1 = min(ref_box_i[2] + 1, im_w) 54 | y_0 = max(ref_box_i[1], 0) 55 | y_1 = min(ref_box_i[3] + 1, im_h) 56 | 57 | im_mask[y_0:y_1, x_0:x_1] = mask[ 58 | (y_0 - ref_box_i[1]):(y_1 - ref_box_i[1]), 59 | (x_0 - ref_box_i[0]):(x_1 - ref_box_i[0]) 60 | ] 61 | 62 | # Get RLE encoding used by the COCO evaluation API 63 | rle = mask_util.encode( 64 | np.array(im_mask[:, :, np.newaxis], order='F') 65 | )[0] 66 | segms.append(rle) 67 | segms = np.array(segms) 68 | return segms -------------------------------------------------------------------------------- /models/FPN/get_top_proposal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collect top proposals across all levels for FPN 3 | author: Yi Jiang, Chenxia Han 4 | """ 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | 10 | class GetTopProposalOperator(mx.operator.CustomOp): 11 | def __init__(self, top_n): 12 | super().__init__() 13 | self.top_n = top_n 14 | 15 | def forward(self, is_train, req, in_data, out_data, aux): 16 | bboxes = in_data[0] 17 | scores = in_data[1] 18 | 19 | num_image = bboxes.shape[0] 20 | top_n = self.top_n 21 | top_bboxes = [] 22 | top_scores = [] 23 | 24 | for i in range(num_image): 25 | image_bboxes = bboxes[i] 26 | image_scores = scores[i] 27 | argsort_ind = mx.nd.argsort(image_scores[:,0], is_ascend=False) 28 | image_bboxes = image_bboxes[argsort_ind] 29 | image_bboxes = image_bboxes[:top_n] 30 | image_scores = image_scores[argsort_ind] 31 | image_scores = image_scores[:top_n] 32 | 33 | top_bboxes.append(image_bboxes) 34 | top_scores.append(image_scores) 35 | 36 | top_bboxes = mx.nd.stack(*top_bboxes) 37 | top_scores = mx.nd.stack(*top_scores) 38 | 39 | self.assign(out_data[0], req[0], top_bboxes) 40 | self.assign(out_data[1], req[1], top_scores) 41 | 42 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 43 | self.assign(in_grad[0], req[0], 0) 44 | self.assign(in_grad[1], req[1], 0) 45 | 46 | 47 | @mx.operator.register('get_top_proposal') 48 | class GetTopProposalProp(mx.operator.CustomOpProp): 49 | def __init__(self, top_n): 50 | super().__init__(need_top_grad=False) 51 | self.top_n = int(top_n) 52 | 53 | def list_arguments(self): 54 | return ['bbox', 'score'] 55 | 56 | def list_outputs(self): 57 | return ['bbox', 'score'] 58 | 59 | def infer_shape(self, in_shape): 60 | bbox_shape = in_shape[0] 61 | score_shape = in_shape[1] 62 | num_image = bbox_shape[0] 63 | 64 | top_bbox_shape = (num_image, self.top_n, 4) 65 | top_score_shape = (num_image, self.top_n, 1) 66 | 67 | return [bbox_shape, score_shape], \ 68 | [top_bbox_shape, top_score_shape] 69 | 70 | def create_operator(self, ctx, shapes, dtypes): 71 | return GetTopProposalOperator(self.top_n) 72 | 73 | def declare_backward_dependency(self, out_grad, in_data, out_data): 74 | return [] 75 | -------------------------------------------------------------------------------- /utils/contrib/edit_model_weight.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This script allows you to edit the model weight from https://github.com/TuSimple/simpledet/blob/master/MODEL_ZOO.md 4 | for your own training. In this script, it assumes you don't change the layer of model, but only edit the content in 5 | the layer that models of MODEL_ZOO have. For example, you just change the training classes but not add or delete a 6 | unit of resnet. 7 | 8 | Example: Edit the weight from 80 + 1 classes to 3 + 1 classes training. 9 | - Train by your own configuration for one epoch, the configuration should have edited for 3 + 1 classes training. 10 | - Edit the constant in this file. 11 | - SIMPLEDET_WEIGHT_FOLDER the path to the weight folder you download 12 | - TRAINED_WEIGHT_FOLDER the path to the weight folder you need the shape 13 | - EDIT_KEY the key of layer which you want to edit the weight, you can show the key by 14 | print(arg_params_src), in this example, the key names are 15 | "bbox_cls_logit_weight", "bbox_cls_logit_bias" 16 | - Run the code! 17 | 18 | Note: The new generated model weight file will cover your original downloaded weight file, if you don't want like this, 19 | you can edit the last line of the code. 20 | 21 | TODO: Before you run the code, you should train a model for one epoch and edit the code as instructions above. 22 | """ 23 | 24 | import mxnet as mx 25 | import numpy as np 26 | import os 27 | 28 | # TODO: Edit the path. 29 | SIMPLEDET_WEIGHT_FOLDER = " " 30 | TRAINED_WEIGHT_FOLDER = " " 31 | 32 | # TODO: Edit the key names which you want to modify. 33 | EDIT_KEY = ["bbox_cls_logit_weight", "bbox_cls_logit_bias"] 34 | 35 | def change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst): 36 | for key in EDIT_KEY: 37 | arg_params_src[key] = arg_params_dst[key] 38 | return arg_params_src 39 | 40 | if __name__ == "__main__": 41 | sym, arg_params_src, aux_params = \ 42 | mx.model.load_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 6) 43 | _, arg_params_dst, _ = \ 44 | mx.model.load_checkpoint(os.path.join(TRAINED_WEIGHT_FOLDER, "checkpoint"), 1) 45 | 46 | # print(arg_params_src) to show the key name. 47 | # arg_params_src means the weight you want to change which is downloaded from simpledet, 48 | # arg_params_src means the weight you need the shape. 49 | arg_params = change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst) 50 | 51 | mx.model.save_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 1, sym, arg_params, aux_params) 52 | -------------------------------------------------------------------------------- /operator_py/cython/bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps_cython( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps -------------------------------------------------------------------------------- /models/FPN/assign_layer_fpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Assign Layer operator for FPN 3 | author: Yi Jiang, Chenxia Han 4 | """ 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | 10 | class AssignLayerFPNOperator(mx.operator.CustomOp): 11 | def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level): 12 | super().__init__() 13 | self.rcnn_stride = rcnn_stride 14 | self.roi_canonical_scale = roi_canonical_scale 15 | self.roi_canonical_level = roi_canonical_level 16 | 17 | def forward(self, is_train, req, in_data, out_data, aux): 18 | all_rois = in_data[0] 19 | 20 | rcnn_stride = self.rcnn_stride 21 | scale0 = self.roi_canonical_scale 22 | lvl0 = self.roi_canonical_level 23 | k_min = np.log2(min(rcnn_stride)) 24 | k_max = np.log2(max(rcnn_stride)) 25 | 26 | rois_area = (all_rois[:, :, 2] - all_rois[:, :, 0] + 1) \ 27 | * (all_rois[:, :, 3] - all_rois[:, :, 1] + 1) 28 | 29 | scale = mx.nd.sqrt(rois_area) 30 | target_lvls = mx.nd.floor(lvl0 + mx.nd.log2(scale / scale0 + 1e-6)) 31 | target_lvls = mx.nd.clip(target_lvls, k_min, k_max) 32 | target_stride = (2 ** target_lvls).astype('uint8') 33 | 34 | for i, s in enumerate(rcnn_stride): 35 | lvl_rois = mx.nd.zeros_like(all_rois) 36 | lvl_inds = mx.nd.expand_dims(target_stride == s, axis=2).astype('float32') 37 | lvl_inds = mx.nd.broadcast_like(lvl_inds, lvl_rois) 38 | lvl_rois = mx.nd.where(lvl_inds, all_rois, lvl_rois) 39 | 40 | self.assign(out_data[i], req[i], lvl_rois) 41 | 42 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 43 | self.assign(in_grad[0], req[0], 0) 44 | 45 | 46 | @mx.operator.register('assign_layer_fpn') 47 | class AssignLayerFPNProp(mx.operator.CustomOpProp): 48 | def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level): 49 | super().__init__(need_top_grad=False) 50 | self.rcnn_stride = eval(rcnn_stride) 51 | self.roi_canonical_scale = int(roi_canonical_scale) 52 | self.roi_canonical_level = int(roi_canonical_level) 53 | 54 | def list_arguments(self): 55 | return ['rois'] 56 | 57 | def list_outputs(self): 58 | rois_list = ['rois_s{}'.format(s) for s in self.rcnn_stride] 59 | return rois_list 60 | 61 | def infer_shape(self, in_shape): 62 | rpn_rois_shape = in_shape[0] 63 | 64 | output_rois_shape = [rpn_rois_shape] * len(self.rcnn_stride) 65 | 66 | return [rpn_rois_shape], output_rois_shape 67 | 68 | def create_operator(self, ctx, shapes, dtypes): 69 | return AssignLayerFPNOperator(self.rcnn_stride, self.roi_canonical_scale, 70 | self.roi_canonical_level) 71 | 72 | def declare_backward_dependency(self, out_grad, in_data, out_data): 73 | return [] 74 | -------------------------------------------------------------------------------- /utils/contrib/data_to_coco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This script allows you to transfer your own data from your own data format to coco format. 4 | 5 | Attention: This is not the official format, it does not require licenses and other redundant info, but can generate 6 | coco-like dataset which can be accepted by Simpledet. 7 | 8 | TODO: You should reimplement the code from line 31 to the end, this file only describe the format of dataset 9 | and the way to save it. 10 | """ 11 | 12 | import json 13 | import sys 14 | 15 | def main(): 16 | if len(sys.argv) < 3: 17 | print("Usage: python data_to_coco.py infile outfile") 18 | exit(1) 19 | input_file = sys.argv[1] 20 | output_file = sys.argv[2] 21 | 22 | # The whole coco dataset 23 | dataset = { 24 | 'licenses': [], 25 | 'info': {}, 26 | 'categories': [], # Required 27 | 'images': [], # Required 28 | 'annotations': [] # Required 29 | } 30 | 31 | # TODO: class_map maps the class, which would be added into dataset['categories'] 32 | class_map = { 33 | "box": 1, 34 | "can": 2, 35 | "bottle": 3 36 | } 37 | for class_name, class_id in class_map.items(): 38 | dataset['categories'].append({ 39 | 'id': class_id, 40 | 'name': class_name, 41 | 'supercategory': 'supercategory_name' 42 | }) 43 | 44 | # TODO: Load your own data 45 | self_data_list = [] 46 | with open(input_file, 'r') as in_file: 47 | for line in in_file: 48 | self_data_list.append(json.loads(line)) 49 | 50 | # TODO: Dataset images info, normally you should implement an iter here to append the info 51 | dataset['images'].append({ 52 | 'coco_url': '', 53 | 'date_captured': '', 54 | 'file_name': '', # Required (str) image file name 55 | 'flickr_url': '', 56 | 'id': int(), # Required (int) id of image 57 | 'license': '', 58 | 'width': int(), # Required (int) width of image 59 | 'height': int() # Required (int) height of image 60 | }) 61 | 62 | # TODO: Dataset annotation info, normally you should implement an iter here to append the info 63 | dataset["annotations"].append({ 64 | 'area': int(), # Required (int) image area 65 | 'bbox': [int()] * 4, # Required (int) one of the image bboxes 66 | 'category_id': int(), # Required (int) class id of this bbox 67 | 'id': int(), # Required (int) bbox id in this image 68 | 'image_id': int(), # Required (int) image id of this bbox 69 | 'iscrowd': 0, # Optional, required only if you want to train for semantic segmentation 70 | 'segmentation': [] # Optional, required only if you want to train for semantic segmentation 71 | }) 72 | 73 | with open(output_file, 'w') as ofile: 74 | json.dump(dataset, ofile, sort_keys=True, indent=2) 75 | 76 | 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /unittest/test_loader.py: -------------------------------------------------------------------------------- 1 | import pickle as pkl 2 | import unittest 3 | import mxnet as mx 4 | 5 | from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ 6 | ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ 7 | RenameRecord, AnchorTarget2D, AnchorLoader 8 | from config import detection_config 9 | 10 | 11 | class TestLoader(unittest.TestCase): 12 | 13 | def test_empty_v_loader(self): 14 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 15 | transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True) 16 | roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1") 17 | all_v_roidbs = [roidb for roidb in roidbs if roidb['h'] >= roidb['w']] 18 | 19 | loader = AnchorLoader( 20 | roidb=all_v_roidbs, 21 | transform=transform, 22 | data_name=data_name, 23 | label_name=label_name, 24 | batch_size=1, 25 | shuffle=True, 26 | num_thread=1, 27 | kv=mx.kvstore.create(pKv.kvstore) 28 | ) 29 | with self.assertRaises(StopIteration): 30 | while True: 31 | data_batch = loader.next() 32 | 33 | def test_empty_h_loader(self): 34 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 35 | transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True) 36 | roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1") 37 | all_h_roidbs = [roidb for roidb in roidbs if roidb['h'] < roidb['w']] 38 | 39 | loader = AnchorLoader( 40 | roidb=all_h_roidbs, 41 | transform=transform, 42 | data_name=data_name, 43 | label_name=label_name, 44 | batch_size=1, 45 | shuffle=True, 46 | num_thread=1, 47 | kv=mx.kvstore.create(pKv.kvstore) 48 | ) 49 | with self.assertRaises(StopIteration): 50 | while True: 51 | data_batch = loader.next() 52 | 53 | def test_record_num(self): 54 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 55 | transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True) 56 | roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1") 57 | batch_size = 4 58 | 59 | loader = AnchorLoader( 60 | roidb=roidbs, 61 | transform=transform, 62 | data_name=data_name, 63 | label_name=label_name, 64 | batch_size=batch_size, 65 | shuffle=True, 66 | num_thread=1, 67 | kv=mx.kvstore.create(pKv.kvstore) 68 | ) 69 | 70 | num_batch = 0 71 | while True: 72 | try: 73 | data_batch = loader.next() 74 | num_batch += 1 75 | except StopIteration: 76 | break 77 | self.assertEqual(batch_size * num_batch, loader.total_record) 78 | 79 | 80 | -------------------------------------------------------------------------------- /operator_cxx/contrib/axpy.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file axpy.cc 23 | * \brief port from https://github.com/hujie-frank/SENet 24 | * \author Yuntao Chen 25 | */ 26 | #include "./axpy-inl.h" 27 | 28 | namespace mshadow { 29 | template 30 | inline void AxpyForwardLauncher(const Tensor &scale_data, 31 | const Tensor &x_data, 32 | const Tensor &y_data, 33 | const Tensor &out) { 34 | LOG(FATAL) << "NotImplemented"; 35 | } 36 | 37 | template 38 | inline void AxpyBackwardLauncher(const Tensor &scale_data, 39 | const Tensor &x_data, 40 | const Tensor &y_data, 41 | const Tensor &scale_grad, 42 | const Tensor &x_grad, 43 | const Tensor &y_grad, 44 | const Tensor &out_grad, 45 | Stream *s) { 46 | LOG(FATAL) << "NotImplemented"; 47 | } 48 | } // namespace mshadow 49 | 50 | namespace mxnet { 51 | namespace op { 52 | 53 | template<> 54 | Operator *CreateOp(AxpyParam param, int dtype) { 55 | Operator* op = NULL; 56 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 57 | op = new AxpyOp(param); 58 | }); 59 | return op; 60 | } 61 | 62 | Operator *AxpyProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 63 | std::vector *in_type) const { 64 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 65 | } 66 | 67 | DMLC_REGISTER_PARAMETER(AxpyParam); 68 | 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_Axpy, AxpyProp) 70 | .describe(R"code(Accelerate Squeeze and Excitation Network)code" ADD_FILELINE) 71 | .add_argument("scale", "NDArray-or-Symbol", "channel scaling factor") 72 | .add_argument("x", "NDArray-or-Symbol", "resnet increase output") 73 | .add_argument("y", "NDArray-or-Symbol", "resnet shortcut output") 74 | .add_arguments(AxpyParam::__FIELDS__()); 75 | } // namespace op 76 | } // namespace mxnet 77 | -------------------------------------------------------------------------------- /models/maskrcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Mask-RCNN 2 | 3 | This repository implements [**Mask-RCNN**](https://arxiv.org/abs/1703.06870) in the SimpleDet framework. 4 | Mask-RCNN is a simple and effective approach for object instance segmentation. By simply extending Faster-RCNN with a mask branch, Mask-RCNN can generate a high-quality segmentation mask for each instance. In the following, we will introduce how we build Mask-RCNN in the SimpleDet framework. Currently, we only provide FPN based Mask-RCNN. 5 | 6 | ### Qucik Start 7 | ```bash 8 | # train 9 | python3 detection_train.py --config config/mask_r50v1_fpn_1x.py 10 | 11 | # test 12 | python3 mask_test.py --config config/mask_r50v1_fpn_1x.py 13 | ``` 14 | ### How we build Mask-RCNN 15 | #### Input 16 | First, we need mask label. 17 | 18 | Instead of providing binary masks to the network, we adopt poly format in the current implementation. Since each instance may contain several parts, we adopt a list of lists ([[ax1, ay1, ax2, ay2,...], [bx1, by1, bx2, by2,...], ...) to represent each instance following COCO. For simplicity, we note [ax1, ay1, ax2, ay2, ...] as a segm. 19 | 20 | We implement these transforms for poly format mask label: 21 | - **PreprocessGtPoly**: convert each segm in a instance into ndarray. 22 | - **EncodeGtPoly**: encode each instance into a fixed length format ([class_id, num_segms, len_segm1, len_segm2, segm1, segm2]). 23 | 24 | For data augmentation, we extend several transfroms from Faster-RCNN: 25 | - **Resize2DImageBboxMask**: based on **Resize2DImageBbox** 26 | - **Flip2DImageBboxMask**: based on **Flip2DImageBbox** 27 | - **Pad2DImageBboxMask**: based on **Pad2DImageBbox** 28 | 29 | #### Operators 30 | Then, we extend proposal_target to get sampled mask target for mask branch training: 31 | - **proposal_mask_target**, decodes encoded gt poly into binary mask and samples a fixed amount of masks as mask target. For acceleration, we only provide mask target for fg roi. So the number of mask target is ```int(image_roi * fg_fraction)```. Currently we only support class specific mask target. So the shape of mask target is ```(batch_size, int(image_roi * fg_fraction), num_class (81 in COCO), mask_size, mask_size)```. 32 | 33 | In order to test mask in an end-to-end manner, we reuses the code from detection_test.py and implement a bbox post processing operator: 34 | - **bbox_post_processing**, adopts NMS for multi-class bbox and get final bbox results. 35 | 36 | For loss function, we implement sigmoid cross entropy: 37 | - **sigmoid_cross_entropy**, a general sigmoid cross entropy loss function. 38 | 39 | #### Symbol 40 | - **MaskFasterRcnn**, detector for MaskRCNN 41 | - **MaskFPNRpnHead**, a new RpnHead inherited from FPNRpnHead, note that we slice the proposal sampled from proposal_mask_target since the mask target provided by this operator is only for fg roi. 42 | - **MaskFasterRcnnHead**, mask head for MaskRCNN 43 | - **MaskFasterRcnn4ConvHead**, a specific mask head with 4 convolutions. 44 | - **BboxPostProcessor**, a bbox post processor for end-to-end test. 45 | 46 | ### How to build Mask-RCNN without FPN 47 | - Implement **MaskRpnHead** following **MaskFPNRpnHead**. 48 | - Implement your own MaskHead following **MaskFasterRcnn4ConvHead** 49 | - Write your own config following **mask_r50v1_fpn_1x.py** and **faster_r50v1c4_c5_512roi_1x.py** 50 | 51 | -------------------------------------------------------------------------------- /doc/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Setup with Docker 2 | We provide pre-built docker images for both cuda9.0 and cuda10.0. 3 | 4 | Maxwell, Pascal, Volta and Turing GPUs are supported. 5 | 6 | For nvidia-driver >= 410.48, cuda10 image is recommended. 7 | 8 | For nvidia-driver >= 384.81, cuda9 image is recommended. 9 | 10 | Aliyun beijing mirror is provided for users pulling from China. 11 | 12 | ```bash 13 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda9 zsh 14 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda10 zsh 15 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda9 zsh 16 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda10 zsh 17 | ``` 18 | 19 | ## Setup with Singularity 20 | We recommend the users to adopt singualrity as the default environment manager to minimize the efforts of configuration. 21 | Singularity is a virtual environment manager like virtualenv, but in the system-level. 22 | 23 | #### Install Singularity >= 2.6 24 | ```bash 25 | # install dependency 26 | sudo apt update 27 | sudo apt install build-essential python libarchive-dev 28 | 29 | # install singularity 30 | wget https://github.com/sylabs/singularity/releases/download/2.6.1/singularity-2.6.1.tar.gz 31 | tar xzfv singularity-2.6.1.tar.gz 32 | cd singularity-2.6.1 33 | ./configure --prefix=/usr/local 34 | make 35 | sudo make install 36 | ``` 37 | 38 | #### Download singularity image for SimpleDet 39 | ```bash 40 | wget https://simpledet-model.oss-cn-beijing.aliyuncs.com/simpledet.img 41 | ``` 42 | 43 | #### Invoke simpledet shell 44 | Here we need to map the working directory into singularity shell, note that **symlink to files outside the working directory will not work** since singularity has its own filesystem. Thus we recommend users to map the whole data storage into singularity by replacing $WORKDIR by something like `/data` or `/mnt/`. 45 | 46 | ```bash 47 | sudo singularity shell --no-home --nv -s /usr/bin/zsh --bind $WORKDIR /path/to/simpledet.img 48 | ``` 49 | 50 | ## Setup from Scratch 51 | #### System Requirements 52 | - Ubuntu 16.04 53 | - Python >= 3.5 54 | 55 | #### Install CUDA, cuDNN and NCCL 56 | 57 | #### Install cocotools 58 | ```bash 59 | # Install a patched cocotools for python3 60 | pip3 install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI' 61 | ``` 62 | 63 | #### Install MXNet 64 | ```bash 65 | # Install dependency 66 | sudo apt-get update 67 | sudo apt-get install -y build-essential git 68 | sudo apt-get install -y libopenblas-dev 69 | ``` 70 | 71 | ```bash 72 | git clone --recursive https://github.com/apache/incubator-mxnet /tmp/mxnet && \ 73 | git clone https://github.com/Tusimple/simpledet /tmp/simpledet && \ 74 | git clone https://github.com/RogerChern/cocoapi /tmp/cocoapi && \ 75 | cp -r /tmp/simpledet/operator_cxx/* /tmp/mxnet/src/operator && \ 76 | mkdir -p /tmp/mxnet/src/coco_api && \ 77 | cp -r /tmp/cocoapi/common /tmp/mxnet/src/coco_api && \ 78 | cd /tmp/mxnet && \ 79 | echo "USE_SIGNAL_HANDLER = 1" >> ./config.mk && \ 80 | echo "USE_OPENCV = 0" >> ./config.mk && \ 81 | echo "USE_MKLDNN = 0" >> ./config.mk && \ 82 | echo "USE_BLAS = openblas" >> ./config.mk && \ 83 | echo "USE_CUDA = 1" >> ./config.mk && \ 84 | echo "USE_CUDA_PATH = /usr/local/cuda" >> ./config.mk && \ 85 | echo "USE_CUDNN = 1" >> ./config.mk && \ 86 | echo "USE_NCCL = 1" >> ./config.mk && \ 87 | echo "USE_DIST_KVSTORE = 1" >> ./config.mk && \ 88 | echo "CUDA_ARCH = -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70" >> ./config.mk && \ 89 | rm /tmp/mxnet/src/operator/nn/group_norm* && \ 90 | make -j$((`nproc`-1)) && \ 91 | cd python && \ 92 | python3 setup.py install && \ 93 | rm -rf /tmp/mxnet /tmp/simpledet /tmp/cocoapi 94 | ``` 95 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sigmoid_cross_entropy.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file sigmoid_cross_entropy.cc 23 | * \brief 24 | * \author Yuntao Chen 25 | */ 26 | 27 | #include "./sigmoid_cross_entropy-inl.h" 28 | 29 | namespace mshadow { 30 | 31 | template 32 | inline void SigmoidCrossEntropyForward(const Tensor &data, 33 | const Tensor &label, 34 | Tensor &loss, 35 | Tensor &loss_sum, 36 | Tensor &count, 37 | Tensor &count_sum, 38 | Tensor &out, 39 | T scale) { 40 | LOG(FATAL) << "NotImplemented"; 41 | } 42 | 43 | template 44 | inline void SigmoidCrossEntropyBackward(const Tensor &data, 45 | const Tensor &label, 46 | Tensor &d_data, 47 | Tensor &count, 48 | Tensor &count_sum, 49 | T scale) { 50 | LOG(FATAL) << "NotImplemented"; 51 | } 52 | 53 | } 54 | 55 | namespace mxnet { 56 | namespace op { 57 | template<> 58 | Operator *CreateOp(SigmoidCrossEntropyParam param, int dtype) { 59 | Operator *op = NULL; 60 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 61 | op = new SigmoidCrossEntropyOp(param); 62 | }) 63 | return op; 64 | } 65 | 66 | // DO_BIND_DISPATCH comes from operator_common.h 67 | Operator *SigmoidCrossEntropyProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 68 | std::vector *in_type) const { 69 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 70 | } 71 | 72 | DMLC_REGISTER_PARAMETER(SigmoidCrossEntropyParam); 73 | 74 | MXNET_REGISTER_OP_PROPERTY(_contrib_SigmoidCrossEntropy, SigmoidCrossEntropyProp) 75 | .describe(R"DOC( 76 | Compute sigmoid activations followed by averaged binary cross entropy loss. The 77 | target values may be in {-1, 0, 1}, where -1 indicates that the corresponding 78 | sample should be ignored and {0, 1} correspond to the binary classes 0 and 1. By 79 | default the loss is divided by the number of targets > -1 and then multiplied by 80 | the `grad_scale` op argument. The divisive normalization may be disable by setting 81 | the op argument `normalize` to 0 (the multiplication by `scale` still takes 82 | effect). 83 | This op fuses sigmoid and cross entropy for numerical stability in both forward 84 | and gradient computation. 85 | )DOC" ADD_FILELINE) 86 | .add_argument("data", "NDArray-or-Symbol", "Input array.") 87 | .add_argument("label", "NDArray-or-Symbol", "Ground truth label.") 88 | .add_arguments(SigmoidCrossEntropyParam::__FIELDS__()); 89 | 90 | 91 | } // namespace op 92 | } // namespace mxnet 93 | -------------------------------------------------------------------------------- /operator_cxx/contrib/global_average_pooling.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file global_average_pooling.cc 23 | * \brief port from https://github.com/hujie-frank/SENet 24 | * \author Chenxia Han 25 | */ 26 | #include "./global_average_pooling-inl.h" 27 | 28 | namespace mshadow { 29 | template 30 | inline void GAPForward(const Tensor &out, 31 | const Tensor &data) { 32 | // NOT_IMPLEMENTED 33 | return; 34 | } 35 | 36 | template 37 | inline void GAPBackward(const Tensor &in_grad, 38 | const Tensor &out_grad) { 39 | // NOT_IMPLEMENTED 40 | return; 41 | } 42 | } // namespace mshadow 43 | 44 | namespace mxnet { 45 | namespace op { 46 | 47 | template<> 48 | Operator *CreateOp(GAPParam param, int dtype) { 49 | Operator *op = NULL; 50 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 51 | op = new GAPOp(param); 52 | }); 53 | 54 | return op; 55 | } 56 | 57 | // DO_BIND_DISPATCH comes from operator_common.h 58 | Operator* GAPProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 59 | std::vector *in_type) const { 60 | std::vector out_shape, aux_shape; 61 | std::vector out_type, aux_type; 62 | CHECK(InferType(in_type, &out_type, &aux_type)); 63 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 64 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 65 | } 66 | 67 | DMLC_REGISTER_PARAMETER(GAPParam); 68 | 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_GAP, GAPProp) 70 | .describe(R"code(This operator is DEPRECATED. 71 | Perform pooling on the input. 72 | 73 | The shapes for 2-D pooling is 74 | 75 | - **data**: *(batch_size, channel, height, width)* 76 | - **out**: *(batch_size, num_filter, out_height, out_width)*, with:: 77 | 78 | out_height = f(height, kernel[0], pad[0], stride[0]) 79 | out_width = f(width, kernel[1], pad[1], stride[1]) 80 | 81 | The definition of *f* depends on ``pooling_convention``, which has two options: 82 | 83 | - **valid** (default):: 84 | 85 | f(x, k, p, s) = floor((x+2*p-k)/s)+1 86 | 87 | - **full**, which is compatible with Caffe:: 88 | 89 | f(x, k, p, s) = ceil((x+2*p-k)/s)+1 90 | 91 | But ``global_pool`` is set to be true, then do a global pooling, namely reset 92 | ``kernel=(height, width)``. 93 | 94 | Three pooling options are supported by ``pool_type``: 95 | 96 | - **avg**: average pooling 97 | - **max**: max pooling 98 | - **sum**: sum pooling 99 | 100 | 1-D pooling is special case of 2-D pooling with *weight=1* and 101 | *kernel[1]=1*. 102 | 103 | For 3-D pooling, an additional *depth* dimension is added before 104 | *height*. Namely the input data will have shape *(batch_size, channel, depth, 105 | height, width)*. 106 | 107 | )code" ADD_FILELINE) 108 | .add_argument("data", "NDArray-or-Symbol", "Input data to the pooling operator.") 109 | .add_arguments(GAPParam::__FIELDS__()); 110 | 111 | } // namespace op 112 | } // namespace mxnet 113 | -------------------------------------------------------------------------------- /utils/generate_roidb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle as pkl 4 | import numpy as np 5 | from pycocotools.coco import COCO 6 | 7 | 8 | dataset_split_mapping = { 9 | "train2014": "train2014", 10 | "val2014": "val2014", 11 | "valminusminival2014": "val2014", 12 | "minival2014": "val2014", 13 | "train2017": "train2017", 14 | "val2017": "val2017", 15 | "test-dev2017": "test2017", 16 | "train": "train" 17 | } 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database') 22 | parser.add_argument('--dataset', help='dataset name', type=str) 23 | parser.add_argument('--dataset-split', help='dataset split, e.g. train2017, minival2014', type=str) 24 | 25 | args = parser.parse_args() 26 | return args.dataset, args.dataset_split 27 | 28 | 29 | def generate_groundtruth_database(dataset_name, dataset_split): 30 | annotation_type = 'image_info' if 'test' in dataset_split else 'instances' 31 | annotation_path = "/ws/data/opendata/%s/annotations/%s_%s.json" % (dataset_name, annotation_type, dataset_split) 32 | 33 | catid_offset = 0 34 | lable_version = -1 35 | if dataset_name == "coco": 36 | lable_version = 1 37 | catid_offset = 0 38 | elif dataset_name == "cctsdb": 39 | lable_version = 2 40 | catid_offset = 80 41 | else: 42 | lable_version = -1 43 | assert lable_version > 0 44 | 45 | dataset = COCO(annotation_path) 46 | img_ids = dataset.getImgIds() 47 | roidb = [] 48 | for img_id in img_ids: 49 | img_anno = dataset.loadImgs(img_id)[0] 50 | im_filename = img_anno['file_name'] 51 | im_w = img_anno['width'] 52 | im_h = img_anno['height'] 53 | 54 | ins_anno_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=False) 55 | trainid_to_datasetid = dict({i + 1: cid for i, cid in enumerate(dataset.getCatIds())}) # 0 for bg 56 | datasetid_to_trainid = dict({cid: tid for tid, cid in trainid_to_datasetid.items()}) 57 | instances = dataset.loadAnns(ins_anno_ids) 58 | 59 | # sanitize bboxes 60 | valid_instances = [] 61 | for inst in instances: 62 | x, y, box_w, box_h = inst['bbox'] 63 | x1 = max(0, x) 64 | y1 = max(0, y) 65 | x2 = min(im_w - 1, x1 + max(0, box_w - 1)) 66 | y2 = min(im_h - 1, y1 + max(0, box_h - 1)) 67 | if inst['area'] > 0 and x2 >= x1 and y2 >= y1: 68 | inst['clean_bbox'] = [x1, y1, x2, y2] 69 | valid_instances.append(inst) 70 | num_instance = len(valid_instances) 71 | 72 | gt_bbox = np.zeros((num_instance, 4), dtype=np.float32) 73 | gt_class = np.zeros((num_instance, ), dtype=np.int32) 74 | gt_poly = [None] * num_instance 75 | 76 | for i, inst in enumerate(valid_instances): 77 | cls = catid_offset + datasetid_to_trainid[inst['category_id']] 78 | gt_bbox[i, :] = inst['clean_bbox'] 79 | gt_class[i] = cls 80 | gt_poly[i] = inst['segmentation'] 81 | 82 | split = dataset_split_mapping[dataset_split] 83 | roi_rec = { 84 | 'image_url': '/ws/data/opendata/%s/images/%s/%s' % (dataset_name, split, im_filename), 85 | 'im_id': img_id, 86 | 'h': im_h, 87 | 'w': im_w, 88 | 'gt_class': gt_class, 89 | 'gt_bbox': gt_bbox, 90 | 'gt_poly': gt_poly, 91 | 'version': lable_version, 92 | 'flipped': False} 93 | 94 | roidb.append(roi_rec) 95 | 96 | return roidb 97 | 98 | 99 | if __name__ == "__main__": 100 | d, dsplit = parse_args() 101 | roidb = generate_groundtruth_database(d, dsplit) 102 | os.makedirs("data/cache", exist_ok=True) 103 | with open("data/cache/%s_%s.roidb" % (d, dsplit), "wb") as fout: 104 | pkl.dump(roidb, fout) 105 | -------------------------------------------------------------------------------- /operator_cxx/contrib/group_norm.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 15 | and IDIAP Research Institute nor the names of its contributors may be 16 | used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | /* 32 | * Licensed to the Apache Software Foundation (ASF) under one 33 | * or more contributor license agreements. See the NOTICE file 34 | * distributed with this work for additional information 35 | * regarding copyright ownership. The ASF licenses this file 36 | * to you under the Apache License, Version 2.0 (the 37 | * "License"); you may not use this file except in compliance 38 | * with the License. You may obtain a copy of the License at 39 | * 40 | * http://www.apache.org/licenses/LICENSE-2.0 41 | * 42 | * Unless required by applicable law or agreed to in writing, 43 | * software distributed under the License is distributed on an 44 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 45 | * KIND, either express or implied. See the License for the 46 | * specific language governing permissions and limitations 47 | * under the License. 48 | */ 49 | 50 | /*! 51 | * \file group_norm.cc 52 | * \author Yuntao Chen 53 | */ 54 | 55 | #include "./group_norm-inl.h" 56 | 57 | namespace mxnet { 58 | namespace op { 59 | template <> 60 | Operator* CreateOp(GroupNormParam param, int dtype) { 61 | LOG(FATAL) << "not implemented."; 62 | return NULL; 63 | } 64 | 65 | // DO_BIND_DISPATCH comes from operator_common.h 66 | Operator* GroupNormProp::CreateOperatorEx(Context ctx, 67 | std::vector* in_shape, 68 | std::vector* in_type) const { 69 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 70 | } 71 | 72 | DMLC_REGISTER_PARAMETER(GroupNormParam); 73 | 74 | MXNET_REGISTER_OP_PROPERTY(_contrib_GroupNorm, GroupNormProp) 75 | .add_argument("data", "NDArray-or-Symbol", 76 | "An n-dimensional input array (n > 2) of the form [batch, " 77 | "channel, spatial_dim1, spatial_dim2, ...].") 78 | .add_argument("gamma", "NDArray-or-Symbol", 79 | "A vector of length \'channel\', which multiplies the " 80 | "normalized input.") 81 | .add_argument("beta", "NDArray-or-Symbol", 82 | "A vector of length \'channel\', which is added to the " 83 | "product of the normalized input and the weight.") 84 | .add_arguments(GroupNormParam::__FIELDS__()) 85 | .describe(R"code(Group Normalization (GN) operation: https://arxiv.org/abs/1803.08494)code" ADD_FILELINE); 86 | } // namespace op 87 | } // namespace mxnet 88 | -------------------------------------------------------------------------------- /operator_py/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .cython.cpu_nms import greedy_nms, soft_nms 3 | 4 | 5 | def cython_soft_nms_wrapper(thresh, sigma=0.5, score_thresh=0.001, method='linear'): 6 | methods = {'hard': 0, 'linear': 1, 'gaussian': 2} 7 | assert method in methods, 'Unknown soft_nms method: {}'.format(method) 8 | def _nms(dets): 9 | dets, _ = soft_nms( 10 | np.ascontiguousarray(dets, dtype=np.float32), 11 | np.float32(sigma), 12 | np.float32(thresh), 13 | np.float32(score_thresh), 14 | np.uint8(methods[method])) 15 | return dets 16 | return _nms 17 | 18 | 19 | def py_nms_wrapper(thresh): 20 | def _nms(dets): 21 | return nms(dets, thresh) 22 | return _nms 23 | 24 | 25 | def cpu_nms_wrapper(thresh): 26 | def _nms(dets): 27 | return greedy_nms(dets, thresh)[0] 28 | return _nms 29 | 30 | 31 | def wnms_wrapper(thresh_lo, thresh_hi): 32 | def _nms(dets): 33 | return py_weighted_nms(dets, thresh_lo, thresh_hi) 34 | return _nms 35 | 36 | 37 | def nms(dets, thresh): 38 | """ 39 | greedily select boxes with high confidence and overlap with current maximum <= thresh 40 | rule out overlap >= thresh 41 | :param dets: [[x1, y1, x2, y2 score]] 42 | :param thresh: retain overlap < thresh 43 | :return: indexes to keep 44 | """ 45 | x1 = dets[:, 0] 46 | y1 = dets[:, 1] 47 | x2 = dets[:, 2] 48 | y2 = dets[:, 3] 49 | scores = dets[:, 4] 50 | 51 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 52 | order = scores.argsort()[::-1] 53 | 54 | keep = [] 55 | while order.size > 0: 56 | i = order[0] 57 | keep.append(i) 58 | xx1 = np.maximum(x1[i], x1[order[1:]]) 59 | yy1 = np.maximum(y1[i], y1[order[1:]]) 60 | xx2 = np.minimum(x2[i], x2[order[1:]]) 61 | yy2 = np.minimum(y2[i], y2[order[1:]]) 62 | 63 | w = np.maximum(0.0, xx2 - xx1 + 1) 64 | h = np.maximum(0.0, yy2 - yy1 + 1) 65 | inter = w * h 66 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 67 | 68 | inds = np.where(ovr <= thresh)[0] 69 | order = order[inds + 1] 70 | 71 | return dets[keep, :] 72 | 73 | 74 | def py_weighted_nms(dets, thresh_lo, thresh_hi): 75 | """ 76 | voting boxes with confidence > thresh_hi 77 | keep boxes overlap <= thresh_lo 78 | rule out overlap > thresh_hi 79 | :param dets: [[x1, y1, x2, y2 score]] 80 | :param thresh_lo: retain overlap <= thresh_lo 81 | :param thresh_hi: vote overlap > thresh_hi 82 | :return: indexes to keep 83 | """ 84 | x1 = dets[:, 0] 85 | y1 = dets[:, 1] 86 | x2 = dets[:, 2] 87 | y2 = dets[:, 3] 88 | scores = dets[:, 4] 89 | 90 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 91 | order = scores.argsort()[::-1] 92 | 93 | keep = [] 94 | while order.size > 0: 95 | i = order[0] 96 | xx1 = np.maximum(x1[i], x1[order]) 97 | yy1 = np.maximum(y1[i], y1[order]) 98 | xx2 = np.minimum(x2[i], x2[order]) 99 | yy2 = np.minimum(y2[i], y2[order]) 100 | 101 | w = np.maximum(0.0, xx2 - xx1 + 1) 102 | h = np.maximum(0.0, yy2 - yy1 + 1) 103 | inter = w * h 104 | ovr = inter / (areas[i] + areas[order] - inter) 105 | 106 | inds = np.where(ovr <= thresh_lo)[0] 107 | inds_keep = np.where(ovr > thresh_hi)[0] 108 | if len(inds_keep) == 0: 109 | break 110 | 111 | order_keep = order[inds_keep] 112 | 113 | tmp=np.sum(scores[order_keep]) 114 | x1_avg = np.sum(scores[order_keep] * x1[order_keep]) / tmp 115 | y1_avg = np.sum(scores[order_keep] * y1[order_keep]) / tmp 116 | x2_avg = np.sum(scores[order_keep] * x2[order_keep]) / tmp 117 | y2_avg = np.sum(scores[order_keep] * y2[order_keep]) / tmp 118 | 119 | keep.append([x1_avg, y1_avg, x2_avg, y2_avg, scores[i]]) 120 | order = order[inds] 121 | return np.array(keep) 122 | -------------------------------------------------------------------------------- /models/tridentnet/README.md: -------------------------------------------------------------------------------- 1 | ## Scale-Aware Trident Networks for Object Detection 2 | 3 | Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang 4 | 5 |

6 | 7 | ### Introduction 8 | 9 | This repository implements [TridentNet](https://arxiv.org/abs/1901.01892) in the SimpleDet framework. 10 | 11 | Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. Then, we propose a scale-aware training scheme to specialize each branch by sampling object instances of proper scales for training. As a bonus, a fast approximation version of TridentNet could achieve significant improvements without any additional parameters and computational cost. On the COCO dataset, our TridentNet with ResNet-101 backbone achieves state-of-the-art single-model results by obtaining an mAP of 48.4. 12 | 13 | #### Trident Blocks 14 | 15 | - Dilated convolution for efficient scale enumeration 16 | - Weight sharing between convs for uniform representation 17 | 18 |

19 | 20 | The above figure shows how to convert bottleneck residual blocks to 3-branch Trident Blocks. The dilation rate of three branches are set as 1, 2 and 3, respectively. 21 | 22 | ### Use TridentNet 23 | 24 | Please setup SimpleDet following [README](../../README.md) and [INSTALL](../../doc/INSTALL.md) and use the TridentNet configuration files in the `config` folder. 25 | 26 | ### Results on MS-COCO 27 | 28 | | | Backbone | Test data | mAP@[0.5:0.95] | Link | 29 | | --------------------------- | ---------- | --------- | :------------: | -----| 30 | | Faster R-CNN, 1x | ResNet-101 | minival | 37.6 |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/faster_r101v2c4_c5_256roi_1x.zip)| 31 | | TridentNet, 1x | ResNet-101 | minival | 40.6 |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_1x.zip)| 32 | | TridentNet, 1x, Fast Approx | ResNet-101 | minival | 39.9 |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_fastapprox_1x.zip)| 33 | | TridentNet, 2x | ResNet-101 | test-dev | 42.8 |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_addminival_2x.zip)| 34 | | TridentNet*, 3x | ResNet-101 | test-dev | 48.4 |[model](https://simpledet-model.oss-cn-beijing.aliyuncs.com/tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.zip)| 35 | 36 | Note: 37 | 1. These models are not trained in SimpleDet. Re-training these models in SimpleDet gives a slightly better result. 38 | 2. TridentNet* - TridentNet = extended training + softNMS + multi-scale training/testing + syncBN + DCNv1. 39 | 40 | ### Results on MS-COCO with stronger baselines 41 | All config files are available in [config/resnet_v1b](../../config/resnet_v1b). 42 | 43 | |Model|Backbone|Head|Train Schedule|AP|AP50|AP75|APs|APm|APl| 44 | |-----|--------|----|--------------|--|----|----|---|---|---| 45 | |Faster|R50v1b-C4|C5-512ROI|2X|36.9|57.9|39.3|19.9|41.4|50.2| 46 | |Trident|R50v1b-C4|C5-128ROI|2X|39.6|60.9|42.9|22.5|44.5|53.9| 47 | |TridentFast|R50v1b-C4|C5-128ROI|2X|39.0|60.2|41.8|20.8|43.6|53.8| 48 | |Faster|R101v1b-C4|C5-512ROI|2X|40.5|61.2|43.8|22.5|44.8|55.4| 49 | |Trident|R101v1b-C4|C5-128ROI|2X|43.0|64.3|46.3|25.3|47.9|58.4| 50 | |TridentFast|R101v1b-C4|C5-128ROI|2X|42.5|63.7|46.0|23.3|46.7|59.3| 51 | |Faster|R152v1b-C4|C5-512ROI|2X|41.8|62.4|45.2|23.2|46.0|56.9| 52 | |Trident|R152v1b-C4|C5-128ROI|2X|44.4|65.4|48.3|26.4|49.4|59.6| 53 | |TridentFast|R152v1b-C4|C5-128ROI|2X|43.9|65.1|47.0|25.1|48.1|60.4| 54 | 55 | ### Citing TridentNet 56 | 57 | ``` 58 | @article{li2019scale, 59 | title={Scale-Aware Trident Networks for Object Detection}, 60 | author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, 61 | journal={ICCV 2019}, 62 | year={2019} 63 | } 64 | ``` 65 | -------------------------------------------------------------------------------- /utils/callback.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | import mxnet as mx 4 | 5 | 6 | class Speedometer(object): 7 | def __init__(self, batch_size, frequent=50): 8 | self.batch_size = batch_size 9 | self.frequent = frequent 10 | self.init = False 11 | self.tic = 0 12 | self.last_count = 0 13 | 14 | def __call__(self, param): 15 | """Callback to Show speed.""" 16 | count = param.nbatch 17 | if self.last_count > count: 18 | self.init = False 19 | self.last_count = count 20 | 21 | if self.init: 22 | if count % self.frequent == 0: 23 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 24 | if param.eval_metric is not None: 25 | name, value = param.eval_metric.get() 26 | s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed) 27 | for n, v in zip(name, value): 28 | s += "%s=%f,\t" % (n, v) 29 | logging.info(s) 30 | else: 31 | logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec", 32 | param.epoch, count, speed) 33 | self.tic = time.time() 34 | else: 35 | self.init = True 36 | self.tic = time.time() 37 | 38 | class DetailSpeedometer(object): 39 | def __init__(self, batch_size, frequent=50): 40 | self.batch_size = batch_size 41 | self.frequent = frequent 42 | self.init = False 43 | self.tic = 0 44 | self.last_count = 0 45 | 46 | def __call__(self, param): 47 | """Callback to Show speed.""" 48 | count = param.nbatch 49 | rank = param.rank 50 | total_iter = param.total_iter 51 | 52 | if self.last_count > count: 53 | self.init = False 54 | self.last_count = count 55 | 56 | if self.init: 57 | if count % self.frequent == 0: 58 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 59 | if param.eval_metric is not None: 60 | name, value = param.eval_metric.get() 61 | s = "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\t" \ 62 | "data:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec\tTrain-" % ( 63 | param.epoch, rank, count, total_iter, 64 | param.cur_batch_time, param.avg_batch_time, 65 | param.cur_kvstore_sync_time, param.avg_kvstore_sync_time, 66 | param.cur_data_time, param.avg_data_time, 67 | param.cur_iter_total_time, param.avg_iter_total_time, 68 | speed) 69 | for n, v in zip(name, value): 70 | s += "%s=%f,\t" % (n, v) 71 | logging.info(s) 72 | else: 73 | logging.info( 74 | "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\tdata:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec", 75 | param.epoch, rank, count, total_iter, 76 | param.cur_batch_time, param.avg_batch_time, 77 | param.cur_kvstore_sync_time, param.avg_kvstore_sync_time, 78 | param.cur_data_time, param.avg_data_time, 79 | param.cur_iter_total_time, param.avg_iter_total_time, 80 | speed) 81 | 82 | self.tic = time.time() 83 | else: 84 | self.init = True 85 | self.tic = time.time() 86 | 87 | 88 | def do_checkpoint(prefix): 89 | def _callback(iter_no, sym, arg, aux): 90 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 91 | return _callback 92 | 93 | 94 | def do_checkpoint_iter(prefix, checkpoint_iter): 95 | def _callback(param): 96 | if checkpoint_iter == param.locals["total_iter"]: 97 | arg_params, aux_params = param.locals["self"].get_params() 98 | save_dict = {('arg:%s' % k) : v.as_in_context(mx.cpu()) for k, v in arg_params.items()} 99 | save_dict.update({('aux:%s' % k) : v.as_in_context(mx.cpu()) for k, v in aux_params.items()}) 100 | param_name = '%s-iter-%s.params' % (prefix, checkpoint_iter) 101 | mx.nd.save(param_name, save_dict) 102 | logging.info('Saved checkpoint to \"%s\"', param_name) 103 | return _callback 104 | -------------------------------------------------------------------------------- /models/dcn/builder.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import mxnext as X 3 | from mxnext import conv, relu, add 4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit 5 | from symbol.builder import Backbone 6 | 7 | 8 | def dcn_resnet_unit(input, name, filter, stride, dilate, proj, norm, **kwargs): 9 | conv1 = conv(input, name=name + "_conv1", filter=filter // 4) 10 | bn1 = norm(conv1, name=name + "_bn1") 11 | relu1 = relu(bn1, name=name + "_relu1") 12 | 13 | # conv2 filter router 14 | conv2_offset = conv(relu1, name=name + "_conv2_offset", filter=72, kernel=3, stride=stride, dilate=dilate) 15 | conv2 = mx.sym.contrib.DeformableConvolution(relu1, conv2_offset, kernel=(3, 3), 16 | stride=(stride, stride), dilate=(dilate, dilate), pad=(1, 1), num_filter=filter // 4, 17 | num_deformable_group=4, no_bias=True, name=name + "_conv2") 18 | bn2 = norm(conv2, name=name + "_bn2") 19 | relu2 = relu(bn2, name=name + "_relu2") 20 | 21 | conv3 = conv(relu2, name=name + "_conv3", filter=filter) 22 | bn3 = norm(conv3, name=name + "_bn3") 23 | 24 | if proj: 25 | shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride) 26 | shortcut = norm(shortcut, name=name + "_sc_bn") 27 | else: 28 | shortcut = input 29 | 30 | eltwise = add(bn3, shortcut, name=name + "_plus") 31 | 32 | return relu(eltwise, name=name + "_relu") 33 | 34 | 35 | def hybrid_resnet_stage(data, name, num_block, num_special_block, special_res_unit, filter, 36 | stride, dilate, norm, **kwargs): 37 | s, d = stride, dilate 38 | 39 | for i in range(1, num_block + 1 - num_special_block): 40 | proj = True if i == 1 else False 41 | s = stride if i == 1 else 1 42 | d = dilate 43 | data = resnet_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm) 44 | 45 | for i in range(num_block + 1 - num_special_block, num_block + 1): 46 | proj = True if i == 1 else False 47 | s = stride if i == 1 else 1 48 | d = dilate 49 | data = special_res_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm, **kwargs) 50 | 51 | return data 52 | 53 | 54 | def hybrid_resnet_c4_builder(special_resnet_unit): 55 | class ResNetC4(Backbone): 56 | def __init__(self, pBackbone): 57 | super().__init__(pBackbone) 58 | p = self.p 59 | 60 | import mxnext.backbone.resnet_v1b_helper as helper 61 | num_c2, num_c3, num_c4, _ = helper.depth_config[p.depth] 62 | 63 | data = X.var("data") 64 | if p.fp16: 65 | data = data.astype("float16") 66 | c1 = helper.resnet_c1(data, p.normalizer) 67 | c2 = helper.resnet_c2(c1, num_c2, 1, 1, p.normalizer) 68 | c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1, 69 | p.normalizer, params=p) 70 | c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1, 71 | p.normalizer, params=p) 72 | 73 | self.symbol = c4 74 | 75 | def get_rpn_feature(self): 76 | return self.symbol 77 | 78 | def get_rcnn_feature(self): 79 | return self.symbol 80 | 81 | return ResNetC4 82 | 83 | 84 | def hybrid_resnet_fpn_builder(special_resnet_unit): 85 | class ResNetFPN(Backbone): 86 | def __init__(self, pBackbone): 87 | super().__init__(pBackbone) 88 | p = self.p 89 | 90 | import mxnext.backbone.resnet_v1b_helper as helper 91 | num_c2, num_c3, num_c4, num_c5 = helper.depth_config[p.depth] 92 | 93 | data = X.var("data") 94 | if p.fp16: 95 | data = data.astype("float16") 96 | c1 = helper.resnet_c1(data, p.normalizer) 97 | c2 = helper.resnet_c2(c1, num_c2, 1, 1, p.normalizer) 98 | c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1, 99 | p.normalizer, params=p) 100 | c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1, 101 | p.normalizer, params=p) 102 | c5 = hybrid_resnet_stage(c4, "stage4", num_c5, p.num_c5_block or 0, special_resnet_unit, 2048, 2, 1, 103 | p.normalizer, params=p) 104 | 105 | self.symbol = (c2, c3, c4, c5) 106 | 107 | def get_rpn_feature(self): 108 | return self.symbol 109 | 110 | def get_rcnn_feature(self): 111 | return self.symbol 112 | 113 | return ResNetFPN 114 | 115 | 116 | DCNResNetC4 = hybrid_resnet_c4_builder(dcn_resnet_unit) 117 | -------------------------------------------------------------------------------- /operator_cxx/contrib/generate_anchor.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file generate_anchor.cc 22 | * \brief 23 | * \author Yanghao Li, Chenxia Han 24 | */ 25 | 26 | #include "./generate_anchor-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template 32 | class GenAnchorOp : public Operator{ 33 | public: 34 | explicit GenAnchorOp(GenAnchorParam param) { 35 | this->param_ = param; 36 | } 37 | 38 | virtual void Forward(const OpContext &ctx, 39 | const std::vector &in_data, 40 | const std::vector &req, 41 | const std::vector &out_data, 42 | const std::vector &aux_states) { 43 | using namespace mshadow; 44 | using namespace mshadow::expr; 45 | CHECK_EQ(in_data.size(), 1); 46 | CHECK_EQ(out_data.size(), 1); 47 | CHECK_EQ(req.size(), 1); 48 | CHECK_EQ(req[gen_anchor::kOut], kWriteTo); 49 | 50 | Stream *s = ctx.get_stream(); 51 | Tensor scores = in_data[gen_anchor::kClsProb].get(s); 52 | 53 | Tensor out = out_data[gen_anchor::kOut].get(s); 54 | 55 | std::vector scales(param_.scales.begin(), param_.scales.end()); 56 | std::vector ratios(param_.ratios.begin(), param_.ratios.end()); 57 | 58 | int num_anchors = scales.size() * ratios.size(); 59 | int height = scores.size(2); 60 | int width = scores.size(3); 61 | 62 | // Generate anchors 63 | std::vector base_anchor({ 64 | 0.0f, 0.0f, param_.feature_stride - 1.0f, param_.feature_stride - 1.0f 65 | }); 66 | std::vector anchors; 67 | gen_anchor_utils::GenerateAnchors( 68 | base_anchor, ratios, scales, anchors 69 | ); 70 | 71 | // Enumerate all shifted anchors 72 | for (index_t i = 0; i < num_anchors; ++i) { 73 | for (index_t j = 0; j < height; ++j) { 74 | for (index_t k = 0; k < width; ++k) { 75 | index_t index = j * (width * num_anchors) + k * (num_anchors) + i; 76 | out[index][0] = static_cast(anchors[i * 4 + 0] + k * param_.feature_stride); 77 | out[index][1] = static_cast(anchors[i * 4 + 1] + j * param_.feature_stride); 78 | out[index][2] = static_cast(anchors[i * 4 + 2] + k * param_.feature_stride); 79 | out[index][3] = static_cast(anchors[i * 4 + 3] + j * param_.feature_stride); 80 | } 81 | } 82 | } 83 | } 84 | 85 | virtual void Backward(const OpContext &ctx, 86 | const std::vector &out_grad, 87 | const std::vector &in_data, 88 | const std::vector &out_data, 89 | const std::vector &req, 90 | const std::vector &in_grad, 91 | const std::vector &aux_states) { 92 | using namespace mshadow; 93 | using namespace mshadow::expr; 94 | CHECK_EQ(in_grad.size(), 1); 95 | 96 | Stream *s = ctx.get_stream(); 97 | Tensor gscores = in_grad[gen_anchor::kClsProb].get(s); 98 | 99 | // can not assume the grad would be zero 100 | Assign(gscores, req[gen_anchor::kClsProb], 0); 101 | } 102 | 103 | private: 104 | GenAnchorParam param_; 105 | }; // class GenAnchorOp 106 | 107 | template<> 108 | Operator *CreateOp(GenAnchorParam param) { 109 | return new GenAnchorOp(param); 110 | } 111 | 112 | Operator* GenAnchorProp::CreateOperator(Context ctx) const { 113 | DO_BIND_DISPATCH(CreateOp, param_); 114 | } 115 | 116 | DMLC_REGISTER_PARAMETER(GenAnchorParam); 117 | 118 | MXNET_REGISTER_OP_PROPERTY(_contrib_GenAnchor, GenAnchorProp) 119 | .describe("Generate region anchors") 120 | .add_argument("cls_prob", "NDArray-or-Symbol", "Probability of how likely proposal is object.") 121 | .add_arguments(GenAnchorParam::__FIELDS__()); 122 | 123 | } // namespace op 124 | } // namespace mxnet 125 | -------------------------------------------------------------------------------- /operator_cxx/contrib/decodebbox-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file decodebbox-inl.h 22 | * \brief DecodeBBox Operator 23 | * \author Ziyang Zhou, Chenxia Han 24 | */ 25 | #ifndef MXNET_OPERATOR_CONTRIB_DECODEBBOX_INL_H_ 26 | #define MXNET_OPERATOR_CONTRIB_DECODEBBOX_INL_H_ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "../operator_common.h" 39 | #include "../mshadow_op.h" 40 | 41 | namespace mxnet { 42 | namespace op { 43 | 44 | namespace decodebbox { 45 | enum DecodeBBoxOpInputs {kRois, kBBoxPred, kImInfo}; 46 | enum DecodeBBoxOpOutputs {kOut}; 47 | } // decodebbox 48 | 49 | struct DecodeBBoxParam : public dmlc::Parameter { 50 | nnvm::Tuple bbox_mean; 51 | nnvm::Tuple bbox_std; 52 | bool class_agnostic; 53 | 54 | DMLC_DECLARE_PARAMETER(DecodeBBoxParam) { 55 | float tmp[] = {0.f, 0.f, 0.f, 0.f}; 56 | DMLC_DECLARE_FIELD(bbox_mean).set_default(nnvm::Tuple(tmp, tmp+4)).describe("Bounding box mean"); 57 | tmp[0] = 0.1f; tmp[1] = 0.1f; tmp[2] = 0.2f; tmp[3] = 0.2f; 58 | DMLC_DECLARE_FIELD(bbox_std).set_default(nnvm::Tuple(tmp, tmp+4)).describe("Bounding box std"); 59 | DMLC_DECLARE_FIELD(class_agnostic).set_default(true) 60 | .describe("Whether use class agnostic"); 61 | } 62 | }; 63 | 64 | template 65 | Operator *CreateOp(DecodeBBoxParam param); 66 | 67 | #if DMLC_USE_CXX11 68 | class DecodeBBoxProp : public OperatorProperty { 69 | public: 70 | void Init(const std::vector >& kwargs) override { 71 | param_.Init(kwargs); 72 | } 73 | 74 | std::map GetParams() const override { 75 | return param_.__DICT__(); 76 | } 77 | 78 | bool InferShape(std::vector *in_shape, 79 | std::vector *out_shape, 80 | std::vector *aux_shape) const override { 81 | using namespace mshadow; 82 | CHECK_EQ(in_shape->size(), 3) << "Input:[rois, bbox_pred, im_info]"; 83 | const TShape &dshape = in_shape->at(decodebbox::kBBoxPred); 84 | 85 | const bool class_agnostic = param_.class_agnostic; 86 | TShape bbox_shape; 87 | if (class_agnostic) { 88 | const int nbatch = dshape[0]; 89 | const int nrois = dshape[1]; 90 | bbox_shape = Shape3(nbatch, nrois, 4); 91 | } else { 92 | bbox_shape = dshape; 93 | } 94 | 95 | out_shape->clear(); 96 | aux_shape->clear(); 97 | out_shape->push_back(bbox_shape); 98 | 99 | return true; 100 | } 101 | 102 | OperatorProperty* Copy() const override { 103 | auto ptr = new DecodeBBoxProp(); 104 | ptr->param_ = param_; 105 | return ptr; 106 | } 107 | 108 | std::string TypeString() const override { 109 | return "_contrib_DecodeBBox"; 110 | } 111 | 112 | int NumOutputs() const override { 113 | return 1; 114 | } 115 | 116 | std::vector ListArguments() const override { 117 | return {"rois", "bbox_pred", "im_info"}; 118 | } 119 | 120 | std::vector ListOutputs() const override { 121 | return {"output"}; 122 | } 123 | 124 | std::vector DeclareBackwardDependency( 125 | const std::vector &out_grad, 126 | const std::vector &in_data, 127 | const std::vector &out_data) const override { 128 | return {}; 129 | } 130 | 131 | // Operator* CreateOperator(Context ctx) const override; 132 | Operator *CreateOperator(Context ctx) const override { 133 | LOG(FATAL) << "Not Implemented."; 134 | return NULL; 135 | } 136 | 137 | Operator *CreateOperatorEx(Context ctx, std::vector *in_shape, 138 | std::vector *in_type) const override; 139 | 140 | private: 141 | DecodeBBoxParam param_; 142 | }; // class DecodeBBoxProp 143 | 144 | #endif // DMLC_USE_CXX11 145 | } // namespace op 146 | } // namespace mxnet 147 | 148 | #endif // MXNET_OPERATOR_CONTRIB_DECODEBBOX_INL_H_ 149 | -------------------------------------------------------------------------------- /operator_cxx/contrib/nms-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file nms-inl.h 22 | * \brief NMS Operator 23 | * \author Yanghao Li 24 | */ 25 | #ifndef MXNET_OPERATOR_CONTRIB_NMS_INL_H_ 26 | #define MXNET_OPERATOR_CONTRIB_NMS_INL_H_ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "../operator_common.h" 39 | #include "../mshadow_op.h" 40 | 41 | namespace mxnet { 42 | namespace op { 43 | 44 | namespace nms { 45 | enum NMSOpInputs {kBBox}; 46 | enum NMSOpOutputs {kOut, kScore}; 47 | enum NMSForwardResource {kTempSpace}; 48 | } // nms 49 | 50 | struct NMSParam : public dmlc::Parameter { 51 | int rpn_pre_nms_top_n; 52 | int rpn_post_nms_top_n; 53 | float threshold; 54 | bool output_score; 55 | bool already_sorted; 56 | uint64_t workspace; 57 | 58 | DMLC_DECLARE_PARAMETER(NMSParam) { 59 | DMLC_DECLARE_FIELD(rpn_pre_nms_top_n).set_default(6000) 60 | .describe("Number of top scoring boxes to keep before applying NMS to RPN proposals"); 61 | DMLC_DECLARE_FIELD(rpn_post_nms_top_n).set_default(300) 62 | .describe("Number of top scoring boxes to keep after applying NMS to RPN proposals"); 63 | DMLC_DECLARE_FIELD(threshold).set_default(0.7) 64 | .describe("NMS value, below which to suppress."); 65 | DMLC_DECLARE_FIELD(output_score).set_default(false) 66 | .describe("Add score to outputs"); 67 | DMLC_DECLARE_FIELD(already_sorted).set_default(false) 68 | .describe("if input rois have been sorted by confidence"); 69 | DMLC_DECLARE_FIELD(workspace).set_default(256) 70 | .describe("Workspace for NMS in MB, default to 256"); 71 | } 72 | }; 73 | 74 | template 75 | Operator *CreateOp(NMSParam param); 76 | 77 | #if DMLC_USE_CXX11 78 | class NMSProp : public OperatorProperty { 79 | public: 80 | void Init(const std::vector >& kwargs) override { 81 | param_.Init(kwargs); 82 | } 83 | 84 | std::map GetParams() const override { 85 | return param_.__DICT__(); 86 | } 87 | 88 | bool InferShape(std::vector *in_shape, 89 | std::vector *out_shape, 90 | std::vector *aux_shape) const override { 91 | using namespace mshadow; 92 | CHECK_EQ(in_shape->size(), 1) << "Input:[bbox]"; 93 | const TShape &dshape = in_shape->at(nms::kBBox); 94 | if (dshape.ndim() == 0) return false; 95 | out_shape->clear(); 96 | // output 97 | out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 4)); 98 | // score 99 | out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 1)); 100 | return true; 101 | } 102 | 103 | OperatorProperty* Copy() const override { 104 | auto ptr = new NMSProp(); 105 | ptr->param_ = param_; 106 | return ptr; 107 | } 108 | 109 | std::string TypeString() const override { 110 | return "_contrib_NMS"; 111 | } 112 | 113 | std::vector ForwardResource( 114 | const std::vector &in_shape) const override { 115 | return {ResourceRequest::kTempSpace}; 116 | } 117 | 118 | std::vector DeclareBackwardDependency( 119 | const std::vector &out_grad, 120 | const std::vector &in_data, 121 | const std::vector &out_data) const override { 122 | return {}; 123 | } 124 | 125 | int NumVisibleOutputs() const override { 126 | if (param_.output_score) { 127 | return 2; 128 | } else { 129 | return 1; 130 | } 131 | } 132 | 133 | int NumOutputs() const override { 134 | return 2; 135 | } 136 | 137 | std::vector ListArguments() const override { 138 | return {"rois"}; 139 | } 140 | 141 | std::vector ListOutputs() const override { 142 | return {"output", "score"}; 143 | } 144 | 145 | Operator* CreateOperator(Context ctx) const override; 146 | 147 | private: 148 | NMSParam param_; 149 | }; // class NMSProp 150 | 151 | #endif // DMLC_USE_CXX11 152 | } // namespace op 153 | } // namespace mxnet 154 | 155 | #endif // MXNET_OPERATOR_CONTRIB_NMS_INL_H_ 156 | -------------------------------------------------------------------------------- /models/maskrcnn/bbox_post_processing.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | from operator_py.nms import py_nms_wrapper 5 | 6 | def multiclass_nms(nms, cls_score, bbox_xyxy, min_det_score, max_det_per_image): 7 | # remove background 8 | cls_score = cls_score[:, 1:] 9 | # TODO: the output shape of class_agnostic box is [n, 4], while class_aware box is [n, 4 * (1 + class)] 10 | bbox_xyxy = bbox_xyxy[:, 4:] if bbox_xyxy.shape[1] != 4 else bbox_xyxy 11 | num_class = cls_score.shape[1] 12 | 13 | cls_det = [np.empty((0, 6), dtype=np.float32) for _ in range(num_class)] # [x1, y1, x2, y2, score, cls] 14 | 15 | for cid in range(num_class): 16 | score = cls_score[:, cid] 17 | if bbox_xyxy.shape[1] != 4: 18 | _bbox_xyxy = bbox_xyxy[:, cid * 4:(cid + 1) * 4] 19 | else: 20 | _bbox_xyxy = bbox_xyxy 21 | valid_inds = np.where(score > min_det_score)[0] 22 | box = _bbox_xyxy[valid_inds] 23 | score = score[valid_inds] 24 | det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32) 25 | det = nms(det) 26 | cls = np.full((det.shape[0], 1), cid, dtype=np.float32) 27 | cls_det[cid] = np.hstack((det, cls)) 28 | 29 | cls_det = np.vstack([det for det in cls_det]) 30 | scores = cls_det[:, -2] 31 | top_index = np.argsort(scores)[::-1][:max_det_per_image] 32 | return cls_det[top_index] 33 | 34 | 35 | class BboxPostProcessingOperator(mx.operator.CustomOp): 36 | def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr): 37 | super().__init__() 38 | self.max_det_per_image = max_det_per_image 39 | self.min_det_score = min_det_score 40 | self.nms_type = nms_type 41 | self.nms_thr = nms_thr 42 | 43 | def forward(self, is_train, req, in_data, out_data, aux): 44 | if self.nms_type == 'nms': 45 | nms = py_nms_wrapper(self.nms_thr) 46 | else: 47 | raise NotImplementedError 48 | 49 | cls_score = in_data[0].asnumpy() 50 | bbox_xyxy = in_data[1].asnumpy() 51 | 52 | cls_score_shape = cls_score.shape # (b, n, num_class_withbg) 53 | bbox_xyxy_shape = bbox_xyxy.shape # (b, n, 4) or (b, n, 4 * num_class_withbg) 54 | batch_image = cls_score_shape[0] 55 | num_bbox = cls_score_shape[1] 56 | num_class_withbg = cls_score_shape[2] 57 | 58 | post_score = np.zeros((batch_image, self.max_det_per_image, 1), dtype=np.float32) 59 | post_bbox_xyxy = np.zeros((batch_image, self.max_det_per_image, 4), dtype=np.float32) 60 | post_cls = np.full((batch_image, self.max_det_per_image, 1), -1, dtype=np.float32) 61 | 62 | for i, (per_image_cls_score, per_image_bbox_xyxy) in enumerate(zip(cls_score, bbox_xyxy)): 63 | cls_det = multiclass_nms(nms, per_image_cls_score, per_image_bbox_xyxy, \ 64 | self.min_det_score, self.max_det_per_image) 65 | num_det = cls_det.shape[0] 66 | post_bbox_xyxy[i, :num_det] = cls_det[:, :4] 67 | post_score[i, :num_det] = cls_det[:, -2][:, np.newaxis] # convert to (n, 1) 68 | post_cls[i, :num_det] = cls_det[:, -1][:, np.newaxis] # convert to (n, 1) 69 | 70 | self.assign(out_data[0], req[0], post_score) 71 | self.assign(out_data[1], req[1], post_bbox_xyxy) 72 | self.assign(out_data[2], req[2], post_cls) 73 | 74 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 75 | self.assign(in_grad[0], req[0], 0) 76 | self.assign(in_grad[1], req[1], 0) 77 | 78 | 79 | @mx.operator.register("BboxPostProcessing") 80 | class BboxPostProcessingProp(mx.operator.CustomOpProp): 81 | def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr): 82 | super().__init__(need_top_grad=False) 83 | self.max_det_per_image = int(max_det_per_image) 84 | self.min_det_score = float(min_det_score) 85 | self.nms_type = str(nms_type) 86 | self.nms_thr = float(nms_thr) 87 | 88 | def list_arguments(self): 89 | return ['cls_score', 'bbox_xyxy'] 90 | 91 | def list_outputs(self): 92 | return ['post_score', 'post_bbox_xyxy', 'post_cls'] 93 | 94 | def infer_shape(self, in_shape): 95 | cls_score_shape = in_shape[0] # (b, n, num_class_withbg) 96 | bbox_xyxy_shape = in_shape[1] # (b, n, 4) or (b, n, 4 * num_class_withbg) 97 | 98 | batch_image = cls_score_shape[0] 99 | 100 | post_score_shape = (batch_image, self.max_det_per_image, 1) 101 | post_bbox_xyxy_shape = (batch_image, self.max_det_per_image, 4) 102 | post_cls_shape = (batch_image, self.max_det_per_image, 1) 103 | 104 | return [cls_score_shape, bbox_xyxy_shape], \ 105 | [post_score_shape, post_bbox_xyxy_shape, post_cls_shape] 106 | 107 | def create_operator(self, ctx, shapes, dtypes): 108 | return BboxPostProcessingOperator(self.max_det_per_image, self.min_det_score, self.nms_type, self.nms_thr) 109 | 110 | def declare_backward_dependency(self, out_grad, in_data, out_data): 111 | return [] 112 | -------------------------------------------------------------------------------- /operator_cxx/contrib/global_average_pooling.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file global_average_pooling.cu 23 | * \brief port from https://github.com/hujie-frank/SENet 24 | * \author Chenxia Han 25 | */ 26 | #include 27 | #include 28 | #include "../mxnet_op.h" 29 | #include "../../common/cuda_utils.h" 30 | #include "./global_average_pooling-inl.h" 31 | 32 | #define GAP_CUDA_CHECK(condition) \ 33 | /* Code block avoids redefinition of cudaError_t error */ \ 34 | do { \ 35 | cudaError_t error = condition; \ 36 | CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ 37 | } while (0) 38 | #define CUDA_KERNEL_LOOP(i, n) \ 39 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 40 | i < (n); \ 41 | i += blockDim.x * gridDim.x) 42 | 43 | constexpr int CAFFE_CUDA_NUM_THREADS = 512; 44 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096; 45 | 46 | inline int CAFFE_GET_BLOCKS(const int N) { 47 | return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS, 48 | CAFFE_MAXIMUM_NUM_BLOCKS); 49 | } 50 | 51 | namespace mshadow { 52 | namespace cuda { 53 | 54 | template 55 | __global__ void GlobalAvePoolForwardKernel(const int spatial_dim, 56 | const Dtype* bottom_data, Dtype* top_data) { 57 | __shared__ Dtype buffer[CAFFE_CUDA_NUM_THREADS]; 58 | unsigned int tid = threadIdx.x; 59 | buffer[tid] = 0; 60 | __syncthreads(); 61 | 62 | for (int j = tid; j < spatial_dim; j += blockDim.x) { 63 | buffer[tid] += bottom_data[blockIdx.x * spatial_dim + j]; 64 | } 65 | __syncthreads(); 66 | 67 | for (int i = blockDim.x / 2; i > 0; i >>= 1) { 68 | if (tid < i) { 69 | buffer[threadIdx.x] += buffer[threadIdx.x + i]; 70 | } 71 | __syncthreads(); 72 | } 73 | 74 | if (tid == 0) { 75 | top_data[blockIdx.x] = buffer[0] / spatial_dim; 76 | } 77 | } 78 | 79 | template 80 | inline void GAPForward(const Tensor &out, 81 | const Tensor &data) { 82 | const DType *bottom_data = data.dptr_; 83 | DType *top_data = out.dptr_; 84 | const int nblocks = data.shape_.ProdShape(0, 2); 85 | const int spatial_dim = data.shape_.ProdShape(2, 4); 86 | cudaStream_t stream = Stream::GetStream(out.stream_); 87 | GlobalAvePoolForwardKernel << > >(spatial_dim, bottom_data, top_data); 89 | GAP_CUDA_CHECK(cudaPeekAtLastError()); 90 | } 91 | 92 | template 93 | __global__ void GlobalAvePoolBackwardKernel(const int nthreads, const int spatial_dim, 94 | const Dtype* top_diff, Dtype* bottom_diff) { 95 | CUDA_KERNEL_LOOP(index, nthreads) { 96 | const int n = index / spatial_dim; 97 | bottom_diff[index] = top_diff[n] / spatial_dim; 98 | } 99 | } 100 | 101 | template 102 | inline void GAPBackward(const Tensor &in_grad, 103 | const Tensor &out_grad) { 104 | const DType *top_diff = out_grad.dptr_; 105 | DType *bottom_diff = in_grad.dptr_; 106 | const int count = in_grad.shape_.Size(); 107 | const int spatial_dim = in_grad.shape_.ProdShape(2, 4); 108 | cudaStream_t stream = Stream::GetStream(in_grad.stream_); 109 | GlobalAvePoolBackwardKernel << > >(count, spatial_dim, top_diff, bottom_diff); 111 | GAP_CUDA_CHECK(cudaPeekAtLastError()); 112 | } 113 | 114 | } // namespace cuda 115 | 116 | template 117 | inline void GAPForward(const Tensor &out, 118 | const Tensor &data) { 119 | cuda::GAPForward(out, data); 120 | } 121 | 122 | template 123 | inline void GAPBackward(const Tensor &in_grad, 124 | const Tensor &out_grad) { 125 | cuda::GAPBackward(in_grad, out_grad); 126 | } 127 | 128 | } // namespace mshadow 129 | 130 | namespace mxnet { 131 | namespace op { 132 | template<> 133 | Operator *CreateOp(GAPParam param, int dtype) { 134 | Operator *op = NULL; 135 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 136 | op = new GAPOp(param); 137 | }); 138 | return op; 139 | } 140 | 141 | } // namespace op 142 | } // namespace mxnet 143 | 144 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sync_batch_norm.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /*! 20 | * Copyright (c) 2018 by Contributors 21 | * \file sync_batch_norm.cc 22 | * \brief Synchronized BatchNorm modified from BatchNormV1 23 | * \author Hang Zhang 24 | */ 25 | 26 | #include "sync_batch_norm-inl.h" 27 | #include 28 | 29 | namespace mxnet { 30 | namespace op { 31 | template<> 32 | Operator *CreateOp(SyncBatchNormParam param, int dtype) { 33 | return new SyncBatchNorm(param); 34 | } 35 | 36 | // DO_BIND_DISPATCH comes from operator_common.h 37 | Operator *SyncBatchNormProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 38 | std::vector *in_type) const { 39 | std::vector out_shape, aux_shape; 40 | std::vector out_type, aux_type; 41 | CHECK(InferType(in_type, &out_type, &aux_type)); 42 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 43 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 44 | } 45 | 46 | DMLC_REGISTER_PARAMETER(SyncBatchNormParam); 47 | 48 | MXNET_REGISTER_OP_PROPERTY(_contrib_SyncBatchNorm, SyncBatchNormProp) 49 | .describe(R"code(Batch normalization. 50 | 51 | Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as 52 | well as offset ``beta``. 53 | Standard BN [1]_ implementation only normalize the data within each device. 54 | SyncBN normalizes the input within the whole mini-batch. 55 | We follow the sync-onece implmentation described in the paper [2]_ . 56 | 57 | Assume the input has more than one dimension and we normalize along axis 1. 58 | We first compute the mean and variance along this axis: 59 | 60 | .. math:: 61 | 62 | data\_mean[i] = mean(data[:,i,:,...]) \\ 63 | data\_var[i] = var(data[:,i,:,...]) 64 | 65 | Then compute the normalized output, which has the same shape as input, as following: 66 | 67 | .. math:: 68 | 69 | out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i] 70 | 71 | Both *mean* and *var* returns a scalar by treating the input as a vector. 72 | 73 | Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta`` 74 | have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and 75 | ``data_var`` as well, which are needed for the backward pass. 76 | 77 | Besides the inputs and the outputs, this operator accepts two auxiliary 78 | states, ``moving_mean`` and ``moving_var``, which are *k*-length 79 | vectors. They are global statistics for the whole dataset, which are updated 80 | by:: 81 | 82 | moving_mean = moving_mean * momentum + data_mean * (1 - momentum) 83 | moving_var = moving_var * momentum + data_var * (1 - momentum) 84 | 85 | If ``use_global_stats`` is set to be true, then ``moving_mean`` and 86 | ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute 87 | the output. It is often used during inference. 88 | 89 | Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is true, 90 | then set ``gamma`` to 1 and its gradient to 0. 91 | 92 | Reference: 93 | .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating 94 | deep network training by reducing internal covariate shift." *ICML 2015* 95 | .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, 96 | Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018* 97 | )code" ADD_FILELINE) 98 | .add_argument("data", "NDArray-or-Symbol", "Input data to batch normalization") 99 | .add_argument("gamma", "NDArray-or-Symbol", "gamma array") 100 | .add_argument("beta", "NDArray-or-Symbol", "beta array") 101 | .add_argument("moving_mean", "NDArray-or-Symbol", "running mean of input") 102 | .add_argument("moving_var", "NDArray-or-Symbol", "running variance of input") 103 | .add_arguments(SyncBatchNormParam::__FIELDS__()); 104 | 105 | NNVM_REGISTER_OP(_contrib_SyncBatchNorm) 106 | .set_attr("FSetInputVarAttrOnCompose", 107 | [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) { 108 | if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return; 109 | if (index == 3) { 110 | var->attrs.dict["__init__"] = "[\"zero\", {}]"; 111 | } else if (index == 4) { 112 | var->attrs.dict["__init__"] = "[\"one\", {}]"; 113 | } 114 | }); 115 | 116 | } // namespace op 117 | } // namespace mxnet 118 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sync_inplace_activation_batch_norm.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /*! 20 | * Copyright (c) 2018 by Contributors 21 | * \file sync_inplace_activation_batch_norm.cc 22 | * \brief Synchronized BatchNorm modified from BatchNormV1 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "sync_inplace_activation_batch_norm-inl.h" 27 | #include 28 | 29 | namespace mxnet { 30 | namespace op { 31 | template<> 32 | Operator *CreateOp(SyncInplaceABNParam param, int dtype) { 33 | return new SyncInplaceABN(param); 34 | } 35 | 36 | // DO_BIND_DISPATCH comes from operator_common.h 37 | Operator *SyncInplaceABNProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 38 | std::vector *in_type) const { 39 | std::vector out_shape, aux_shape; 40 | std::vector out_type, aux_type; 41 | CHECK(InferType(in_type, &out_type, &aux_type)); 42 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 43 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 44 | } 45 | 46 | DMLC_REGISTER_PARAMETER(SyncInplaceABNParam); 47 | 48 | MXNET_REGISTER_OP_PROPERTY(_contrib_SyncInplaceABN, SyncInplaceABNProp) 49 | .describe(R"code(Batch normalization. 50 | 51 | Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as 52 | well as offset ``beta``. 53 | Standard BN [1]_ implementation only normalize the data within each device. 54 | SyncBN normalizes the input within the whole mini-batch. 55 | We follow the sync-onece implmentation described in the paper [2]_ . 56 | 57 | Assume the input has more than one dimension and we normalize along axis 1. 58 | We first compute the mean and variance along this axis: 59 | 60 | .. math:: 61 | 62 | data\_mean[i] = mean(data[:,i,:,...]) \\ 63 | data\_var[i] = var(data[:,i,:,...]) 64 | 65 | Then compute the normalized output, which has the same shape as input, as following: 66 | 67 | .. math:: 68 | 69 | out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i] 70 | 71 | Both *mean* and *var* returns a scalar by treating the input as a vector. 72 | 73 | Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta`` 74 | have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and 75 | ``data_var`` as well, which are needed for the backward pass. 76 | 77 | Besides the inputs and the outputs, this operator accepts two auxiliary 78 | states, ``moving_mean`` and ``moving_var``, which are *k*-length 79 | vectors. They are global statistics for the whole dataset, which are updated 80 | by:: 81 | 82 | moving_mean = moving_mean * momentum + data_mean * (1 - momentum) 83 | moving_var = moving_var * momentum + data_var * (1 - momentum) 84 | 85 | If ``use_global_stats`` is set to be true, then ``moving_mean`` and 86 | ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute 87 | the output. It is often used during inference. 88 | 89 | Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is true, 90 | then set ``gamma`` to 1 and its gradient to 0. 91 | 92 | Reference: 93 | .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating 94 | deep network training by reducing internal covariate shift." *ICML 2015* 95 | .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, 96 | Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018* 97 | )code" ADD_FILELINE) 98 | .add_argument("data", "NDArray-or-Symbol", "Input data to batch normalization") 99 | .add_argument("gamma", "NDArray-or-Symbol", "gamma array") 100 | .add_argument("beta", "NDArray-or-Symbol", "beta array") 101 | .add_argument("moving_mean", "NDArray-or-Symbol", "running mean of input") 102 | .add_argument("moving_var", "NDArray-or-Symbol", "running variance of input") 103 | .add_arguments(SyncInplaceABNParam::__FIELDS__()); 104 | 105 | NNVM_REGISTER_OP(_contrib_SyncInplaceABN) 106 | .set_attr("FSetInputVarAttrOnCompose", 107 | [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) { 108 | if (var->attrs.dict.find("__init__") != var->attrs.dict.end()) return; 109 | if (index == 3) { 110 | var->attrs.dict["__init__"] = "[\"zero\", {}]"; 111 | } else if (index == 4) { 112 | var->attrs.dict["__init__"] = "[\"one\", {}]"; 113 | } 114 | }); 115 | 116 | } // namespace op 117 | } // namespace mxnet 118 | -------------------------------------------------------------------------------- /models/tridentnet/input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | from core.detection_input import DetectionAugmentation, AnchorTarget2D 5 | from operator_py.cython.bbox import bbox_overlaps_cython 6 | 7 | 8 | class ScaleAwareRange(DetectionAugmentation): 9 | def __init__(self, pScaleRange): 10 | super().__init__() 11 | self.p = pScaleRange 12 | 13 | def apply(self, input_record): 14 | p = self.p 15 | 16 | im_info = input_record['im_info'] 17 | 18 | # input_record["valid_ranges_on_origin"] = p.cal_on_origin 19 | input_record["valid_ranges"] = np.array(p.valid_ranges, dtype=np.float32).reshape(-1, 2) 20 | if p.cal_on_origin: 21 | input_record["valid_ranges"] *= im_info[2] 22 | # replace -1 with max_size 23 | inds = np.where(input_record["valid_ranges"][:, 1] < 0)[0] 24 | input_record["valid_ranges"][inds, 1] = max(im_info[0], im_info[1]) 25 | 26 | 27 | class TridentAnchorTarget2D(AnchorTarget2D): 28 | """ 29 | input: image_meta: tuple(h, w, scale) 30 | gt_bbox, ndarry(max_num_gt, 4) 31 | output: anchor_label, ndarray(num_branch, num_anchor * 2, h, w) 32 | anchor_bbox_target, ndarray(num_branch, num_anchor * 4, h, w) 33 | anchor_bbox_weight, ndarray(num_branch, num_anchor * 4, h, w) 34 | valid_ranges, ndarray(num_branch, 2) 35 | """ 36 | 37 | def __init__(self, pAnchor): 38 | super().__init__(pAnchor) 39 | 40 | def _filter_anchor_by_scale_range(self, cls_label, valid_anchor, gt_bbox, valid_range, invalid_anchor_threshd): 41 | if len(gt_bbox) == 0: 42 | return 43 | gt_bbox_sizes = (gt_bbox[:, 2] - gt_bbox[:, 0] + 1.0) * (gt_bbox[:, 3] - gt_bbox[:, 1] + 1.0) 44 | invalid_gt_bbox_inds = np.where((gt_bbox_sizes < valid_range[0]**2) | (gt_bbox_sizes > valid_range[1]**2))[0] 45 | invalid_gt_bbox = gt_bbox[invalid_gt_bbox_inds] 46 | if len(invalid_gt_bbox) > 0: 47 | invalid_overlaps = bbox_overlaps_cython( 48 | valid_anchor.astype(np.float32, copy=False), invalid_gt_bbox.astype(np.float32, copy=False)) 49 | invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1) 50 | invalid_max_overlaps = invalid_overlaps[np.arange(len(valid_anchor)), invalid_argmax_overlaps] 51 | 52 | # ignore anchors overlapped with invalid gt boxes 53 | disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshd))[0] 54 | cls_label[disable_inds] = -1 55 | 56 | def apply(self, input_record): 57 | p = self.p 58 | 59 | im_info = input_record["im_info"] 60 | gt_bbox = input_record["gt_bbox"] 61 | valid_ranges = input_record["valid_ranges"] 62 | assert isinstance(gt_bbox, np.ndarray) 63 | assert gt_bbox.dtype == np.float32 64 | assert gt_bbox.shape[1] == 5 65 | valid = np.where(gt_bbox[:, 0] != -1)[0] 66 | gt_bbox = gt_bbox[valid] 67 | gt_class = gt_bbox[:, -1].copy() 68 | gt_bbox = gt_bbox[:, :4].copy() 69 | 70 | h, w = im_info[:2] 71 | if h >= w: 72 | fh, fw = p.generate.long, p.generate.short 73 | else: 74 | fh, fw = p.generate.short, p.generate.long 75 | 76 | valid_cls_label = None 77 | valid_anchor_label = None 78 | valid_index, valid_anchor = self._gather_valid_anchor(im_info) 79 | if p.generate.use_groupsoftmax: 80 | gt_class = p.gtclass2rpn(gt_class) 81 | valid_cls_label, valid_anchor_label = \ 82 | self._assign_label_to_anchor_group(valid_anchor, gt_bbox, gt_class, 83 | p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr) 84 | else: 85 | valid_cls_label, valid_anchor_label = \ 86 | self._assign_label_to_anchor(valid_anchor, gt_bbox, 87 | p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr) 88 | 89 | cls_labels, reg_targets, reg_weights = [], [], [] 90 | for valid_range in valid_ranges: 91 | cls_label = valid_cls_label.copy() 92 | self._filter_anchor_by_scale_range(cls_label, valid_anchor, gt_bbox, 93 | valid_range, p.trident.invalid_anchor_threshd) 94 | self._sample_anchor(cls_label, p.sample.image_anchor, p.sample.pos_fraction) 95 | reg_target, reg_weight = self._cal_anchor_target(cls_label, valid_anchor, gt_bbox, valid_anchor_label) 96 | cls_label, reg_target, reg_weight = \ 97 | self._scatter_valid_anchor(valid_index, cls_label, reg_target, reg_weight) 98 | 99 | cls_labels.append(cls_label.reshape((fh, fw, -1)).transpose(2, 0, 1).reshape(-1)) 100 | reg_targets.append(reg_target.reshape((fh, fw, -1)).transpose(2, 0, 1)) 101 | reg_weights.append(reg_weight.reshape((fh, fw, -1)).transpose(2, 0, 1)) 102 | 103 | input_record["rpn_cls_label"] = np.stack(cls_labels) 104 | input_record["rpn_reg_target"] = np.stack(reg_targets) 105 | input_record["rpn_reg_weight"] = np.stack(reg_weights) 106 | 107 | return input_record["rpn_cls_label"], \ 108 | input_record["rpn_reg_target"], \ 109 | input_record["rpn_reg_weight"] 110 | 111 | -------------------------------------------------------------------------------- /operator_cxx/contrib/fixed_divisor.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 15 | and IDIAP Research Institute nor the names of its contributors may be 16 | used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | #ifndef CAFFE2_UTILS_FIXED_DIVISOR_H_ 32 | #define CAFFE2_UTILS_FIXED_DIVISOR_H_ 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | #include 39 | 40 | #if defined(__CUDA_ARCH__) || defined(__HIP_ARCH__) 41 | #define FIXED_DIVISOR_DECL inline __host__ __device__ 42 | #else 43 | #define FIXED_DIVISOR_DECL inline 44 | #endif 45 | 46 | 47 | // Utility class for quickly calculating quotients and remainders for 48 | // a known integer divisor 49 | template 50 | class FixedDivisor {}; 51 | 52 | // Works for any positive divisor, 1 to INT_MAX. One 64-bit 53 | // multiplication and one 64-bit shift is used to calculate the 54 | // result. 55 | template <> 56 | class FixedDivisor { 57 | public: 58 | FixedDivisor() = default; 59 | 60 | explicit FixedDivisor(const std::int32_t d) : d_(d) { 61 | CalcSignedMagic(); 62 | } 63 | 64 | FIXED_DIVISOR_DECL std::int32_t d() const { 65 | return d_; 66 | } 67 | 68 | FIXED_DIVISOR_DECL std::uint64_t magic() const { 69 | return magic_; 70 | } 71 | 72 | FIXED_DIVISOR_DECL int shift() const { 73 | return shift_; 74 | } 75 | 76 | /// Calculates `q = n / d`. 77 | FIXED_DIVISOR_DECL std::int32_t Div(const std::int32_t n) const { 78 | // In lieu of a mulhi instruction being available, perform the 79 | // work in uint64 80 | return (int32_t)((magic_ * (uint64_t)n) >> shift_); 81 | } 82 | 83 | /// Calculates `r = n % d`. 84 | FIXED_DIVISOR_DECL std::int32_t Mod(const std::int32_t n) const { 85 | return n - d_ * Div(n); 86 | } 87 | 88 | /// Calculates `q = n / d` and `r = n % d` together. 89 | FIXED_DIVISOR_DECL void 90 | DivMod(const std::int32_t n, std::int32_t* q, int32_t* r) const { 91 | *q = Div(n); 92 | *r = n - d_ * *q; 93 | } 94 | 95 | private: 96 | // Calculates magic multiplicative value and shift amount for calculating `q = 97 | // n / d` for signed 32-bit integers. 98 | // Implementation taken from Hacker's Delight section 10. 99 | void CalcSignedMagic() { 100 | if (d_ == 1) { 101 | magic_ = UINT64_C(0x1) << 32; 102 | shift_ = 32; 103 | return; 104 | } 105 | 106 | const std::uint32_t two31 = UINT32_C(0x80000000); 107 | const std::uint32_t ad = std::abs(d_); 108 | const std::uint32_t t = two31 + ((uint32_t)d_ >> 31); 109 | const std::uint32_t anc = t - 1 - t % ad; // Absolute value of nc. 110 | std::uint32_t p = 31; // Init. p. 111 | std::uint32_t q1 = two31 / anc; // Init. q1 = 2**p/|nc|. 112 | std::uint32_t r1 = two31 - q1 * anc; // Init. r1 = rem(2**p, |nc|). 113 | std::uint32_t q2 = two31 / ad; // Init. q2 = 2**p/|d|. 114 | std::uint32_t r2 = two31 - q2 * ad; // Init. r2 = rem(2**p, |d|). 115 | std::uint32_t delta = 0; 116 | do { 117 | ++p; 118 | q1 <<= 1; // Update q1 = 2**p/|nc|. 119 | r1 <<= 1; // Update r1 = rem(2**p, |nc|). 120 | if (r1 >= anc) { // (Must be an unsigned 121 | ++q1; // comparison here). 122 | r1 -= anc; 123 | } 124 | q2 <<= 1; // Update q2 = 2**p/|d|. 125 | r2 <<= 1; // Update r2 = rem(2**p, |d|). 126 | if (r2 >= ad) { // (Must be an unsigned 127 | ++q2; // comparison here). 128 | r2 -= ad; 129 | } 130 | delta = ad - r2; 131 | } while (q1 < delta || (q1 == delta && r1 == 0)); 132 | std::int32_t magic = q2 + 1; 133 | if (d_ < 0) { 134 | magic = -magic; 135 | } 136 | shift_ = p; 137 | magic_ = (std::uint64_t)(std::uint32_t)magic; 138 | } 139 | 140 | std::int32_t d_ = 1; 141 | std::uint64_t magic_; 142 | int shift_; 143 | }; 144 | 145 | 146 | #endif // CAFFE2_UTILS_FIXED_DIVISOR_H_ -------------------------------------------------------------------------------- /utils/graph_optimize.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import json 19 | import logging 20 | import mxnet as mx 21 | 22 | 23 | def merge_bn(symbol, args, auxs, symbol_only=False): 24 | """ 25 | Adapted from https://github.com/dmlc/tvm/blob/master/python/tvm/relay/frontend/mxnet.py 26 | Instead of translating nnvm graph into TVM relay graph, we adapt the script to translate 27 | it back to mxnet graph. 28 | """ 29 | assert symbol is not None 30 | jgraph = json.loads(symbol.tojson()) 31 | jnodes = jgraph["nodes"] 32 | node_map = {} 33 | node_op_map = {} 34 | 35 | for nid, node in enumerate(jnodes): 36 | # edges are [which_node, which_output, type(? not sure)] 37 | # mx.symbol has an attribute of __getitem__. sym[1] gives the second output 38 | children = [node_map[e[0]][e[1]] for e in node["inputs"]] 39 | attrs = node.get("attrs", {}) 40 | node_name = node["name"] 41 | op_name = node["op"] 42 | if op_name == "null": 43 | attrs = dict({k:v for k, v in attrs.items() if k.startswith("__")}) 44 | node_map[nid] = mx.sym.var(node_name, **attrs) 45 | node_op_map[nid] = ["Variable"] 46 | elif op_name == "BatchNorm": 47 | e = node["inputs"][0] 48 | _, gamma, beta, mmean, mvar = children 49 | gamma_name, beta_name, mmean_name, mvar_name = gamma.name, beta.name, mmean.name, mvar.name 50 | assert "gamma" in gamma_name 51 | assert "beta" in beta_name 52 | assert "moving_mean" in mmean_name 53 | assert "moving_var" in mvar_name 54 | eps = float(attrs["eps"]) 55 | if attrs["use_global_stats"] == "True" and node_op_map[e[0]][e[1]] == "Convolution": 56 | if not symbol_only: 57 | if (mmean_name) not in auxs: 58 | logging.info("Can not find {}, skipping".format(node_name + "_moving_mean")) 59 | else: 60 | logging.info("Merging {}".format(node_name)) 61 | # modify beta before gamma since gamma is not depend on beta 62 | args[beta_name] -= args[gamma_name] * auxs[mmean_name] / mx.nd.sqrt(eps + auxs[mvar_name]) 63 | args[gamma_name] /= mx.nd.sqrt(eps + auxs[mvar_name]) 64 | # expand for broadcasting 65 | if args[gamma_name].ndim == 1: 66 | args[gamma_name] = args[gamma_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1) 67 | args[beta_name] = args[beta_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1) 68 | auxs[mmean_name] = auxs[mmean_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1) 69 | auxs[mvar_name] = auxs[mvar_name].expand_dims(axis=0).expand_dims(axis=-1).expand_dims(axis=-1) 70 | # set mmean and mvar to identity to avoid fusing more than once in weight sharing 71 | auxs[mmean_name][:] = 0.0 72 | auxs[mvar_name][:] = 1.0 73 | # copy shared gamma and beta for each BN 74 | args[node_name + "_gamma"] = args[gamma_name] 75 | args[node_name + "_beta"] = args[beta_name] 76 | # BroadcastScale is needed 77 | gamma = mx.sym.var(node_name + "_gamma", shape=args[node_name + "_gamma"].shape) 78 | beta = mx.sym.var(node_name + "_beta", shape=args[node_name + "_beta"].shape) 79 | res = mx.sym.broadcast_add(mx.sym.contrib.BroadcastScale(data=children[0], scaler=gamma), beta) 80 | else: 81 | res = mx.sym.BatchNorm(*children, **attrs, name=node_name) 82 | node_map[nid] = res 83 | node_op_map[nid] = ["BatchNorm"] 84 | else: 85 | if op_name.startswith("_contrib_"): 86 | op_name = op_name.replace("_contrib_", "") 87 | operator = eval("mx.sym.contrib." + op_name) 88 | elif op_name.startswith("_"): 89 | operator = eval("mx.sym._internal." + op_name) 90 | else: 91 | operator = eval("mx.sym." + op_name) 92 | res = operator(*children, **attrs, name=node_name) 93 | node_map[nid] = res 94 | node_op_map[nid] = [op_name] 95 | 96 | outputs = [node_map[e[0]][e[1]] for e in jgraph["heads"]] 97 | outputs = outputs[0] if len(outputs) == 1 else mx.sym.Group(outputs) 98 | return outputs, args, auxs 99 | 100 | if __name__ == "__main__": 101 | sym = mx.sym.load("experiments/faster_r50v1_2fc_1x/checkpoint.json") 102 | sym1, _, _ = merge_bn(sym, None, None, True) 103 | print(sym1.tojson()) 104 | -------------------------------------------------------------------------------- /operator_py/cython/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /operator_cxx/contrib/generate_proposal-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file generate_proposal-inl.h 22 | * \brief GenerateProposal Operator 23 | * \author Piotr Teterwak, Bing Xu, Jian Guo, Pengfei Chen, Yuntao Chen, Yanghao Li 24 | */ 25 | #ifndef MXNET_OPERATOR_CONTRIB_GENERATE_PROPOSAL_INL_H_ 26 | #define MXNET_OPERATOR_CONTRIB_GENERATE_PROPOSAL_INL_H_ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "../operator_common.h" 39 | #include "../mshadow_op.h" 40 | 41 | namespace mxnet { 42 | namespace op { 43 | 44 | namespace gen_proposal { 45 | enum GenProposalOpInputs {kClsProb, kBBoxPred, kImInfo, kAnchor}; 46 | enum GenProposalOpOutputs {kOut, kScore}; 47 | enum GenProposalForwardResource {kTempSpace}; 48 | } // gen_proposal 49 | 50 | struct GenProposalParam : public dmlc::Parameter { 51 | int rpn_pre_nms_top_n; 52 | int rpn_min_size; 53 | int feature_stride; 54 | bool iou_loss; 55 | uint64_t workspace; 56 | int num_class; 57 | 58 | DMLC_DECLARE_PARAMETER(GenProposalParam) { 59 | DMLC_DECLARE_FIELD(rpn_pre_nms_top_n).set_default(6000) 60 | .describe("Number of top scoring boxes to keep after applying NMS to RPN proposals"); 61 | DMLC_DECLARE_FIELD(rpn_min_size).set_default(16) 62 | .describe("Minimum height or width in proposal"); 63 | DMLC_DECLARE_FIELD(feature_stride).set_default(16) 64 | .describe("The size of the receptive field each unit in the convolution layer of the rpn," 65 | "for example the product of all stride's prior to this layer."); 66 | DMLC_DECLARE_FIELD(iou_loss).set_default(false) 67 | .describe("Usage of IoU Loss"); 68 | DMLC_DECLARE_FIELD(workspace).set_default(256) 69 | .describe("Workspace for proposal in MB, default to 256"); 70 | DMLC_DECLARE_FIELD(num_class).set_default(2) 71 | .describe("The number of classes"); 72 | } 73 | }; 74 | 75 | template 76 | Operator *CreateOp(GenProposalParam param); 77 | 78 | #if DMLC_USE_CXX11 79 | class GenProposalProp : public OperatorProperty { 80 | public: 81 | void Init(const std::vector >& kwargs) override { 82 | param_.Init(kwargs); 83 | } 84 | 85 | std::map GetParams() const override { 86 | return param_.__DICT__(); 87 | } 88 | 89 | bool InferShape(std::vector *in_shape, 90 | std::vector *out_shape, 91 | std::vector *aux_shape) const override { 92 | using namespace mshadow; 93 | CHECK_EQ(in_shape->size(), 4) << "Input:[cls_prob, bbox_pred, im_info, anchors]"; 94 | const TShape &dshape = in_shape->at(gen_proposal::kClsProb); 95 | if (dshape.ndim() == 0) return false; 96 | Shape<4> bbox_pred_shape; 97 | bbox_pred_shape = Shape4(dshape[0], dshape[1] / param_.num_class * 4, dshape[2], dshape[3]); 98 | SHAPE_ASSIGN_CHECK(*in_shape, gen_proposal::kBBoxPred, 99 | bbox_pred_shape); 100 | Shape<2> im_info_shape; 101 | im_info_shape = Shape2(dshape[0], 3); 102 | SHAPE_ASSIGN_CHECK(*in_shape, gen_proposal::kImInfo, im_info_shape); 103 | Shape<2> anchors_shape; 104 | anchors_shape = Shape2(dshape[2] * dshape[3] * dshape[1] / 2, 4); 105 | SHAPE_ASSIGN_CHECK(*in_shape, gen_proposal::kAnchor, anchors_shape); 106 | out_shape->clear(); 107 | // output 108 | out_shape->push_back(Shape3(dshape[0], param_.rpn_pre_nms_top_n, 5)); 109 | return true; 110 | } 111 | 112 | OperatorProperty* Copy() const override { 113 | auto ptr = new GenProposalProp(); 114 | ptr->param_ = param_; 115 | return ptr; 116 | } 117 | 118 | std::string TypeString() const override { 119 | return "_contrib_GenProposal"; 120 | } 121 | 122 | std::vector ForwardResource( 123 | const std::vector &in_shape) const override { 124 | return {ResourceRequest::kTempSpace}; 125 | } 126 | 127 | std::vector DeclareBackwardDependency( 128 | const std::vector &out_grad, 129 | const std::vector &in_data, 130 | const std::vector &out_data) const override { 131 | return {}; 132 | } 133 | 134 | int NumOutputs() const override { 135 | return 1; 136 | } 137 | 138 | std::vector ListArguments() const override { 139 | return {"cls_prob", "bbox_pred", "im_info", "anchors"}; 140 | } 141 | 142 | std::vector ListOutputs() const override { 143 | return {"output"}; 144 | } 145 | 146 | Operator* CreateOperator(Context ctx) const override; 147 | 148 | private: 149 | GenProposalParam param_; 150 | }; // class GenProposalProp 151 | 152 | #endif // DMLC_USE_CXX11 153 | } // namespace op 154 | } // namespace mxnet 155 | 156 | #endif // MXNET_OPERATOR_CONTRIB_GENERATE_PROPOSAL_INL_H_ 157 | -------------------------------------------------------------------------------- /core/detection_metric.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | 5 | class LossWithIgnore(mx.metric.EvalMetric): 6 | def __init__(self, name, output_names, label_names, ignore_label=-1): 7 | super().__init__(name, output_names, label_names) 8 | self.ignore_label = ignore_label 9 | 10 | def update(self, labels, preds): 11 | raise NotImplementedError 12 | 13 | 14 | class FgLossWithIgnore(LossWithIgnore): 15 | def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1): 16 | super().__init__(name, output_names, label_names, ignore_label) 17 | self.bg_label = bg_label 18 | 19 | def update(self, labels, preds): 20 | raise NotImplementedError 21 | 22 | 23 | class AccWithIgnore(LossWithIgnore): 24 | def __init__(self, name, output_names, label_names, ignore_label=-1): 25 | super().__init__(name, output_names, label_names, ignore_label) 26 | 27 | def update(self, labels, preds): 28 | if len(preds) == 1 and len(labels) == 1: 29 | pred = preds[0] 30 | label = labels[0] 31 | elif len(preds) == 2: 32 | pred = preds[0] 33 | label = preds[1] 34 | else: 35 | raise Exception( 36 | "unknown loss output: len(preds): {}, len(labels): {}".format( 37 | len(preds), len(labels) 38 | ) 39 | ) 40 | 41 | pred_label = mx.ndarray.argmax_channel(pred).astype('int32').asnumpy().reshape(-1) 42 | label = label.astype('int32').asnumpy().reshape(-1) 43 | 44 | keep_inds = np.where(label != self.ignore_label)[0] 45 | pred_label = pred_label[keep_inds] 46 | label = label[keep_inds] 47 | 48 | self.sum_metric += np.sum(pred_label == label) 49 | self.num_inst += len(pred_label) 50 | 51 | 52 | class FgAccWithIgnore(FgLossWithIgnore): 53 | def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1): 54 | super().__init__(name, output_names, label_names, bg_label, ignore_label) 55 | 56 | def update(self, labels, preds): 57 | pred = preds[0] 58 | label = labels[0] 59 | 60 | pred_label = mx.ndarray.argmax_channel(pred).astype('int32').asnumpy().reshape(-1) 61 | label = label.astype('int32').asnumpy().reshape(-1) 62 | 63 | keep_inds = np.where((label != self.bg_label) & (label != self.ignore_label))[0] 64 | pred_label = pred_label[keep_inds] 65 | label = label[keep_inds] 66 | 67 | self.sum_metric += np.sum(pred_label == label) 68 | self.num_inst += len(pred_label) 69 | 70 | 71 | class CeWithIgnore(LossWithIgnore): 72 | def __init__(self, name, output_names, label_names, ignore_label=-1): 73 | super().__init__(name, output_names, label_names, ignore_label) 74 | 75 | def update(self, labels, preds): 76 | pred = preds[0] 77 | label = labels[0] 78 | 79 | label = label.astype('int32').asnumpy().reshape(-1) 80 | pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1)) 81 | pred = pred.reshape((label.shape[0], -1)) # -1 x c 82 | 83 | keep_inds = np.where(label != self.ignore_label)[0] 84 | label = label[keep_inds] 85 | prob = pred[keep_inds, label] 86 | 87 | prob += 1e-14 88 | ce_loss = -1 * np.log(prob) 89 | ce_loss = np.sum(ce_loss) 90 | self.sum_metric += ce_loss 91 | self.num_inst += label.shape[0] 92 | 93 | 94 | class FgCeWithIgnore(FgLossWithIgnore): 95 | def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1): 96 | super().__init__(name, output_names, label_names, bg_label, ignore_label) 97 | 98 | def update(self, labels, preds): 99 | pred = preds[0] 100 | label = labels[0] 101 | 102 | label = label.astype('int32').asnumpy().reshape(-1) 103 | pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1)) 104 | pred = pred.reshape((label.shape[0], -1)) # -1 x c 105 | 106 | keep_inds = np.where((label != self.ignore_label) & (label != self.bg_label))[0] 107 | label = label[keep_inds] 108 | prob = pred[keep_inds, label] 109 | 110 | prob += 1e-14 111 | ce_loss = -1 * np.log(prob) 112 | ce_loss = np.sum(ce_loss) 113 | self.sum_metric += ce_loss 114 | self.num_inst += label.shape[0] 115 | 116 | 117 | class L1(FgLossWithIgnore): 118 | def __init__(self, name, output_names, label_names, bg_label=0, ignore_label=-1): 119 | super().__init__(name, output_names, label_names, bg_label, ignore_label) 120 | 121 | def update(self, labels, preds): 122 | if len(preds) == 1 and len(labels) == 1: 123 | pred = preds[0].asnumpy() 124 | label = labels[0].asnumpy() 125 | elif len(preds) == 2: 126 | pred = preds[0].asnumpy() 127 | label = preds[1].asnumpy() 128 | else: 129 | raise Exception( 130 | "unknown loss output: len(preds): {}, len(labels): {}".format( 131 | len(preds), len(labels) 132 | ) 133 | ) 134 | 135 | label = label.reshape(-1) 136 | num_inst = len(np.where((label != self.bg_label) & (label != self.ignore_label))[0]) 137 | 138 | self.sum_metric += np.sum(pred) 139 | self.num_inst += num_inst 140 | 141 | 142 | class SigmoidCrossEntropy(mx.metric.EvalMetric): 143 | def __init__(self, name, output_names, label_names): 144 | super().__init__(name, output_names, label_names) 145 | 146 | def update(self, labels, preds): 147 | x = preds[0].reshape(-1) # logit 148 | z = preds[1].reshape(-1) # label 149 | l = mx.nd.relu(x) - x * z + mx.nd.log1p(mx.nd.exp(-mx.nd.abs(x))) 150 | l = l.mean().asnumpy() 151 | 152 | self.num_inst += 1 153 | self.sum_metric += l -------------------------------------------------------------------------------- /operator_py/cython/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | 30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | and values giving the absolute path to each directory. 32 | 33 | Starts by looking for the CUDAHOME env variable. If not found, everything 34 | is based on finding 'nvcc' in the PATH. 35 | """ 36 | 37 | # first check if the CUDAHOME env variable is in use 38 | if 'CUDAHOME' in os.environ: 39 | home = os.environ['CUDAHOME'] 40 | nvcc = pjoin(home, 'bin', 'nvcc') 41 | else: 42 | # otherwise, search the PATH for NVCC 43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | if nvcc is None: 46 | raise EnvironmentError('The nvcc binary could not be ' 47 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | home = os.path.dirname(os.path.dirname(nvcc)) 49 | 50 | cudaconfig = {'home':home, 'nvcc':nvcc, 51 | 'include': pjoin(home, 'include'), 52 | 'lib64': pjoin(home, 'lib64')} 53 | for k, v in cudaconfig.items(): 54 | if not os.path.exists(v): 55 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | 57 | return cudaconfig 58 | CUDA = locate_cuda() 59 | 60 | 61 | # Obtain the numpy include directory. This logic works across numpy versions. 62 | try: 63 | numpy_include = np.get_include() 64 | except AttributeError: 65 | numpy_include = np.get_numpy_include() 66 | 67 | 68 | def customize_compiler_for_nvcc(self): 69 | """inject deep into distutils to customize how the dispatch 70 | to gcc/nvcc works. 71 | 72 | If you subclass UnixCCompiler, it's not trivial to get your subclass 73 | injected in, and still have the right customizations (i.e. 74 | distutils.sysconfig.customize_compiler) run on it. So instead of going 75 | the OO route, I have this. Note, it's kindof like a wierd functional 76 | subclassing going on.""" 77 | 78 | # tell the compiler it can processes .cu 79 | self.src_extensions.append('.cu') 80 | 81 | # save references to the default compiler_so and _comple methods 82 | default_compiler_so = self.compiler_so 83 | super = self._compile 84 | 85 | # now redefine the _compile method. This gets executed for each 86 | # object but distutils doesn't have the ability to change compilers 87 | # based on source extension: we add it. 88 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 89 | if os.path.splitext(src)[1] == '.cu': 90 | # use the cuda for .cu files 91 | self.set_executable('compiler_so', CUDA['nvcc']) 92 | # use only a subset of the extra_postargs, which are 1-1 translated 93 | # from the extra_compile_args in the Extension class 94 | postargs = extra_postargs['nvcc'] 95 | else: 96 | postargs = extra_postargs['gcc'] 97 | 98 | super(obj, src, ext, cc_args, postargs, pp_opts) 99 | # reset the default compiler_so, which we might have changed for cuda 100 | self.compiler_so = default_compiler_so 101 | 102 | # inject our redefined _compile method into the class 103 | self._compile = _compile 104 | 105 | 106 | # run the customize_compiler 107 | class custom_build_ext(build_ext): 108 | def build_extensions(self): 109 | customize_compiler_for_nvcc(self.compiler) 110 | build_ext.build_extensions(self) 111 | 112 | 113 | ext_modules = [ 114 | Extension( 115 | "bbox", 116 | ["bbox.pyx"], 117 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 118 | include_dirs=[numpy_include] 119 | ), 120 | Extension( 121 | "cpu_nms", 122 | ["cpu_nms.pyx"], 123 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 124 | include_dirs = [numpy_include] 125 | ), 126 | Extension('gpu_nms', 127 | ['nms_kernel.cu', 'gpu_nms.pyx'], 128 | library_dirs=[CUDA['lib64']], 129 | libraries=['cudart'], 130 | language='c++', 131 | runtime_library_dirs=[CUDA['lib64']], 132 | # this syntax is specific to this build system 133 | # we're only going to use certain compiler args with nvcc and not with 134 | # gcc the implementation of this trick is in customize_compiler() below 135 | extra_compile_args={'gcc': ["-Wno-unused-function"], 136 | 'nvcc': ['-arch=sm_35', 137 | '--ptxas-options=-v', 138 | '-c', 139 | '--compiler-options', 140 | "'-fPIC'"]}, 141 | include_dirs = [numpy_include, CUDA['include']] 142 | ), 143 | ] 144 | 145 | setup( 146 | name='frcnn_cython', 147 | ext_modules=ext_modules, 148 | # inject our custom trigger 149 | cmdclass={'build_ext': custom_build_ext}, 150 | ) 151 | -------------------------------------------------------------------------------- /models/efficientnet/builder.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import mxnext as X 3 | from mxnext import dwconv, conv, relu6, add, global_avg_pool, sigmoid, to_fp16, to_fp32 4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit 5 | from symbol.builder import Backbone 6 | 7 | 8 | def _make_divisible(dividend, divisor): 9 | if dividend % divisor == 0: 10 | return dividend 11 | else: 12 | return (dividend // divisor + 1) * divisor 13 | 14 | round32 = lambda dividend: _make_divisible(dividend, 32) 15 | 16 | 17 | def se(input, prefix, f_down, f_up): 18 | with mx.name.Prefix(prefix + "_"): 19 | gap = mx.sym.mean(input, axis=-1, keepdims=True) 20 | gap = mx.sym.mean(gap, axis=-2, keepdims=True) 21 | fc1 = conv(gap, name="fc1", filter=f_down) 22 | fc1 = relu6(fc1, name="fc1_relu") 23 | fc2 = conv(fc1, name="fc2", filter=f_up) 24 | att = sigmoid(fc2, name="sigmoid") 25 | input = mx.sym.broadcast_mul(input, att, name="mul") 26 | 27 | return input 28 | 29 | 30 | def convnormrelu(input, prefix, kernel, f_in, f_out, stride, proj, norm, **kwargs): 31 | with mx.name.Prefix(prefix + "_"): 32 | conv1 = conv(input, name="conv1", filter=f_out, kernel=kernel, stride=stride, no_bias=False) 33 | bn1 = norm(conv1, name="bn1") 34 | relu1 = relu6(bn1, name="relu1") 35 | return relu1 36 | 37 | 38 | def mbconv(input, prefix, kernel, f_in, f_out, stride, proj, bottleneck_ratio, norm, **kwargs): 39 | with mx.name.Prefix(prefix + "_"): 40 | if bottleneck_ratio != 1: 41 | conv1 = conv(input, name="conv1", filter=f_in * bottleneck_ratio, no_bias=False) 42 | bn1 = norm(conv1, name="bn1") 43 | relu1 = relu6(bn1, name="relu1") 44 | else: 45 | relu1 = input 46 | 47 | conv2 = dwconv(relu1, name="conv2", filter=f_in * bottleneck_ratio, 48 | kernel=kernel, stride=stride, no_bias=False) 49 | bn2 = norm(conv2, name="bn2") 50 | relu2 = relu6(bn2, name="relu2") 51 | relu2 = se(relu2, prefix=prefix + "_se2", f_down=f_in//4, f_up=f_in * bottleneck_ratio) 52 | 53 | conv3 = conv(relu2, name="conv3", filter=f_out, no_bias=False) 54 | bn3 = norm(conv3, name="bn3") 55 | 56 | if proj: 57 | return bn3 58 | else: 59 | return bn3 + input 60 | 61 | 62 | mbc1 = lambda input, prefix, kernel, f_in, f_out, stride, proj, norm, **kwargs: \ 63 | mbconv(input, prefix, kernel, f_in, f_out, stride, proj, 1, norm, **kwargs) 64 | mbc6 = lambda input, prefix, kernel, f_in, f_out, stride, proj, norm, **kwargs: \ 65 | mbconv(input, prefix, kernel, f_in, f_out, stride, proj, 6, norm, **kwargs) 66 | 67 | 68 | def efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs): 69 | stages = [] 70 | for i, (u, fo, fi, s, k, c) in enumerate(zip(us, fos, fis, ss, ks, cs), start=1): 71 | for j in range(1, u + 1): 72 | s = s if j == 1 else 1 73 | proj = True if j == 1 else False 74 | fi = fi if j == 1 else fo 75 | data = c(data, prefix="stage%s_unit%s" % (i, j), f_in=fi, f_out=fo, 76 | kernel=k, stride=s, proj=proj, norm=norm) 77 | stages.append(data) 78 | return stages 79 | 80 | 81 | def efficientnet_b4(data, norm, **kwargs): 82 | # 1.5 GFLOPs 83 | us = [1, 2, 4, 4, 6, 6, 8, 2, 1] 84 | fos = [48, 24, 32, 56, 112, 160, 272, 448, 1792] 85 | fis = [0] + fos[:-1] 86 | ss = [2, 1, 2, 2, 2, 1, 2, 1, 1] 87 | ks = [3, 3, 3, 5, 3, 5, 5, 3, 1] 88 | cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu] 89 | return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs) 90 | 91 | 92 | def efficientnet_b5(data, norm, **kwargs): 93 | # 2.3 GFLOPs 94 | us = [1, 3, 5, 5, 7, 7, 9, 3, 1] 95 | fos = [48, 24, 40, 64, 128, 172, 304, 512, 2048] 96 | fis = [0] + fos[:-1] 97 | ss = [2, 1, 2, 2, 2, 1, 2, 1, 1] 98 | ks = [3, 3, 3, 5, 3, 5, 5, 3, 1] 99 | # ks = [3, 5, 5, 5, 5, 5, 5, 5, 1] 100 | cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu] 101 | return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs) 102 | 103 | 104 | def efficientnet_b6(data, norm, **kwargs): 105 | # 3.3 GFLOPs 106 | us = [1, 3, 6, 6, 8, 8, 11, 3, 1] 107 | fos = [56, 32, 40, 72, 144, 200, 344, 576, 2304] 108 | fis = [0] + fos[:-1] 109 | ss = [2, 1, 2, 2, 2, 1, 2, 1, 1] 110 | ks = [3, 3, 3, 5, 3, 5, 5, 3, 1] 111 | cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu] 112 | return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs) 113 | 114 | 115 | def efficientnet_b7(data, norm, **kwargs): 116 | # 5.1 GFLOPs 117 | us = [1, 4, 7, 7, 10, 10, 13, 4, 1] 118 | fos = [64, 32, 48, 80, 160, 224, 384, 640, 2560] 119 | fis = [0] + fos[:-1] 120 | ss = [2, 1, 2, 2, 2, 1, 2, 1, 1] 121 | ks = [3, 3, 3, 5, 3, 5, 5, 3, 1] 122 | cs = [convnormrelu, mbc1, mbc6, mbc6, mbc6, mbc6, mbc6, mbc6, convnormrelu] 123 | return efficientnet_helper(data, norm, us, fos, fis, ss, ks, cs) 124 | 125 | 126 | def efficientnet_fpn_builder(efficientnet): 127 | class EfficientNetFPN(Backbone): 128 | def __init__(self, pBackbone): 129 | super().__init__(pBackbone) 130 | p = self.p 131 | data = X.var("data") 132 | if p.fp16: 133 | data = data.astype("float16") 134 | stages = efficientnet(data, p.normalizer, params=p) 135 | self.symbol = (stages[2], stages[3], stages[5], stages[8]) 136 | 137 | def get_rpn_feature(self): 138 | return self.symbol 139 | 140 | def get_rcnn_feature(self): 141 | return self.symbol 142 | return EfficientNetFPN 143 | 144 | 145 | EfficientNetB4FPN = efficientnet_fpn_builder(efficientnet_b4) 146 | EfficientNetB5FPN = efficientnet_fpn_builder(efficientnet_b5) 147 | EfficientNetB6FPN = efficientnet_fpn_builder(efficientnet_b6) 148 | EfficientNetB7FPN = efficientnet_fpn_builder(efficientnet_b7) 149 | 150 | 151 | if __name__ == "__main__": 152 | data = X.var("data") 153 | norm = X.normalizer_factory() 154 | *_, last = efficientnet_b4(data, norm) 155 | mx.viz.print_summary(last, shape={"data": (1, 3, 224, 224)}) 156 | -------------------------------------------------------------------------------- /models/FPN/input.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | import copy 6 | 7 | from core.detection_input import AnchorTarget2D 8 | 9 | 10 | class PyramidAnchorTarget2DBase(AnchorTarget2D): 11 | """ 12 | input: image_meta: tuple(h, w, scale) 13 | gt_bbox, ndarry(max_num_gt, 4) 14 | output: anchor_label, ndarray(num_anchor * h * w) 15 | anchor_bbox_target, ndarray(num_anchor * h * w, 4) 16 | anchor_bbox_weight, ndarray(num_anchor * h * w, 4) 17 | """ 18 | 19 | def apply(self, input_record): 20 | p = self.p 21 | 22 | im_info = input_record["im_info"] 23 | gt_bbox = input_record["gt_bbox"] 24 | assert isinstance(gt_bbox, np.ndarray) 25 | assert gt_bbox.dtype == np.float32 26 | valid = np.where(gt_bbox[:, 0] != -1)[0] 27 | gt_bbox = gt_bbox[valid] 28 | 29 | if gt_bbox.shape[1] == 5: 30 | gt_bbox = gt_bbox[:, :4] 31 | 32 | valid_index, valid_anchor = self._gather_valid_anchor(im_info) 33 | cls_label, anchor_label = \ 34 | self._assign_label_to_anchor(valid_anchor, gt_bbox, 35 | p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr) 36 | self._sample_anchor(cls_label, p.sample.image_anchor, p.sample.pos_fraction) 37 | reg_target, reg_weight = self._cal_anchor_target(cls_label, valid_anchor, gt_bbox, anchor_label) 38 | cls_label, reg_target, reg_weight = \ 39 | self._scatter_valid_anchor(valid_index, cls_label, reg_target, reg_weight) 40 | 41 | """ 42 | cls_label: (all_anchor,) 43 | reg_target: (all_anchor, 4) 44 | reg_weight: (all_anchor, 4) 45 | """ 46 | input_record["rpn_cls_label"] = cls_label 47 | input_record["rpn_reg_target"] = reg_target 48 | input_record["rpn_reg_weight"] = reg_weight 49 | 50 | return input_record["rpn_cls_label"], \ 51 | input_record["rpn_reg_target"], \ 52 | input_record["rpn_reg_weight"] 53 | 54 | 55 | class PyramidAnchorTarget2D(PyramidAnchorTarget2DBase): 56 | """ 57 | input: image_meta: tuple(h, w, scale) 58 | gt_bbox, ndarry(max_num_gt, 4) 59 | output: anchor_label, ndarray(num_anchor * h * w) 60 | anchor_bbox_target, ndarray(num_anchor * 4, h * w) 61 | anchor_bbox_weight, ndarray(num_anchor * 4, h * w) 62 | """ 63 | 64 | def __init__(self, pAnchor): 65 | super().__init__(pAnchor) 66 | 67 | self.pyramid_levels = len(self.p.generate.stride) 68 | self.p_list = [copy.deepcopy(self.p) for _ in range(self.pyramid_levels)] 69 | 70 | pyramid_stride = self.p.generate.stride 71 | pyramid_short = self.p.generate.short 72 | pyramid_long = self.p.generate.long 73 | 74 | for i in range(self.pyramid_levels): 75 | self.p_list[i].generate.stride = pyramid_stride[i] 76 | self.p_list[i].generate.short = pyramid_short[i] 77 | self.p_list[i].generate.long = pyramid_long[i] 78 | 79 | # generate anchors for multi-leval feature map 80 | self.anchor_target_2d_list = [PyramidAnchorTarget2DBase(p) for p in self.p_list] 81 | self.anchor_target_2d = PyramidAnchorTarget2DBase(self.p_list[0]) 82 | 83 | self.anchor_target_2d.v_all_anchor = self.v_all_anchor 84 | self.anchor_target_2d.h_all_anchor = self.h_all_anchor 85 | 86 | @property 87 | def v_all_anchor(self): 88 | anchors_list = [anchor_target_2d.v_all_anchor for anchor_target_2d in self.anchor_target_2d_list] 89 | anchors = np.concatenate(anchors_list) 90 | return anchors 91 | 92 | @property 93 | def h_all_anchor(self): 94 | anchors_list = [anchor_target_2d.h_all_anchor for anchor_target_2d in self.anchor_target_2d_list] 95 | anchors = np.concatenate(anchors_list) 96 | return anchors 97 | 98 | def apply(self, input_record): 99 | 100 | anchor_size = [0] + [x.h_all_anchor.shape[0] for x in self.anchor_target_2d_list] 101 | anchor_size = np.cumsum(anchor_size) 102 | cls_label, reg_target, reg_weight = \ 103 | self.anchor_target_2d.apply(input_record) 104 | 105 | im_info = input_record["im_info"] 106 | h, w = im_info[:2] 107 | 108 | cls_label_list = [] 109 | reg_target_list = [] 110 | reg_weight_list = [] 111 | for i in range(self.pyramid_levels): 112 | p = self.anchor_target_2d_list[i].p 113 | 114 | cls_label_level = cls_label[anchor_size[i]:anchor_size[i + 1]] 115 | reg_target_level = reg_target[anchor_size[i]:anchor_size[i + 1]] 116 | reg_weight_level = reg_weight[anchor_size[i]:anchor_size[i + 1]] 117 | """ 118 | label: (h * w * A) -> (A * h * w) 119 | bbox_target: (h * w * A, 4) -> (A * 4, h * w) 120 | bbox_weight: (h * w * A, 4) -> (A * 4, h * w) 121 | """ 122 | if h >= w: 123 | fh, fw = p.generate.long, p.generate.short 124 | else: 125 | fh, fw = p.generate.short, p.generate.long 126 | cls_label_level = cls_label_level.reshape((fh, fw, -1)).transpose(2, 0, 1) 127 | reg_target_level = reg_target_level.reshape((fh, fw, -1)).transpose(2, 0, 1) 128 | reg_weight_level = reg_weight_level.reshape((fh, fw, -1)).transpose(2, 0, 1) 129 | 130 | cls_label_level = cls_label_level.reshape(-1, fh * fw) 131 | reg_target_level = reg_target_level.reshape(-1, fh * fw) 132 | reg_weight_level = reg_weight_level.reshape(-1, fh * fw) 133 | 134 | cls_label_list.append(cls_label_level) 135 | reg_target_list.append(reg_target_level) 136 | reg_weight_list.append(reg_weight_level) 137 | 138 | cls_label = np.concatenate(cls_label_list, axis=1).reshape(-1) 139 | reg_target = np.concatenate(reg_target_list, axis=1) 140 | reg_weight = np.concatenate(reg_weight_list, axis=1) 141 | 142 | input_record["rpn_cls_label"] = cls_label 143 | input_record["rpn_reg_target"] = reg_target 144 | input_record["rpn_reg_weight"] = reg_weight 145 | 146 | return input_record["rpn_cls_label"], \ 147 | input_record["rpn_reg_target"], \ 148 | input_record["rpn_reg_weight"] 149 | -------------------------------------------------------------------------------- /detection_img.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from core.detection_module import DetModule 4 | from core.detection_input import Loader 5 | from utils.load_model import load_checkpoint 6 | from operator_py.nms import py_nms_wrapper 7 | from utils import callback 8 | from mxnet.base import _as_list 9 | 10 | from six.moves import reduce 11 | from six.moves.queue import Queue 12 | from threading import Thread 13 | import argparse 14 | import importlib 15 | import mxnet as mx 16 | import numpy as np 17 | import six.moves.cPickle as pkl 18 | import time 19 | import json 20 | import cv2 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser(description='Test Detection') 25 | # general 26 | parser.add_argument('--config', help='config file path', type=str) 27 | parser.add_argument('--gpu_id', help='gpu_id', type=int, default=0) 28 | parser.add_argument('--epoch', help='load params epoch', type=int, default=0) 29 | parser.add_argument('--thr', help='detection threshold', type=float, default=0.80) 30 | parser.add_argument('--path', help='images path to detect', type=str) 31 | args = parser.parse_args() 32 | 33 | config = importlib.import_module(args.config.replace('.py', '').replace('/', '.')) 34 | return args, config 35 | 36 | if __name__ == "__main__": 37 | # os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" 38 | 39 | args, config = parse_args() 40 | 41 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 42 | transform, data_name, label_name, metric_list = config.get_config(is_train=False) 43 | 44 | nms = py_nms_wrapper(pTest.nms.thr) 45 | sym = pModel.test_symbol 46 | pshort = 800 47 | plong = 2000 48 | 49 | arg_params, aux_params = load_checkpoint(pTest.model.prefix, args.epoch) 50 | mod = DetModule(sym, data_names=["data", "im_info", "im_id", "rec_id"], context=mx.gpu(args.gpu_id)) 51 | provide_data = [("data", (1, 3, pshort, plong)), ("im_info", (1, 3)), ("im_id", (1,)), ("rec_id", (1,))] 52 | mod.bind(data_shapes=provide_data, for_training=False) 53 | mod.set_params(arg_params, aux_params, allow_extra=False) 54 | 55 | image_list = [] 56 | if os.path.isfile(args.path): 57 | if ".txt" in args.path: 58 | list_file = open(args.path, 'r') 59 | list_lines = list_file.readlines() 60 | list_file.close() 61 | (fpath, fname) = os.path.split(args.path) 62 | for aline in list_lines: 63 | uints = aline.split(' ') 64 | imgpath = os.path.join(fpath, uints[0]) 65 | image_list.append(imgpath) 66 | else: 67 | image_list.append(args.path) 68 | else: 69 | for fname in os.listdir(args.path): 70 | fpath = os.path.join(args.path, fname) 71 | if os.path.isfile(fpath): 72 | image_list.append(fpath) 73 | 74 | for imgpath in image_list: 75 | img = cv2.imread(imgpath, cv2.IMREAD_COLOR) 76 | image = img[:, :, ::-1] 77 | short = image.shape[0] 78 | long = image.shape[1] 79 | scale = min(pshort / short, plong / long) 80 | image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR) 81 | # exactly as opencv 82 | h, w = image.shape[:2] 83 | im_info = (h, w, scale) 84 | # shape = (plong, pshort, 3) if h >= w else (pshort, plong, 3) 85 | shape = (pshort, plong, 3) 86 | padded_image = np.zeros(shape, dtype=np.float32) 87 | padded_image[:h, :w] = image 88 | padded_image = padded_image.transpose((2, 0, 1)) 89 | img_array = [] 90 | img_array.append(padded_image) 91 | iminfo_array = [] 92 | iminfo_array.append(im_info) 93 | im_id = mx.nd.array([1]) 94 | rec_id = mx.nd.array([1]) 95 | data = [mx.nd.array(img_array)] 96 | data.append(mx.nd.array(iminfo_array)) 97 | data.append(im_id) 98 | data.append(rec_id) 99 | mbatch = mx.io.DataBatch(data=data, provide_data=provide_data) 100 | 101 | start_t = time.time() 102 | mod.forward(mbatch, is_train=False) 103 | outs = [x.asnumpy() for x in mod.get_outputs()] 104 | im_info = outs[2] # h_raw, w_raw, scale 105 | cls_score = outs[3] 106 | bbox_xyxy = outs[4] 107 | if cls_score.ndim == 3: 108 | cls_score = cls_score[0] 109 | bbox_xyxy = bbox_xyxy[0] 110 | bbox_xyxy = bbox_xyxy / scale # scale to original image scale 111 | cls_score = cls_score[:, 1:] # remove background score 112 | # TODO: the output shape of class_agnostic box is [n, 4], while class_aware box is [n, 4 * (1 + class)] 113 | bbox_xyxy = bbox_xyxy[:, 4:] if bbox_xyxy.shape[1] != 4 else bbox_xyxy 114 | 115 | final_dets = {} 116 | for cid in range(cls_score.shape[1]): 117 | score = cls_score[:, cid] 118 | if bbox_xyxy.shape[1] != 4: 119 | cls_box = bbox_xyxy[:, cid * 4:(cid + 1) * 4] 120 | else: 121 | cls_box = bbox_xyxy 122 | valid_inds = np.where(score > args.thr)[0] 123 | box = cls_box[valid_inds] 124 | score = score[valid_inds] 125 | det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32) 126 | final_dets[cid] = nms(det) 127 | end_t = time.time() 128 | print("detection use: %.3f seconds." % (end_t - start_t)) 129 | 130 | for cid in final_dets: 131 | det = final_dets[cid] 132 | if det.shape[0] == 0: 133 | continue 134 | scores = det[:, -1] 135 | x1 = det[:, 0] 136 | y1 = det[:, 1] 137 | x2 = det[:, 2] 138 | y2 = det[:, 3] 139 | for k in range(det.shape[0]): 140 | bbox = [float(x1[k]), float(y1[k]), float(x2[k]), float(y2[k])] 141 | score = float(scores[k]) 142 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) 143 | # cv2.putText(img, "{}:{:.2}".format(str(cid), score), (int(bbox[0]), int(bbox[1] - 10)), 4, 0.6, (0, 0, 255)) 144 | (filepath, filename) = os.path.split(imgpath) 145 | cv2.imwrite(filename, img) 146 | exit() 147 | 148 | 149 | -------------------------------------------------------------------------------- /operator_cxx/contrib/group_norm_helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 15 | and IDIAP Research Institute nor the names of its contributors may be 16 | used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | #include 32 | #include 33 | #include 34 | #include "../../common/cuda_utils.h" 35 | 36 | 37 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 38 | for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ 39 | i += blockDim.x * gridDim.x) 40 | 41 | constexpr int CAFFE_CUDA_NUM_THREADS = 512; 42 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096; 43 | 44 | inline int CAFFE_GET_BLOCKS(const int N) { 45 | return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS, 46 | CAFFE_MAXIMUM_NUM_BLOCKS); 47 | } 48 | 49 | template 50 | struct SimpleArray { 51 | T data[N]; 52 | }; 53 | 54 | template 55 | using BlockReduce = cub::BlockReduce; 56 | 57 | 58 | constexpr int kCUDATensorMaxDims = 8; 59 | 60 | #define DISPATCH_FUNCTION_BY_VALUE_WITH_TYPE_1(val, Func, T, ...) \ 61 | do { \ 62 | CHECK_LT(val, kCUDATensorMaxDims); \ 63 | switch (val) { \ 64 | case 1: { \ 65 | Func(__VA_ARGS__); \ 66 | break; \ 67 | } \ 68 | case 2: { \ 69 | Func(__VA_ARGS__); \ 70 | break; \ 71 | } \ 72 | case 3: { \ 73 | Func(__VA_ARGS__); \ 74 | break; \ 75 | } \ 76 | case 4: { \ 77 | Func(__VA_ARGS__); \ 78 | break; \ 79 | } \ 80 | case 5: { \ 81 | Func(__VA_ARGS__); \ 82 | break; \ 83 | } \ 84 | case 6: { \ 85 | Func(__VA_ARGS__); \ 86 | break; \ 87 | } \ 88 | case 7: { \ 89 | Func(__VA_ARGS__); \ 90 | break; \ 91 | } \ 92 | case 8: { \ 93 | Func(__VA_ARGS__); \ 94 | break; \ 95 | } \ 96 | default: { break; } \ 97 | } \ 98 | } while (false) 99 | 100 | 101 | void ComputeTransposeAxesForReduceOp( 102 | const int num_dims, 103 | const int num_reduce_axes, 104 | const int* reduce_axes, 105 | int* transpose_axes); 106 | 107 | 108 | void ComputeTransposedStrides( 109 | const int ndim, 110 | const int* dims, 111 | const int* axes, 112 | int* strides); 113 | 114 | 115 | bool IsRowwiseReduce( 116 | const int ndim, 117 | const int* X_dims, 118 | const int* Y_dims, 119 | int* rows, 120 | int* cols); 121 | 122 | 123 | bool IsColwiseReduce( 124 | const int ndim, 125 | const int* X_dims, 126 | const int* Y_dims, 127 | int* rows, 128 | int* cols); 129 | 130 | 131 | template 132 | void Set(const size_t N, const T alpha, T* X, cudaStream_t context); 133 | 134 | 135 | template 136 | void Moments( 137 | const int num_dims, 138 | const int* dims, 139 | const int num_axes, 140 | const int* axes, 141 | const T* X, 142 | T* mean, 143 | T* variance, 144 | cudaStream_t context); 145 | 146 | template 147 | void InvStd( 148 | const int N, 149 | const T epsilon, 150 | const T* var, 151 | T* inv_std, 152 | cudaStream_t context); -------------------------------------------------------------------------------- /operator_cxx/contrib/sigmoid_cross_entropy.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file sigmoid_cross_entropy.cu 23 | * \brief 24 | * \author Yuntao Chen 25 | */ 26 | 27 | #include "./sigmoid_cross_entropy-inl.h" 28 | 29 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 30 | for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ 31 | i += blockDim.x * gridDim.x) 32 | 33 | constexpr int CAFFE_CUDA_NUM_THREADS = 512; 34 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096; 35 | 36 | inline int CAFFE_GET_BLOCKS(const int N) { 37 | return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS, 38 | CAFFE_MAXIMUM_NUM_BLOCKS); 39 | } 40 | 41 | namespace mshadow { 42 | namespace cuda { 43 | 44 | template 45 | __global__ void SigmoidCrossEntropyLossKernel( 46 | const int n, 47 | const T* logits, 48 | const T* targets, 49 | T* losses, 50 | T* counts) { 51 | CUDA_1D_KERNEL_LOOP(index, n) { 52 | if (targets[index] == -1) { 53 | losses[index] = 0.; 54 | counts[index] = 0.; 55 | } else { 56 | losses[index] = 57 | -1. * logits[index] * (targets[index] - (logits[index] >= 0)) + 58 | logf( 59 | 1 + 60 | expf(logits[index] - 2 * logits[index] * (logits[index] >= 0))); 61 | counts[index] = 1.; 62 | } 63 | } 64 | } 65 | 66 | template 67 | __global__ void SigmoidCrossEntropyLossGradientKernel( 68 | const int n, 69 | const T* logits, 70 | const T* targets, 71 | T* d_logits, 72 | T* counts) { 73 | CUDA_1D_KERNEL_LOOP(index, n) { 74 | if (targets[index] == -1) { 75 | d_logits[index] = 0.; 76 | counts[index] = 0.; 77 | } else { 78 | d_logits[index] = 1. / (1. + expf(-logits[index])) - targets[index]; 79 | counts[index] = 1.; 80 | } 81 | } 82 | } 83 | 84 | template 85 | inline void SigmoidCrossEntropyForward(const Tensor &data, 86 | const Tensor &label, 87 | Tensor &loss, 88 | Tensor &loss_sum, 89 | Tensor &count, 90 | Tensor &count_sum, 91 | Tensor &out, 92 | T scale) { 93 | using namespace mshadow::expr; 94 | SigmoidCrossEntropyLossKernel<<>>( 95 | data.shape_.Size(), data.dptr_, label.dptr_, loss.dptr_, count.dptr_); 96 | loss_sum = sumall_except_dim<0>(loss); 97 | count_sum = sumall_except_dim<0>(count); 98 | count_sum += static_cast(1e-5); 99 | out = loss_sum / count_sum; 100 | int count_num = (count.size(0) * count.size(1)); 101 | //out /= static_cast(count_num); 102 | // mx.metric.Loss will take care of this 103 | // out *= scale; 104 | } 105 | 106 | template 107 | inline void SigmoidCrossEntropyBackward(const Tensor &data, 108 | const Tensor &label, 109 | Tensor &d_data, 110 | Tensor &count, 111 | Tensor &count_sum, 112 | T scale) { 113 | using namespace mshadow::expr; 114 | SigmoidCrossEntropyLossGradientKernel<<>>( 115 | data.shape_.Size(), data.dptr_, label.dptr_, d_data.dptr_, count.dptr_); 116 | count_sum = sumall_except_dim<0>(count); 117 | count_sum += static_cast(1e-5); 118 | d_data /= broadcast<0>(count_sum, d_data.shape_); 119 | int count_num = (count.size(0) * count.size(1)); 120 | //d_data /= static_cast(count_num); 121 | d_data *= scale; 122 | } 123 | 124 | } // namespace cuda 125 | 126 | template 127 | inline void SigmoidCrossEntropyForward(const Tensor &data, 128 | const Tensor &label, 129 | Tensor &loss, 130 | Tensor &loss_sum, 131 | Tensor &count, 132 | Tensor &count_sum, 133 | Tensor &out, 134 | T scale) { 135 | cuda::SigmoidCrossEntropyForward(data, label, loss, loss_sum, count, count_sum, out, scale); 136 | } 137 | 138 | template 139 | inline void SigmoidCrossEntropyBackward(const Tensor &data, 140 | const Tensor &label, 141 | Tensor &d_data, 142 | Tensor &count, 143 | Tensor &count_sum, 144 | T scale) { 145 | cuda::SigmoidCrossEntropyBackward(data, label, d_data, count, count_sum, scale); 146 | } 147 | 148 | } // namespace mshadow 149 | 150 | namespace mxnet { 151 | namespace op { 152 | 153 | template<> 154 | Operator *CreateOp(SigmoidCrossEntropyParam param, int dtype) { 155 | Operator *op = NULL; 156 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 157 | op = new SigmoidCrossEntropyOp(param); 158 | }) 159 | return op; 160 | } 161 | 162 | } // namespace op 163 | } // namespace mxnet 164 | 165 | -------------------------------------------------------------------------------- /operator_cxx/contrib/generate_anchor-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file generate_anchor-inl.h 22 | * \brief GenerateAnchor Operator 23 | * \author Yanghao Li, Chenxia Han 24 | */ 25 | #ifndef MXNET_OPERATOR_CONTRIB_GENERATE_ANCHOR_INL_H_ 26 | #define MXNET_OPERATOR_CONTRIB_GENERATE_ANCHOR_INL_H_ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "../operator_common.h" 39 | #include "../mshadow_op.h" 40 | 41 | namespace mxnet { 42 | namespace op { 43 | 44 | namespace gen_anchor { 45 | enum GenAnchorOpInputs {kClsProb, kAnchor}; 46 | enum GenAnchorOpOutputs {kOut}; 47 | } // gen_anchor 48 | 49 | struct GenAnchorParam : public dmlc::Parameter { 50 | // use double to keep consistency with python implementation 51 | nnvm::Tuple scales; 52 | nnvm::Tuple ratios; 53 | int feature_stride; 54 | 55 | DMLC_DECLARE_PARAMETER(GenAnchorParam) { 56 | DMLC_DECLARE_FIELD(scales).set_default(nnvm::Tuple({4.0f, 8.0f, 16.0f, 32.0f})) 57 | .describe("Used to generate anchor windows by enumerating scales"); 58 | DMLC_DECLARE_FIELD(ratios).set_default(nnvm::Tuple({0.5f, 1.0f, 2.0f})) 59 | .describe("Used to generate anchor windows by enumerating ratios"); 60 | DMLC_DECLARE_FIELD(feature_stride).set_default(16) 61 | .describe("The size of the receptive field each unit in the convolution layer of the rpn," 62 | "for example the product of all stride's prior to this layer."); 63 | } 64 | }; 65 | 66 | template 67 | Operator *CreateOp(GenAnchorParam param); 68 | 69 | #if DMLC_USE_CXX11 70 | class GenAnchorProp : public OperatorProperty { 71 | public: 72 | void Init(const std::vector >& kwargs) override { 73 | param_.Init(kwargs); 74 | } 75 | 76 | std::map GetParams() const override { 77 | return param_.__DICT__(); 78 | } 79 | 80 | bool InferShape(std::vector *in_shape, 81 | std::vector *out_shape, 82 | std::vector *aux_shape) const override { 83 | using namespace mshadow; 84 | CHECK_EQ(in_shape->size(), 1) << "Input:[cls_prob]"; 85 | const TShape &dshape = in_shape->at(gen_anchor::kClsProb); 86 | if (dshape.ndim() == 0) return false; 87 | int num_anchors = param_.scales.ndim() * param_.ratios.ndim(); 88 | out_shape->clear(); 89 | // output 90 | out_shape->push_back(Shape2(dshape[2] * dshape[3] * num_anchors, 4)); 91 | return true; 92 | } 93 | 94 | OperatorProperty* Copy() const override { 95 | auto ptr = new GenAnchorProp(); 96 | ptr->param_ = param_; 97 | return ptr; 98 | } 99 | 100 | std::string TypeString() const override { 101 | return "_contrib_GenAnchor"; 102 | } 103 | 104 | std::vector DeclareBackwardDependency( 105 | const std::vector &out_grad, 106 | const std::vector &in_data, 107 | const std::vector &out_data) const override { 108 | return {}; 109 | } 110 | 111 | int NumOutputs() const override { 112 | return 1; 113 | } 114 | 115 | std::vector ListArguments() const override { 116 | return {"cls_prob"}; 117 | } 118 | 119 | std::vector ListOutputs() const override { 120 | return {"output"}; 121 | } 122 | 123 | Operator* CreateOperator(Context ctx) const override; 124 | 125 | private: 126 | GenAnchorParam param_; 127 | }; // class GenAnchorProp 128 | 129 | #endif // DMLC_USE_CXX11 130 | } // namespace op 131 | } // namespace mxnet 132 | 133 | //======================== 134 | // Anchor Generation Utils 135 | //======================== 136 | namespace mxnet { 137 | namespace op { 138 | namespace gen_anchor_utils { 139 | 140 | template 141 | inline void _MakeAnchor(DType w, 142 | DType h, 143 | DType x_ctr, 144 | DType y_ctr, 145 | std::vector& out_anchors) { 146 | out_anchors.push_back(x_ctr - 0.5f * (w - 1.0f)); 147 | out_anchors.push_back(y_ctr - 0.5f * (h - 1.0f)); 148 | out_anchors.push_back(x_ctr + 0.5f * (w - 1.0f)); 149 | out_anchors.push_back(y_ctr + 0.5f * (h - 1.0f)); 150 | } 151 | 152 | template 153 | inline void _Transform(DType scale, 154 | DType ratio, 155 | const std::vector& base_anchor, 156 | std::vector& out_anchors) { 157 | // use double in intermedia computation for consistency with numpy 158 | DType w = base_anchor[2] - base_anchor[0] + 1.0f; 159 | DType h = base_anchor[3] - base_anchor[1] + 1.0f; 160 | DType x_ctr = base_anchor[0] + 0.5 * (w - 1.0f); 161 | DType y_ctr = base_anchor[1] + 0.5 * (h - 1.0f); 162 | DType size = w * h; 163 | DType size_ratios = size / ratio; 164 | DType new_w = std::rint(std::sqrt(size_ratios)) * scale; 165 | DType new_h = std::rint((new_w / scale * ratio)) * scale; 166 | 167 | _MakeAnchor(new_w, new_h, x_ctr, y_ctr, out_anchors); 168 | } 169 | 170 | // out_anchors must have shape (n, 4), where n is ratios.size() * scales.size() 171 | template 172 | inline void GenerateAnchors(const std::vector& base_anchor, 173 | const std::vector& ratios, 174 | const std::vector& scales, 175 | std::vector& out_anchors) { 176 | for (size_t j = 0; j < ratios.size(); ++j) { 177 | for (size_t k = 0; k < scales.size(); ++k) { 178 | _Transform(scales[k], ratios[j], base_anchor, out_anchors); 179 | } 180 | } 181 | } 182 | 183 | } // namespace anchor_utils 184 | } // namespace op 185 | } // namespace mxnet 186 | 187 | #endif // MXNET_OPERATOR_CONTRIB_GENERATE_ANCHOR_INL_H_ 188 | --------------------------------------------------------------------------------