├── config ├── __init__.py ├── NASFPN │ └── __init__.py └── int8 │ └── README.md ├── core └── __init__.py ├── models ├── __init__.py ├── FPG │ ├── __init__.py │ └── README.md ├── KD │ ├── __init__.py │ ├── README.md │ ├── utils.py │ └── builder.py ├── NASFPN │ ├── __init__.py │ └── README.md ├── maskrcnn │ ├── __init__.py │ ├── metric.py │ ├── process_output.py │ ├── utils.py │ ├── detector.py │ ├── README.md │ └── bbox_post_processing.py ├── msrcnn │ ├── __init__.py │ ├── metric.py │ ├── README.md │ ├── maskiou_compute.py │ └── detector.py ├── retinanet │ ├── __init__.py │ ├── metric.py │ └── README.md ├── cascade_rcnn │ ├── __init__.py │ └── README.md ├── tridentnet │ ├── __init__.py │ ├── README.md │ └── input.py ├── selsa │ └── README.md ├── TSD │ ├── metric.py │ ├── readme.md │ └── faster_rcnn.py ├── efficientnet │ └── README.md ├── FreeAnchor │ └── README.md ├── sepc │ ├── README.md │ ├── readme.md │ └── sepc_dconv.py ├── crowdhuman │ ├── README.md │ └── softmax_entropy_op.py ├── RepPoints │ └── README.md ├── FCOS │ └── metric.py ├── FPN │ ├── README.md │ ├── get_top_proposal.py │ └── assign_layer_fpn.py ├── se │ └── builder.py └── dcn │ └── builder.py ├── symbol ├── __init__.py └── detector.py ├── utils ├── __init__.py ├── json_to_roidb.py ├── download_pretrain.py ├── logger.py ├── roidb_to_coco.py ├── load_model.py ├── patch_config.py ├── deprecated.py ├── contrib │ ├── edit_model_weight.py │ └── data_to_coco.py ├── create_voc_roidb.py ├── create_coco_roidb.py ├── create_crowdhuman_roidb.py └── callback.py ├── operator_py ├── __init__.py └── cython │ ├── __init__.py │ ├── .gitignore │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── bbox.pyx │ └── bbox_self.pyx ├── doc ├── image │ ├── detector.png │ ├── diagram.png │ ├── diagram_v2.png │ ├── trident_block.png │ ├── tensorboard_screenshot.png │ └── trident_block_details.png ├── DISTRIBUTED.md ├── TENSORBOARD.md ├── BUILD_WHEEL.md ├── DATASET.md ├── INSTALL.md └── FRAMEWOKR_OVERVIEW.md ├── Makefile ├── docker ├── README.md └── Dockerfile ├── scripts ├── dist_worker.sh ├── train_hpc.sh ├── terminate.sh ├── launch.sh └── setup.sh ├── .github └── ISSUE_TEMPLATE │ └── bug_report.md ├── operator_cxx └── contrib │ ├── bbox_norm.cu │ ├── sync_batch_norm.cu │ ├── focal_loss.cu │ ├── broadcast_scale.cu │ ├── sync_inplace_activation_batch_norm.cu │ ├── quantization_int8.cu │ ├── focal_loss.cc │ ├── bbox_norm.cc │ ├── broadcast_scale.cc │ ├── quantization_int8.cc │ ├── axpy.cc │ ├── sigmoid_cross_entropy.cc │ ├── global_average_pooling.cc │ ├── group_norm.cc │ ├── generate_anchor.cc │ ├── nms-inl.h │ └── global_average_pooling.cu ├── .gitignore ├── unittest └── test_loader.py └── detection_infer_speed.py /config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /symbol/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/FPG/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/KD/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /operator_py/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/NASFPN/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/NASFPN/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/maskrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/msrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/retinanet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/cascade_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/tridentnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /operator_py/cython/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /operator_py/cython/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /doc/image/detector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/detector.png -------------------------------------------------------------------------------- /doc/image/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/diagram.png -------------------------------------------------------------------------------- /doc/image/diagram_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/diagram_v2.png -------------------------------------------------------------------------------- /doc/image/trident_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/trident_block.png -------------------------------------------------------------------------------- /doc/image/tensorboard_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/tensorboard_screenshot.png -------------------------------------------------------------------------------- /doc/image/trident_block_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/trident_block_details.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd operator_py/cython/; python3 setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | clean: 4 | cd operator_py/cython/; rm *.so *.c *.cpp; cd ../../ 5 | -------------------------------------------------------------------------------- /operator_py/cython/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker 2 | ## Build 3 | ``` 4 | cd docker 5 | # build for cuda11.1 cudnn8 6 | docker build --network=host --build-arg OS_VERSION=16.04 --build-arg CUDA_VERSION=11.1 --build-arg CUDNN_VERSION=8 --tag simpledet . 7 | ``` 8 | 9 | ## Launch 10 | ``` 11 | docker run -it --gpus all simpledet zsh 12 | ``` 13 | -------------------------------------------------------------------------------- /models/maskrcnn/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | from core.detection_metric import EvalMetricWithSummary 5 | 6 | 7 | class SigmoidCELossMetric(EvalMetricWithSummary): 8 | def __init__(self, name, output_names, label_names, **kwargs): 9 | super().__init__(name, output_names, label_names, **kwargs) 10 | 11 | def update(self, labels, preds): 12 | self.sum_metric += preds[0].mean().asscalar() 13 | self.num_inst += 1 -------------------------------------------------------------------------------- /scripts/dist_worker.sh: -------------------------------------------------------------------------------- 1 | root_dir=$1 2 | singularity_image=$2 3 | conffile=$3 4 | 5 | if test $(which singularity); then 6 | singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "MXNET_UPDATE_ON_KVSTORE=0 MXNET_OPTIMIZER_AGGREGATION_SIZE=20 python -u detection_train.py --config ${conffile}" 7 | else 8 | singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "python -u detection_train.py" 9 | fi 10 | -------------------------------------------------------------------------------- /models/selsa/README.md: -------------------------------------------------------------------------------- 1 | ## Sequence Level Semantics Aggregation for Video Object Detection 2 | 3 | Haiping Wu, Yuntao Chen, Naiyan Wang, Zhaoxiang Zhang 4 | 5 | The project is currently released at https://github.com/happywu/Sequence-Level-Semantics-Aggregation 6 | ### Citing SELSA 7 | ``` 8 | @article{wu2019selsa, 9 | title={Sequence Level Semantics Aggregation for Video Object Detection}, 10 | author={Wu, Haiping and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, 11 | journal={ICCV 2019}, 12 | year={2019} 13 | } 14 | ``` 15 | -------------------------------------------------------------------------------- /models/TSD/metric.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Loss metric for QueryDet training 3 | 4 | Author: Chenhongyi Yang 5 | ''' 6 | import numpy as np 7 | import mxnet as mx 8 | from core.detection_metric import EvalMetricWithSummary 9 | 10 | 11 | class LossMetric(EvalMetricWithSummary): 12 | def __init__(self, name, output_names, label_names, **kwargs): 13 | super().__init__(name, output_names, label_names, **kwargs) 14 | 15 | def update(self, labels, preds): 16 | self.sum_metric += preds[0].asnumpy().sum() 17 | self.num_inst += 1 18 | 19 | -------------------------------------------------------------------------------- /scripts/train_hpc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gpucount=8 4 | num_node=2 5 | num_servers=${num_node} 6 | root_dir="/path/to/simpledet" 7 | sync_dir="/tmp/simpledet_sync" 8 | 9 | hostfile=hostfile.txt 10 | conffile=faster_r50v2c4_c5_256roi_1x 11 | singularity_image=simpledet.img 12 | 13 | export DMLC_INTERFACE=eth0 14 | python -u ../../launcher/tools/launch.py \ 15 | -n ${num_node} \ 16 | --num-servers ${num_servers} \ 17 | --sync-dst-dir ${sync_dir} \ 18 | --launcher ssh -H ${hostfile} \ 19 | scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \ 20 | 2>&1 | tee -a ${root_dir}/log/${conffile}.log 21 | -------------------------------------------------------------------------------- /models/maskrcnn/process_output.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pycocotools import mask as mask_util 3 | 4 | from models.maskrcnn.utils import segm_results 5 | 6 | def process_output(all_outputs, roidb): 7 | for output_record in all_outputs: 8 | rec_id = int(output_record['rec_id']) 9 | bbox_xyxy = output_record['bbox_xyxy'] 10 | cls_score = output_record['cls_score'] 11 | cls = output_record['cls'] 12 | mask = output_record['mask'] 13 | 14 | im_h = roidb[rec_id]["h"] 15 | im_w = roidb[rec_id]["w"] 16 | segm = segm_results(bbox_xyxy, cls, mask, im_h, im_w) 17 | output_record['segm'] = segm 18 | del output_record['mask'] 19 | return all_outputs 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Screenshots or stack traces** 14 | If applicable, add screenshots or stack traces to help explain your problem. 15 | 16 | **Which config are you using** 17 | 18 | **Which dataset are you using ** 19 | 20 | **Hardware info** 21 | CPU, GPU, Storage(Disk or NFS) 22 | 23 | **Software info** 24 | driver, CUDA, cuDNN versions 25 | OS verison 26 | 27 | **How did you set up your MXNet for SimpleDet** 28 | 29 | **Additional context** 30 | Add any other context about the problem here. 31 | -------------------------------------------------------------------------------- /models/msrcnn/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | from core.detection_metric import EvalMetricWithSummary 5 | 6 | 7 | class SigmoidCELossMetric(EvalMetricWithSummary): 8 | def __init__(self, name, output_names, label_names, **kwargs): 9 | super().__init__(name, output_names, label_names, **kwargs) 10 | 11 | def update(self, labels, preds): 12 | self.sum_metric += preds[0].mean().asscalar() 13 | self.num_inst += 1 14 | 15 | class L2(EvalMetricWithSummary): 16 | def __init__(self, name, output_names, label_names, **kwargs): 17 | super().__init__(name, output_names, label_names, **kwargs) 18 | 19 | def update(self, labels, preds): 20 | self.sum_metric += preds[0].mean().asscalar() 21 | self.num_inst += 1 -------------------------------------------------------------------------------- /utils/json_to_roidb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import pickle as pkl 4 | from os.path import basename 5 | 6 | import numpy as np 7 | 8 | 9 | def parse_argument(): 10 | parser = argparse.ArgumentParser("Convert json gt to roidb") 11 | parser.add_argument("--json", type=str, required=True) 12 | args = parser.parse_args() 13 | return args.json 14 | 15 | 16 | def json_to_roidb(json_path): 17 | with open(json_path) as f: 18 | json_gt = json.load(f) 19 | 20 | for obj in json_gt: 21 | obj["gt_class"] = np.array(obj["gt_class"], dtype=np.float32) 22 | obj["gt_bbox"] = np.array(obj["gt_bbox"], dtype=np.float32) 23 | with open("data/cache/%s.roidb" % basename(json_path).replace("json", "roidb"), "wb") as fout: 24 | pkl.dump(json_gt, fout) 25 | 26 | 27 | if __name__ == "__main__": 28 | json_to_roidb(parse_argument()) 29 | -------------------------------------------------------------------------------- /models/msrcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Mask Scoring RCNN 2 | 3 | This repository implements [**Mask Scoring RCNN**](https://arxiv.org/abs/1903.00241) in the SimpleDet framework. 4 | 5 | ### Set Up 6 | You need newer [mxnet-cu100-20191214](https://1dv.aflat.top/mxnet_cu100-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) or [mxnet-cu101-20191214](https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) 7 | 8 | ### Qucik Start 9 | ```bash 10 | # train 11 | python3 detection_train.py --config config/ms_r50v1_fpn_1x.py 12 | 13 | # test 14 | python3 mask_test.py --config config/ms_r50v1_fpn_1x.py 15 | ``` 16 | 17 | ### Performance 18 | |Model|Backbone|Head|Train Schedule|GPU|FP16|Train MEM|Train Speed|Image/GPU|Box AP(Mask AP)|Link| 19 | |-----|--------|----|--------------|---|---------|----|---------|-----------|---------------|----| 20 | |Mask Scoring|R50v1-FPN|2MLP+4CONV|1X|8X 1080Ti|2|no|8.1G(3.6G)|23 img/s|37.2(35.0)|[model](https://1dv.aflat.top/ms_r50v1_fpn_1x.zip) 21 | -------------------------------------------------------------------------------- /utils/download_pretrain.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request 3 | 4 | 5 | def report(block_count, block_size, content_size): 6 | if block_count % (content_size // block_size // 5) == 1: 7 | print("Downloaded %.1f/100" % (block_size * block_count / content_size * 100)) 8 | 9 | 10 | def download(prefix, epoch): 11 | dir_name = os.path.dirname(prefix) 12 | if not os.path.exists(dir_name): 13 | os.makedirs(dir_name) 14 | base_name = prefix.replace("pretrain_model/", "") + "-%04d.params" % epoch 15 | save_name = "%s-%04d.params" % (prefix, epoch) 16 | base_url = os.environ.get("SIMPLEDET_BASE_URL", "https://1dv.aflat.top/") 17 | full_url = base_url + base_name 18 | 19 | try: 20 | print("Downloading %s from %s" % (save_name, full_url)) 21 | urllib.request.urlretrieve(full_url, save_name, report) 22 | except Exception as e: 23 | print("Fail to download %s. You can mannually download it from %s and put it to %s" % (base_name, full_url, save_name)) 24 | os.remove(save_name) 25 | raise e 26 | -------------------------------------------------------------------------------- /models/KD/README.md: -------------------------------------------------------------------------------- 1 | ## KD 2 | 3 | This repository implements [**Knowledge Distillation**](https://arxiv.org/abs/1503.02531) in the SimpleDet framework. 4 | 5 | ### Qucik Start 6 | ```bash 7 | python3 detection_train.py --config config/kd/retina_r50v1b_fpn_1x_fitnet_g10.py 8 | python3 detection_test.py --config config/kd/retina_r50v1b_fpn_1x_fitnet_g10.py 9 | ``` 10 | 11 | ### Results and Models 12 | All AP results are reported on the minival2014 split of the [COCO](http://cocodataset.org) dataset. 13 | 14 | |Model|Backbone|Head|Train Schedule|AP|AP50|AP75|APs|APm|APl| 15 | |-----|--------|----|--------------|--|----|----|---|---|---| 16 | |Retina|R50v1b-FPN|4Conv|1X|36.6|56.9|39.0|20.3|40.7|47.2| 17 | |Retina|R50v1b-FPN-TR152v1b1X|4Conv|1X|38.9|59.0|41.6|21.4|43.3|52.1| 18 | |Retina|R50v1b-FPN-TR152v1b1X|4Conv|2X|40.1|60.6|43.1|21.8|44.5|54.3| 19 | |Faster|R50v1b-FPN|2MLP|1X|37.2|59.4|40.4|22.3|41.3|47.6| 20 | |Faster|R50v1b-FPN|2MLP|2X|38.0|59.7|41.5|22.2|41.6|48.8| 21 | |Faster|R50v1b-FPN-TR152v1b2X|2MLP|1X|39.9|61.3|43.6|22.7|44.2|52.7| 22 | |Faster|R50v1b-FPN-TR152v1b2X|2MLP|2X|40.5|62.2|43.9|23.1|44.7|53.9| 23 | -------------------------------------------------------------------------------- /scripts/terminate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "usage: $0 comma_separated_worker_hostnames" 5 | exit -1 6 | fi 7 | 8 | hosts=$1 9 | 10 | # extract worker and check reachablity 11 | IFS=, read -r -a host_array <<< $hosts 12 | for host in ${host_array[@]}; do 13 | # check reachability 14 | echo "check reachability of $host" 15 | ssh -q $host exit 16 | if [ $? -ne 0 ]; then 17 | echo "$host is not reachable" 18 | exit -1 19 | fi 20 | 21 | # check availablity (retreat if remote host is in use) 22 | echo "check availability of $host" 23 | for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 24 | x="${x//[$'\t\r\n ']}" # remove trailing whitespace 25 | if [ $x -gt 5 ]; then 26 | echo "$host has gpu utilization of $x%"; 27 | fi; 28 | done 29 | 30 | # cleanup potentially dead python process (march since we checked it) 31 | ssh $host ps aux | grep python 32 | echo -e "\n" 33 | echo "Terminate tasks on $host in 5s" 34 | sleep 5 35 | ssh -q $host pkill python 36 | done 37 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | from pytz import utc, timezone 4 | 5 | 6 | def config_logger(path): 7 | def custom_time(*args): 8 | utc_dt = utc.localize(datetime.utcnow()) 9 | my_tz = timezone("Asia/Shanghai") 10 | converted = utc_dt.astimezone(my_tz) 11 | return converted.timetuple() 12 | 13 | logging.basicConfig() 14 | logging.getLogger().handlers.pop() 15 | 16 | fmt = '%(asctime)s %(message)s' 17 | date_fmt = '%m-%d %H:%M:%S' 18 | formatter = logging.Formatter(fmt=fmt, datefmt=date_fmt) 19 | formatter.converter = custom_time 20 | 21 | logging.getLogger().setLevel(logging.INFO) 22 | 23 | log_file_save_name = path 24 | file_handler = logging.FileHandler(filename=log_file_save_name, mode='w') 25 | file_handler.setLevel(logging.INFO) 26 | file_handler.setFormatter(formatter) 27 | logging.getLogger().addHandler(file_handler) 28 | 29 | console = logging.StreamHandler() 30 | console.setLevel(logging.INFO) 31 | console.setFormatter(formatter) 32 | logging.getLogger().addHandler(console) -------------------------------------------------------------------------------- /models/TSD/readme.md: -------------------------------------------------------------------------------- 1 | # TSD 2 | This repository implements the CVPR 2020 paper [\<\\>](https://openaccess.thecvf.com/content_CVPR_2020/papers/Song_Revisiting_the_Sibling_Head_in_Object_Detector_CVPR_2020_paper.pdf). 3 | 4 | ## Quick Start 5 | ``` 6 | # train 7 | python detection_train.py --config config/TSD/tsd_r50_rpn_1x.py 8 | 9 | # test 10 | python detection_test.py --config config/TSD/tsd_r50_rpn_1x.py 11 | ``` 12 | 13 | ## COCO minival Performance 14 | 15 | All results are reported using ResNet-50 and 1x schedule training. 16 | 17 | TSD: Task-aware Spatial Disentanglement 18 | 19 | PC: Progressive Constraint 20 | 21 | |Method|AP|AP_50|AP_75|AP_s|AP_m|AP_l| 22 | |------|--|-----|-----|----|----|----| 23 | |Baseline Faster RCNN|36.3|58.2|39.0|21.3|39.8|46.9| 24 | |+TSD|39.3|60.6|42.8|22.2|42.8|52.0| 25 | |+TSD and PC|38.9|60.2|42.2|22.0|42.4|51.6| 26 | 27 | ## Citation 28 | ``` 29 | @InProceedings{Song_2020_CVPR, 30 | author = {Song, Guanglu and Liu, Yu and Wang, Xiaogang}, 31 | title = {Revisiting the Sibling Head in Object Detector}, 32 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 33 | month = {June}, 34 | year = {2020} 35 | } 36 | ``` -------------------------------------------------------------------------------- /operator_py/cython/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /doc/DISTRIBUTED.md: -------------------------------------------------------------------------------- 1 | #### Requirement 2 | Here we only provide a guide to launch distributed training with singularity, please make sure your singularity works by checking [INSTALL.md](./doc/INSTALL.md) 3 | 4 | #### Setup 5 | 1. obtain the mxnet launcher and place it in the parent directory of the simpledet working directory 6 | ```bash 7 | git clone https://github.com/RogerChern/mxnet-dist-lancher.git lancher 8 | ``` 9 | 10 | 2. mv `data`, `pretrain_model`, `experiments` outside of simpledet and symink them back. 11 | This step is to avoid unnecessary `rsync` of large binary files in the working directory during launching. 12 | 13 | 3. after step 1 and 2, your directory should be as following 14 | ``` 15 | lancher/ 16 | simpledet/ 17 | data -> /path/to/data 18 | pretrain_model -> /path/to/pretain_model 19 | experiments -> /path/to/experiments 20 | ... 21 | ``` 22 | 23 | 4. make a hostfile containing hostnames of all nodes, these nodes would be accessed from our launch node by ssh without password 24 | simpledet/hostfile.txt 25 | ``` 26 | node1 27 | node2 28 | ``` 29 | 30 | 5. change the singulariy mounting point in `scripts/dist_worker.sh` 31 | 32 | 7. launch distributed training with scripts 33 | ```bash 34 | bash scritps/launch.sh config/mask_r50v1_fpn_1x.py node1,node2 35 | ``` 36 | -------------------------------------------------------------------------------- /models/efficientnet/README.md: -------------------------------------------------------------------------------- 1 | ## EfficientNet for object detection 2 | This repository implements [**EfficientNet**](https://arxiv.org/abs/1905.11946) in the SimpleDet framework. Efficient B5 achives the same mAP with **~1/10 FLOPs** compared with ResNet-50. 3 | 4 | ### Qucik Start 5 | ```bash 6 | # train faster r-cnn with efficientnet fpn backbone 7 | python3 detection_train.py --config config/efficientnet/efficientnet_b5_fpn_bn_scratch_400_6x.py 8 | ``` 9 | 10 | ### Results and Models 11 | All AP results are reported on minival of the [COCO dataset](http://cocodataset.org). 12 | 13 | |Model|InputSize|Backbone|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP|Link| 14 | |-----|-----|--------|--------------|---|---------|----|---------|-----------|---------------|----| 15 | |Faster|400x600|B5-FPN|36 epoch(6X)|8X 1080Ti|8|yes|-|75 img/s|37.2|[model](https://1dv.aflat.top/efficientnet_b5_fpn_bn_scratch_400_6x.zip)| 16 | |Faster|400x600|B5-FPN|54 epoch(9X)|8X 1080Ti|8|yes|-|75 img/s|37.9|-| 17 | |Faster|400x600|B5-FPN|72 epoch(12X)|8X 1080Ti|8|yes|-|75 img/s|38.3|-| 18 | 19 | ### Reference 20 | ``` 21 | @inproceedings{tan2019, 22 | title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks}, 23 | author={Tan, Mingxing and Le, Quoc V}, 24 | booktitle={ICML}, 25 | year={2019} 26 | } 27 | ``` 28 | -------------------------------------------------------------------------------- /models/FreeAnchor/README.md: -------------------------------------------------------------------------------- 1 | ## FreeAnchor 2 | 3 | This repository implements [**FreeAnchor**](https://arxiv.org/abs/1909.02466) in the SimpleDet framework. 4 | FreeAnchor assigns anchors for ground-truth objects with a maximum likelihood estimation procedure. On the basis of RetinaNet, this method achieves a significant improvement on performance. 5 | 6 | ### Qucik Start 7 | ```bash 8 | # train 9 | python3 detection_train.py --config config/FreeAnchor/free_anchor_r50v1_fpn_1x.py 10 | # test 11 | python3 detection_test.py --config config/FreeAnchor/free_anchor_r50v1_fpn_1x.py 12 | ``` 13 | 14 | ### Models 15 | All AP results are reported on minival2014 of the [COCO dataset](http://cocodataset.org). 16 | 17 | |Method|Backbone|Schedule|AP|Link| 18 | |------|--------|--------|--|----| 19 | |FreeAnchor|R50v1-FPN|1x|38.3|[model](https://drive.google.com/open?id=1k043sSZa-sa6qeHuDG21OFOrze1SF364)| 20 | |FreeAnchor|R101v1-FPN|1x|40.4|[model](https://drive.google.com/open?id=1Rki-hZFsuMHleYJpoXFJQMplCFxkDfW-)| 21 | 22 | ### Reference 23 | ``` 24 | @inproceedings{zhang2019freeanchor, 25 | title={{FreeAnchor}: Learning to Match Anchors for Visual Object Detection}, 26 | author={Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang}, 27 | booktitle={Neural Information Processing Systems}, 28 | year={2019} 29 | } 30 | ``` 31 | -------------------------------------------------------------------------------- /models/cascade_rcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Cascade R-CNN 2 | 3 | This repository implements [**Cascade R-CNN**](https://arxiv.org/abs/1712.00726) in the **SimpleDet** framework. Cascade R-CNN is a multi-stage object detector, aiming to reduce the overfitting problem by resampling of progressively improved hypotheses. 4 | 5 | ### How we build Cascade R-CNN 6 | 7 | #### Input 8 | 9 | Cascade R-CNN can share the origin Faster R-CNN input, so there is no need to implement an extra one. 10 | 11 | #### Symbol 12 | 13 | - ```CascadeRcnn```: detector with three ```R-CNN``` stages 14 | - ```CascadeBbox2fcHead```: header for ```R-CNN``` stages. Note that it is also required to generate proposal for next ```R-CNN``` stages, thus we add ```get_all_proposal``` to decode boxes predicted in this stage and ```get_sampled_proposal``` to generate ```bbox_target```. 15 | 16 | #### Config 17 | 18 | - ```BboxParam```, ```BboxParam2nd```, ```BboxParam3rd```: config for ```R-CNN``` stages, ```mean``` and ```std``` in ```regress_target``` aim to decode boxes predicted in this stage, and those in ```bbox_target``` is prepared to generate ```bbox_target``` for next ```R-CNN``` stage. Note that we add ```stage``` field to specify the weight used by ```R-CNN```, as in **test phase** ```bbox_head_1st``` and ```bbox_head_2nd``` forward twice with different input feature. 19 | 20 | -------------------------------------------------------------------------------- /operator_cxx/contrib/bbox_norm.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file bbox_norm.cu 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./bbox_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BBoxNormParam param, int dtype) { 33 | Operator *op = NULL; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BBoxNormOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet 42 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sync_batch_norm.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /*! 20 | * Copyright (c) 2018 by Contributors 21 | * \file sync_batch_norm.cc 22 | * \brief Synchronized BatchNorm modified from BatchNormV1 23 | * \author Hang Zhang 24 | */ 25 | 26 | #include "sync_batch_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | template<> 31 | Operator *CreateOp(SyncBatchNormParam param, int dtype) { 32 | return new SyncBatchNorm(param); 33 | } 34 | 35 | } // namespace op 36 | } // namespace mxnet 37 | 38 | -------------------------------------------------------------------------------- /operator_cxx/contrib/focal_loss.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file focal_loss.cu 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./focal_loss-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(FocalLossParam param, int dtype) { 33 | Operator *op = NULL; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new FocalLossOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet 42 | -------------------------------------------------------------------------------- /operator_cxx/contrib/broadcast_scale.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file broadcast_scale.cu 22 | * \brief 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "./broadcast_scale-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BroadcastScaleParam param, int dtype) { 33 | Operator *op = NULL; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BroadcastScaleOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet 42 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sync_inplace_activation_batch_norm.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /*! 20 | * Copyright (c) 2018 by Contributors 21 | * \file sync_inplace_activation_batch_norm.cc 22 | * \brief Synchronized BatchNorm modified from BatchNormV1 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "sync_inplace_activation_batch_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | template<> 31 | Operator *CreateOp(SyncInplaceABNParam param, int dtype) { 32 | return new SyncInplaceABN(param); 33 | } 34 | 35 | } // namespace op 36 | } // namespace mxnet 37 | 38 | -------------------------------------------------------------------------------- /models/FPG/README.md: -------------------------------------------------------------------------------- 1 | ## Feature Pyramid Grids & PAFPN 2 | 3 | This repository implements [**FPG**](https://arxiv.org/pdf/2004.03580.pdf) and [**PAFPN**](https://arxiv.org/abs/1803.01534) in the SimpleDet framework. 4 | 5 | ### Quick Start 6 | ```bash 7 | # train 8 | python detection_train.py --config config/FPG/faster_r50v1b_fpg6@128_syncbn_1x.py 9 | python detection_train.py --config config/pafpn/faster_r50v1b_pafpn3@256_syncbn_1x.py 10 | python detection_train.py --config config/pafpn/faster_r50v1b_pafpn3@384_syncbn_1x.py 11 | # test 12 | python detection_test.py --config config/FPG/faster_r50v1b_fpg6@128_syncbn_1x.py 13 | python detection_test.py --config config/pafpn/faster_r50v1b_pafpn3@256_syncbn_1x.py 14 | python detection_test.py --config config/pafpn/faster_r50v1b_pafpn3@384_syncbn_1x.py 15 | ``` 16 | 17 | ### Results 18 | 19 | | Detector | Pyramid | AP | AP50 | AP75 | APs | APm | APl | 20 | |----------|---------|----|------|------|-----|-----|-----| 21 | | Faster R50v1b | FPG 6@128 | 38.7 | 59.5 | 42.3 | 23.7 | 42.3 | 48.3| 22 | | Faster R50v1b | PAFPN 3@256 | 38.6 | 58.8 | 41.8 | 22.3 | 42.6 | 50.8 | 23 | | Faster R50v1b | PAFPN 3@384 | 39.4 | 59.9 | 42.8 | 23.9 | 43.2 | 50.9 | 24 | 25 | Note that SyncBN is only used in FPG neck but used in all BN layers under PAFPN settings according to the original papers. Besides, TDBU Neck in NASFPN folder is a special case of PAFPN with 3 stages and 384 channels, thus this setting is also appended in this config. -------------------------------------------------------------------------------- /operator_cxx/contrib/quantization_int8.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2019 by Contributors 22 | * \file Quantization_int8.cu 23 | * \brief 24 | * \author Xiaotao Chen, Jingqiu Zhou, Ruize Hou 25 | */ 26 | #include "./quantization_int8-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(Quantization_int8Para param, int dtype) { 33 | Operator* op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new Quantization_int8Op(param); 36 | }); 37 | return op; 38 | } 39 | 40 | } // namespace op 41 | } // namespace mxnet -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IntelliJ project files 2 | .idea 3 | *.iml 4 | out 5 | gen 6 | 7 | ### Vim template 8 | [._]*.s[a-w][a-z] 9 | [._]s[a-w][a-z] 10 | *.un~ 11 | Session.vim 12 | .netrwhist 13 | *~ 14 | 15 | ### IPythonNotebook template 16 | # Temporary data 17 | .ipynb_checkpoints/ 18 | 19 | ### Python template 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | env/ 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib/ 38 | lib64/ 39 | parts/ 40 | sdist/ 41 | var/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *,cover 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | *.ipynb 80 | *.params 81 | *.json 82 | .vscode/ 83 | 84 | model 85 | model/ 86 | visimg 87 | mxnet/ 88 | mxnext/ 89 | data 90 | experiments 91 | pretrain_model 92 | !data/cache/coco_micro_test.roidb 93 | -------------------------------------------------------------------------------- /models/sepc/README.md: -------------------------------------------------------------------------------- 1 | ## Scale-Equalizing Pyramid Convolution for Object Detection 2 | This repository implements [Scale-Equalizing Pyramid Convolution for Object Detection](https://arxiv.org/abs/2005.03101) in the SimpleDet framework. 3 | 4 | ### Qucik Start 5 | 6 | ```python 7 | # train 8 | python detection_train.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py 9 | 10 | # test 11 | python detection_test.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py 12 | ``` 13 | 14 | ### Performance and Model 15 | All AP results are reported on COCO val2017: 16 | 17 | Model | Backbone | Train Schedule | GPU | Image/GPU| Train MEM| Train Speed| FP16| Box AP | link | 18 | ---------- | --------- | --------- | ---------- | ---------| ----------| ----------| ---------| -----------| ----------- 19 | retinanet (baseline) | res50v1b | 1x | 8X 2080Ti |4|8653M | 44 img/s| yes|35.9 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGJfW59R3IEelhxv?e=Ob9y4W)| 20 | retinanet_pconv | res50v1b | 1x | 8X 2080Ti |4| 9111M|43 img/s | yes|37.2 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGPiw3cfqOWZkUAB?e=PIHppA)| 21 | retinanet_pconv+ibn | res50v1b | 1x | 8X 2080Ti|4 |9467M | 40 img/s| yes|37.6 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGayQr_1Ew-dhRfA?e=W2AXi6)| 22 | retinanet_sepclite | res50v1b | 1x | 8X 2080Ti |4| 9467M|36 img/s |yes|38.6 |[model](https://1drv.ms/u/s!AhNcLYzCx6CCjGTAbLT7_YXjq3GF?e=ZHfIqn) | 23 | retinanet_sepc | res50v1b | 1x | 8X 2080Ti |4| 9471M|25 img/s | yes|**39.7** | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGWO02qo_adoy8km?e=30H1sl)| 24 | -------------------------------------------------------------------------------- /models/sepc/readme.md: -------------------------------------------------------------------------------- 1 | ## Scale-Equalizing Pyramid Convolution for Object Detection 2 | This repository implements [Scale-Equalizing Pyramid Convolution for Object Detection](https://arxiv.org/abs/2005.03101) in the SimpleDet framework. 3 | 4 | ### Qucik Start 5 | 6 | ```python 7 | # train 8 | python detection_train.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py 9 | 10 | # test 11 | python detection_test.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py 12 | ``` 13 | 14 | ### Performance and Model 15 | All AP results are reported on COCO val2017: 16 | 17 | Model | Backbone | Train Schedule | GPU | Image/GPU| Train MEM| Train Speed| FP16| Box AP | link | 18 | ---------- | --------- | --------- | ---------- | ---------| ----------| ----------| ---------| -----------| ----------- 19 | retinanet (baseline) | res50v1b | 1x | 8X 2080Ti |4|8653M | 44 img/s| yes|35.9 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGJfW59R3IEelhxv?e=Ob9y4W)| 20 | retinanet_pconv | res50v1b | 1x | 8X 2080Ti |4| 9111M|43 img/s | yes|37.2 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGPiw3cfqOWZkUAB?e=PIHppA)| 21 | retinanet_pconv+ibn | res50v1b | 1x | 8X 2080Ti|4 |9467M | 40 img/s| yes|37.6 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGayQr_1Ew-dhRfA?e=W2AXi6)| 22 | retinanet_sepclite | res50v1b | 1x | 8X 2080Ti |4| 9467M|36 img/s |yes|38.6 |[model](https://1drv.ms/u/s!AhNcLYzCx6CCjGTAbLT7_YXjq3GF?e=ZHfIqn) | 23 | retinanet_sepc | res50v1b | 1x | 8X 2080Ti |4| 9471M|25 img/s | yes|**39.7** | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGWO02qo_adoy8km?e=30H1sl)| 24 | -------------------------------------------------------------------------------- /utils/roidb_to_coco.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | 4 | from pycocotools.coco import COCO 5 | from operator_py.detectron_bbox_utils import xyxy_to_xywh 6 | 7 | 8 | def roidb_to_coco(roidb): 9 | # The whole coco dataset 10 | dataset = { 11 | 'categories': [], 12 | 'images': [], 13 | 'annotations': [] 14 | } 15 | 16 | category_ids = set() 17 | obj_id = 0 18 | for roirec in roidb: 19 | dataset['images'].append({ 20 | 'id': roirec['im_id'], 21 | 'width': roirec['w'], 22 | 'height': roirec['h'] 23 | }) 24 | roirec['gt_bbox'] = xyxy_to_xywh(roirec['gt_bbox']) 25 | for bbox, cls in zip(roirec['gt_bbox'], roirec['gt_class']): 26 | x, y, h, w = bbox.tolist() 27 | dataset["annotations"].append({ 28 | 'area': h * w, 29 | 'bbox': [x, y, h, w], 30 | 'category_id': float(cls), 31 | 'id': obj_id, 32 | 'image_id': roirec['im_id'], 33 | 'iscrowd': 0 34 | }) 35 | obj_id += 1 36 | category_ids.add(float(cls)) 37 | for class_id in category_ids: 38 | dataset['categories'].append({ 39 | 'id': class_id, 40 | 'name': class_id, 41 | 'supercategory': 'none' 42 | }) 43 | 44 | with tempfile.NamedTemporaryFile(mode="w") as f: 45 | json.dump(dataset, f) 46 | f.flush() 47 | coco = COCO(f.name) 48 | 49 | return coco -------------------------------------------------------------------------------- /models/retinanet/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | from core.detection_metric import EvalMetricWithSummary 5 | 6 | 7 | class FGAccMetric(EvalMetricWithSummary): 8 | def __init__(self, name, output_names, label_names, threshold=0, **kwargs): 9 | super().__init__(name, output_names, label_names, **kwargs) 10 | self.thr = threshold 11 | 12 | def update(self, labels, preds): 13 | if len(preds) == 1 and len(labels) == 1: 14 | pred = preds[0] 15 | label = labels[0] 16 | elif len(preds) == 2: 17 | pred = preds[0] 18 | label = preds[1] 19 | else: 20 | raise Exception( 21 | "unknown loss output: len(preds): {}, len(labels): {}".format( 22 | len(preds), len(labels) 23 | ) 24 | ) 25 | 26 | label = label.asnumpy().astype('int32') 27 | keep_inds = np.where(label >= 1) 28 | 29 | # treat as foreground if score larger than threshold 30 | # select class with maximum score as prediction 31 | pred_score = pred.max(axis=-1) 32 | pred_label = pred.argmax(axis=-1) + 1 33 | if self.thr != 0: 34 | pred_label *= pred_score > self.thr 35 | 36 | pred_label = pred_label.asnumpy().astype('int32') 37 | 38 | pred_label = pred_label[keep_inds] 39 | label = label[keep_inds] 40 | 41 | self.sum_metric += np.sum(pred_label.flat == label.flat) 42 | self.num_inst += len(pred_label.flat) 43 | -------------------------------------------------------------------------------- /doc/TENSORBOARD.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | ### Setup tensorboard 4 | ```bash 5 | pip3 install mxboard tensorboard --user 6 | ``` 7 | 8 | ### Modify your config 9 | You need to import the `SummaryWriter` and pass it to your metric in the config. 10 | 11 | ```python 12 | from mxboard import SummaryWriter 13 | 14 | # modify the logdir as you like 15 | sw = SummaryWriter(logdir="./tflogs", flush_secs=5) 16 | 17 | rpn_acc_metric = metric.AccWithIgnore( 18 | name="RpnAcc", 19 | output_names=["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], 20 | label_names=[], 21 | summary=sw 22 | ) 23 | rpn_l1_metric = metric.L1( 24 | name="RpnL1", 25 | output_names=["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], 26 | label_names=[], 27 | summary=sw 28 | ) 29 | box_acc_metric = metric.AccWithIgnore( 30 | name="RcnnAcc", 31 | output_names=["bbox_cls_loss_output", "bbox_label_blockgrad_output"], 32 | label_names=[], 33 | summary=sw 34 | ) 35 | box_l1_metric = metric.L1( 36 | name="RcnnL1", 37 | output_names=["bbox_reg_loss_output", "bbox_label_blockgrad_output"], 38 | label_names=[], 39 | summary=sw 40 | ) 41 | mask_cls_metric = SigmoidCELossMetric( 42 | name="MaskCE", 43 | output_names=["mask_loss_output"], 44 | label_names=[], 45 | summary=sw 46 | ) 47 | ``` 48 | 49 | 50 | ### Launch tensorborad on the shell 51 | ```bash 52 | # you can specify the logdir in your 53 | tensorboard --logdir tflogs > /dev/null 2>&1 & 54 | 55 | python detection_train --config path/to/your/config.py 56 | ``` 57 | 58 | Now open the web browser you can see the training curve like 59 | ![training_curve](image/tensorboard_screenshot.png) 60 | -------------------------------------------------------------------------------- /models/crowdhuman/README.md: -------------------------------------------------------------------------------- 1 | ## Crowdhuman Dataset 2 | 3 | This repository implements Faster-RCNN and [**Double Pred**](https://arxiv.org/abs/2003.09163) on [**CrowdHuman**](https://arxiv.org/abs/1805.00123) dataset in the SimpleDet framework. 4 | 5 | ### Quick Start 6 | 7 | #### 1. Prepare Crowdhuman Format Dataset 8 | ```bash 9 | # Ensure that the directory of crowdhuman dataset looks like: 10 | # data/crowdhuman 11 | # ---------/images/xxxx.jpg 12 | # ---------/annotations/xxxx.ogdt 13 | python utils/create_crowdhuman_roidb.py --dataset crowdhuman --dataset-split train --num-threads 45 14 | ``` 15 | 16 | #### 2. Train Model 17 | ```bash 18 | # train 19 | python detection_train.py --config config/crowdhuman/faster_r50v1b_fpn_1x.py 20 | python detection_train.py --config config/doublepred_r50v1b_fpn_1x.py 21 | python detection_train.py --config config/doublepred_r50v1b_fpn_1x_refine.py 22 | 23 | # test 24 | python detection_test.py --config config/crowdhuman/faster_r50v1b_fpn_1x.py 25 | python detection_test.py --config config/doublepred_r50v1b_fpn_1x.py 26 | python detection_test.py --config config/doublepred_r50v1b_fpn_1x_refine.py 27 | ``` 28 | 29 | ### Results on CrowdHuman 30 | 31 | | Detector | AP | MR | 32 | |----------|---------|----| 33 | | Faster R50v1b | 84.77 | 46.72 | 34 | | DoublePred R50v1b | 88.64 | 45.52 | 35 | | DoublePred R50v1b + Refine | 88.81 | 45.02 | 36 | 37 | Note that crowdhuman is different from COCO-like dataset, since it contains **ignore region**. We followed the procedure shared by Zheng Ge([Talk Link](https://www.bilibili.com/video/av455989666/)) by ignoring anchors in RPN and adding BN in FPN. A simple Toolkit to evaluate AP and MR with ignore region can refer to [here](https://github.com/Purkialo/CrowdDet/tree/master/lib/evaluate). -------------------------------------------------------------------------------- /models/crowdhuman/softmax_entropy_op.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | class SoftmaxEntropyOperator(mx.operator.CustomOp): 5 | def __init__(self): 6 | super().__init__() 7 | 8 | def forward(self, is_train, req, in_data, out_data, aux): 9 | data = in_data[0] 10 | label = in_data[1] 11 | 12 | num_reg_class = data.shape[-1] 13 | label = mx.nd.one_hot(label, depth=num_reg_class) 14 | 15 | data = mx.nd.softmax(data, axis=-1) 16 | loss = - label * mx.nd.log(data + 1e-10) 17 | self.assign(out_data[0], req[0], loss) 18 | 19 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 20 | data = in_data[0] 21 | label = in_data[1] 22 | 23 | batch_roi, num_reg_class = data.shape 24 | onehot_label = mx.nd.one_hot(label, depth=num_reg_class) 25 | 26 | d_grad = mx.nd.softmax(data, axis=-1) - onehot_label 27 | # since we directly backward grad from here, we need to normalize gradient right! 28 | d_grad *= out_grad[0] 29 | 30 | self.assign(in_grad[0], req[0], d_grad) 31 | self.assign(in_grad[1], req[1], mx.nd.zeros_like(label)) 32 | 33 | 34 | @mx.operator.register('softmax_entropy') 35 | class SoftmaxEntropyProp(mx.operator.CustomOpProp): 36 | def __init__(self): 37 | super().__init__(need_top_grad=True) 38 | 39 | def list_arguments(self): 40 | return ['data', 'label'] 41 | 42 | def list_outputs(self): 43 | return ['output'] 44 | 45 | def infer_shape(self, in_shape): 46 | return [in_shape[0], in_shape[1]], [in_shape[0]] 47 | 48 | def create_operator(self, ctx, shapes, dtypes): 49 | return SoftmaxEntropyOperator() -------------------------------------------------------------------------------- /utils/load_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | import glob 4 | 5 | 6 | def get_latest_ckpt_epoch(prefix): 7 | """ 8 | Get latest checkpoint epoch by prefix 9 | """ 10 | def get_checkpoint_epoch(prefix): 11 | return int(prefix[prefix.rfind('.params')-4:prefix.rfind('.params')]) 12 | 13 | checkpoints = glob.glob(prefix + '*.params') 14 | assert len(checkpoints), 'can not find params startswith {}'.prefix 15 | return max([get_checkpoint_epoch(x) for x in checkpoints]) 16 | 17 | 18 | def load_checkpoint(prefix, epoch): 19 | """ 20 | Load model checkpoint from file. 21 | :param prefix: Prefix of model name. 22 | :param epoch: Epoch number of model we would like to load. 23 | :return: (arg_params, aux_params) 24 | arg_params : dict of str to NDArray 25 | Model parameter, dict of name to NDArray of net's weights. 26 | aux_params : dict of str to NDArray 27 | Model parameter, dict of name to NDArray of net's auxiliary states. 28 | """ 29 | print('load %s-%04d.params' % (prefix, epoch)) 30 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 31 | arg_params = {} 32 | aux_params = {} 33 | for k, v in save_dict.items(): 34 | tp, name = k.split(':', 1) 35 | if tp == 'arg': 36 | arg_params[name] = v 37 | if tp == 'aux': 38 | aux_params[name] = v 39 | return arg_params, aux_params 40 | 41 | 42 | def convert_context(params, ctx): 43 | """ 44 | :param params: dict of str to NDArray 45 | :param ctx: the context to convert to 46 | :return: dict of str of NDArray with context ctx 47 | """ 48 | new_params = dict() 49 | for k, v in params.items(): 50 | new_params[k] = v.as_in_context(ctx) 51 | return new_params 52 | 53 | -------------------------------------------------------------------------------- /models/RepPoints/README.md: -------------------------------------------------------------------------------- 1 | ## RepPoints 2 | 3 | This repository implements [**RepPoints**](https://arxiv.org/abs/1904.11490) in the SimpleDet framework. 4 | RPDet is a state-of-the-art anchor-free detector, utilizing a point set as the representation of objects in localization and recognition. 5 | 6 | ### Qucik Start 7 | ```bash 8 | # train 9 | python3 detection_train.py --config config/reppoints_moment_r50v1_fpn_1x.py 10 | 11 | # test 12 | python3 detection_test.py --config config/reppoints_moment_r50v1_fpn_1x.py 13 | ``` 14 | 15 | ### Models 16 | All AP results are reported on minival2014 of the [COCO dataset](http://cocodataset.org). 17 | 18 | |Method|Backbone|Transform|Schedule|AP (paper)|AP (re-impl)|Link| 19 | |------|--------|---------|--------|----------|------------|----| 20 | |RepPoints|R50v1-FPN|MinMax|1x|38.2|38.0|[model](https://drive.google.com/open?id=1BNF7cLJDLgOUpSgQ3bcXm2iSHop5G3Rp)| 21 | |RepPoints|R50v1-FPN|Moment|1x|38.3|38.3|[model](https://drive.google.com/open?id=1q0mFJl0qG22Y6AlRQ95HSIFT0GKuQRLS)| 22 | |RepPoints|R101v1-FPN|Moment|2x|40.3|40.7|[model](https://drive.google.com/open?id=1dslqEcvlPh-8NoRhU--7ypan7XnAP_S5)| 23 | |RepPoints|R101v1b-FPN-DCNv1|Moment|2x, multi-scale training & testing|-|46.4|[model](https://drive.google.com/open?id=1SreAuNE7ILXcBx8_-NHyftZTgS94kzO6)| 24 | |RepPoints|R101v1b-FPN-DCNv2|Moment|2x, multi-scale training & testing|-|47.0|[model](https://drive.google.com/open?id=14GFKGeXU9FVBFDQUS-4jlH2raSLzt8Zd)| 25 | 26 | ### Reference 27 | ``` 28 | @inproceedings{yang2019reppoints, 29 | title={RepPoints: Point Set Representation for Object Detection}, 30 | author={Yang, Ze and Liu, Shaohui and Hu, Han and Wang, Liwei and Lin, Stephen}, 31 | booktitle={The IEEE International Conference on Computer Vision (ICCV)}, 32 | month={Oct}, 33 | year={2019} 34 | } 35 | ``` 36 | -------------------------------------------------------------------------------- /models/FCOS/metric.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | class LossWithIgnore(mx.metric.EvalMetric): 5 | def __init__(self, name, output_names, label_names, ignore_label=-1): 6 | super().__init__(name, output_names, label_names) 7 | self.ignore_label = ignore_label 8 | 9 | def update(self, labels, preds): 10 | raise NotImplementedError 11 | 12 | class ClsAccWithIgnore(LossWithIgnore): 13 | def __init__(self, stride, name, output_names, label_names, ignore_label=-1): 14 | super().__init__(name, output_names, label_names, ignore_label) 15 | self.stride = stride 16 | 17 | def reset(self): 18 | self.sum_metric = 0 19 | self.num_inst = 0 20 | 21 | def update(self, labels, preds): 22 | pred = preds[1] 23 | label = labels[len(stride):len(stride)*2] 24 | 25 | for stride in self.stride: 26 | self.sum_metric += mx.nd.sum(mx.nd.logical_and(pred[stride]>0.5, label[stride])) 27 | self.num_inst += mx.nd.sum(label[stride]) 28 | 29 | class LossMeter(mx.metric.EvalMetric): 30 | def __init__(self, stride, pred_id_start, pred_id_end, name='LossMeter'): 31 | self.stride = stride 32 | self.pred_id_start = pred_id_start 33 | self.pred_id_end = pred_id_end 34 | super(LossMeter, self).__init__(name=name) 35 | 36 | def reset(self): 37 | self.sum_metric = 0 38 | self.num_inst = 0 39 | 40 | def update(self, labels, preds): 41 | for i, pred in enumerate(preds[self.pred_id_start:self.pred_id_end]): 42 | if len(pred.shape) > 1: 43 | valid_pred = pred.mean().asnumpy() 44 | else: 45 | valid_pred = pred.asnumpy() 46 | 47 | self.sum_metric += valid_pred 48 | self.num_inst += +1 49 | -------------------------------------------------------------------------------- /models/retinanet/README.md: -------------------------------------------------------------------------------- 1 | ## RetinaNet 2 | 3 | This repository implements [**RetinaNet**](https://arxiv.org/abs/1708.02002) in the **SimpleDet** framework. RetinaNet is state-of-the-art single stage detector, preventing the vast number of easy negatives from overwhelming the detector with focal loss. 4 | 5 | ### How we build RetinaNet 6 | 7 | #### Input 8 | 9 | The pyramid label parts of **RetinaNet** is similar with **Feature Pyramid Network**, you can refer to [FPN README](../FPN/README.md) . In addition, the label assignment method is different compared with **Faster R-CNN**, thus we overrides ```_assign_label_to_anchor``` and ```apply``` of ```AnchorTarget2D```, named ```PyramidAnchorTarget2DBase```, to obtain class-aware labels and avoid sampling RoIs. 10 | 11 | #### Operators 12 | 13 | - **bbox_norm**, passes data in forward, and normalizes gradient by number of positive samples in backward 14 | - **focal_loss**, acts same as Sigmoid in forward, and return focal loss gradient in backward 15 | - **decode_retina**, reuses the code from [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test_retinanet.py) to decode boxes and scores. Note that ```min_det_score``` is moved to ```RpnParam.proposal``` as it requires different threshold for results from **P7** level. 16 | 17 | #### Symbol 18 | 19 | - ``` RetinaNet```, detector only with RPN 20 | - ```RetinaNetHead```, classification and regression header with sharing weights 21 | - ```RetinaNetNeck```, top-down pathway for **FPN** in **RetinaNet** 22 | 23 | #### Config 24 | 25 | - ```min_det_score``` in ```TestParam``` is set to 0 to remove those appended boxes with zero scores 26 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation. 27 | -------------------------------------------------------------------------------- /utils/patch_config.py: -------------------------------------------------------------------------------- 1 | import types 2 | import inspect 3 | 4 | 5 | class NoThrowBase: 6 | def __getattr__(self, item): 7 | return None 8 | 9 | 10 | class NoThrowMeta(type): 11 | def __getattr__(self, item): 12 | return None 13 | 14 | 15 | def patch_config_as_nothrow(instance): 16 | if "NoThrow" in [instance.__name__, instance.__class__.__name__]: 17 | return instance 18 | 19 | if type(instance) == type: 20 | instance = types.new_class(instance.__name__ + "NoThrow", (instance, ), dict(metaclass=NoThrowMeta)) 21 | for (k, v) in inspect.getmembers(instance): 22 | if not k.startswith("__") and type(v) == type: 23 | type.__setattr__(instance, k, patch_config_as_nothrow(v)) 24 | else: 25 | for (k, v) in inspect.getmembers(instance.__class__): 26 | if not k.startswith("__") and type(v) == type: 27 | type.__setattr__(instance.__class__, k, patch_config_as_nothrow(v)) 28 | instance.__class__ = type(instance.__class__.__name__ + "NoThrow", (instance.__class__, NoThrowBase), {}) 29 | 30 | return instance 31 | 32 | 33 | if __name__ == "__main__": 34 | class A: 35 | a = 1 36 | 37 | A = patch_config_as_nothrow(A) 38 | assert A.non_exist is None 39 | assert A.a == 1 40 | 41 | class B: 42 | b = 1 43 | class B1: 44 | b1 = 2 45 | 46 | B = patch_config_as_nothrow(B) 47 | assert B.non_exist is None 48 | assert B.B1.non_exist is None 49 | assert B.b == 1 50 | assert B.B1.b1 == 2 51 | 52 | class B: 53 | b = 1 54 | class B1: 55 | b1 = 2 56 | def b1f(): 57 | return 3 58 | 59 | b = B() 60 | b = patch_config_as_nothrow(b) 61 | assert b.non_exist is None 62 | assert b.B1.non_exist is None 63 | assert b.b == 1 64 | assert b.B1.b1 == 2 65 | assert b.B1.b1f() == 3 66 | -------------------------------------------------------------------------------- /scripts/launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "usage: $0 config_path comma_separated_worker_hostnames" 5 | exit -1 6 | fi 7 | 8 | conffile=$1 9 | hosts=$2 10 | 11 | # extract worker and check reachablity 12 | IFS=, read -r -a host_array <<< $hosts 13 | for host in ${host_array[@]}; do 14 | # check reachability 15 | echo "check reachability of $host" 16 | ssh -q $host exit 17 | if [ $? -ne 0 ]; then 18 | echo "$host is not reachable" 19 | exit -1 20 | fi 21 | 22 | # check availablity (retreat if remote host is in use) 23 | echo "check availability of $host" 24 | for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 25 | x="${x//[$'\t\r\n ']}" # remove trailing whitespace 26 | if [ $x -gt 10 ]; then 27 | echo "$host has gpu utilization of $x%"; 28 | exit -1 29 | fi; 30 | done 31 | 32 | # cleanup potentially dead python process (march since we checked it) 33 | ssh -q $host pkill python 34 | done 35 | 36 | gpucount=8 37 | num_node=${#host_array[@]} 38 | num_servers=${num_node} 39 | root_dir="/mnt/tscpfs/yuntao.chen/simpledet/simpledet_open" 40 | sync_dir="/tmp/simpledet_sync" 41 | singularity_image=/mnt/tscpfs/yuntao.chen/simpledet.img 42 | 43 | # check existence of config file 44 | if [ ! -f ${conffile} ]; then 45 | echo "${conffile} does not exsit" 46 | exit -1 47 | fi 48 | 49 | # dump hosts in a hostfile for launch.py 50 | IFS=, 51 | output="" 52 | for id in $hosts 53 | do output+="${id}\n" 54 | done 55 | unset IFS 56 | echo -e ${output::-2} > scripts/hosts.txt 57 | sleep 1 58 | 59 | logfile=${conffile#config/} 60 | logfile=${logfile%.py} 61 | 62 | export DMLC_INTERFACE=eth0 63 | python -u /mnt/tscpfs/yuntao.chen/dist-mxnet/tools/launch.py \ 64 | -n ${num_node} \ 65 | -s ${num_servers} \ 66 | --launcher ssh \ 67 | -H scripts/hosts.txt \ 68 | scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \ 69 | 2>&1 | tee -a ${root_dir}/log/${logfile}.log 70 | -------------------------------------------------------------------------------- /models/sepc/sepc_dconv.py: -------------------------------------------------------------------------------- 1 | import mxnext as X 2 | import mxnet as mx 3 | from mxnext import conv, relu 4 | 5 | 6 | def DeformConv(x, offset, name, out_channels, kernel_size, stride=1, padding=0, dilation=1, 7 | groups=1, deformable_groups=1, no_bias=False, weight=None, bias=None): 8 | assert weight is not None 9 | if not no_bias: 10 | assert bias is not None 11 | assert out_channels % groups == 0, 'out_channels {} cannot be divisible by groups {}'.format(out_channels, groups) 12 | out = mx.sym.contrib.DeformableConvolution( 13 | x, offset, weight=weight, bias=bias if not no_bias else None, kernel=(kernel_size,kernel_size), 14 | stride=(stride,stride), dilate=(dilation,dilation), pad=(padding,padding), num_filter=out_channels, 15 | num_group=groups, num_deformable_group=deformable_groups, no_bias=no_bias, name=name) 16 | return out 17 | 18 | 19 | def sepc_conv(x, name, out_channels, kernel_size, i, stride=1, padding=0, dilation=1, 20 | groups=1, deformable_groups=1, part_deform=False, start_level=1, 21 | weight=None, bias=None, weight_offset=None, bias_offset=None): 22 | assert weight is not None and bias is not None 23 | if part_deform: 24 | assert weight_offset is not None and bias_offset is not None 25 | if i < start_level or not part_deform: 26 | return conv(x, name, filter=out_channels, kernel=kernel_size, stride=stride, pad=kernel_size//2, 27 | dilate=dilation, num_group=groups, no_bias=False, weight=weight, bias=bias) 28 | offset = conv(x, name+'offset', filter=deformable_groups*2*kernel_size*kernel_size, kernel=kernel_size, stride=stride, 29 | pad=kernel_size//2, dilate=dilation, num_group=groups, no_bias=False, weight=weight_offset, bias=bias_offset) 30 | return DeformConv(x, offset, name, out_channels, kernel_size, stride, padding=padding, dilation=dilation, 31 | groups=groups, deformable_groups=deformable_groups, no_bias=False, weight=weight, bias=bias) -------------------------------------------------------------------------------- /scripts/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # install dependency 5 | sudo apt update && sudo apt install -y git wget make python3-dev libglib2.0-0 libsm6 libxext6 libxrender-dev unzip 6 | 7 | # create conda env 8 | conda create -n simpledet python=3.7 9 | conda activate simpledet 10 | 11 | # fetch CUDA environment 12 | conda install cudatoolkit=10.1 13 | 14 | # install python dependency 15 | pip install 'matplotlib<3.1' opencv-python pytz 16 | 17 | # download and intall pre-built wheel for CUDA 10.1 18 | pip install https://1dv.alarge.space/mxnet_cu101-1.6.0b20190820-py2.py3-none-manylinux1_x86_64.whl 19 | 20 | # install pycocotools 21 | pip install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI' 22 | 23 | # install mxnext, a wrapper around MXNet symbolic API 24 | pip install 'git+https://github.com/RogerChern/mxnext#egg=mxnext' 25 | 26 | # get simpledet 27 | git clone https://github.com/tusimple/simpledet 28 | cd simpledet 29 | make 30 | 31 | # make data dir 32 | mkdir -p data/coco/images data/src 33 | 34 | # skip this if you have the zip files 35 | wget -c http://images.cocodataset.org/zips/train2017.zip -O data/src/train2017.zip 36 | wget -c http://images.cocodataset.org/zips/val2017.zip -O data/src/val2017.zip 37 | wget -c http://images.cocodataset.org/zips/test2017.zip -O data/src/test2017.zip 38 | wget -c http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O data/src/annotations_trainval2017.zip 39 | wget -c http://images.cocodataset.org/annotations/image_info_test2017.zip -O data/src/image_info_test2017.zip 40 | 41 | unzip data/src/train2017.zip -d data/coco/images 42 | unzip data/src/val2017.zip -d data/coco/images 43 | unzip data/src/test2017.zip -d data/coco/images 44 | unzip data/src/annotations_trainval2017.zip -d data/coco 45 | unzip data/src/image_info_test2017.zip -d data/coco 46 | 47 | python utils/create_coco_roidb.py --dataset coco --dataset-split train2017 48 | python utils/create_coco_roidb.py --dataset coco --dataset-split val2017 49 | python utils/create_coco_roidb.py --dataset coco --dataset-split test-dev2017 50 | -------------------------------------------------------------------------------- /operator_cxx/contrib/focal_loss.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file focal_loss.cc 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./focal_loss-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(FocalLossParam param, int dtype) { 33 | Operator *op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new FocalLossOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | Operator *FocalLossProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 41 | std::vector *in_type) const { 42 | std::vector out_shape, aux_shape; 43 | std::vector out_type, aux_type; 44 | CHECK(InferType(in_type, &out_type, &aux_type)); 45 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 46 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 47 | } 48 | 49 | DMLC_REGISTER_PARAMETER(FocalLossParam); 50 | 51 | MXNET_REGISTER_OP_PROPERTY(_contrib_FocalLoss, FocalLossProp) 52 | .describe("Focal loss for dense object detection") 53 | .add_argument("data", "NDArray-or-Symbol", "Data") 54 | .add_argument("label", "NDArray-or-Symbol", "Label") 55 | .add_arguments(FocalLossParam::__FIELDS__()); 56 | 57 | } // namespace op 58 | } // namespace mxnet 59 | -------------------------------------------------------------------------------- /operator_cxx/contrib/bbox_norm.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file bbox_norm.cc 22 | * \brief 23 | * \author Chenxia Han 24 | */ 25 | 26 | #include "./bbox_norm-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BBoxNormParam param, int dtype) { 33 | Operator *op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BBoxNormOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | Operator *BBoxNormProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 41 | std::vector *in_type) const { 42 | std::vector out_shape, aux_shape; 43 | std::vector out_type, aux_type; 44 | CHECK(InferType(in_type, &out_type, &aux_type)); 45 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 46 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 47 | } 48 | 49 | DMLC_REGISTER_PARAMETER(BBoxNormParam); 50 | 51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BBoxNorm, BBoxNormProp) 52 | .describe("Normalize those boxes with positive label") 53 | .add_argument("data", "NDArray-or-Symbol", "Data to normalize") 54 | .add_argument("label", "NDArray-or-Symbol", "Label") 55 | .add_arguments(BBoxNormParam::__FIELDS__()); 56 | 57 | } // namespace op 58 | } // namespace mxnet 59 | -------------------------------------------------------------------------------- /operator_cxx/contrib/broadcast_scale.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file broadcast_scale.cc 22 | * \brief 23 | * \author Yuntao Chen 24 | */ 25 | 26 | #include "./broadcast_scale-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template<> 32 | Operator *CreateOp(BroadcastScaleParam param, int dtype) { 33 | Operator *op = nullptr; 34 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 35 | op = new BroadcastScaleOp(param); 36 | }); 37 | return op; 38 | } 39 | 40 | Operator *BroadcastScaleProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 41 | std::vector *in_type) const { 42 | std::vector out_shape, aux_shape; 43 | std::vector out_type, aux_type; 44 | CHECK(InferType(in_type, &out_type, &aux_type)); 45 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 46 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 47 | } 48 | 49 | DMLC_REGISTER_PARAMETER(BroadcastScaleParam); 50 | 51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BroadcastScale, BroadcastScaleProp) 52 | .describe("Broadcast_scale to enable in-place scaling of tensor") 53 | .add_argument("data", "NDArray-or-Symbol", "Data") 54 | .add_argument("label", "NDArray-or-Symbol", "Label") 55 | .add_arguments(BroadcastScaleParam::__FIELDS__()); 56 | 57 | } // namespace op 58 | } // namespace mxnet 59 | -------------------------------------------------------------------------------- /models/NASFPN/README.md: -------------------------------------------------------------------------------- 1 | ## NAS-FPN 2 | 3 | This repository implements [**NAS-FPN**](https://arxiv.org/abs/1904.07392) in the SimpleDet framework. 4 | 5 | ### Qucik Start 6 | ```bash 7 | # train baseline retinanet following the setting of NAS-FPN 8 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_fpn_640_1@256_25epoch.py 9 | 10 | # train NAS-FPN 11 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_640_7@256_25epoch.py 12 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1024_7@256_25epoch.py 13 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1280_7@384_25epoch.py 14 | 15 | # train hand-crafted neck 16 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_tdbu_1280_3@384_25epoch.py 17 | ``` 18 | 19 | ### Results and Models 20 | All AP results are reported on test-dev of the [COCO dataset](http://cocodataset.org). 21 | 22 | |Model|InputSize|Backbone|Neck|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP(Mask AP)|Link| 23 | |-----|-----|--------|----|--------------|---|---------|----|---------|-----------|---------------|----| 24 | |RetinaNet|640|R50v1b-FPN|1@256|25 epoch|8X 1080Ti|8|yes|6.6G|85 img/s|37.4|[model](https://1dv.aflat.top/retina_r50v1b_fpn_640640_25epoch.zip)| 25 | |NAS-FPN|640|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|8|yes|7.8G|66 img/s|40.1|[model](https://1dv.aflat.top/retina_r50v1b_nasfpn_640640_25epoch.zip)| 26 | |NAS-FPN|1024|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|4|yes|9.1G|17 img/s|44.2|[model](https://1dv.aflat.top/retina_r50v1b_nasfpn_1024_7%40256_25epoch.zip)| 27 | |NAS-FPN|1280|R50v1b-FPN|7@384|25 epoch|8X 1080Ti|2|yes|8.9G|10 img/s|45.3|[model](https://1dv.aflat.top/retina_r50v1b_nasfpn_1280_7%40384_25epoch.zip)| 28 | |TD-BU*|1280|R50v1b-FPN|3@384|25 epoch|8X 1080Ti|3|yes|10.5G|12 img/s|44.7|[model](https://1dv.aflat.top/retina_r50v1b_tdbu_1280_3%40384_25epoch.zip)| 29 | 30 | \* Short for TopDown-BottomUp neck which is highly symmetric proposed by Zehao. 31 | ### Reference 32 | ``` 33 | @inproceedings{ghiasi2019fpn, 34 | title={NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection}, 35 | author={Ghiasi, Golnaz and Lin, Tsung-Yi and Pang, Ruoming and Le, Quoc V}, 36 | booktitle={CVPR}, 37 | year={2019} 38 | } 39 | ``` 40 | -------------------------------------------------------------------------------- /models/FPN/README.md: -------------------------------------------------------------------------------- 1 | ## Feature Pyramid Networks for Object Detection 2 | 3 | Here we introduce how is [**Feature Pyramid Network**](https://arxiv.org/abs/1612.03144) built in **simpledet** framework. The following sections explain detail implementation. 4 | 5 | #### AnchorTarget 6 | 7 | Since **FPN** uses **Feature Pyramid** as backbone, we cannot use ```AnchorTarget2D``` directly, which only generates anchor target for single stride declared in ```RpnParam```. Instead, we implement ```PyramidAnchorTarget2D``` to create a list of ```AnchorTarget2D```, each generating anchor target for single pyramid stride, then collect them together. More specifically, we create instances for each pyramid stride to generate anchor. To collect anchors from different pyramid levels, we overrides ```v_all_anchor``` and ```h_all_anchor``` property, which returns the concatenation of anchors from different levels, then assign to primary instances. Also, we override ```apply``` function to obtain label, sample anchor, target and weight from primary instances, then split and concat them in a certain axis. 8 | 9 | #### Operators 10 | 11 | - **get_top_proposal**, since **FPN** has mutli-scale proposals, we should concat the multi-scale proposals together and get the topK proposals for roi-pooling or roi-align 12 | - **assign_layer_fpn**, **FPN** assign the proposals to target levels(P2, P3, P4, P5) according to the areas, so we use this Operator to assign feature levels for proposals 13 | 14 | 15 | #### Symbol 16 | 17 | - ``` Detector```, detector is the same as FasterRcnn 18 | - ```FPNNeck```, top-down pathway for **Feature Pyramid Network** 19 | - ```FPNRpnHead```, classification and regression header with sharing weights for FPN-RPN 20 | - ```FPNRoiAlign```, we use this module to get the proposal feature for the proposals of different levels respectively, then add the feature from different level proposals together for next rcnn head 21 | 22 | #### Config 23 | 24 | - ```TestParam``` is the same as the setting in [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md) 25 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation. 26 | -------------------------------------------------------------------------------- /models/KD/utils.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | import mxnet as mx 3 | 4 | from core.detection_module import DetModule 5 | from utils.load_model import load_checkpoint 6 | 7 | 8 | def create_teacher_module(pTeacherModel, worker_data_shape, input_batch_size, ctx, rank, logger): 9 | t_prefix = pTeacherModel.prefix 10 | t_epoch = pTeacherModel.epoch 11 | t_endpoint = pTeacherModel.endpoint 12 | t_data_name = pTeacherModel.data_name 13 | t_label_name = pTeacherModel.label_name 14 | if rank == 0: 15 | logger.info('Building teacher module with endpoint: {}'.format(t_endpoint)) 16 | t_sym = pTeacherModel.prefix + '-symbol.json' 17 | t_sym = mx.sym.load(t_sym) 18 | t_sym = mx.sym.Group([t_sym.get_internals()[out] for out in t_endpoint]) 19 | t_worker_data_shape = {key: worker_data_shape[key] for key in t_data_name} 20 | _, t_out_shape, _ = t_sym.infer_shape(**t_worker_data_shape) 21 | t_terminal_out_shape_dict = zip(t_sym.list_outputs(), t_out_shape) 22 | t_data_shape = [] 23 | for idx, data_name in enumerate(t_data_name): 24 | data_shape = t_worker_data_shape[data_name] 25 | data_shape = (input_batch_size,) + data_shape[1:] 26 | t_data_shape.append((data_name, data_shape)) 27 | t_label_shape = [] 28 | for idx, label_name in enumerate(t_label_name): 29 | label_shape = t_out_shape[idx] 30 | label_shape = (input_batch_size,) + label_shape[1:] 31 | t_label_shape.append((label_name, label_shape)) 32 | if rank == 0: 33 | logger.info('Teacher data_name: {}'.format(t_data_name)) 34 | logger.info('Teacher data_shape: {}'.format(t_data_shape)) 35 | logger.info('Teacher label_name: {}'.format(t_label_name)) 36 | logger.info('Teacher label_shape: {}'.format(t_label_shape)) 37 | 38 | if rank == 0: 39 | logger.info('Teacher terminal output shape') 40 | logger.info(pprint.pformat([i for i in t_terminal_out_shape_dict])) 41 | t_arg_params, t_aux_params = load_checkpoint(t_prefix, t_epoch) 42 | t_mod = DetModule(t_sym, data_names=t_data_name, label_names=None, 43 | logger=logger, context=ctx) 44 | t_mod.bind(data_shapes=t_data_shape, for_training=False, grad_req='null') 45 | t_mod.set_params(t_arg_params, t_aux_params) 46 | if rank == 0: 47 | logger.info('Finish teacher module build') 48 | return t_mod, t_label_name, t_label_shape -------------------------------------------------------------------------------- /models/TSD/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import mxnet as mx 4 | import mxnext as X 5 | 6 | class FasterRcnn_TSD(object): 7 | _rpn_output = None 8 | 9 | def __init__(self): 10 | pass 11 | 12 | @classmethod 13 | def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head): 14 | gt_bbox = X.var("gt_bbox") 15 | im_info = X.var("im_info") 16 | 17 | rpn_feat = backbone.get_rpn_feature() 18 | rcnn_feat = backbone.get_rcnn_feature() 19 | rpn_feat = neck.get_rpn_feature(rpn_feat) 20 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 21 | 22 | rpn_head.get_anchor() 23 | rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info) 24 | proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info) 25 | 26 | roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal) 27 | bbox_loss = bbox_head.get_loss(proposal, roi_feat, rcnn_feat, bbox_cls, bbox_target, bbox_weight, gt_bbox) 28 | 29 | return X.group(rpn_loss + bbox_loss) 30 | 31 | @classmethod 32 | def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head): 33 | rec_id, im_id, im_info, proposal, proposal_score = \ 34 | FasterRcnn_TSD.get_rpn_test_symbol(backbone, neck, rpn_head) 35 | 36 | rcnn_feat = backbone.get_rcnn_feature() 37 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 38 | 39 | roi_feat = roi_extractor.get_roi_feature_test(rcnn_feat, proposal) 40 | 41 | preds = bbox_head.get_prediction(proposal, roi_feat, rcnn_feat, im_info) 42 | 43 | return X.group([rec_id, im_id, im_info] + list(preds)) 44 | 45 | @classmethod 46 | def get_rpn_test_symbol(cls, backbone, neck, rpn_head): 47 | if cls._rpn_output is not None: 48 | return cls._rpn_output 49 | 50 | im_info = X.var("im_info") 51 | im_id = X.var("im_id") 52 | rec_id = X.var("rec_id") 53 | 54 | rpn_head.get_anchor() 55 | rpn_feat = backbone.get_rpn_feature() 56 | rpn_feat = neck.get_rpn_feature(rpn_feat) 57 | 58 | (proposal, proposal_score) = rpn_head.get_all_proposal(rpn_feat, im_info) 59 | 60 | cls._rpn_output = X.group([rec_id, im_id, im_info, proposal, proposal_score]) 61 | return cls._rpn_output 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /operator_cxx/contrib/quantization_int8.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2019 by Contributors 22 | * \file Quantization_int8.cc 23 | * \brief 24 | * \author Xiaotao Chen, Jingqiu Zhou, Ruize Hou 25 | */ 26 | 27 | #include "./quantization_int8-inl.h" 28 | 29 | #include 30 | 31 | namespace mxnet { 32 | namespace op { 33 | 34 | template<> 35 | Operator *CreateOp(Quantization_int8Para param, int dtype) { 36 | Operator* op = nullptr; 37 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 38 | op = new Quantization_int8Op(param); 39 | }); 40 | return op; 41 | } 42 | 43 | Operator *Quantization_int8Prop::CreateOperatorEx(Context ctx, std::vector *in_shape, 44 | std::vector *in_type) const { 45 | std::vector out_shape, aux_shape; 46 | std::vector out_type, aux_type; 47 | CHECK(InferType(in_type, &out_type, &aux_type)); 48 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 49 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 50 | } 51 | 52 | DMLC_REGISTER_PARAMETER(Quantization_int8Para); 53 | 54 | MXNET_REGISTER_OP_PROPERTY(_contrib_Quantization_int8, Quantization_int8Prop) 55 | .describe(R"code(perform simulated int8 quatization)code" ADD_FILELINE) 56 | .add_argument("data", "NDArray-or-Symbol", "Input data to activation function.") 57 | .add_argument("minmax", "NDArray-or-Symbol", "minmax array") 58 | .add_arguments(Quantization_int8Para::__FIELDS__()); 59 | 60 | } // namespace op 61 | } // namespace mxnet 62 | -------------------------------------------------------------------------------- /models/maskrcnn/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from pycocotools import mask as mask_util 5 | 6 | 7 | def expand_boxes(boxes, scale): 8 | """Expand an array of boxes by a given scale.""" 9 | w_half = (boxes[:, 2] - boxes[:, 0]) * .5 10 | h_half = (boxes[:, 3] - boxes[:, 1]) * .5 11 | x_c = (boxes[:, 2] + boxes[:, 0]) * .5 12 | y_c = (boxes[:, 3] + boxes[:, 1]) * .5 13 | 14 | w_half *= scale 15 | h_half *= scale 16 | 17 | boxes_exp = np.zeros(boxes.shape) 18 | boxes_exp[:, 0] = x_c - w_half 19 | boxes_exp[:, 2] = x_c + w_half 20 | boxes_exp[:, 1] = y_c - h_half 21 | boxes_exp[:, 3] = y_c + h_half 22 | 23 | return boxes_exp 24 | 25 | 26 | def segm_results(bbox_xyxy, cls, masks, im_h, im_w): 27 | # Modify from Detectron 28 | # To work around an issue with cv2.resize (it seems to automatically pad 29 | # with repeated border values), we manually zero-pad the masks by 1 pixel 30 | # prior to resizing back to the original image resolution. This prevents 31 | # "top hat" artifacts. We therefore need to expand the reference boxes by an 32 | # appropriate factor. 33 | segms = [] 34 | M = masks.shape[-1] 35 | scale = (M + 2.0) / M 36 | ref_boxes = expand_boxes(bbox_xyxy, scale) 37 | ref_boxes = ref_boxes.astype(np.int32) 38 | padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) 39 | 40 | for ref_box_i, mask_i, cls_i in zip(ref_boxes, masks, cls): 41 | padded_mask[1:-1, 1:-1] = mask_i[cls_i, :, :] 42 | 43 | w = ref_box_i[2] - ref_box_i[0] + 1 44 | h = ref_box_i[3] - ref_box_i[1] + 1 45 | w = np.maximum(w, 1) 46 | h = np.maximum(h, 1) 47 | 48 | mask = cv2.resize(padded_mask, (w, h)) 49 | mask = np.array(mask > 0.5, dtype=np.uint8) 50 | im_mask = np.zeros((im_h, im_w), dtype=np.uint8) 51 | 52 | x_0 = max(ref_box_i[0], 0) 53 | x_1 = min(ref_box_i[2] + 1, im_w) 54 | y_0 = max(ref_box_i[1], 0) 55 | y_1 = min(ref_box_i[3] + 1, im_h) 56 | 57 | im_mask[y_0:y_1, x_0:x_1] = mask[ 58 | (y_0 - ref_box_i[1]):(y_1 - ref_box_i[1]), 59 | (x_0 - ref_box_i[0]):(x_1 - ref_box_i[0]) 60 | ] 61 | 62 | # Get RLE encoding used by the COCO evaluation API 63 | rle = mask_util.encode( 64 | np.array(im_mask[:, :, np.newaxis], order='F') 65 | )[0] 66 | segms.append(rle) 67 | segms = np.array(segms) 68 | return segms -------------------------------------------------------------------------------- /models/FPN/get_top_proposal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collect top proposals across all levels for FPN 3 | author: Yi Jiang, Chenxia Han 4 | """ 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | 10 | class GetTopProposalOperator(mx.operator.CustomOp): 11 | def __init__(self, top_n): 12 | super().__init__() 13 | self.top_n = top_n 14 | 15 | def forward(self, is_train, req, in_data, out_data, aux): 16 | bboxes = in_data[0] 17 | scores = in_data[1] 18 | 19 | num_image = bboxes.shape[0] 20 | top_n = self.top_n 21 | top_bboxes = [] 22 | top_scores = [] 23 | 24 | for i in range(num_image): 25 | image_bboxes = bboxes[i] 26 | image_scores = scores[i] 27 | argsort_ind = mx.nd.argsort(image_scores[:,0], is_ascend=False) 28 | image_bboxes = image_bboxes[argsort_ind] 29 | image_bboxes = image_bboxes[:top_n] 30 | image_scores = image_scores[argsort_ind] 31 | image_scores = image_scores[:top_n] 32 | 33 | top_bboxes.append(image_bboxes) 34 | top_scores.append(image_scores) 35 | 36 | top_bboxes = mx.nd.stack(*top_bboxes) 37 | top_scores = mx.nd.stack(*top_scores) 38 | 39 | self.assign(out_data[0], req[0], top_bboxes) 40 | self.assign(out_data[1], req[1], top_scores) 41 | 42 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 43 | self.assign(in_grad[0], req[0], 0) 44 | self.assign(in_grad[1], req[1], 0) 45 | 46 | 47 | @mx.operator.register('get_top_proposal') 48 | class GetTopProposalProp(mx.operator.CustomOpProp): 49 | def __init__(self, top_n): 50 | super().__init__(need_top_grad=False) 51 | self.top_n = int(top_n) 52 | 53 | def list_arguments(self): 54 | return ['bbox', 'score'] 55 | 56 | def list_outputs(self): 57 | return ['bbox', 'score'] 58 | 59 | def infer_shape(self, in_shape): 60 | bbox_shape = in_shape[0] 61 | score_shape = in_shape[1] 62 | num_image = bbox_shape[0] 63 | 64 | top_bbox_shape = (num_image, self.top_n, 4) 65 | top_score_shape = (num_image, self.top_n, 1) 66 | 67 | return [bbox_shape, score_shape], \ 68 | [top_bbox_shape, top_score_shape] 69 | 70 | def create_operator(self, ctx, shapes, dtypes): 71 | return GetTopProposalOperator(self.top_n) 72 | 73 | def declare_backward_dependency(self, out_grad, in_data, out_data): 74 | return [] 75 | -------------------------------------------------------------------------------- /utils/deprecated.py: -------------------------------------------------------------------------------- 1 | """ 2 | Util comes from https://stackoverflow.com/questions/2536307/decorators-in-the-python-standard-lib-deprecated-specifically 3 | """ 4 | 5 | import functools 6 | import inspect 7 | import warnings 8 | 9 | string_types = (type(b''), type(u'')) 10 | 11 | 12 | def deprecated(reason): 13 | """ 14 | This is a decorator which can be used to mark functions 15 | as deprecated. It will result in a warning being emitted 16 | when the function is used. 17 | """ 18 | 19 | if isinstance(reason, string_types): 20 | 21 | # The @deprecated is used with a 'reason'. 22 | # 23 | # .. code-block:: python 24 | # 25 | # @deprecated("please, use another function") 26 | # def old_function(x, y): 27 | # pass 28 | 29 | def decorator(func1): 30 | 31 | if inspect.isclass(func1): 32 | fmt1 = "Call to deprecated class {name} ({reason})." 33 | else: 34 | fmt1 = "Call to deprecated function {name} ({reason})." 35 | 36 | @functools.wraps(func1) 37 | def new_func1(*args, **kwargs): 38 | warnings.simplefilter('always', DeprecationWarning) 39 | warnings.warn( 40 | fmt1.format(name=func1.__name__, reason=reason), 41 | category=DeprecationWarning, 42 | stacklevel=2 43 | ) 44 | warnings.simplefilter('default', DeprecationWarning) 45 | return func1(*args, **kwargs) 46 | 47 | return new_func1 48 | 49 | return decorator 50 | 51 | elif inspect.isclass(reason) or inspect.isfunction(reason): 52 | 53 | # The @deprecated is used without any 'reason'. 54 | # 55 | # .. code-block:: python 56 | # 57 | # @deprecated 58 | # def old_function(x, y): 59 | # pass 60 | 61 | func2 = reason 62 | 63 | if inspect.isclass(func2): 64 | fmt2 = "Call to deprecated class {name}." 65 | else: 66 | fmt2 = "Call to deprecated function {name}." 67 | 68 | @functools.wraps(func2) 69 | def new_func2(*args, **kwargs): 70 | warnings.simplefilter('always', DeprecationWarning) 71 | warnings.warn( 72 | fmt2.format(name=func2.__name__), 73 | category=DeprecationWarning, 74 | stacklevel=2 75 | ) 76 | warnings.simplefilter('default', DeprecationWarning) 77 | return func2(*args, **kwargs) 78 | 79 | return new_func2 80 | 81 | else: 82 | raise TypeError(repr(type(reason))) -------------------------------------------------------------------------------- /utils/contrib/edit_model_weight.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This script allows you to edit the model weight from https://github.com/TuSimple/simpledet/blob/master/MODEL_ZOO.md 4 | for your own training. In this script, it assumes you don't change the layer of model, but only edit the content in 5 | the layer that models of MODEL_ZOO have. For example, you just change the training classes but not add or delete a 6 | unit of resnet. 7 | 8 | Example: Edit the weight from 80 + 1 classes to 3 + 1 classes training. 9 | - Train by your own configuration for one epoch, the configuration should have edited for 3 + 1 classes training. 10 | - Edit the constant in this file. 11 | - SIMPLEDET_WEIGHT_FOLDER the path to the weight folder you download 12 | - TRAINED_WEIGHT_FOLDER the path to the weight folder you need the shape 13 | - EDIT_KEY the key of layer which you want to edit the weight, you can show the key by 14 | print(arg_params_src), in this example, the key names are 15 | "bbox_cls_logit_weight", "bbox_cls_logit_bias" 16 | - Run the code! 17 | 18 | Note: The new generated model weight file will cover your original downloaded weight file, if you don't want like this, 19 | you can edit the last line of the code. 20 | 21 | TODO: Before you run the code, you should train a model for one epoch and edit the code as instructions above. 22 | """ 23 | 24 | import mxnet as mx 25 | import numpy as np 26 | import os 27 | 28 | # TODO: Edit the path. 29 | SIMPLEDET_WEIGHT_FOLDER = " " 30 | TRAINED_WEIGHT_FOLDER = " " 31 | 32 | # TODO: Edit the key names which you want to modify. 33 | EDIT_KEY = ["bbox_cls_logit_weight", "bbox_cls_logit_bias"] 34 | 35 | def change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst): 36 | for key in EDIT_KEY: 37 | arg_params_src[key] = arg_params_dst[key] 38 | return arg_params_src 39 | 40 | if __name__ == "__main__": 41 | sym, arg_params_src, aux_params = \ 42 | mx.model.load_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 6) 43 | _, arg_params_dst, _ = \ 44 | mx.model.load_checkpoint(os.path.join(TRAINED_WEIGHT_FOLDER, "checkpoint"), 1) 45 | 46 | # print(arg_params_src) to show the key name. 47 | # arg_params_src means the weight you want to change which is downloaded from simpledet, 48 | # arg_params_src means the weight you need the shape. 49 | arg_params = change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst) 50 | 51 | mx.model.save_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 1, sym, arg_params, aux_params) 52 | -------------------------------------------------------------------------------- /models/maskrcnn/detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import mxnext as X 4 | import mxnet as mx 5 | 6 | from symbol.detector import FasterRcnn, RpnHead 7 | from models.FPN.builder import FPNRpnHead 8 | 9 | from models.maskrcnn import bbox_post_processing 10 | from utils.patch_config import patch_config_as_nothrow 11 | 12 | 13 | class MaskRcnn(object): 14 | def __init__(self): 15 | pass 16 | 17 | @staticmethod 18 | def get_train_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head): 19 | gt_bbox = X.var("gt_bbox") 20 | gt_poly = X.var("gt_poly") 21 | im_info = X.var("im_info") 22 | 23 | rpn_feat = backbone.get_rpn_feature() 24 | rcnn_feat = backbone.get_rcnn_feature() 25 | rpn_feat = neck.get_rpn_feature(rpn_feat) 26 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 27 | 28 | rpn_head.get_anchor() 29 | rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info) 30 | proposal, bbox_cls, bbox_target, bbox_weight, mask_proposal, mask_target, mask_ind = \ 31 | rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, gt_poly, im_info) 32 | roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal) 33 | mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, mask_proposal) 34 | 35 | bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight) 36 | mask_loss = mask_head.get_loss(mask_roi_feat, mask_target, mask_ind) 37 | return X.group(rpn_loss + bbox_loss + mask_loss) 38 | 39 | @staticmethod 40 | def get_test_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor): 41 | rec_id, im_id, im_info, proposal, proposal_score = \ 42 | MaskRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) 43 | 44 | rcnn_feat = backbone.get_rcnn_feature() 45 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 46 | 47 | roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal) 48 | cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal) 49 | 50 | post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(cls_score, bbox_xyxy) 51 | 52 | mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, post_bbox_xyxy) 53 | mask = mask_head.get_prediction(mask_roi_feat) 54 | 55 | mask_score = mx.sym.ones((1, ), name='maskiou_prediction') 56 | return X.group([rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask, mask_score]) 57 | 58 | @staticmethod 59 | def get_rpn_test_symbol(backbone, neck, rpn_head): 60 | return FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) -------------------------------------------------------------------------------- /models/msrcnn/maskiou_compute.py: -------------------------------------------------------------------------------- 1 | """ 2 | IoU convert Operator 3 | Compute MaskIoU Target Given feature, mask ratio, mask target and mask predict logits 4 | """ 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | 10 | class MaskIoUComputeOperator(mx.operator.CustomOp): 11 | def __init__(self): 12 | super().__init__() 13 | 14 | def forward(self, is_train, req, in_data, out_data, aux): 15 | mask_pred_logits = in_data[0].asnumpy() 16 | mask_target = in_data[1].asnumpy() 17 | mask_ratio = in_data[2].asnumpy().reshape(-1, ) 18 | mask_inds = in_data[3].asnumpy().reshape(-1, ) 19 | 20 | mask_pred = np.array(mask_pred_logits > 0.5, dtype=np.bool) 21 | 22 | intersec = mask_target * mask_pred 23 | mask_pred_sum = np.sum(mask_pred, axis=(1, 2)) 24 | intersec_sum = np.sum(intersec, axis=(1, 2)) 25 | mask_target_sum = np.sum(mask_target, axis=(1, 2)).astype(np.float) 26 | mask_target_sum /= mask_ratio 27 | union = mask_target_sum + mask_pred_sum - intersec_sum 28 | union = np.maximum(union, 1) 29 | intersec_sum = np.maximum(intersec_sum, 0) 30 | iou = np.reshape(intersec_sum / union , (-1, 1)) 31 | 32 | positive_inds = np.where(mask_inds > 0)[0] 33 | weight_list = np.zeros_like(mask_inds) 34 | weight_list[positive_inds] = 1 35 | weight_list = weight_list.reshape(-1, 1) 36 | 37 | self.assign(out_data[0], req[0], iou) 38 | self.assign(out_data[1], req[1], weight_list) 39 | 40 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 41 | self.assign(in_grad[0], req[0], 0) 42 | self.assign(in_grad[1], req[1], 0) 43 | self.assign(in_grad[2], req[2], 0) 44 | 45 | @mx.operator.register('maskiou_compute') 46 | class MaskIoUComputeProp(mx.operator.CustomOpProp): 47 | def __init__(self): 48 | super().__init__(need_top_grad=False) 49 | 50 | def list_arguments(self): 51 | return ['mask_pred_logits', 'mask_target', 'mask_ratio', 'mask_inds'] 52 | 53 | def infer_shape(self, in_shape): 54 | mask_pred_logits_shape = in_shape[0] 55 | mask_target_shape = in_shape[1] 56 | mask_ratio_shape = in_shape[2] 57 | mask_ind_shape = in_shape[3] 58 | 59 | maskiou_target_shape = (mask_target_shape[0], 1) 60 | weight_shape = (mask_target_shape[0], 1) 61 | return [mask_pred_logits_shape, mask_target_shape, mask_ratio_shape, mask_ind_shape], [maskiou_target_shape, weight_shape] 62 | 63 | def list_outputs(self): 64 | return ['maskiou_target', 'weight_list'] 65 | 66 | def create_operator(self, ctx, shapes, dtypes): 67 | return MaskIoUComputeOperator() 68 | 69 | def declare_backward_dependency(self, out_grad, in_data, out_data): 70 | return [] 71 | 72 | -------------------------------------------------------------------------------- /operator_py/cython/bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps_cython( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps -------------------------------------------------------------------------------- /models/FPN/assign_layer_fpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Assign Layer operator for FPN 3 | author: Yi Jiang, Chenxia Han 4 | """ 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | 10 | class AssignLayerFPNOperator(mx.operator.CustomOp): 11 | def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level): 12 | super().__init__() 13 | self.rcnn_stride = rcnn_stride 14 | self.roi_canonical_scale = roi_canonical_scale 15 | self.roi_canonical_level = roi_canonical_level 16 | 17 | def forward(self, is_train, req, in_data, out_data, aux): 18 | all_rois = in_data[0] 19 | 20 | rcnn_stride = self.rcnn_stride 21 | scale0 = self.roi_canonical_scale 22 | lvl0 = self.roi_canonical_level 23 | k_min = np.log2(min(rcnn_stride)) 24 | k_max = np.log2(max(rcnn_stride)) 25 | 26 | rois_area = (all_rois[:, :, 2] - all_rois[:, :, 0] + 1) \ 27 | * (all_rois[:, :, 3] - all_rois[:, :, 1] + 1) 28 | 29 | scale = mx.nd.sqrt(rois_area) 30 | target_lvls = mx.nd.floor(lvl0 + mx.nd.log2(scale / scale0 + 1e-6)) 31 | target_lvls = mx.nd.clip(target_lvls, k_min, k_max) 32 | target_stride = (2 ** target_lvls).astype('uint8') 33 | 34 | for i, s in enumerate(rcnn_stride): 35 | lvl_rois = mx.nd.zeros_like(all_rois) 36 | lvl_inds = mx.nd.expand_dims(target_stride == s, axis=2).astype('float32') 37 | lvl_inds = mx.nd.broadcast_like(lvl_inds, lvl_rois) 38 | lvl_rois = mx.nd.where(lvl_inds, all_rois, lvl_rois) 39 | 40 | self.assign(out_data[i], req[i], lvl_rois) 41 | 42 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 43 | self.assign(in_grad[0], req[0], 0) 44 | 45 | 46 | @mx.operator.register('assign_layer_fpn') 47 | class AssignLayerFPNProp(mx.operator.CustomOpProp): 48 | def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level): 49 | super().__init__(need_top_grad=False) 50 | self.rcnn_stride = eval(rcnn_stride) 51 | self.roi_canonical_scale = int(roi_canonical_scale) 52 | self.roi_canonical_level = int(roi_canonical_level) 53 | 54 | def list_arguments(self): 55 | return ['rois'] 56 | 57 | def list_outputs(self): 58 | rois_list = ['rois_s{}'.format(s) for s in self.rcnn_stride] 59 | return rois_list 60 | 61 | def infer_shape(self, in_shape): 62 | rpn_rois_shape = in_shape[0] 63 | 64 | output_rois_shape = [rpn_rois_shape] * len(self.rcnn_stride) 65 | 66 | return [rpn_rois_shape], output_rois_shape 67 | 68 | def create_operator(self, ctx, shapes, dtypes): 69 | return AssignLayerFPNOperator(self.rcnn_stride, self.roi_canonical_scale, 70 | self.roi_canonical_level) 71 | 72 | def declare_backward_dependency(self, out_grad, in_data, out_data): 73 | return [] 74 | -------------------------------------------------------------------------------- /operator_py/cython/bbox_self.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_selfoverlaps_cython( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | (N, k) ndarray of self overlap with query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps_self = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua, ub 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 54 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) + 1 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) + 1 65 | ) 66 | if ih > 0: 67 | ub = float( 68 | (boxes[n, 2] - boxes[n, 0] + 1) * 69 | (boxes[n, 3] - boxes[n, 1] + 1) 70 | ) 71 | ua = float( 72 | ub + box_area - iw * ih 73 | ) 74 | overlaps_self[n, k] = iw * ih / ub 75 | return overlaps_self -------------------------------------------------------------------------------- /models/msrcnn/detector.py: -------------------------------------------------------------------------------- 1 | import mxnext as X 2 | import mxnet as mx 3 | 4 | from symbol.detector import FasterRcnn, RpnHead 5 | from models.FPN.builder import FPNRpnHead 6 | 7 | from models.maskrcnn import bbox_post_processing 8 | from models.msrcnn import maskiou_compute 9 | 10 | from utils.patch_config import patch_config_as_nothrow 11 | 12 | 13 | class MaskScoringRcnn(object): 14 | def __init__(self): 15 | pass 16 | 17 | @staticmethod 18 | def get_train_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, maskiou_head): 19 | gt_bbox = X.var("gt_bbox") 20 | gt_poly = X.var("gt_poly") 21 | im_info = X.var("im_info") 22 | 23 | rpn_feat = backbone.get_rpn_feature() 24 | rcnn_feat = backbone.get_rcnn_feature() 25 | rpn_feat = neck.get_rpn_feature(rpn_feat) 26 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 27 | 28 | rpn_head.get_anchor() 29 | rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info) 30 | proposal, bbox_cls, bbox_target, bbox_weight, mask_proposal, mask_target, mask_ind, mask_ratio = \ 31 | rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, gt_poly, im_info) 32 | roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal) 33 | mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, mask_proposal) 34 | 35 | bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight) 36 | mask_loss, mask_pred_logits = mask_head.get_loss(mask_roi_feat, mask_target, mask_ind) 37 | 38 | iou_loss = maskiou_head.get_loss(mask_roi_feat, mask_pred_logits, mask_target, mask_ind, mask_ratio) 39 | return X.group(rpn_loss + bbox_loss + mask_loss + iou_loss) 40 | 41 | @staticmethod 42 | def get_test_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor, maskiou_head): 43 | rec_id, im_id, im_info, proposal, proposal_score = \ 44 | MaskScoringRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) 45 | 46 | rcnn_feat = backbone.get_rcnn_feature() 47 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 48 | 49 | roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal) 50 | cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal) 51 | 52 | post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(cls_score, bbox_xyxy) 53 | 54 | mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, post_bbox_xyxy) 55 | mask = mask_head.get_prediction(mask_roi_feat) 56 | 57 | iou_pred = maskiou_head.get_maskiou_prediction(mask, mask_roi_feat, post_cls) 58 | 59 | return X.group([rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask, iou_pred]) 60 | 61 | @staticmethod 62 | def get_rpn_test_symbol(backbone, neck, rpn_head): 63 | return FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) -------------------------------------------------------------------------------- /utils/contrib/data_to_coco.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This script allows you to transfer your own data from your own data format to coco format. 4 | 5 | Attention: This is not the official format, it does not require licenses and other redundant info, but can generate 6 | coco-like dataset which can be accepted by Simpledet. 7 | 8 | TODO: You should reimplement the code from line 31 to the end, this file only describe the format of dataset 9 | and the way to save it. 10 | """ 11 | 12 | import json 13 | import sys 14 | 15 | def main(): 16 | if len(sys.argv) < 3: 17 | print("Usage: python data_to_coco.py infile outfile") 18 | exit(1) 19 | input_file = sys.argv[1] 20 | output_file = sys.argv[2] 21 | 22 | # The whole coco dataset 23 | dataset = { 24 | 'licenses': [], 25 | 'info': {}, 26 | 'categories': [], # Required 27 | 'images': [], # Required 28 | 'annotations': [] # Required 29 | } 30 | 31 | # TODO: class_map maps the class, which would be added into dataset['categories'] 32 | class_map = { 33 | "box": 1, 34 | "can": 2, 35 | "bottle": 3 36 | } 37 | for class_name, class_id in class_map.items(): 38 | dataset['categories'].append({ 39 | 'id': class_id, 40 | 'name': class_name, 41 | 'supercategory': 'supercategory_name' 42 | }) 43 | 44 | # TODO: Load your own data 45 | self_data_list = [] 46 | with open(input_file, 'r') as in_file: 47 | for line in in_file: 48 | self_data_list.append(json.loads(line)) 49 | 50 | # TODO: Dataset images info, normally you should implement an iter here to append the info 51 | dataset['images'].append({ 52 | 'coco_url': '', 53 | 'date_captured': '', 54 | 'file_name': '', # Required (str) image file name 55 | 'flickr_url': '', 56 | 'id': int(), # Required (int) id of image 57 | 'license': '', 58 | 'width': int(), # Required (int) width of image 59 | 'height': int() # Required (int) height of image 60 | }) 61 | 62 | # TODO: Dataset annotation info, normally you should implement an iter here to append the info 63 | dataset["annotations"].append({ 64 | 'area': int(), # Required (int) image area 65 | 'bbox': [int()] * 4, # Required (int) one of the image bboxes 66 | 'category_id': int(), # Required (int) class id of this bbox 67 | 'id': int(), # Required (int) bbox id in this image 68 | 'image_id': int(), # Required (int) image id of this bbox 69 | 'iscrowd': 0, # Optional, required only if you want to train for semantic segmentation 70 | 'segmentation': [] # Optional, required only if you want to train for semantic segmentation 71 | }) 72 | 73 | with open(output_file, 'w') as ofile: 74 | json.dump(dataset, ofile, sort_keys=True, indent=2) 75 | 76 | 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /doc/BUILD_WHEEL.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | This document describes the process of packaging our custom mxnet as a python wheel for local installation. 3 | 4 | ### Platform 5 | In order to suppport CentOS 7, which is popular in production, we have to make the wheel largely comformant to `manylinux2014` tag of pypi. 6 | The `manylinux2014` tag specify the maximum version of some system library as 7 | ``` 8 | GLIBC_2.17 9 | CXXABI_1.3.7 10 | GLIBCXX_3.4.19 11 | GCC_4.8.5 12 | ``` 13 | Compiling the library under such restriction from a newer platform could be quite tricky, since this is essential cross-compiling. 14 | So here we complile from the Ubuntu 14.04 which is also `manylinux2014` comformant. 15 | 16 | ### Setup toolchains 17 | ```bash 18 | sudo apt-get update && \ 19 | sudo apt-get install -y git \ 20 | vim \ 21 | libcurl4-openssl-dev \ 22 | unzip \ 23 | gcc-4.8 \ 24 | g++-4.8 \ 25 | gfortran \ 26 | gfortran-4.8 \ 27 | binutils \ 28 | nasm \ 29 | libtool \ 30 | curl \ 31 | wget \ 32 | sudo \ 33 | gnupg \ 34 | gnupg2 \ 35 | gnupg-agent \ 36 | pandoc \ 37 | python3-pip \ 38 | automake \ 39 | pkg-config 40 | 41 | wget https://github.com/Kitware/CMake/releases/download/v3.15.2/cmake-3.15.2.tar.gz && \ 42 | tar xzf cmake-3.15.2.tar.gz && \ 43 | cd cmake-3.15.2 && \ 44 | ./configure && make install -j4 && cd .. && \ 45 | rm -r cmake-3.15.2 cmake-3.15.2.tar.gz 46 | 47 | # change the url to your repo link if you are doing PR 48 | export SIMPLEDET_URL=https://github.com/tusimple/simpledet 49 | git clone --recursive --depth=1 https://github.com/apache/incubator-mxnet /work/mxnet && \ 50 | cd /work/mxnet && \ 51 | git clone $SIMPLEDET_URL /work/simpledet && \ 52 | cp -r /work/simpledet/operator_cxx/* /work/mxnet/src/operator && \ 53 | git clone https://github.com/RogerChern/cocoapi /work/cocoapi && \ 54 | mkdir -p src/coco_api && \ 55 | cp -r /work/cocoapi/common src/coco_api && \ 56 | rm /work/mxnet/src/operator/nn/group_norm* && \ 57 | rm -r /work/cocoapi /work/simpledet 58 | ``` 59 | 60 | ### Compile `libmxnet.so` with static dependancy 61 | ``` 62 | cd /work/mxnet 63 | # remove sm_30 64 | sed -i 's/KNOWN_CUDA_ARCHS :=.*/KNOWN_CUDA_ARCHS := 35 50 60 70/' Makefile 65 | # change build config according to the target CUDA version 66 | tools/staticbuild/build.sh cu100 pip 67 | # tools/staticbuild/build.sh cu101 pip 68 | ``` 69 | 70 | ### Package wheel 71 | ``` 72 | cd /work/mxnet/tools/pip 73 | ln -s /work/mxnet mxnet-build 74 | 75 | # change the path according to the target CUDA version 76 | LD_LIBRARY_PATH=/work/mxnet/staticdeps/usr/local/cuda-10.0/lib64:/work/mxnet/staticdeps/usr/lib/x86_64-linux-gnu:/work/mxnet/staticdeps/usr/lib/nvidia-410 77 | # LD_LIBRARY_PATH=/work/mxnet/staticdeps/usr/local/cuda-10.1/lib64:/work/mxnet/staticdeps/usr/lib/x86_64-linux-gnu:/work/mxnet/staticdeps/usr/lib/nvidia-418 78 | export LD_LIBRARY_PATH 79 | mxnet_variant=CU100 python3 setup.py bdist_wheel 80 | # mxnet_variant=CU101 python3 setup.py bdist_wheel 81 | ``` 82 | 83 | The built wheel file is in `dist/` 84 | -------------------------------------------------------------------------------- /unittest/test_loader.py: -------------------------------------------------------------------------------- 1 | import pickle as pkl 2 | import unittest 3 | import mxnet as mx 4 | 5 | from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ 6 | ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ 7 | RenameRecord, AnchorTarget2D, AnchorLoader 8 | from config import detection_config 9 | 10 | 11 | class TestLoader(unittest.TestCase): 12 | 13 | def test_empty_v_loader(self): 14 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 15 | transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True) 16 | roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1") 17 | all_v_roidbs = [roidb for roidb in roidbs if roidb['h'] >= roidb['w']] 18 | 19 | loader = AnchorLoader( 20 | roidb=all_v_roidbs, 21 | transform=transform, 22 | data_name=data_name, 23 | label_name=label_name, 24 | batch_size=1, 25 | shuffle=True, 26 | num_worker=1, 27 | kv=mx.kvstore.create(pKv.kvstore) 28 | ) 29 | with self.assertRaises(StopIteration): 30 | while True: 31 | data_batch = loader.next() 32 | 33 | def test_empty_h_loader(self): 34 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 35 | transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True) 36 | roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1") 37 | all_h_roidbs = [roidb for roidb in roidbs if roidb['h'] < roidb['w']] 38 | 39 | loader = AnchorLoader( 40 | roidb=all_h_roidbs, 41 | transform=transform, 42 | data_name=data_name, 43 | label_name=label_name, 44 | batch_size=1, 45 | shuffle=True, 46 | num_worker=1, 47 | kv=mx.kvstore.create(pKv.kvstore) 48 | ) 49 | with self.assertRaises(StopIteration): 50 | while True: 51 | data_batch = loader.next() 52 | 53 | def test_record_num(self): 54 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 55 | transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True) 56 | roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1") 57 | batch_size = 4 58 | 59 | loader = AnchorLoader( 60 | roidb=roidbs, 61 | transform=transform, 62 | data_name=data_name, 63 | label_name=label_name, 64 | batch_size=batch_size, 65 | shuffle=True, 66 | num_worker=1, 67 | kv=mx.kvstore.create(pKv.kvstore) 68 | ) 69 | 70 | num_batch = 0 71 | while True: 72 | try: 73 | data_batch = loader.next() 74 | num_batch += 1 75 | except StopIteration: 76 | break 77 | self.assertEqual(batch_size * num_batch, loader.total_record) 78 | 79 | 80 | -------------------------------------------------------------------------------- /operator_cxx/contrib/axpy.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file axpy.cc 23 | * \brief port from https://github.com/hujie-frank/SENet 24 | * \author Yuntao Chen 25 | */ 26 | #include "./axpy-inl.h" 27 | 28 | namespace mshadow { 29 | template 30 | inline void AxpyForwardLauncher(const Tensor &scale_data, 31 | const Tensor &x_data, 32 | const Tensor &y_data, 33 | const Tensor &out) { 34 | LOG(FATAL) << "NotImplemented"; 35 | } 36 | 37 | template 38 | inline void AxpyBackwardLauncher(const Tensor &scale_data, 39 | const Tensor &x_data, 40 | const Tensor &y_data, 41 | const Tensor &scale_grad, 42 | const Tensor &x_grad, 43 | const Tensor &y_grad, 44 | const Tensor &out_grad, 45 | Stream *s) { 46 | LOG(FATAL) << "NotImplemented"; 47 | } 48 | } // namespace mshadow 49 | 50 | namespace mxnet { 51 | namespace op { 52 | 53 | template<> 54 | Operator *CreateOp(AxpyParam param, int dtype) { 55 | Operator* op = NULL; 56 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 57 | op = new AxpyOp(param); 58 | }); 59 | return op; 60 | } 61 | 62 | Operator *AxpyProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 63 | std::vector *in_type) const { 64 | DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0)); 65 | } 66 | 67 | DMLC_REGISTER_PARAMETER(AxpyParam); 68 | 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_Axpy, AxpyProp) 70 | .describe(R"code(Accelerate Squeeze and Excitation Network)code" ADD_FILELINE) 71 | .add_argument("scale", "NDArray-or-Symbol", "channel scaling factor") 72 | .add_argument("x", "NDArray-or-Symbol", "resnet increase output") 73 | .add_argument("y", "NDArray-or-Symbol", "resnet shortcut output") 74 | .add_arguments(AxpyParam::__FIELDS__()); 75 | } // namespace op 76 | } // namespace mxnet 77 | -------------------------------------------------------------------------------- /utils/create_voc_roidb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import glob 4 | import pickle 5 | import json 6 | import xml.etree.ElementTree as ET 7 | 8 | import numpy as np 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database for PASCAL VOC dataset') 13 | parser.add_argument('--data-dir', help='Path to VOC-like dataset', type=str) 14 | parser.add_argument('--label-map', help='A json file containing the map from class name to training id', 15 | type=str, default="data/label_map/voc_label_map.json") 16 | parser.add_argument('--split', help='Dataset split', type=str, default=None) 17 | 18 | args = parser.parse_args() 19 | with open(args.label_map) as f: 20 | label_map = json.load(f) 21 | return args.data_dir, label_map, args.split 22 | 23 | 24 | def create_roidb(data_dir, label_map, split): 25 | # sanity check 26 | if not os.path.exists(data_dir): 27 | raise Exception("{} is not accessible".format(data_dir)) 28 | for subdir in ["Annotations", "JPEGImages", "ImageSets"]: 29 | if not os.path.exists(os.path.join(data_dir, subdir)): 30 | raise Exception("{}/{} is not accessible".format(data_dir, subdir)) 31 | 32 | if split is not None: 33 | subset = set() 34 | with open("{}/ImageSets/Main/{}.txt".format(data_dir, split)) as f: 35 | for line in f: 36 | subset.add("{}/Annotations/{}.xml".format(data_dir, line.strip())) 37 | else: 38 | subset = glob.glob("{}/Annotations/*.xml".format(data_dir)) 39 | 40 | roidb = [] 41 | for i, anno_name in enumerate(sorted(subset)): 42 | tree = ET.parse(anno_name) 43 | root = tree.getroot() 44 | h = int(root.find("size/height").text) 45 | w = int(root.find("size/width").text) 46 | filename = root.find("filename").text 47 | image_url = os.path.abspath(os.path.join(data_dir, "JPEGImages", filename)) 48 | assert os.path.exists(image_url) 49 | im_id = i 50 | gt_class, gt_bbox = list(), list() 51 | for obj in root.findall("object"): 52 | gt_class.append(label_map[obj.find("name").text]) 53 | x1 = float(obj.find("bndbox/xmin").text) 54 | y1 = float(obj.find("bndbox/ymin").text) 55 | x2 = float(obj.find("bndbox/xmax").text) 56 | y2 = float(obj.find("bndbox/ymax").text) 57 | gt_bbox.append([x1, y1, x2, y2]) 58 | 59 | roidb.append(dict( 60 | gt_class=np.array(gt_class, dtype=np.float32), 61 | gt_bbox=np.array(gt_bbox, dtype=np.float32), 62 | flipped=False, 63 | h=h, 64 | w=w, 65 | image_url=image_url, 66 | im_id=im_id)) 67 | 68 | dataset_name = os.path.basename(data_dir).lower() 69 | if split is not None: 70 | roidb_name = "data/cache/{}_{}.roidb".format(dataset_name, split) 71 | else: 72 | roidb_name = "data/cache/{}.roidb".format(dataset_name) 73 | 74 | with open(roidb_name, "wb") as f: 75 | pickle.dump(roidb, f) 76 | 77 | 78 | if __name__ == "__main__": 79 | os.makedirs("data/cache", exist_ok=True) 80 | create_roidb(*parse_args()) 81 | -------------------------------------------------------------------------------- /models/maskrcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Mask-RCNN 2 | 3 | This repository implements [**Mask-RCNN**](https://arxiv.org/abs/1703.06870) in the SimpleDet framework. 4 | Mask-RCNN is a simple and effective approach for object instance segmentation. By simply extending Faster-RCNN with a mask branch, Mask-RCNN can generate a high-quality segmentation mask for each instance. In the following, we will introduce how we build Mask-RCNN in the SimpleDet framework. Currently, we only provide FPN based Mask-RCNN. 5 | 6 | ### Qucik Start 7 | ```bash 8 | # train 9 | python3 detection_train.py --config config/mask_r50v1_fpn_1x.py 10 | 11 | # test 12 | python3 mask_test.py --config config/mask_r50v1_fpn_1x.py 13 | ``` 14 | ### How we build Mask-RCNN 15 | #### Input 16 | First, we need mask label. 17 | 18 | Instead of providing binary masks to the network, we adopt poly format in the current implementation. Since each instance may contain several parts, we adopt a list of lists ([[ax1, ay1, ax2, ay2,...], [bx1, by1, bx2, by2,...], ...) to represent each instance following COCO. For simplicity, we note [ax1, ay1, ax2, ay2, ...] as a segm. 19 | 20 | We implement these transforms for poly format mask label: 21 | - **PreprocessGtPoly**: convert each segm in a instance into ndarray. 22 | - **EncodeGtPoly**: encode each instance into a fixed length format ([class_id, num_segms, len_segm1, len_segm2, segm1, segm2]). 23 | 24 | For data augmentation, we extend several transfroms from Faster-RCNN: 25 | - **Resize2DImageBboxMask**: based on **Resize2DImageBbox** 26 | - **Flip2DImageBboxMask**: based on **Flip2DImageBbox** 27 | - **Pad2DImageBboxMask**: based on **Pad2DImageBbox** 28 | 29 | #### Operators 30 | Then, we extend proposal_target to get sampled mask target for mask branch training: 31 | - **proposal_mask_target**, decodes encoded gt poly into binary mask and samples a fixed amount of masks as mask target. For acceleration, we only provide mask target for fg roi. So the number of mask target is ```int(image_roi * fg_fraction)```. Currently we only support class specific mask target. So the shape of mask target is ```(batch_size, int(image_roi * fg_fraction), num_class (81 in COCO), mask_size, mask_size)```. 32 | 33 | In order to test mask in an end-to-end manner, we reuses the code from detection_test.py and implement a bbox post processing operator: 34 | - **bbox_post_processing**, adopts NMS for multi-class bbox and get final bbox results. 35 | 36 | For loss function, we implement sigmoid cross entropy: 37 | - **sigmoid_cross_entropy**, a general sigmoid cross entropy loss function. 38 | 39 | #### Symbol 40 | - **MaskFasterRcnn**, detector for MaskRCNN 41 | - **MaskFPNRpnHead**, a new RpnHead inherited from FPNRpnHead, note that we slice the proposal sampled from proposal_mask_target since the mask target provided by this operator is only for fg roi. 42 | - **MaskFasterRcnnHead**, mask head for MaskRCNN 43 | - **MaskFasterRcnn4ConvHead**, a specific mask head with 4 convolutions. 44 | - **BboxPostProcessor**, a bbox post processor for end-to-end test. 45 | 46 | ### How to build Mask-RCNN without FPN 47 | - Implement **MaskRpnHead** following **MaskFPNRpnHead**. 48 | - Implement your own MaskHead following **MaskFasterRcnn4ConvHead** 49 | - Write your own config following **mask_r50v1_fpn_1x.py** and **faster_r50v1c4_c5_512roi_1x.py** 50 | 51 | -------------------------------------------------------------------------------- /symbol/detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import mxnet as mx 4 | import mxnext as X 5 | from utils.patch_config import patch_config_as_nothrow 6 | 7 | 8 | class Rpn(object): 9 | _rpn_output = None 10 | 11 | def __init__(self): 12 | pass 13 | 14 | @classmethod 15 | def get_train_symbol(cls, backbone, neck, rpn_head): 16 | rpn_feat = backbone.get_rpn_feature() 17 | rpn_feat = neck.get_rpn_feature(rpn_feat) 18 | 19 | rpn_loss = rpn_head.get_loss(rpn_feat, None, None) 20 | 21 | return X.group(rpn_loss) 22 | 23 | @classmethod 24 | def get_rpn_test_symbol(cls, backbone, neck, rpn_head): 25 | if cls._rpn_output is not None: 26 | return cls._rpn_output 27 | 28 | im_info = X.var("im_info") 29 | im_id = X.var("im_id") 30 | rec_id = X.var("rec_id") 31 | 32 | rpn_feat = backbone.get_rpn_feature() 33 | rpn_feat = neck.get_rpn_feature(rpn_feat) 34 | 35 | (proposal, proposal_score) = rpn_head.get_all_proposal(rpn_feat, im_info) 36 | 37 | cls._rpn_output = X.group([rec_id, im_id, im_info, proposal, proposal_score]) 38 | return cls._rpn_output 39 | 40 | 41 | class FasterRcnn(object): 42 | _rpn_output = None 43 | 44 | def __init__(self): 45 | pass 46 | 47 | @classmethod 48 | def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head): 49 | gt_bbox = X.var("gt_bbox") 50 | im_info = X.var("im_info") 51 | 52 | rpn_feat = backbone.get_rpn_feature() 53 | rcnn_feat = backbone.get_rcnn_feature() 54 | rpn_feat = neck.get_rpn_feature(rpn_feat) 55 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 56 | 57 | rpn_head.get_anchor() 58 | rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info) 59 | proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info) 60 | roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal) 61 | bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight) 62 | 63 | return X.group(rpn_loss + bbox_loss) 64 | 65 | @classmethod 66 | def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head): 67 | rec_id, im_id, im_info, proposal, proposal_score = \ 68 | FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) 69 | 70 | rcnn_feat = backbone.get_rcnn_feature() 71 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 72 | 73 | roi_feat = roi_extractor.get_roi_feature_test(rcnn_feat, proposal) 74 | cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal) 75 | 76 | return X.group([rec_id, im_id, im_info, cls_score, bbox_xyxy]) 77 | 78 | @classmethod 79 | def get_rpn_test_symbol(cls, backbone, neck, rpn_head): 80 | if cls._rpn_output is not None: 81 | return cls._rpn_output 82 | 83 | im_info = X.var("im_info") 84 | im_id = X.var("im_id") 85 | rec_id = X.var("rec_id") 86 | 87 | rpn_head.get_anchor() 88 | rpn_feat = backbone.get_rpn_feature() 89 | rpn_feat = neck.get_rpn_feature(rpn_feat) 90 | 91 | (proposal, proposal_score) = rpn_head.get_all_proposal(rpn_feat, im_info) 92 | 93 | cls._rpn_output = X.group([rec_id, im_id, im_info, proposal, proposal_score]) 94 | return cls._rpn_output -------------------------------------------------------------------------------- /detection_infer_speed.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import importlib 3 | import time 4 | from utils.patch_config import patch_config_as_nothrow 5 | from core.detection_module import DetModule 6 | 7 | import mxnet as mx 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Test detector inference speed') 12 | # general 13 | parser.add_argument('--config', help='config file path', type=str, required=True) 14 | parser.add_argument('--shape', help='specify input 2d image shape', metavar=('SHORT', 'LONG'), type=int, nargs=2, required=True) 15 | parser.add_argument('--gpu', help='GPU index', type=int, default=0) 16 | parser.add_argument('--count', help='number of runs, final result will be averaged', type=int, default=100) 17 | args = parser.parse_args() 18 | 19 | config = importlib.import_module(args.config.replace('.py', '').replace('/', '.')) 20 | return config, args.gpu, args.shape, args.count 21 | 22 | 23 | if __name__ == "__main__": 24 | config, gpu, shape, count = parse_args() 25 | 26 | pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ 27 | transform, data_name, label_name, metric_list = config.get_config(is_train=False) 28 | sym = pModel.test_symbol 29 | 30 | # create dummy data batch 31 | data = mx.nd.ones(shape=[1, 3] + shape) 32 | im_info = mx.nd.array([x / 2.0 for x in shape] + [2.0]).reshape(1, 3) 33 | im_id = mx.nd.array([1]) 34 | rec_id = mx.nd.array([1]) 35 | data_names = ["data", "im_info", "im_id", "rec_id"] 36 | data_shape = [[1, 3] + shape, [1, 3], [1], [1]] 37 | data_shape = [(name, shape) for name, shape in zip(data_names, data_shape)] 38 | data_batch = mx.io.DataBatch(data=[data, im_info, im_id, rec_id]) 39 | 40 | ''' 41 | there are some conflicts between `mergebn` and `attach_quantized_node` in graph_optimize.py 42 | when mergebn ahead of attach_quantized_node 43 | such as `Symbol.ComposeKeyword` 44 | ''' 45 | pModel = patch_config_as_nothrow(pModel) 46 | if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag: 47 | pQuant = pModel.QuantizeTrainingParam 48 | assert pGen.fp16 == False, "current quantize training only support fp32 mode." 49 | from utils.graph_optimize import attach_quantize_node 50 | worker_data_shape = dict([(name, tuple(shape)) for name, shape in data_shape]) 51 | # print(worker_data_shape) 52 | # raise NotImplementedError 53 | _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) 54 | out_shape_dictoinary = dict(zip(sym.get_internals().list_outputs(), out_shape)) 55 | sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam, 56 | pQuant.ActQuantizeParam, pQuant.quantized_op) 57 | sym.save(pTest.model.prefix + "_infer_speed.json") 58 | 59 | 60 | ctx = mx.gpu(gpu) 61 | mod = DetModule(sym, data_names=data_names, context=ctx) 62 | mod.bind(data_shapes=data_shape, for_training=False) 63 | mod.set_params({}, {}, True) 64 | 65 | # let AUTOTUNE run for once 66 | mod.forward(data_batch, is_train=False) 67 | for output in mod.get_outputs(): 68 | output.wait_to_read() 69 | 70 | tic = time.time() 71 | for _ in range(count): 72 | mod.forward(data_batch, is_train=False) 73 | for output in mod.get_outputs(): 74 | output.wait_to_read() 75 | toc = time.time() 76 | 77 | print((toc - tic) / count * 1000) 78 | 79 | -------------------------------------------------------------------------------- /utils/create_coco_roidb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle as pkl 4 | import numpy as np 5 | from pycocotools.coco import COCO 6 | 7 | 8 | dataset_split_mapping = { 9 | "valminusminival2014": "val2014", 10 | "minival2014": "val2014", 11 | "test-dev2017": "test2017" 12 | } 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database for COCO-like dataset') 17 | parser.add_argument('--dataset', help='dataset name', type=str) 18 | parser.add_argument('--dataset-split', help='dataset split, e.g. train2017, minival2014', type=str) 19 | 20 | args = parser.parse_args() 21 | return args.dataset, args.dataset_split 22 | 23 | 24 | def generate_groundtruth_database(dataset_name, dataset_split): 25 | annotation_type = 'image_info' if 'test' in dataset_split else 'instances' 26 | annotation_path = "data/%s/annotations/%s_%s.json" % (dataset_name, annotation_type, dataset_split) 27 | assert os.path.exists(annotation_path) 28 | 29 | dataset = COCO(annotation_path) 30 | img_ids = dataset.getImgIds() 31 | roidb = [] 32 | for img_id in img_ids: 33 | img_anno = dataset.loadImgs(img_id)[0] 34 | im_filename = img_anno['file_name'] 35 | im_w = img_anno['width'] 36 | im_h = img_anno['height'] 37 | 38 | ins_anno_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=False) 39 | trainid_to_datasetid = dict({i + 1: cid for i, cid in enumerate(dataset.getCatIds())}) # 0 for bg 40 | datasetid_to_trainid = dict({cid: tid for tid, cid in trainid_to_datasetid.items()}) 41 | instances = dataset.loadAnns(ins_anno_ids) 42 | 43 | # sanitize bboxes 44 | valid_instances = [] 45 | for inst in instances: 46 | x, y, box_w, box_h = inst['bbox'] 47 | x1 = max(0, x) 48 | y1 = max(0, y) 49 | x2 = min(im_w - 1, x1 + max(0, box_w - 1)) 50 | y2 = min(im_h - 1, y1 + max(0, box_h - 1)) 51 | if inst['area'] > 0 and x2 >= x1 and y2 >= y1: 52 | inst['clean_bbox'] = [x1, y1, x2, y2] 53 | valid_instances.append(inst) 54 | num_instance = len(valid_instances) 55 | 56 | gt_bbox = np.zeros((num_instance, 4), dtype=np.float32) 57 | gt_class = np.zeros((num_instance, ), dtype=np.int32) 58 | gt_poly = [None] * num_instance 59 | 60 | for i, inst in enumerate(valid_instances): 61 | cls = datasetid_to_trainid[inst['category_id']] 62 | gt_bbox[i, :] = inst['clean_bbox'] 63 | gt_class[i] = cls 64 | gt_poly[i] = 'segmentation' in inst and inst['segmentation'] or gt_poly[i] 65 | 66 | # split mapping is specific to coco as it uses annotation files to manage split 67 | split = dataset_split in dataset_split_mapping and dataset_split_mapping[dataset_split] or dataset_split 68 | 69 | image_url = 'data/%s/images/%s/%s' % (dataset_name, split, im_filename) 70 | assert os.path.exists(image_url) 71 | roi_rec = { 72 | 'image_url': image_url, 73 | 'im_id': img_id, 74 | 'h': im_h, 75 | 'w': im_w, 76 | 'gt_class': gt_class, 77 | 'gt_bbox': gt_bbox, 78 | 'gt_poly': gt_poly, 79 | 'flipped': False} 80 | 81 | roidb.append(roi_rec) 82 | 83 | return roidb 84 | 85 | 86 | if __name__ == "__main__": 87 | d, dsplit = parse_args() 88 | roidb = generate_groundtruth_database(d, dsplit) 89 | os.makedirs("data/cache", exist_ok=True) 90 | with open("data/cache/%s_%s.roidb" % (d, dsplit), "wb") as fout: 91 | pkl.dump(roidb, fout) 92 | -------------------------------------------------------------------------------- /config/int8/README.md: -------------------------------------------------------------------------------- 1 | ## Quantization during Training 2 | 3 | #### Motivation 4 | Low precision weight and activation could greatly reduce the storage and memory footprint of detection models and improve the inference latency. We provide the inference time measured on TensorRT of INT8 and FP32 version of `faster_r50v1c4_c5_512roi_1x` as an example below. 5 | 6 | | dtype | time(ms) | minival mAP| 7 | | ----- | -------- | -----------| 8 | | fp32 | 260 | 35.7 | 9 | | int8 | 100 | 35.8 | 10 | 11 | **detail configs** 12 | 13 | ```shell 14 | batch size=1 15 | device = GTX 1080 16 | data shape = (1, 3, 800, 1200) 17 | ``` 18 | 19 | ### Implementation Details 20 | 21 | #### the Quantization Methods 22 | 23 | **for model weight:** 24 | ```shell 25 | nbits = 8 26 | QUANT_LEVEL = 2 ** (nbits - 1) - 1 27 | threshold = max(abs(w_tensor)) 28 | quant_unit = threshold / QUANT_LEVEL 29 | quantized_w = round(w_tensor / quant_unit) * quant_unit 30 | ``` 31 | 32 | **for model activation:** The threshold is maintained with exponetial moving average of max absolute activation. [ref]() 33 | 34 | ```shell 35 | nbits = 8 36 | QUANT_LEVEL = 2**(nbits -1) -1 37 | history_threshold; # initialized by max(abs(act_tensor)) 38 | curr_max = max(abs(act_tensor)) 39 | threshold = 0.99 * history_threshold + 0.01 * curr_max 40 | quant_unit = threshold / QUANT_LEVEL 41 | quantized_act = round(w_tensor / quant_unit) * quant_unit 42 | ``` 43 | 44 | ### Quantization Configs 45 | The quantization configs are in the `ModelParam.QuantizeTrainingParam` class, which give users more flexibility during quantization. 46 | 47 | **quantize_flag:** to quantize the model or not. 48 | 49 | **quantized_op:** the operators to quantize. 50 | 51 | `WeightQuantizeParam` and `ActQuantizeParam` is attributes need by `Quantization_int8` operator for quantizing `weight` and `activation`. 52 | 53 | ### Attributes of the `quantization_int8` operator 54 | 55 | **delya_quant:** after delay_quant iters, the quantization working actually. 56 | 57 | **ema_decay:** the hyperparameter for activation threshold update. 58 | 59 | **grad_mode:** the mode for gradients pass. there are two mode: ste or clip. ste mean straightforward pass the out gradients to data, clip mean only pass the gradients whose value of data in the range of [-threshold, threshold], the gradients of outer is settting to 0. 60 | 61 | **workspace:** the temporary space used in grad_mode='clip' 62 | 63 | **is_weight:** the tensor to be quantized is weight or not. 64 | 65 | **is_weight_perchannel:** the granularity of quantization for weight : per tensor or per channel. Only used when the tensor is weight. Currently, only support pertensor mode. 66 | 67 | **quant_mode:** the quantization methods: `minmax` or `power2`, Currently, only support minmax mode. 68 | 69 | 70 | ### How to reproduce the result 71 | 1. Install a newer version of MXNet 72 | [[CUDA100]](https://1dv.aflat.top/mxnet_cu100-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) 73 | [[CUDA101]](https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) 74 | 2. To train a fp32 model with the default config. 75 | 3. Finetune the trained fp32 model with quantization training. Our finetuning setting are: `begin_epoch=6` and `end_epoch=12`. All other configs remains the same as fp32 training configs. 76 | 4. We provide an example [model](https://1dv.aflat.top/faster_r50v1bc4_c5_512roi_1x_int8.zip) for `faster_r50v1c4_c5_512roi_1x`. 77 | 78 | ### Drawbacks 79 | TensorRT does not provide API to set `quantize scale` as user's own `scale` instead of `scale` calcuated by itself. So the learned `threshold` can't be directly deployed to TensorRT currently. You may need to tweak with the weight file generated by TensorRT. 80 | -------------------------------------------------------------------------------- /utils/create_crowdhuman_roidb.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle as pkl 4 | import numpy as np 5 | import random 6 | from PIL import Image 7 | import concurrent.futures 8 | import json 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database for Crowdhuman-like dataset') 12 | parser.add_argument('--dataset', help='dataset name', type=str) 13 | parser.add_argument('--dataset-split', help='dataset split, e.g. train, val', type=str) 14 | parser.add_argument('--num-threads', help='number of threads to process', default=4, type=int) 15 | 16 | args = parser.parse_args() 17 | return args.dataset, args.dataset_split, args.num_threads 18 | 19 | def load_func(fpath): 20 | assert os.path.exists(fpath) 21 | with open(fpath,'r') as fid: 22 | lines = fid.readlines() 23 | records = [json.loads(line.strip('\n')) for line in lines] 24 | return records 25 | 26 | def decode_annotations(rec_id): 27 | global dataset_path 28 | img_id = records[rec_id]['ID'] 29 | img_url = dataset_path + 'images/' + img_id + '.jpg' 30 | assert os.path.exists(img_url) 31 | im = Image.open(img_url) 32 | im_w, im_h = im.width, im.height 33 | 34 | gt_box = records[rec_id]['gtboxes'] 35 | gt_box_len = len(gt_box) 36 | valid_bboxes = [] 37 | valid_classes = [] 38 | for ii in range(gt_box_len): 39 | each_data = gt_box[ii] 40 | x, y, w, h = each_data['fbox'] 41 | 42 | if w <= 0 or h <= 0: 43 | continue 44 | 45 | x1 = x; y1 = y; x2 = x + w; y2 = y + h 46 | 47 | valid_bbox = [x1, y1, x2, y2] 48 | valid_bboxes.append(valid_bbox) 49 | if each_data['tag'] == 'person': 50 | tag = 1 51 | else: 52 | tag = -2 53 | if 'extra' in each_data: 54 | if 'ignore' in each_data['extra']: 55 | if each_data['extra']['ignore'] != 0: 56 | tag = -2 57 | valid_classes.append(tag) 58 | 59 | valid_bboxes = np.array(valid_bboxes).reshape(-1, 4) 60 | valid_classes = np.array(valid_classes).reshape(-1,) 61 | 62 | valid_num = valid_bboxes.shape[0] 63 | rand_ind = np.arange(valid_num) 64 | np.random.shuffle(rand_ind) 65 | gt_bbox = valid_bboxes[rand_ind] 66 | gt_class = valid_classes[rand_ind] 67 | 68 | roi_rec = { 69 | 'image_url': img_url, 70 | 'im_id': rec_id, 71 | 'id': img_id, 72 | 'h': im_h, 73 | 'w': im_w, 74 | 'gt_class': gt_class, 75 | 'gt_bbox': gt_bbox, 76 | 'flipped': False 77 | } 78 | return roi_rec, gt_bbox.shape[0] 79 | 80 | if __name__ == "__main__": 81 | dataset_name, dataset_type, num_threads = parse_args() 82 | 83 | dataset_path = 'data/%s/' % dataset_name 84 | ch_file_path = dataset_path + 'annotations/annotation_%s.odgt' % dataset_type 85 | json_file_path = dataset_path + 'annotations/annotation_%s.json' % dataset_type 86 | 87 | records = load_func(ch_file_path) 88 | print("Loading Annotations Done") 89 | 90 | roidbs = []; num_bbox = 0 91 | rec_ids = list(range(len(records))) 92 | 93 | with concurrent.futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 94 | for index, res_data in enumerate(executor.map(decode_annotations, rec_ids)): 95 | roidb, cnt = res_data 96 | roidbs.append(roidb) 97 | num_bbox += cnt 98 | if index % 1000 == 0: 99 | print("Finished %d/%d" % (index, len(rec_ids))) 100 | print("Parsing Bbox Number: %d" % num_bbox) 101 | os.makedirs("data/cache", exist_ok=True) 102 | with open("data/cache/%s_%s.roidb" % (dataset_name, dataset_type), "wb") as fout: 103 | pkl.dump(roidbs, fout) 104 | -------------------------------------------------------------------------------- /operator_cxx/contrib/sigmoid_cross_entropy.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file sigmoid_cross_entropy.cc 23 | * \brief 24 | * \author Yuntao Chen 25 | */ 26 | 27 | #include "./sigmoid_cross_entropy-inl.h" 28 | 29 | namespace mshadow { 30 | 31 | template 32 | inline void SigmoidCrossEntropyForward(const Tensor &data, 33 | const Tensor &label, 34 | Tensor &loss, 35 | Tensor &loss_sum, 36 | Tensor &count, 37 | Tensor &count_sum, 38 | Tensor &out, 39 | T scale) { 40 | LOG(FATAL) << "NotImplemented"; 41 | } 42 | 43 | template 44 | inline void SigmoidCrossEntropyBackward(const Tensor &data, 45 | const Tensor &label, 46 | Tensor &d_data, 47 | Tensor &count, 48 | Tensor &count_sum, 49 | T scale) { 50 | LOG(FATAL) << "NotImplemented"; 51 | } 52 | 53 | } 54 | 55 | namespace mxnet { 56 | namespace op { 57 | template<> 58 | Operator *CreateOp(SigmoidCrossEntropyParam param, int dtype) { 59 | Operator *op = NULL; 60 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 61 | op = new SigmoidCrossEntropyOp(param); 62 | }) 63 | return op; 64 | } 65 | 66 | // DO_BIND_DISPATCH comes from operator_common.h 67 | Operator *SigmoidCrossEntropyProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 68 | std::vector *in_type) const { 69 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 70 | } 71 | 72 | DMLC_REGISTER_PARAMETER(SigmoidCrossEntropyParam); 73 | 74 | MXNET_REGISTER_OP_PROPERTY(_contrib_SigmoidCrossEntropy, SigmoidCrossEntropyProp) 75 | .describe(R"DOC( 76 | Compute sigmoid activations followed by averaged binary cross entropy loss. The 77 | target values may be in {-1, 0, 1}, where -1 indicates that the corresponding 78 | sample should be ignored and {0, 1} correspond to the binary classes 0 and 1. By 79 | default the loss is divided by the number of targets > -1 and then multiplied by 80 | the `grad_scale` op argument. The divisive normalization may be disable by setting 81 | the op argument `normalize` to 0 (the multiplication by `scale` still takes 82 | effect). 83 | This op fuses sigmoid and cross entropy for numerical stability in both forward 84 | and gradient computation. 85 | )DOC" ADD_FILELINE) 86 | .add_argument("data", "NDArray-or-Symbol", "Input array.") 87 | .add_argument("label", "NDArray-or-Symbol", "Ground truth label.") 88 | .add_arguments(SigmoidCrossEntropyParam::__FIELDS__()); 89 | 90 | 91 | } // namespace op 92 | } // namespace mxnet 93 | -------------------------------------------------------------------------------- /operator_cxx/contrib/global_average_pooling.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file global_average_pooling.cc 23 | * \brief port from https://github.com/hujie-frank/SENet 24 | * \author Chenxia Han 25 | */ 26 | #include "./global_average_pooling-inl.h" 27 | 28 | namespace mshadow { 29 | template 30 | inline void GAPForward(const Tensor &out, 31 | const Tensor &data) { 32 | // NOT_IMPLEMENTED 33 | return; 34 | } 35 | 36 | template 37 | inline void GAPBackward(const Tensor &in_grad, 38 | const Tensor &out_grad) { 39 | // NOT_IMPLEMENTED 40 | return; 41 | } 42 | } // namespace mshadow 43 | 44 | namespace mxnet { 45 | namespace op { 46 | 47 | template<> 48 | Operator *CreateOp(GAPParam param, int dtype) { 49 | Operator *op = NULL; 50 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 51 | op = new GAPOp(param); 52 | }); 53 | 54 | return op; 55 | } 56 | 57 | // DO_BIND_DISPATCH comes from operator_common.h 58 | Operator* GAPProp::CreateOperatorEx(Context ctx, std::vector *in_shape, 59 | std::vector *in_type) const { 60 | std::vector out_shape, aux_shape; 61 | std::vector out_type, aux_type; 62 | CHECK(InferType(in_type, &out_type, &aux_type)); 63 | CHECK(InferShape(in_shape, &out_shape, &aux_shape)); 64 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 65 | } 66 | 67 | DMLC_REGISTER_PARAMETER(GAPParam); 68 | 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_GAP, GAPProp) 70 | .describe(R"code(This operator is DEPRECATED. 71 | Perform pooling on the input. 72 | 73 | The shapes for 2-D pooling is 74 | 75 | - **data**: *(batch_size, channel, height, width)* 76 | - **out**: *(batch_size, num_filter, out_height, out_width)*, with:: 77 | 78 | out_height = f(height, kernel[0], pad[0], stride[0]) 79 | out_width = f(width, kernel[1], pad[1], stride[1]) 80 | 81 | The definition of *f* depends on ``pooling_convention``, which has two options: 82 | 83 | - **valid** (default):: 84 | 85 | f(x, k, p, s) = floor((x+2*p-k)/s)+1 86 | 87 | - **full**, which is compatible with Caffe:: 88 | 89 | f(x, k, p, s) = ceil((x+2*p-k)/s)+1 90 | 91 | But ``global_pool`` is set to be true, then do a global pooling, namely reset 92 | ``kernel=(height, width)``. 93 | 94 | Three pooling options are supported by ``pool_type``: 95 | 96 | - **avg**: average pooling 97 | - **max**: max pooling 98 | - **sum**: sum pooling 99 | 100 | 1-D pooling is special case of 2-D pooling with *weight=1* and 101 | *kernel[1]=1*. 102 | 103 | For 3-D pooling, an additional *depth* dimension is added before 104 | *height*. Namely the input data will have shape *(batch_size, channel, depth, 105 | height, width)*. 106 | 107 | )code" ADD_FILELINE) 108 | .add_argument("data", "NDArray-or-Symbol", "Input data to the pooling operator.") 109 | .add_arguments(GAPParam::__FIELDS__()); 110 | 111 | } // namespace op 112 | } // namespace mxnet 113 | -------------------------------------------------------------------------------- /models/tridentnet/README.md: -------------------------------------------------------------------------------- 1 | ## Scale-Aware Trident Networks for Object Detection 2 | 3 | Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang 4 | 5 |

6 | 7 | ### Introduction 8 | 9 | This repository implements [TridentNet](https://arxiv.org/abs/1901.01892) in the SimpleDet framework. 10 | 11 | Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. Then, we propose a scale-aware training scheme to specialize each branch by sampling object instances of proper scales for training. As a bonus, a fast approximation version of TridentNet could achieve significant improvements without any additional parameters and computational cost. On the COCO dataset, our TridentNet with ResNet-101 backbone achieves state-of-the-art single-model results by obtaining an mAP of 48.4. 12 | 13 | #### Trident Blocks 14 | 15 | - Dilated convolution for efficient scale enumeration 16 | - Weight sharing between convs for uniform representation 17 | 18 |

19 | 20 | The above figure shows how to convert bottleneck residual blocks to 3-branch Trident Blocks. The dilation rate of three branches are set as 1, 2 and 3, respectively. 21 | 22 | ### Use TridentNet 23 | 24 | Please setup SimpleDet following [README](../../README.md) and [INSTALL](../../doc/INSTALL.md) and use the TridentNet configuration files in the `config` folder. 25 | 26 | ### Results on MS-COCO 27 | 28 | | | Backbone | Test data | mAP@[0.5:0.95] | Link | 29 | | --------------------------- | ---------- | --------- | :------------: | -----| 30 | | Faster R-CNN, 1x | ResNet-101 | minival | 37.6 |[model](https://1dv.aflat.top/faster_r101v2c4_c5_256roi_1x.zip)| 31 | | TridentNet, 1x | ResNet-101 | minival | 40.6 |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_1x.zip)| 32 | | TridentNet, 1x, Fast Approx | ResNet-101 | minival | 39.9 |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_fastapprox_1x.zip)| 33 | | TridentNet, 2x | ResNet-101 | test-dev | 42.8 |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_addminival_2x.zip)| 34 | | TridentNet*, 3x | ResNet-101 | test-dev | 48.4 |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.zip)| 35 | 36 | Note: 37 | 1. These models are not trained in SimpleDet. Re-training these models in SimpleDet gives a slightly better result. 38 | 2. TridentNet* - TridentNet = extended training + softNMS + multi-scale training/testing + syncBN + DCNv1. 39 | 40 | ### Results on MS-COCO with stronger baselines 41 | All config files are available in [config/resnet_v1b](../../config/resnet_v1b). 42 | 43 | |Model|Backbone|Head|Train Schedule|AP|AP50|AP75|APs|APm|APl| 44 | |-----|--------|----|--------------|--|----|----|---|---|---| 45 | |Faster|R50v1b-C4|C5-512ROI|2X|36.9|57.9|39.3|19.9|41.4|50.2| 46 | |Trident|R50v1b-C4|C5-128ROI|2X|39.6|60.9|42.9|22.5|44.5|53.9| 47 | |TridentFast|R50v1b-C4|C5-128ROI|2X|39.0|60.2|41.8|20.8|43.6|53.8| 48 | |Faster|R101v1b-C4|C5-512ROI|2X|40.5|61.2|43.8|22.5|44.8|55.4| 49 | |Trident|R101v1b-C4|C5-128ROI|2X|43.0|64.3|46.3|25.3|47.9|58.4| 50 | |TridentFast|R101v1b-C4|C5-128ROI|2X|42.5|63.7|46.0|23.3|46.7|59.3| 51 | |Faster|R152v1b-C4|C5-512ROI|2X|41.8|62.4|45.2|23.2|46.0|56.9| 52 | |Trident|R152v1b-C4|C5-128ROI|2X|44.4|65.4|48.3|26.4|49.4|59.6| 53 | |TridentFast|R152v1b-C4|C5-128ROI|2X|43.9|65.1|47.0|25.1|48.1|60.4| 54 | 55 | ### Citing TridentNet 56 | 57 | ``` 58 | @article{li2019scale, 59 | title={Scale-Aware Trident Networks for Object Detection}, 60 | author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, 61 | journal={ICCV 2019}, 62 | year={2019} 63 | } 64 | ``` 65 | -------------------------------------------------------------------------------- /doc/DATASET.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | This document describes the process of creating roidb from COCO-format, VOC-format or JSON-format annotations. 3 | 4 | ### COCO format 5 | In this section, we create roidb from coco-format annotaions of PASCAL VOC dataset. 6 | 7 | ```bash 8 | # enter simpledet main directory 9 | cd simpledet 10 | 11 | # create data dir 12 | mkdir -p data/src 13 | pushd data/src 14 | 15 | # download and extract VOC2007 trainval 16 | wget http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 17 | tar xf data/src/VOCtrainval_06-Nov-2007.tar 18 | 19 | # download and extract VOC annotaitons provided by COCO 20 | wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip 21 | unzip PASCAL_VOC.zip 22 | popd 23 | 24 | # create soft links 25 | mkdir -p data/pascal_voc/annotations 26 | ln -s data/src/PASCAL_VOC/pascal_train2007.json data/pascal_voc/annotations/instances_train2007.json 27 | ln -s data/src/PASCAL_VOC/pascal_val2007.json data/pascal_voc/annotations/instances_val2007.json 28 | 29 | mkdir -p data/pascal_voc/images 30 | ln -s data/src/VOCdevkit/VOC2007/JPEGImages data/pascal_voc/images/train2007 31 | ln -s data/src/VOCdevkit/VOC2007/JPEGImages data/pascal_voc/images/val2007 32 | 33 | # annotations/instances_split.json should correspond with images/split 34 | pascal_voc 35 | ├── annotations 36 | │   ├── instances_train2007.json -> data/src/PASCAL_VOC/pascal_train2007.json 37 | │   └── instances_val2007.json -> data/src/PASCAL_VOC/pascal_val2007.json 38 | └── images 39 | ├── train2007 -> data/src/VOCdevkit/VOC2007/JPEGImages 40 | └── val2007 -> data/src/VOCdevkit/VOC2007/JPEGImages 41 | 42 | # generate roidbs 43 | python3 utils/create_coco_roidb.py --dataset pascal_voc --dataset-split train2007 44 | python3 utils/create_coco_roidb.py --dataset pascal_voc --dataset-split val2007 45 | ``` 46 | 47 | 48 | ### VOC format 49 | In this section, we create roidb from voc-format annotaions of clipart dataset. 50 | ```bash 51 | # enter simpledet main directory 52 | cd simpledet 53 | 54 | # create data dir 55 | mkdir -p data/src 56 | pushd data/src 57 | 58 | # download and extract clipart.zip 59 | # courtesy to "Towards Universal Object Detection by Domain Attention" 60 | wget https://1dv.aflat.top/clipart.zip -O clipart.zip 61 | unzip clipart.zip 62 | popd 63 | 64 | # generate roidbs 65 | python3 utils/create_voc_roidb.py --data-dir data/src/clipart --split train 66 | ``` 67 | 68 | ### JSON format 69 | In this section, we create roidb from json-format annotaions of clipart dataset. 70 | 71 | Prepare your own data like the example 72 | ```json 73 | # example.json 74 | [ 75 | { 76 | "gt_class": [1, 5], 77 | "gt_bbox": [[48, 240, 195, 371], [8, 12, 352, 498]], 78 | "flipped": false, 79 | "h": 500, 80 | "w": 353, 81 | "image_url": "/absolute/path/to/VOCdevkit/VOC2007/JPEGImages/000001.jpg", 82 | "im_id": 1 83 | }, 84 | ... 85 | ] 86 | ``` 87 | 88 | ```bash 89 | python3 utils/json_to_roidb.py --json path/to/your.json 90 | ``` 91 | 92 | ### Existing Annotations 93 | - Cityscapes (coco format) 94 | Check [this](https://github.com/facebookresearch/Detectron/blob/master/tools/convert_cityscapes_to_coco.py) script 95 | - COCO (coco format) 96 | http://cocodataset.org/#download 97 | - DeepLesion (voc format) 98 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M) 99 | - DOTA (voc format) 100 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M) 101 | - Kitchen (voc format) 102 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M) 103 | - KITTI (voc format) 104 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M) 105 | - VOC (voc format) 106 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M) 107 | - WiderFace (voc format) 108 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M) 109 | -------------------------------------------------------------------------------- /operator_cxx/contrib/group_norm.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 15 | and IDIAP Research Institute nor the names of its contributors may be 16 | used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | */ 31 | /* 32 | * Licensed to the Apache Software Foundation (ASF) under one 33 | * or more contributor license agreements. See the NOTICE file 34 | * distributed with this work for additional information 35 | * regarding copyright ownership. The ASF licenses this file 36 | * to you under the Apache License, Version 2.0 (the 37 | * "License"); you may not use this file except in compliance 38 | * with the License. You may obtain a copy of the License at 39 | * 40 | * http://www.apache.org/licenses/LICENSE-2.0 41 | * 42 | * Unless required by applicable law or agreed to in writing, 43 | * software distributed under the License is distributed on an 44 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 45 | * KIND, either express or implied. See the License for the 46 | * specific language governing permissions and limitations 47 | * under the License. 48 | */ 49 | 50 | /*! 51 | * \file group_norm.cc 52 | * \author Yuntao Chen 53 | */ 54 | 55 | #include "./group_norm-inl.h" 56 | 57 | namespace mxnet { 58 | namespace op { 59 | template <> 60 | Operator* CreateOp(GroupNormParam param, int dtype) { 61 | LOG(FATAL) << "not implemented."; 62 | return NULL; 63 | } 64 | 65 | // DO_BIND_DISPATCH comes from operator_common.h 66 | Operator* GroupNormProp::CreateOperatorEx(Context ctx, 67 | std::vector* in_shape, 68 | std::vector* in_type) const { 69 | DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); 70 | } 71 | 72 | DMLC_REGISTER_PARAMETER(GroupNormParam); 73 | 74 | MXNET_REGISTER_OP_PROPERTY(_contrib_GroupNorm, GroupNormProp) 75 | .add_argument("data", "NDArray-or-Symbol", 76 | "An n-dimensional input array (n > 2) of the form [batch, " 77 | "channel, spatial_dim1, spatial_dim2, ...].") 78 | .add_argument("gamma", "NDArray-or-Symbol", 79 | "A vector of length \'channel\', which multiplies the " 80 | "normalized input.") 81 | .add_argument("beta", "NDArray-or-Symbol", 82 | "A vector of length \'channel\', which is added to the " 83 | "product of the normalized input and the weight.") 84 | .add_arguments(GroupNormParam::__FIELDS__()) 85 | .describe(R"code(Group Normalization (GN) operation: https://arxiv.org/abs/1803.08494)code" ADD_FILELINE); 86 | } // namespace op 87 | } // namespace mxnet 88 | -------------------------------------------------------------------------------- /models/se/builder.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import mxnext as X 3 | from mxnext import conv, relu, add, sigmoid 4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit 5 | from symbol.builder import Backbone 6 | from models.efficientnet.builder import se 7 | from models.dcn.builder import hybrid_resnet_fpn_builder 8 | from models.maskrcnn.builder import MaskFasterRcnnHead 9 | 10 | 11 | def se_resnet_v1b_unit(input, name, filter, stride, dilate, proj, norm, **kwargs): 12 | conv1 = conv(input, name=name + "_conv1", filter=filter // 4) 13 | bn1 = norm(conv1, name=name + "_bn1") 14 | relu1 = relu(bn1, name=name + "_relu1") 15 | 16 | conv2 = conv(relu1, name=name + "_conv2", stride=stride, filter=filter // 4, kernel=3) 17 | bn2 = norm(conv2, name=name + "_bn2") 18 | relu2 = relu(bn2, name=name + "_relu2") 19 | 20 | conv3 = conv(relu2, name=name + "_conv3", filter=filter) 21 | bn3 = norm(conv3, name=name + "_bn3") 22 | bn3 = se(bn3, prefix=name + "_se3", f_down=filter // 16, f_up=filter) 23 | 24 | 25 | if proj: 26 | shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride) 27 | shortcut = norm(shortcut, name=name + "_sc_bn") 28 | else: 29 | shortcut = input 30 | 31 | eltwise = add(bn3, shortcut, name=name + "_plus") 32 | 33 | return relu(eltwise, name=name + "_relu") 34 | 35 | 36 | def se_v2_resnet_v1b_unit(input, name, filter, stride, dilate, proj, norm, **kwargs): 37 | """ 38 | diff with v1: move the SE module to 3x3 conv 39 | """ 40 | conv1 = conv(input, name=name + "_conv1", filter=filter // 4) 41 | bn1 = norm(conv1, name=name + "_bn1") 42 | relu1 = relu(bn1, name=name + "_relu1") 43 | 44 | conv2 = conv(relu1, name=name + "_conv2", stride=stride, filter=filter // 4, kernel=3) 45 | bn2 = norm(conv2, name=name + "_bn2") 46 | relu2 = relu(bn2, name=name + "_relu2") 47 | relu2 = se(relu2, prefix=name + "_se2", f_down=filter // 16, f_up=filter // 4) 48 | 49 | conv3 = conv(relu2, name=name + "_conv3", filter=filter) 50 | bn3 = norm(conv3, name=name + "_bn3") 51 | 52 | if proj: 53 | shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride) 54 | shortcut = norm(shortcut, name=name + "_sc_bn") 55 | else: 56 | shortcut = input 57 | 58 | eltwise = add(bn3, shortcut, name=name + "_plus") 59 | 60 | return relu(eltwise, name=name + "_relu") 61 | 62 | 63 | SEResNetV1bFPN = hybrid_resnet_fpn_builder(se_resnet_v1b_unit) 64 | SEv2ResNetV1bFPN = hybrid_resnet_fpn_builder(se_v2_resnet_v1b_unit) 65 | 66 | 67 | class MaskRcnnSe4convHead(MaskFasterRcnnHead): 68 | def __init__(self, pBbox, pMask, pMaskRoi): 69 | super().__init__(pBbox, pMask, pMaskRoi) 70 | 71 | def _get_mask_head_logit(self, conv_feat): 72 | if self._head_feat is not None: 73 | return self._head_feat 74 | 75 | up_stride = int(self.pMask.resolution // self.pMaskRoi.out_size) 76 | dim_reduced = self.pMask.dim_reduced 77 | 78 | msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) 79 | 80 | current = conv_feat 81 | for i in range(4): 82 | current = X.conv( 83 | current, 84 | name="mask_fcn_conv{}".format(i + 1), 85 | filter=dim_reduced, 86 | kernel=3, 87 | no_bias=False, 88 | init=msra_init 89 | ) 90 | current = self.add_norm(current) 91 | current = X.relu(current) 92 | current = se(current, "mask_fcn_se{}".format(i + 1), f_down=dim_reduced // 4, f_up=dim_reduced) 93 | 94 | mask_up = current 95 | for i in range(up_stride // 2): 96 | weight = X.var( 97 | name="mask_up{}_weight".format(i), 98 | init=msra_init, 99 | lr_mult=1, 100 | wd_mult=1) 101 | mask_up = mx.sym.Deconvolution( 102 | mask_up, 103 | kernel=(2, 2), 104 | stride=(2, 2), 105 | num_filter=dim_reduced, 106 | no_bias=False, 107 | weight=weight, 108 | name="mask_up{}".format(i) 109 | ) 110 | mask_up = X.relu( 111 | mask_up, 112 | name="mask_up{}_relu".format(i)) 113 | 114 | mask_up = X.to_fp32(mask_up, name='mask_up_to_fp32') 115 | self._head_feat = mask_up 116 | 117 | return self._head_feat -------------------------------------------------------------------------------- /models/KD/builder.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import math 5 | import mxnet as mx 6 | import mxnext as X 7 | 8 | from symbol.builder import FasterRcnn, RpnHead, Backbone, Neck 9 | from models.retinanet.builder import RetinaNet 10 | 11 | 12 | class FitNetHead(object): 13 | def __init__(self, pKD): 14 | super().__init__() 15 | self.p = pKD 16 | self._student_feat = None 17 | 18 | def get_student_feat(self, mimic_feat, mimic_channel): 19 | if self._student_feat: 20 | return self._student_feat 21 | 22 | mimic_channel = self.p.channel 23 | student_hint = mx.sym.Convolution(data=mimic_feat, 24 | num_filter=mimic_channel, 25 | kernel=(1, 1), 26 | stride=(1, 1), 27 | pad=(0, 0), 28 | name="student_hint_conv") 29 | student_hint = mx.sym.Activation(data=student_hint, 30 | act_type='relu', 31 | name="student_hint_relu") 32 | return student_hint 33 | 34 | def get_loss(self, feat_dict, label): 35 | mimic_stage = self.p.stage 36 | mimic_channel = self.p.channel 37 | mimic_grad_scale = self.p.grad_scale 38 | 39 | student_feat = self.get_student_feat(feat_dict[mimic_stage], mimic_channel) 40 | fit_loss = mx.sym.mean(mx.sym.square(student_feat - label)) 41 | fit_loss = mx.sym.MakeLoss(fit_loss, grad_scale=mimic_grad_scale, name="fit_loss") 42 | return fit_loss 43 | 44 | 45 | class FitNetRetinaNet(RetinaNet): 46 | def __init__(self): 47 | super().__init__() 48 | 49 | @staticmethod 50 | def get_train_symbol(backbone, neck, head, kd_head): 51 | rpn_cls_label = X.var("rpn_cls_label") 52 | rpn_reg_target = X.var("rpn_reg_target") 53 | rpn_reg_weight = X.var("rpn_reg_weight") 54 | teacher_label = X.var("teacher_label") 55 | 56 | feat = backbone.get_rpn_feature() 57 | c2, c3, c4, c5 = feat 58 | feat_dict = {'c2': c2, 59 | 'c3': c3, 60 | 'c4': c4, 61 | 'c5': c5} 62 | feat = neck.get_rpn_feature(feat) 63 | 64 | loss = head.get_loss(feat, rpn_cls_label, rpn_reg_target, rpn_reg_weight) 65 | kd_loss = kd_head.get_loss(feat_dict, teacher_label) 66 | 67 | return X.group(loss + (kd_loss, )) 68 | 69 | 70 | class FitNetFasterRcnn(FasterRcnn): 71 | def __init__(self): 72 | super().__init__() 73 | 74 | @classmethod 75 | def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head, kd_head): 76 | gt_bbox = X.var("gt_bbox") 77 | im_info = X.var("im_info") 78 | teacher_label = X.var("teacher_label") 79 | 80 | rpn_feat = backbone.get_rpn_feature() 81 | c2, c3, c4, c5 = rpn_feat 82 | feat_dict = {'c2': c2, 83 | 'c3': c3, 84 | 'c4': c4, 85 | 'c5': c5} 86 | rcnn_feat = backbone.get_rcnn_feature() 87 | rpn_feat = neck.get_rpn_feature(rpn_feat) 88 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 89 | 90 | rpn_head.get_anchor() 91 | rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info) 92 | proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info) 93 | roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal) 94 | bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight) 95 | 96 | kd_loss = kd_head.get_loss(feat_dict, teacher_label) 97 | 98 | return X.group(rpn_loss + bbox_loss + (kd_loss, )) 99 | 100 | @classmethod 101 | def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head): 102 | rec_id, im_id, im_info, proposal, proposal_score = \ 103 | FitNetFasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) 104 | 105 | rcnn_feat = backbone.get_rcnn_feature() 106 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 107 | 108 | roi_feat = roi_extractor.get_roi_feature_test(rcnn_feat, proposal) 109 | cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal) 110 | 111 | return X.group([rec_id, im_id, im_info, cls_score, bbox_xyxy]) 112 | -------------------------------------------------------------------------------- /doc/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Setup Locally with Pre-built Wheel 2 | We provide pre-built wheel for python >= 3.4, Ubuntu >= 14.04 or CentOS >=7. The wheels are staticly linked so no dependency other than CUDA is required. 3 | 4 | [Downdload wheel for CUDA-9.0(2019.8.20)](https://1dv.aflat.top/mxnet_cu90-1.6.0b20190820-py2.py3-none-manylinux1_x86_64.whl) 5 | 6 | [Downdload wheel for CUDA-10.0(2019.12.14)](https://1dv.aflat.top/mxnet_cu100-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) 7 | 8 | [Downdload wheel for CUDA-10.1(2019.12.14)](https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) 9 | 10 | Install the wheel as 11 | ```bash 12 | pip3 install https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl --user 13 | ``` 14 | 15 | 16 | 17 | ## Setup with Docker 18 | We provide pre-built docker images for both cuda9.0 and cuda10.0. 19 | 20 | Maxwell, Pascal, Volta and Turing GPUs are supported. 21 | 22 | For nvidia-driver >= 410.48, cuda10 image is recommended. 23 | 24 | For nvidia-driver >= 384.81, cuda9 image is recommended. 25 | 26 | Aliyun beijing mirror is provided for users pulling from China. 27 | 28 | ```bash 29 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda9 zsh 30 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda10 zsh 31 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda9 zsh 32 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda10 zsh 33 | ``` 34 | 35 | ## Setup with Singularity 36 | We recommend the users to adopt singualrity as the default environment manager to minimize the efforts of configuration. 37 | Singularity is a virtual environment manager like virtualenv, but in the system-level. 38 | 39 | #### Install Singularity >= 2.6 40 | ```bash 41 | # install dependency 42 | sudo apt update 43 | sudo apt install build-essential python libarchive-dev 44 | 45 | # install singularity 46 | wget https://github.com/sylabs/singularity/releases/download/2.6.1/singularity-2.6.1.tar.gz 47 | tar xzfv singularity-2.6.1.tar.gz 48 | cd singularity-2.6.1 49 | ./configure --prefix=/usr/local 50 | make 51 | sudo make install 52 | ``` 53 | 54 | #### Download singularity image for SimpleDet 55 | ```bash 56 | wget https://1dv.aflat.top/simpledet.img 57 | ``` 58 | 59 | #### Invoke simpledet shell 60 | Here we need to map the working directory into singularity shell, note that **symlink to files outside the working directory will not work** since singularity has its own filesystem. Thus we recommend users to map the whole data storage into singularity by replacing $WORKDIR by something like `/data` or `/mnt/`. 61 | 62 | ```bash 63 | sudo singularity shell --no-home --nv -s /usr/bin/zsh --bind $WORKDIR /path/to/simpledet.img 64 | ``` 65 | 66 | ## Setup from Scratch 67 | #### System Requirements 68 | - Ubuntu 16.04 69 | - Python >= 3.5 70 | 71 | #### Install CUDA, cuDNN and NCCL 72 | 73 | #### Install cocotools 74 | ```bash 75 | # Install a patched cocotools for python3 76 | pip3 install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI' 77 | ``` 78 | 79 | #### Install MXNet 80 | ```bash 81 | # Install dependency 82 | sudo apt-get update 83 | sudo apt-get install -y build-essential git 84 | sudo apt-get install -y libopenblas-dev 85 | ``` 86 | 87 | ```bash 88 | git clone --recursive https://github.com/apache/incubator-mxnet /tmp/mxnet && \ 89 | git clone https://github.com/Tusimple/simpledet /tmp/simpledet && \ 90 | git clone https://github.com/RogerChern/cocoapi /tmp/cocoapi && \ 91 | cp -r /tmp/simpledet/operator_cxx/* /tmp/mxnet/src/operator && \ 92 | mkdir -p /tmp/mxnet/src/coco_api && \ 93 | cp -r /tmp/cocoapi/common /tmp/mxnet/src/coco_api && \ 94 | cd /tmp/mxnet && \ 95 | echo "USE_SIGNAL_HANDLER = 1" >> ./config.mk && \ 96 | echo "USE_OPENCV = 0" >> ./config.mk && \ 97 | echo "USE_MKLDNN = 0" >> ./config.mk && \ 98 | echo "USE_BLAS = openblas" >> ./config.mk && \ 99 | echo "USE_CUDA = 1" >> ./config.mk && \ 100 | echo "USE_CUDA_PATH = /usr/local/cuda" >> ./config.mk && \ 101 | echo "USE_CUDNN = 1" >> ./config.mk && \ 102 | echo "USE_NCCL = 1" >> ./config.mk && \ 103 | echo "USE_DIST_KVSTORE = 1" >> ./config.mk && \ 104 | echo "CUDA_ARCH = -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70" >> ./config.mk && \ 105 | rm /tmp/mxnet/src/operator/nn/group_norm* && \ 106 | make -j$((`nproc`-1)) && \ 107 | cd python && \ 108 | python3 setup.py install && \ 109 | rm -rf /tmp/mxnet /tmp/simpledet /tmp/cocoapi 110 | ``` 111 | -------------------------------------------------------------------------------- /utils/callback.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | import mxnet as mx 4 | 5 | 6 | class Speedometer(object): 7 | def __init__(self, batch_size, total_iter, frequent=50): 8 | self.batch_size = batch_size 9 | self.total_iter = total_iter 10 | self.frequent = frequent 11 | self.init = False 12 | self.tic = 0 13 | self.last_count = 0 14 | 15 | def __call__(self, param): 16 | """Callback to Show speed.""" 17 | count = param.nbatch 18 | if self.last_count > count: 19 | self.init = False 20 | self.last_count = count 21 | 22 | if self.init: 23 | if count % self.frequent == 0: 24 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 25 | if param.eval_metric is not None: 26 | name, value = param.eval_metric.get() 27 | s = "Epoch[%d] Batch [%d]\tIter: %d/%d\tLr: %.5f\tSpeed: %.2f samples/sec\tTrain-" % \ 28 | (param.epoch, count, param.iter, self.total_iter, param.lr, speed) 29 | for n, v in zip(name, value): 30 | s += "%s=%f,\t" % (n, v) 31 | logging.info(s) 32 | else: 33 | logging.info("Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec", 34 | param.epoch, count, speed) 35 | self.tic = time.time() 36 | else: 37 | self.init = True 38 | self.tic = time.time() 39 | 40 | class DetailSpeedometer(object): 41 | def __init__(self, batch_size, frequent=50): 42 | self.batch_size = batch_size 43 | self.frequent = frequent 44 | self.init = False 45 | self.tic = 0 46 | self.last_count = 0 47 | 48 | def __call__(self, param): 49 | """Callback to Show speed.""" 50 | count = param.nbatch 51 | rank = param.rank 52 | total_iter = param.total_iter 53 | 54 | if self.last_count > count: 55 | self.init = False 56 | self.last_count = count 57 | 58 | if self.init: 59 | if count % self.frequent == 0: 60 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 61 | if param.eval_metric is not None: 62 | name, value = param.eval_metric.get() 63 | s = "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\t" \ 64 | "data:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec\tTrain-" % ( 65 | param.epoch, rank, count, total_iter, 66 | param.cur_batch_time, param.avg_batch_time, 67 | param.cur_kvstore_sync_time, param.avg_kvstore_sync_time, 68 | param.cur_data_time, param.avg_data_time, 69 | param.cur_iter_total_time, param.avg_iter_total_time, 70 | speed) 71 | for n, v in zip(name, value): 72 | s += "%s=%f,\t" % (n, v) 73 | logging.info(s) 74 | else: 75 | logging.info( 76 | "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\tdata:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec", 77 | param.epoch, rank, count, total_iter, 78 | param.cur_batch_time, param.avg_batch_time, 79 | param.cur_kvstore_sync_time, param.avg_kvstore_sync_time, 80 | param.cur_data_time, param.avg_data_time, 81 | param.cur_iter_total_time, param.avg_iter_total_time, 82 | speed) 83 | 84 | self.tic = time.time() 85 | else: 86 | self.init = True 87 | self.tic = time.time() 88 | 89 | 90 | def do_checkpoint(prefix): 91 | def _callback(iter_no, sym, arg, aux): 92 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 93 | return _callback 94 | 95 | 96 | def do_checkpoint_iter(prefix, checkpoint_iter): 97 | def _callback(param): 98 | if checkpoint_iter == param.locals["total_iter"]: 99 | arg_params, aux_params = param.locals["self"].get_params() 100 | save_dict = {('arg:%s' % k) : v.as_in_context(mx.cpu()) for k, v in arg_params.items()} 101 | save_dict.update({('aux:%s' % k) : v.as_in_context(mx.cpu()) for k, v in aux_params.items()}) 102 | param_name = '%s-iter-%s.params' % (prefix, checkpoint_iter) 103 | mx.nd.save(param_name, save_dict) 104 | logging.info('Saved checkpoint to \"%s\"', param_name) 105 | return _callback 106 | -------------------------------------------------------------------------------- /doc/FRAMEWOKR_OVERVIEW.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | ### Scripts 4 | - `detection_train.py` 5 | - `detection_test.py` 6 | - `rpn_test.py` 7 | - `mask_test.py` 8 | 9 | 10 | ### Top-level Singletons (Detectors) 11 | Top-level singletons generally represent an unique kind of detection method (detector). 12 | By unique we mean that the detector has a pipeline that can not be easily adapted from any existing detector. 13 | Detectors now include 14 | - RPN 15 | - RetinaNet 16 | - KD RetinaNet 17 | - Fast R-CNN 18 | - Faster R-CNN 19 | - KD Faster R-CNN 20 | - Mask R-CNN 21 | - Cascade R-CNN 22 | 23 | Every detector have a `get_train_symbol` method. 24 | Each may have one or more of `get_bbox_test_symbol`, `get_mask_test_symbol`, `get_rpn_test_symbol`, and `get_kp_test_symbol` methods. 25 | 26 | Here we leave the API design of `get_xxx_symbol` to user due to 27 | - Detector design should not be bounded by the framework. 28 | - The user is responsible for constructing the `train_sym` used in `detection_train.py` in the config. 29 | 30 | We provide a detailed annotated Mask R-CNN as an example. 31 | ``` python 32 | class MaskRcnn(object): 33 | def __init__(self): 34 | pass 35 | 36 | @staticmethod 37 | def get_train_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head): 38 | # mask r-cnn needs ground truth bboxes and instance polygons to generate the target for training 39 | gt_bbox = X.var("gt_bbox") 40 | gt_poly = X.var("gt_poly") 41 | # im_info contains the width and height of image before padding and is use to remove anchors or 42 | # proposals out of image 43 | im_info = X.var("im_info") 44 | 45 | # backbone network provide feature map for sub-networks 46 | rpn_feat = backbone.get_rpn_feature() 47 | rcnn_feat = backbone.get_rcnn_feature() 48 | # neck is used for feature fusion across scales or dimension reduction 49 | rpn_feat = neck.get_rpn_feature(rpn_feat) 50 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 51 | 52 | # cache anchor for generating anchor target and bbox target 53 | rpn_head.get_anchor() 54 | rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info) 55 | # calculate bbox_target, mask_target from rpn proposals 56 | proposal, bbox_cls, bbox_target, bbox_weight, mask_proposal, mask_target, mask_ind = \ 57 | rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, gt_poly, im_info) 58 | roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal) 59 | mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, mask_proposal) 60 | 61 | bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight) 62 | mask_loss = mask_head.get_loss(mask_roi_feat, mask_target, mask_ind) 63 | return X.group(rpn_loss + bbox_loss + mask_loss) 64 | 65 | @staticmethod 66 | def get_test_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor): 67 | rec_id, im_id, im_info, proposal, proposal_score = \ 68 | MaskRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) 69 | 70 | rcnn_feat = backbone.get_rcnn_feature() 71 | rcnn_feat = neck.get_rcnn_feature(rcnn_feat) 72 | 73 | roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal) 74 | cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal) 75 | 76 | # during test, in order to save computation, only the top 100 bbox after NMS are used for mask prediction 77 | post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(cls_score, bbox_xyxy) 78 | 79 | mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, post_bbox_xyxy) 80 | mask = mask_head.get_prediction(mask_roi_feat) 81 | 82 | # the layout is fixed for mask_test.py 83 | return X.group([rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask]) 84 | 85 | @staticmethod 86 | def get_rpn_test_symbol(backbone, neck, rpn_head): 87 | return FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head) 88 | ``` 89 | 90 | 91 | ### Components 92 | Components now include 93 | - Backbone 94 | - ResNet 95 | - ResNeXt 96 | - TridentNet 97 | - EfficientNet 98 | - DCNv1/v2 99 | - Neck 100 | - ReduceNeck 101 | - FPNNeck 102 | - NASFPNNeck 103 | - RpnHead 104 | - RpnHead 105 | - FPNRpnHead 106 | - MaskRpnHead 107 | - TridentRpnHead 108 | - RetinaNetHead 109 | - RoIExtractor 110 | - RoIAlign 111 | - FPNRoIAlign 112 | - BboxHead 113 | - BboxResNetv1C5Head 114 | - BboxResNetv2C5Head 115 | - BboxResNeXtC5Head 116 | - Bbox2fcHead 117 | - Bbox4conv1fcHead 118 | - BboxDualHead 119 | - MaskHead 120 | - MaskHead 121 | -------------------------------------------------------------------------------- /operator_cxx/contrib/generate_anchor.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file generate_anchor.cc 22 | * \brief 23 | * \author Yanghao Li, Chenxia Han 24 | */ 25 | 26 | #include "./generate_anchor-inl.h" 27 | 28 | namespace mxnet { 29 | namespace op { 30 | 31 | template 32 | class GenAnchorOp : public Operator{ 33 | public: 34 | explicit GenAnchorOp(GenAnchorParam param) { 35 | this->param_ = param; 36 | } 37 | 38 | virtual void Forward(const OpContext &ctx, 39 | const std::vector &in_data, 40 | const std::vector &req, 41 | const std::vector &out_data, 42 | const std::vector &aux_states) { 43 | using namespace mshadow; 44 | using namespace mshadow::expr; 45 | CHECK_EQ(in_data.size(), 1); 46 | CHECK_EQ(out_data.size(), 1); 47 | CHECK_EQ(req.size(), 1); 48 | CHECK_EQ(req[gen_anchor::kOut], kWriteTo); 49 | 50 | Stream *s = ctx.get_stream(); 51 | Tensor scores = in_data[gen_anchor::kClsProb].get(s); 52 | 53 | Tensor out = out_data[gen_anchor::kOut].get(s); 54 | 55 | std::vector scales(param_.scales.begin(), param_.scales.end()); 56 | std::vector ratios(param_.ratios.begin(), param_.ratios.end()); 57 | 58 | int num_anchors = scales.size() * ratios.size(); 59 | int height = scores.size(2); 60 | int width = scores.size(3); 61 | 62 | // Generate anchors 63 | std::vector base_anchor({ 64 | 0.0f, 0.0f, param_.feature_stride - 1.0f, param_.feature_stride - 1.0f 65 | }); 66 | std::vector anchors; 67 | gen_anchor_utils::GenerateAnchors( 68 | base_anchor, ratios, scales, anchors 69 | ); 70 | 71 | // Enumerate all shifted anchors 72 | for (index_t i = 0; i < num_anchors; ++i) { 73 | for (index_t j = 0; j < height; ++j) { 74 | for (index_t k = 0; k < width; ++k) { 75 | index_t index = j * (width * num_anchors) + k * (num_anchors) + i; 76 | out[index][0] = static_cast(anchors[i * 4 + 0] + k * param_.feature_stride); 77 | out[index][1] = static_cast(anchors[i * 4 + 1] + j * param_.feature_stride); 78 | out[index][2] = static_cast(anchors[i * 4 + 2] + k * param_.feature_stride); 79 | out[index][3] = static_cast(anchors[i * 4 + 3] + j * param_.feature_stride); 80 | } 81 | } 82 | } 83 | } 84 | 85 | virtual void Backward(const OpContext &ctx, 86 | const std::vector &out_grad, 87 | const std::vector &in_data, 88 | const std::vector &out_data, 89 | const std::vector &req, 90 | const std::vector &in_grad, 91 | const std::vector &aux_states) { 92 | using namespace mshadow; 93 | using namespace mshadow::expr; 94 | CHECK_EQ(in_grad.size(), 1); 95 | 96 | Stream *s = ctx.get_stream(); 97 | Tensor gscores = in_grad[gen_anchor::kClsProb].get(s); 98 | 99 | // can not assume the grad would be zero 100 | Assign(gscores, req[gen_anchor::kClsProb], 0); 101 | } 102 | 103 | private: 104 | GenAnchorParam param_; 105 | }; // class GenAnchorOp 106 | 107 | template<> 108 | Operator *CreateOp(GenAnchorParam param) { 109 | return new GenAnchorOp(param); 110 | } 111 | 112 | Operator* GenAnchorProp::CreateOperator(Context ctx) const { 113 | DO_BIND_DISPATCH(CreateOp, param_); 114 | } 115 | 116 | DMLC_REGISTER_PARAMETER(GenAnchorParam); 117 | 118 | MXNET_REGISTER_OP_PROPERTY(_contrib_GenAnchor, GenAnchorProp) 119 | .describe("Generate region anchors") 120 | .add_argument("cls_prob", "NDArray-or-Symbol", "Probability of how likely proposal is object.") 121 | .add_arguments(GenAnchorParam::__FIELDS__()); 122 | 123 | } // namespace op 124 | } // namespace mxnet 125 | -------------------------------------------------------------------------------- /models/dcn/builder.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import mxnext as X 3 | from mxnext import conv, relu, add 4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit 5 | from symbol.builder import Backbone 6 | 7 | 8 | def dcn_resnet_unit(input, name, filter, stride, dilate, proj, norm, **kwargs): 9 | conv1 = conv(input, name=name + "_conv1", filter=filter // 4) 10 | bn1 = norm(conv1, name=name + "_bn1") 11 | relu1 = relu(bn1, name=name + "_relu1") 12 | 13 | # conv2 filter router 14 | conv2_offset = conv(relu1, name=name + "_conv2_offset", filter=72, kernel=3, stride=stride, dilate=dilate) 15 | conv2 = mx.sym.contrib.DeformableConvolution(relu1, conv2_offset, kernel=(3, 3), 16 | stride=(stride, stride), dilate=(dilate, dilate), pad=(1, 1), num_filter=filter // 4, 17 | num_deformable_group=4, no_bias=True, name=name + "_conv2") 18 | bn2 = norm(conv2, name=name + "_bn2") 19 | relu2 = relu(bn2, name=name + "_relu2") 20 | 21 | conv3 = conv(relu2, name=name + "_conv3", filter=filter) 22 | bn3 = norm(conv3, name=name + "_bn3") 23 | 24 | if proj: 25 | shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride) 26 | shortcut = norm(shortcut, name=name + "_sc_bn") 27 | else: 28 | shortcut = input 29 | 30 | eltwise = add(bn3, shortcut, name=name + "_plus") 31 | 32 | return relu(eltwise, name=name + "_relu") 33 | 34 | 35 | def hybrid_resnet_stage(data, name, num_block, num_special_block, special_res_unit, filter, 36 | stride, dilate, norm, **kwargs): 37 | s, d = stride, dilate 38 | 39 | for i in range(1, num_block + 1 - num_special_block): 40 | proj = True if i == 1 else False 41 | s = stride if i == 1 else 1 42 | d = dilate 43 | data = resnet_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm) 44 | 45 | for i in range(num_block + 1 - num_special_block, num_block + 1): 46 | proj = True if i == 1 else False 47 | s = stride if i == 1 else 1 48 | d = dilate 49 | data = special_res_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm, **kwargs) 50 | 51 | return data 52 | 53 | 54 | def hybrid_resnet_c4_builder(special_resnet_unit): 55 | class ResNetC4(Backbone): 56 | def __init__(self, pBackbone): 57 | super().__init__(pBackbone) 58 | p = self.p 59 | 60 | import mxnext.backbone.resnet_v1b_helper as helper 61 | num_c2, num_c3, num_c4, _ = helper.depth_config[p.depth] 62 | 63 | data = X.var("data") 64 | if p.fp16: 65 | data = data.astype("float16") 66 | c1 = helper.resnet_c1(data, p.normalizer) 67 | c2 = helper.resnet_c2(c1, num_c2, 1, 1, p.normalizer) 68 | c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1, 69 | p.normalizer, params=p) 70 | c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1, 71 | p.normalizer, params=p) 72 | 73 | self.symbol = c4 74 | 75 | def get_rpn_feature(self): 76 | return self.symbol 77 | 78 | def get_rcnn_feature(self): 79 | return self.symbol 80 | 81 | return ResNetC4 82 | 83 | 84 | def hybrid_resnet_fpn_builder(special_resnet_unit): 85 | class ResNetFPN(Backbone): 86 | def __init__(self, pBackbone): 87 | super().__init__(pBackbone) 88 | p = self.p 89 | 90 | import mxnext.backbone.resnet_v1b_helper as helper 91 | num_c2, num_c3, num_c4, num_c5 = helper.depth_config[p.depth] 92 | 93 | data = X.var("data") 94 | if p.fp16: 95 | data = data.astype("float16") 96 | c1 = helper.resnet_c1(data, p.normalizer) 97 | c2 = hybrid_resnet_stage(c1, "stage1", num_c2, p.num_c2_block or 0, special_resnet_unit, 256, 1, 1, 98 | p.normalizer, params=p) 99 | c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1, 100 | p.normalizer, params=p) 101 | c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1, 102 | p.normalizer, params=p) 103 | c5 = hybrid_resnet_stage(c4, "stage4", num_c5, p.num_c5_block or 0, special_resnet_unit, 2048, 2, 1, 104 | p.normalizer, params=p) 105 | 106 | self.symbol = (c2, c3, c4, c5) 107 | 108 | def get_rpn_feature(self): 109 | return self.symbol 110 | 111 | def get_rcnn_feature(self): 112 | return self.symbol 113 | 114 | return ResNetFPN 115 | 116 | 117 | DCNResNetC4 = hybrid_resnet_c4_builder(dcn_resnet_unit) 118 | DCNResNetFPN = hybrid_resnet_fpn_builder(dcn_resnet_unit) 119 | -------------------------------------------------------------------------------- /models/tridentnet/input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mxnet as mx 3 | 4 | from core.detection_input import DetectionAugmentation, AnchorTarget2D 5 | from operator_py.cython.bbox import bbox_overlaps_cython 6 | 7 | 8 | class ScaleAwareRange(DetectionAugmentation): 9 | def __init__(self, pScaleRange): 10 | super().__init__() 11 | self.p = pScaleRange 12 | 13 | def apply(self, input_record): 14 | p = self.p 15 | 16 | im_info = input_record['im_info'] 17 | 18 | # input_record["valid_ranges_on_origin"] = p.cal_on_origin 19 | input_record["valid_ranges"] = np.array(p.valid_ranges, dtype=np.float32).reshape(-1, 2) 20 | if p.cal_on_origin: 21 | input_record["valid_ranges"] *= im_info[2] 22 | # replace -1 with max_size 23 | inds = np.where(input_record["valid_ranges"][:, 1] < 0)[0] 24 | input_record["valid_ranges"][inds, 1] = max(im_info[0], im_info[1]) 25 | 26 | 27 | class TridentAnchorTarget2D(AnchorTarget2D): 28 | """ 29 | input: image_meta: tuple(h, w, scale) 30 | gt_bbox, ndarry(max_num_gt, 4) 31 | output: anchor_label, ndarray(num_branch, num_anchor * 2, h, w) 32 | anchor_bbox_target, ndarray(num_branch, num_anchor * 4, h, w) 33 | anchor_bbox_weight, ndarray(num_branch, num_anchor * 4, h, w) 34 | valid_ranges, ndarray(num_branch, 2) 35 | """ 36 | 37 | def __init__(self, pAnchor): 38 | super().__init__(pAnchor) 39 | 40 | def _filter_anchor_by_scale_range(self, cls_label, valid_anchor, gt_bbox, valid_range, invalid_anchor_threshd): 41 | if len(gt_bbox) == 0: 42 | return 43 | gt_bbox_sizes = (gt_bbox[:, 2] - gt_bbox[:, 0] + 1.0) * (gt_bbox[:, 3] - gt_bbox[:, 1] + 1.0) 44 | invalid_gt_bbox_inds = np.where((gt_bbox_sizes < valid_range[0]**2) | (gt_bbox_sizes > valid_range[1]**2))[0] 45 | invalid_gt_bbox = gt_bbox[invalid_gt_bbox_inds] 46 | if len(invalid_gt_bbox) > 0: 47 | invalid_overlaps = bbox_overlaps_cython( 48 | valid_anchor.astype(np.float32, copy=False), invalid_gt_bbox.astype(np.float32, copy=False)) 49 | invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1) 50 | invalid_max_overlaps = invalid_overlaps[np.arange(len(valid_anchor)), invalid_argmax_overlaps] 51 | 52 | # ignore anchors overlapped with invalid gt boxes 53 | disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshd))[0] 54 | cls_label[disable_inds] = -1 55 | 56 | def apply(self, input_record): 57 | p = self.p 58 | 59 | im_info = input_record["im_info"] 60 | gt_bbox = input_record["gt_bbox"] 61 | valid_ranges = input_record["valid_ranges"] 62 | assert isinstance(gt_bbox, np.ndarray) 63 | assert gt_bbox.dtype == np.float32 64 | 65 | valid = np.where(gt_bbox[:, 0] != -1)[0] 66 | gt_bbox = gt_bbox[valid] 67 | 68 | if gt_bbox.shape[1] == 5: 69 | gt_bbox = gt_bbox[:, :4] 70 | 71 | h, w = im_info[:2] 72 | if h >= w: 73 | fh, fw = p.generate.long, p.generate.short 74 | else: 75 | fh, fw = p.generate.short, p.generate.long 76 | 77 | valid_index, valid_anchor = self._gather_valid_anchor(im_info) 78 | 79 | valid_cls_label, valid_anchor_label = \ 80 | self._assign_label_to_anchor(valid_anchor, gt_bbox, 81 | p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr) 82 | 83 | cls_labels, reg_targets, reg_weights = [], [], [] 84 | for valid_range in valid_ranges: 85 | cls_label = valid_cls_label.copy() 86 | self._filter_anchor_by_scale_range(cls_label, valid_anchor, gt_bbox, 87 | valid_range, p.trident.invalid_anchor_threshd) 88 | self._sample_anchor(cls_label, p.sample.image_anchor, p.sample.pos_fraction) 89 | reg_target, reg_weight = self._cal_anchor_target(cls_label, valid_anchor, gt_bbox, valid_anchor_label) 90 | cls_label, reg_target, reg_weight = \ 91 | self._scatter_valid_anchor(valid_index, cls_label, reg_target, reg_weight) 92 | 93 | cls_labels.append(cls_label.reshape((fh, fw, -1)).transpose(2, 0, 1).reshape(-1)) 94 | reg_targets.append(reg_target.reshape((fh, fw, -1)).transpose(2, 0, 1)) 95 | reg_weights.append(reg_weight.reshape((fh, fw, -1)).transpose(2, 0, 1)) 96 | 97 | input_record["rpn_cls_label"] = np.stack(cls_labels) 98 | input_record["rpn_reg_target"] = np.stack(reg_targets) 99 | input_record["rpn_reg_weight"] = np.stack(reg_weights) 100 | 101 | return input_record["rpn_cls_label"], \ 102 | input_record["rpn_reg_target"], \ 103 | input_record["rpn_reg_weight"] 104 | 105 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # modify from https://github.com/NVIDIA/TensorRT/blob/master/docker/ubuntu.Dockerfile 2 | ARG CUDA_VERSION=11.1 3 | ARG CUDNN_VERSION=8 4 | ARG OS_VERSION=16.04 5 | ARG NVCR_SUFFIX= 6 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${OS_VERSION}${NVCR_SUFFIX} 7 | 8 | LABEL maintainer="Simpledet" 9 | 10 | WORKDIR workspace 11 | 12 | # basic 13 | RUN apt-get update && \ 14 | apt-get install -y --no-install-recommends && \ 15 | apt-get install -y build-essential python-dev python3-dev && \ 16 | apt-get install -y git wget sudo curl openssh-server openssh-client bash-completion command-not-found \ 17 | vim htop tmux zsh rsync bzip2 zip unzip patch time make cmake locales locales-all libgtk2.0-dev libgl1-mesa-glx python3-tk \ 18 | ninja-build libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev llvm libncurses5-dev libncursesw5-dev \ 19 | xz-utils tk-dev libffi-dev liblzma-dev python-openssl libopenblas-dev && \ 20 | rm -rf /var/lib/apt/lists/* 21 | RUN ln -sfv /usr/bin/python3 /usr/bin/python 22 | 23 | # zsh and fzf 24 | RUN wget https://github.com/robbyrussell/oh-my-zsh/raw/master/tools/install.sh -O - | zsh || true && \ 25 | git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions && \ 26 | sed -i 's/robbyrussell/fishy/' ~/.zshrc && \ 27 | sed -i 's/(git)/(git zsh-autosuggestions)/' ~/.zshrc && \ 28 | sed -i 's/# DISABLE_AUTO_UPDATE/DISABLE_AUTO_UPDATE/' ~/.zshrc && \ 29 | git clone --depth 1 https://github.com/junegunn/fzf.git ~/.fzf && ~/.fzf/install --all 30 | 31 | # use pyenv to manage python version 32 | RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \ 33 | echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.zshrc && \ 34 | echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.zshrc && \ 35 | echo 'export PYTHON_CONFIGURE_OPTS="--enable-shared"' >> ~/.zshrc && \ 36 | echo 'if command -v pyenv 1>/dev/null 2>&1; then\n eval "$(pyenv init -)"\nfi' >> ~/.zshrc 37 | 38 | RUN /usr/bin/zsh -c "source ~/.zshrc && \ 39 | pyenv install 3.6.8 && \ 40 | pyenv global 3.6.8 && \ 41 | eval zsh && \ 42 | pip install -U pipenv setuptools && \ 43 | pip install ipython numpy scipy scikit-learn tqdm graphviz easydict matplotlib pyarrow pyzmq pillow cython requests pytz opencv-python tensorboard && \ 44 | rm -rf ~/.cache" 45 | 46 | # build mxnet 47 | RUN /usr/bin/zsh -c "source ~/.zshrc && \ 48 | git clone --recursive https://github.com/apache/incubator-mxnet /tmp/mxnet -b 1.6.0 && \ 49 | git clone https://github.com/Tusimple/simpledet /tmp/simpledet && \ 50 | git clone https://github.com/RogerChern/cocoapi /tmp/cocoapi && \ 51 | cp -r /tmp/simpledet/operator_cxx/* /tmp/mxnet/src/operator && \ 52 | mkdir -p /tmp/mxnet/src/coco_api && \ 53 | cp -r /tmp/cocoapi/common /tmp/mxnet/src/coco_api && \ 54 | cd /tmp/mxnet && \ 55 | echo 'USE_SIGNAL_HANDLER = 1' >> ./config.mk && \ 56 | echo 'USE_OPENCV = 0' >> ./config.mk && \ 57 | echo 'USE_MKLDNN = 0' >> ./config.mk && \ 58 | echo 'USE_BLAS = openblas' >> ./config.mk && \ 59 | echo 'USE_CUDA = 1' >> ./config.mk && \ 60 | echo 'USE_CUDA_PATH = /usr/local/cuda' >> ./config.mk && \ 61 | echo 'USE_CUDNN = 1' >> ./config.mk && \ 62 | echo 'USE_NCCL = 1' >> ./config.mk && \ 63 | echo 'USE_DIST_KVSTORE = 1' >> ./config.mk && \ 64 | echo 'CUDA_ARCH = -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86' >> ./config.mk && \ 65 | rm /tmp/mxnet/src/operator/nn/group_norm* && \ 66 | make -j$((`nproc`-1)) && \ 67 | cd python && \ 68 | python3 setup.py install && \ 69 | rm -rf /tmp/mxnet /tmp/simpledet /tmp/cocoapi" 70 | 71 | # install pycocotools and mxnext 72 | RUN /usr/bin/zsh -c "source ~/.zshrc && \ 73 | pip install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI' && \ 74 | pip install 'git+https://github.com/RogerChern/mxnext#egg=mxnext'" 75 | 76 | # ssh 77 | RUN chsh -s /usr/bin/zsh root && \ 78 | mkdir /var/run/sshd && \ 79 | echo 'root:simpledet' | chpasswd && \ 80 | sed -i '/PermitRootLogin/s/prohibit-password/yes/' /etc/ssh/sshd_config 81 | EXPOSE 22 82 | 83 | # env 84 | RUN echo "export PATH=/usr/local/cuda/bin:\$PATH" >> ~/.zshrc && \ 85 | echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:\$LD_LIBRARY_PATH" >> ~/.zsrhc 86 | 87 | CMD ["/usr/sbin/sshd", "-D"] 88 | 89 | -------------------------------------------------------------------------------- /operator_cxx/contrib/nms-inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * \file nms-inl.h 22 | * \brief NMS Operator 23 | * \author Yanghao Li 24 | */ 25 | #ifndef MXNET_OPERATOR_CONTRIB_NMS_INL_H_ 26 | #define MXNET_OPERATOR_CONTRIB_NMS_INL_H_ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "../operator_common.h" 39 | #include "../mshadow_op.h" 40 | 41 | namespace mxnet { 42 | namespace op { 43 | 44 | namespace nms { 45 | enum NMSOpInputs {kBBox}; 46 | enum NMSOpOutputs {kOut, kScore}; 47 | enum NMSForwardResource {kTempSpace}; 48 | } // nms 49 | 50 | struct NMSParam : public dmlc::Parameter { 51 | int rpn_pre_nms_top_n; 52 | int rpn_post_nms_top_n; 53 | float threshold; 54 | bool output_score; 55 | bool already_sorted; 56 | uint64_t workspace; 57 | 58 | DMLC_DECLARE_PARAMETER(NMSParam) { 59 | DMLC_DECLARE_FIELD(rpn_pre_nms_top_n).set_default(6000) 60 | .describe("Number of top scoring boxes to keep before applying NMS to RPN proposals"); 61 | DMLC_DECLARE_FIELD(rpn_post_nms_top_n).set_default(300) 62 | .describe("Number of top scoring boxes to keep after applying NMS to RPN proposals"); 63 | DMLC_DECLARE_FIELD(threshold).set_default(0.7) 64 | .describe("NMS value, below which to suppress."); 65 | DMLC_DECLARE_FIELD(output_score).set_default(false) 66 | .describe("Add score to outputs"); 67 | DMLC_DECLARE_FIELD(already_sorted).set_default(false) 68 | .describe("if input rois have been sorted by confidence"); 69 | DMLC_DECLARE_FIELD(workspace).set_default(256) 70 | .describe("Workspace for NMS in MB, default to 256"); 71 | } 72 | }; 73 | 74 | template 75 | Operator *CreateOp(NMSParam param); 76 | 77 | #if DMLC_USE_CXX11 78 | class NMSProp : public OperatorProperty { 79 | public: 80 | void Init(const std::vector >& kwargs) override { 81 | param_.Init(kwargs); 82 | } 83 | 84 | std::map GetParams() const override { 85 | return param_.__DICT__(); 86 | } 87 | 88 | bool InferShape(std::vector *in_shape, 89 | std::vector *out_shape, 90 | std::vector *aux_shape) const override { 91 | using namespace mshadow; 92 | CHECK_EQ(in_shape->size(), 1) << "Input:[bbox]"; 93 | const TShape &dshape = in_shape->at(nms::kBBox); 94 | if (dshape.ndim() == 0) return false; 95 | out_shape->clear(); 96 | // output 97 | out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 4)); 98 | // score 99 | out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 1)); 100 | return true; 101 | } 102 | 103 | OperatorProperty* Copy() const override { 104 | auto ptr = new NMSProp(); 105 | ptr->param_ = param_; 106 | return ptr; 107 | } 108 | 109 | std::string TypeString() const override { 110 | return "_contrib_NMS"; 111 | } 112 | 113 | std::vector ForwardResource( 114 | const std::vector &in_shape) const override { 115 | return {ResourceRequest::kTempSpace}; 116 | } 117 | 118 | std::vector DeclareBackwardDependency( 119 | const std::vector &out_grad, 120 | const std::vector &in_data, 121 | const std::vector &out_data) const override { 122 | return {}; 123 | } 124 | 125 | int NumVisibleOutputs() const override { 126 | if (param_.output_score) { 127 | return 2; 128 | } else { 129 | return 1; 130 | } 131 | } 132 | 133 | int NumOutputs() const override { 134 | return 2; 135 | } 136 | 137 | std::vector ListArguments() const override { 138 | return {"rois"}; 139 | } 140 | 141 | std::vector ListOutputs() const override { 142 | return {"output", "score"}; 143 | } 144 | 145 | Operator* CreateOperator(Context ctx) const override; 146 | 147 | private: 148 | NMSParam param_; 149 | }; // class NMSProp 150 | 151 | #endif // DMLC_USE_CXX11 152 | } // namespace op 153 | } // namespace mxnet 154 | 155 | #endif // MXNET_OPERATOR_CONTRIB_NMS_INL_H_ 156 | -------------------------------------------------------------------------------- /models/maskrcnn/bbox_post_processing.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | from operator_py.nms import py_nms_wrapper 5 | 6 | def multiclass_nms(nms, cls_score, bbox_xyxy, min_det_score, max_det_per_image): 7 | # remove background 8 | cls_score = cls_score[:, 1:] 9 | # TODO: the output shape of class_agnostic box is [n, 4], while class_aware box is [n, 4 * (1 + class)] 10 | bbox_xyxy = bbox_xyxy[:, 4:] if bbox_xyxy.shape[1] != 4 else bbox_xyxy 11 | num_class = cls_score.shape[1] 12 | 13 | cls_det = [np.empty((0, 6), dtype=np.float32) for _ in range(num_class)] # [x1, y1, x2, y2, score, cls] 14 | 15 | for cid in range(num_class): 16 | score = cls_score[:, cid] 17 | if bbox_xyxy.shape[1] != 4: 18 | _bbox_xyxy = bbox_xyxy[:, cid * 4:(cid + 1) * 4] 19 | else: 20 | _bbox_xyxy = bbox_xyxy 21 | valid_inds = np.where(score > min_det_score)[0] 22 | box = _bbox_xyxy[valid_inds] 23 | score = score[valid_inds] 24 | det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32) 25 | det = nms(det) 26 | cls = np.full((det.shape[0], 1), cid, dtype=np.float32) 27 | cls_det[cid] = np.hstack((det, cls)) 28 | 29 | cls_det = np.vstack([det for det in cls_det]) 30 | scores = cls_det[:, -2] 31 | top_index = np.argsort(scores)[::-1][:max_det_per_image] 32 | return cls_det[top_index] 33 | 34 | 35 | class BboxPostProcessingOperator(mx.operator.CustomOp): 36 | def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr): 37 | super().__init__() 38 | self.max_det_per_image = max_det_per_image 39 | self.min_det_score = min_det_score 40 | self.nms_type = nms_type 41 | self.nms_thr = nms_thr 42 | 43 | def forward(self, is_train, req, in_data, out_data, aux): 44 | if self.nms_type == 'nms': 45 | nms = py_nms_wrapper(self.nms_thr) 46 | else: 47 | raise NotImplementedError 48 | 49 | cls_score = in_data[0].asnumpy() 50 | bbox_xyxy = in_data[1].asnumpy() 51 | 52 | cls_score_shape = cls_score.shape # (b, n, num_class_withbg) 53 | bbox_xyxy_shape = bbox_xyxy.shape # (b, n, 4) or (b, n, 4 * num_class_withbg) 54 | batch_image = cls_score_shape[0] 55 | num_bbox = cls_score_shape[1] 56 | num_class_withbg = cls_score_shape[2] 57 | 58 | post_score = np.zeros((batch_image, self.max_det_per_image, 1), dtype=np.float32) 59 | post_bbox_xyxy = np.zeros((batch_image, self.max_det_per_image, 4), dtype=np.float32) 60 | post_cls = np.full((batch_image, self.max_det_per_image, 1), -1, dtype=np.float32) 61 | 62 | for i, (per_image_cls_score, per_image_bbox_xyxy) in enumerate(zip(cls_score, bbox_xyxy)): 63 | cls_det = multiclass_nms(nms, per_image_cls_score, per_image_bbox_xyxy, \ 64 | self.min_det_score, self.max_det_per_image) 65 | num_det = cls_det.shape[0] 66 | post_bbox_xyxy[i, :num_det] = cls_det[:, :4] 67 | post_score[i, :num_det] = cls_det[:, -2][:, np.newaxis] # convert to (n, 1) 68 | post_cls[i, :num_det] = cls_det[:, -1][:, np.newaxis] # convert to (n, 1) 69 | 70 | self.assign(out_data[0], req[0], post_score) 71 | self.assign(out_data[1], req[1], post_bbox_xyxy) 72 | self.assign(out_data[2], req[2], post_cls) 73 | 74 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 75 | self.assign(in_grad[0], req[0], 0) 76 | self.assign(in_grad[1], req[1], 0) 77 | 78 | 79 | @mx.operator.register("BboxPostProcessing") 80 | class BboxPostProcessingProp(mx.operator.CustomOpProp): 81 | def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr): 82 | super().__init__(need_top_grad=False) 83 | self.max_det_per_image = int(max_det_per_image) 84 | self.min_det_score = float(min_det_score) 85 | self.nms_type = str(nms_type) 86 | self.nms_thr = float(nms_thr) 87 | 88 | def list_arguments(self): 89 | return ['cls_score', 'bbox_xyxy'] 90 | 91 | def list_outputs(self): 92 | return ['post_score', 'post_bbox_xyxy', 'post_cls'] 93 | 94 | def infer_shape(self, in_shape): 95 | cls_score_shape = in_shape[0] # (b, n, num_class_withbg) 96 | bbox_xyxy_shape = in_shape[1] # (b, n, 4) or (b, n, 4 * num_class_withbg) 97 | 98 | batch_image = cls_score_shape[0] 99 | 100 | post_score_shape = (batch_image, self.max_det_per_image, 1) 101 | post_bbox_xyxy_shape = (batch_image, self.max_det_per_image, 4) 102 | post_cls_shape = (batch_image, self.max_det_per_image, 1) 103 | 104 | return [cls_score_shape, bbox_xyxy_shape], \ 105 | [post_score_shape, post_bbox_xyxy_shape, post_cls_shape] 106 | 107 | def create_operator(self, ctx, shapes, dtypes): 108 | return BboxPostProcessingOperator(self.max_det_per_image, self.min_det_score, self.nms_type, self.nms_thr) 109 | 110 | def declare_backward_dependency(self, out_grad, in_data, out_data): 111 | return [] 112 | -------------------------------------------------------------------------------- /operator_cxx/contrib/global_average_pooling.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /*! 21 | * Copyright (c) 2018 by Contributors 22 | * \file global_average_pooling.cu 23 | * \brief port from https://github.com/hujie-frank/SENet 24 | * \author Chenxia Han 25 | */ 26 | #include 27 | #include 28 | #include "../mxnet_op.h" 29 | #include "../../common/cuda_utils.h" 30 | #include "./global_average_pooling-inl.h" 31 | 32 | #define GAP_CUDA_CHECK(condition) \ 33 | /* Code block avoids redefinition of cudaError_t error */ \ 34 | do { \ 35 | cudaError_t error = condition; \ 36 | CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ 37 | } while (0) 38 | #define CUDA_KERNEL_LOOP(i, n) \ 39 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 40 | i < (n); \ 41 | i += blockDim.x * gridDim.x) 42 | 43 | constexpr int CAFFE_CUDA_NUM_THREADS = 512; 44 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096; 45 | 46 | inline int CAFFE_GET_BLOCKS(const int N) { 47 | return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS, 48 | CAFFE_MAXIMUM_NUM_BLOCKS); 49 | } 50 | 51 | namespace mshadow { 52 | namespace cuda { 53 | 54 | template 55 | __global__ void GlobalAvePoolForwardKernel(const int spatial_dim, 56 | const Dtype* bottom_data, Dtype* top_data) { 57 | __shared__ Dtype buffer[CAFFE_CUDA_NUM_THREADS]; 58 | unsigned int tid = threadIdx.x; 59 | buffer[tid] = 0; 60 | __syncthreads(); 61 | 62 | for (int j = tid; j < spatial_dim; j += blockDim.x) { 63 | buffer[tid] += bottom_data[blockIdx.x * spatial_dim + j]; 64 | } 65 | __syncthreads(); 66 | 67 | for (int i = blockDim.x / 2; i > 0; i >>= 1) { 68 | if (tid < i) { 69 | buffer[threadIdx.x] += buffer[threadIdx.x + i]; 70 | } 71 | __syncthreads(); 72 | } 73 | 74 | if (tid == 0) { 75 | top_data[blockIdx.x] = buffer[0] / spatial_dim; 76 | } 77 | } 78 | 79 | template 80 | inline void GAPForward(const Tensor &out, 81 | const Tensor &data) { 82 | const DType *bottom_data = data.dptr_; 83 | DType *top_data = out.dptr_; 84 | const int nblocks = data.shape_.ProdShape(0, 2); 85 | const int spatial_dim = data.shape_.ProdShape(2, 4); 86 | cudaStream_t stream = Stream::GetStream(out.stream_); 87 | GlobalAvePoolForwardKernel << > >(spatial_dim, bottom_data, top_data); 89 | GAP_CUDA_CHECK(cudaPeekAtLastError()); 90 | } 91 | 92 | template 93 | __global__ void GlobalAvePoolBackwardKernel(const int nthreads, const int spatial_dim, 94 | const Dtype* top_diff, Dtype* bottom_diff) { 95 | CUDA_KERNEL_LOOP(index, nthreads) { 96 | const int n = index / spatial_dim; 97 | bottom_diff[index] = top_diff[n] / spatial_dim; 98 | } 99 | } 100 | 101 | template 102 | inline void GAPBackward(const Tensor &in_grad, 103 | const Tensor &out_grad) { 104 | const DType *top_diff = out_grad.dptr_; 105 | DType *bottom_diff = in_grad.dptr_; 106 | const int count = in_grad.shape_.Size(); 107 | const int spatial_dim = in_grad.shape_.ProdShape(2, 4); 108 | cudaStream_t stream = Stream::GetStream(in_grad.stream_); 109 | GlobalAvePoolBackwardKernel << > >(count, spatial_dim, top_diff, bottom_diff); 111 | GAP_CUDA_CHECK(cudaPeekAtLastError()); 112 | } 113 | 114 | } // namespace cuda 115 | 116 | template 117 | inline void GAPForward(const Tensor &out, 118 | const Tensor &data) { 119 | cuda::GAPForward(out, data); 120 | } 121 | 122 | template 123 | inline void GAPBackward(const Tensor &in_grad, 124 | const Tensor &out_grad) { 125 | cuda::GAPBackward(in_grad, out_grad); 126 | } 127 | 128 | } // namespace mshadow 129 | 130 | namespace mxnet { 131 | namespace op { 132 | template<> 133 | Operator *CreateOp(GAPParam param, int dtype) { 134 | Operator *op = NULL; 135 | MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { 136 | op = new GAPOp(param); 137 | }); 138 | return op; 139 | } 140 | 141 | } // namespace op 142 | } // namespace mxnet 143 | 144 | --------------------------------------------------------------------------------