├── config
    ├── __init__.py
    ├── NASFPN
    │   └── __init__.py
    └── int8
    │   └── README.md
├── core
    └── __init__.py
├── models
    ├── __init__.py
    ├── FPG
    │   ├── __init__.py
    │   └── README.md
    ├── KD
    │   ├── __init__.py
    │   ├── README.md
    │   ├── utils.py
    │   └── builder.py
    ├── NASFPN
    │   ├── __init__.py
    │   └── README.md
    ├── maskrcnn
    │   ├── __init__.py
    │   ├── metric.py
    │   ├── process_output.py
    │   ├── utils.py
    │   ├── detector.py
    │   ├── README.md
    │   └── bbox_post_processing.py
    ├── msrcnn
    │   ├── __init__.py
    │   ├── metric.py
    │   ├── README.md
    │   ├── maskiou_compute.py
    │   └── detector.py
    ├── retinanet
    │   ├── __init__.py
    │   ├── metric.py
    │   └── README.md
    ├── cascade_rcnn
    │   ├── __init__.py
    │   └── README.md
    ├── tridentnet
    │   ├── __init__.py
    │   ├── README.md
    │   └── input.py
    ├── selsa
    │   └── README.md
    ├── TSD
    │   ├── metric.py
    │   ├── readme.md
    │   └── faster_rcnn.py
    ├── efficientnet
    │   └── README.md
    ├── FreeAnchor
    │   └── README.md
    ├── sepc
    │   ├── README.md
    │   ├── readme.md
    │   └── sepc_dconv.py
    ├── crowdhuman
    │   ├── README.md
    │   └── softmax_entropy_op.py
    ├── RepPoints
    │   └── README.md
    ├── FCOS
    │   └── metric.py
    ├── FPN
    │   ├── README.md
    │   ├── get_top_proposal.py
    │   └── assign_layer_fpn.py
    ├── se
    │   └── builder.py
    └── dcn
    │   └── builder.py
├── symbol
    ├── __init__.py
    └── detector.py
├── utils
    ├── __init__.py
    ├── json_to_roidb.py
    ├── download_pretrain.py
    ├── logger.py
    ├── roidb_to_coco.py
    ├── load_model.py
    ├── patch_config.py
    ├── deprecated.py
    ├── contrib
    │   ├── edit_model_weight.py
    │   └── data_to_coco.py
    ├── create_voc_roidb.py
    ├── create_coco_roidb.py
    ├── create_crowdhuman_roidb.py
    └── callback.py
├── operator_py
    ├── __init__.py
    └── cython
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── bbox.pyx
    │   └── bbox_self.pyx
├── doc
    ├── image
    │   ├── detector.png
    │   ├── diagram.png
    │   ├── diagram_v2.png
    │   ├── trident_block.png
    │   ├── tensorboard_screenshot.png
    │   └── trident_block_details.png
    ├── DISTRIBUTED.md
    ├── TENSORBOARD.md
    ├── BUILD_WHEEL.md
    ├── DATASET.md
    ├── INSTALL.md
    └── FRAMEWOKR_OVERVIEW.md
├── Makefile
├── docker
    ├── README.md
    └── Dockerfile
├── scripts
    ├── dist_worker.sh
    ├── train_hpc.sh
    ├── terminate.sh
    ├── launch.sh
    └── setup.sh
├── .github
    └── ISSUE_TEMPLATE
    │   └── bug_report.md
├── operator_cxx
    └── contrib
    │   ├── bbox_norm.cu
    │   ├── sync_batch_norm.cu
    │   ├── focal_loss.cu
    │   ├── broadcast_scale.cu
    │   ├── sync_inplace_activation_batch_norm.cu
    │   ├── quantization_int8.cu
    │   ├── focal_loss.cc
    │   ├── bbox_norm.cc
    │   ├── broadcast_scale.cc
    │   ├── quantization_int8.cc
    │   ├── axpy.cc
    │   ├── sigmoid_cross_entropy.cc
    │   ├── global_average_pooling.cc
    │   ├── group_norm.cc
    │   ├── generate_anchor.cc
    │   ├── nms-inl.h
    │   └── global_average_pooling.cu
├── .gitignore
├── unittest
    └── test_loader.py
└── detection_infer_speed.py


/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/symbol/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/FPG/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/KD/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/operator_py/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/NASFPN/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/NASFPN/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/maskrcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/msrcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/retinanet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/cascade_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/tridentnet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/operator_py/cython/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/operator_py/cython/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/doc/image/detector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/detector.png


--------------------------------------------------------------------------------
/doc/image/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/diagram.png


--------------------------------------------------------------------------------
/doc/image/diagram_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/diagram_v2.png


--------------------------------------------------------------------------------
/doc/image/trident_block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/trident_block.png


--------------------------------------------------------------------------------
/doc/image/tensorboard_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/tensorboard_screenshot.png


--------------------------------------------------------------------------------
/doc/image/trident_block_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tusen-ai/simpledet/HEAD/doc/image/trident_block_details.png


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd operator_py/cython/; python3 setup.py build_ext --inplace; rm -rf build; cd ../../
3 | clean:
4 | 	cd operator_py/cython/; rm *.so *.c *.cpp; cd ../../
5 | 


--------------------------------------------------------------------------------
/operator_py/cython/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Docker
 2 | ## Build
 3 | ```
 4 | cd docker
 5 | # build for cuda11.1 cudnn8
 6 | docker build --network=host --build-arg OS_VERSION=16.04 --build-arg CUDA_VERSION=11.1 --build-arg CUDNN_VERSION=8 --tag simpledet .
 7 | ```
 8 | 
 9 | ## Launch
10 | ```
11 | docker run -it --gpus all simpledet zsh
12 | ```
13 | 


--------------------------------------------------------------------------------
/models/maskrcnn/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | 
 4 | from core.detection_metric import EvalMetricWithSummary
 5 | 
 6 | 
 7 | class SigmoidCELossMetric(EvalMetricWithSummary):
 8 |     def __init__(self, name, output_names, label_names, **kwargs):
 9 |         super().__init__(name, output_names, label_names, **kwargs)
10 | 
11 |     def update(self, labels, preds):
12 |         self.sum_metric += preds[0].mean().asscalar()
13 |         self.num_inst += 1


--------------------------------------------------------------------------------
/scripts/dist_worker.sh:
--------------------------------------------------------------------------------
 1 | root_dir=$1
 2 | singularity_image=$2
 3 | conffile=$3
 4 | 
 5 | if test $(which singularity); then
 6 |     singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "MXNET_UPDATE_ON_KVSTORE=0 MXNET_OPTIMIZER_AGGREGATION_SIZE=20 python -u detection_train.py --config ${conffile}"
 7 | else
 8 |     singularity exec -B /mnt:/mnt -s /usr/bin/zsh --no-home --nv ${singularity_image} zsh -ic "python -u detection_train.py"
 9 | fi
10 | 


--------------------------------------------------------------------------------
/models/selsa/README.md:
--------------------------------------------------------------------------------
 1 | ## Sequence Level Semantics Aggregation for Video Object Detection
 2 | 
 3 | Haiping Wu, Yuntao Chen, Naiyan Wang, Zhaoxiang Zhang
 4 | 
 5 | The project is currently released at https://github.com/happywu/Sequence-Level-Semantics-Aggregation
 6 | ### Citing SELSA
 7 | ```
 8 | @article{wu2019selsa,
 9 |   title={Sequence Level Semantics Aggregation for Video Object Detection},
10 |   author={Wu, Haiping and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang},
11 |   journal={ICCV 2019},
12 |   year={2019}
13 | }
14 | ```
15 | 


--------------------------------------------------------------------------------
/models/TSD/metric.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Loss metric for QueryDet training
 3 | 
 4 | Author: Chenhongyi Yang 
 5 | '''
 6 | import numpy as np
 7 | import mxnet as mx
 8 | from core.detection_metric import EvalMetricWithSummary
 9 | 
10 | 
11 | class LossMetric(EvalMetricWithSummary):
12 |     def __init__(self, name, output_names, label_names, **kwargs):
13 |         super().__init__(name, output_names, label_names, **kwargs)
14 | 
15 |     def update(self, labels, preds):
16 |         self.sum_metric += preds[0].asnumpy().sum()
17 |         self.num_inst += 1
18 | 
19 | 


--------------------------------------------------------------------------------
/scripts/train_hpc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | gpucount=8
 4 | num_node=2
 5 | num_servers=${num_node}
 6 | root_dir="/path/to/simpledet"
 7 | sync_dir="/tmp/simpledet_sync"
 8 | 
 9 | hostfile=hostfile.txt
10 | conffile=faster_r50v2c4_c5_256roi_1x
11 | singularity_image=simpledet.img
12 | 
13 | export DMLC_INTERFACE=eth0
14 | python -u ../../launcher/tools/launch.py \
15 |     -n ${num_node} \
16 |     --num-servers ${num_servers} \
17 |     --sync-dst-dir ${sync_dir} \
18 |     --launcher ssh -H ${hostfile} \
19 |     scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \
20 |     2>&1 | tee -a ${root_dir}/log/${conffile}.log
21 | 


--------------------------------------------------------------------------------
/models/maskrcnn/process_output.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pycocotools import mask as mask_util
 3 | 
 4 | from models.maskrcnn.utils import segm_results
 5 | 
 6 | def process_output(all_outputs, roidb):
 7 |     for output_record in all_outputs:
 8 |         rec_id = int(output_record['rec_id'])
 9 |         bbox_xyxy = output_record['bbox_xyxy']
10 |         cls_score = output_record['cls_score']
11 |         cls = output_record['cls']
12 |         mask = output_record['mask']
13 | 
14 |         im_h = roidb[rec_id]["h"]
15 |         im_w = roidb[rec_id]["w"]
16 |         segm = segm_results(bbox_xyxy, cls, mask, im_h, im_w)
17 |         output_record['segm'] = segm
18 |         del output_record['mask']
19 |     return all_outputs
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **Screenshots or stack traces**
14 | If applicable, add screenshots or stack traces to help explain your problem.
15 | 
16 | **Which config are you using**
17 | 
18 | **Which dataset are you using **
19 | 
20 | **Hardware info**
21 | CPU, GPU, Storage(Disk or NFS)
22 | 
23 | **Software info**
24 | driver, CUDA, cuDNN versions
25 | OS verison
26 | 
27 | **How did you set up your MXNet for SimpleDet**
28 | 
29 | **Additional context**
30 | Add any other context about the problem here.
31 | 


--------------------------------------------------------------------------------
/models/msrcnn/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | 
 4 | from core.detection_metric import EvalMetricWithSummary
 5 | 
 6 | 
 7 | class SigmoidCELossMetric(EvalMetricWithSummary):
 8 |     def __init__(self, name, output_names, label_names, **kwargs):
 9 |         super().__init__(name, output_names, label_names, **kwargs)
10 | 
11 |     def update(self, labels, preds):
12 |         self.sum_metric += preds[0].mean().asscalar()
13 |         self.num_inst += 1
14 | 
15 | class L2(EvalMetricWithSummary):
16 |     def __init__(self, name, output_names, label_names, **kwargs):
17 |         super().__init__(name, output_names, label_names, **kwargs)
18 | 
19 |     def update(self, labels, preds):
20 |         self.sum_metric += preds[0].mean().asscalar()
21 |         self.num_inst += 1


--------------------------------------------------------------------------------
/utils/json_to_roidb.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import pickle as pkl
 4 | from os.path import basename
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | def parse_argument():
10 |     parser = argparse.ArgumentParser("Convert json gt to roidb")
11 |     parser.add_argument("--json", type=str, required=True)
12 |     args = parser.parse_args()
13 |     return args.json
14 | 
15 | 
16 | def json_to_roidb(json_path):
17 |     with open(json_path) as f:
18 |         json_gt = json.load(f)
19 | 
20 |     for obj in json_gt:
21 |         obj["gt_class"] = np.array(obj["gt_class"], dtype=np.float32)
22 |         obj["gt_bbox"] = np.array(obj["gt_bbox"], dtype=np.float32)
23 |     with open("data/cache/%s.roidb" % basename(json_path).replace("json", "roidb"), "wb") as fout:
24 |         pkl.dump(json_gt, fout)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     json_to_roidb(parse_argument())
29 | 


--------------------------------------------------------------------------------
/models/msrcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Mask Scoring RCNN
 2 | 
 3 | This repository implements [**Mask Scoring RCNN**](https://arxiv.org/abs/1903.00241) in the SimpleDet framework.
 4 | 
 5 | ### Set Up
 6 | You need newer [mxnet-cu100-20191214](https://1dv.aflat.top/mxnet_cu100-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl) or [mxnet-cu101-20191214](https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl)
 7 | 
 8 | ### Qucik Start
 9 | ```bash
10 | # train
11 | python3 detection_train.py --config config/ms_r50v1_fpn_1x.py
12 | 
13 | # test
14 | python3 mask_test.py --config config/ms_r50v1_fpn_1x.py
15 | ```
16 | 
17 | ### Performance
18 | |Model|Backbone|Head|Train Schedule|GPU|FP16|Train MEM|Train Speed|Image/GPU|Box AP(Mask AP)|Link|
19 | |-----|--------|----|--------------|---|---------|----|---------|-----------|---------------|----|
20 | |Mask Scoring|R50v1-FPN|2MLP+4CONV|1X|8X 1080Ti|2|no|8.1G(3.6G)|23 img/s|37.2(35.0)|[model](https://1dv.aflat.top/ms_r50v1_fpn_1x.zip)
21 | 


--------------------------------------------------------------------------------
/utils/download_pretrain.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import urllib.request
 3 | 
 4 | 
 5 | def report(block_count, block_size, content_size):
 6 |     if block_count % (content_size // block_size // 5) == 1:
 7 |         print("Downloaded %.1f/100" % (block_size * block_count / content_size * 100))
 8 | 
 9 | 
10 | def download(prefix, epoch):
11 |     dir_name = os.path.dirname(prefix)
12 |     if not os.path.exists(dir_name):
13 |         os.makedirs(dir_name)
14 |     base_name = prefix.replace("pretrain_model/", "") + "-%04d.params" % epoch
15 |     save_name = "%s-%04d.params" % (prefix, epoch)
16 |     base_url = os.environ.get("SIMPLEDET_BASE_URL", "https://1dv.aflat.top/")
17 |     full_url = base_url + base_name
18 | 
19 |     try:
20 |         print("Downloading %s from %s" % (save_name, full_url))
21 |         urllib.request.urlretrieve(full_url, save_name, report)
22 |     except Exception as e:
23 |         print("Fail to download %s. You can mannually download it from %s and put it to %s" % (base_name, full_url, save_name))
24 |         os.remove(save_name)
25 |         raise e
26 | 


--------------------------------------------------------------------------------
/models/KD/README.md:
--------------------------------------------------------------------------------
 1 | ## KD
 2 | 
 3 | This repository implements [**Knowledge Distillation**](https://arxiv.org/abs/1503.02531) in the SimpleDet framework.
 4 | 
 5 | ### Qucik Start
 6 | ```bash
 7 | python3 detection_train.py --config config/kd/retina_r50v1b_fpn_1x_fitnet_g10.py
 8 | python3 detection_test.py --config config/kd/retina_r50v1b_fpn_1x_fitnet_g10.py
 9 | ```
10 | 
11 | ### Results and Models
12 | All AP results are reported on the minival2014 split of the [COCO](http://cocodataset.org) dataset.
13 | 
14 | |Model|Backbone|Head|Train Schedule|AP|AP50|AP75|APs|APm|APl|
15 | |-----|--------|----|--------------|--|----|----|---|---|---|
16 | |Retina|R50v1b-FPN|4Conv|1X|36.6|56.9|39.0|20.3|40.7|47.2|
17 | |Retina|R50v1b-FPN-TR152v1b1X|4Conv|1X|38.9|59.0|41.6|21.4|43.3|52.1|
18 | |Retina|R50v1b-FPN-TR152v1b1X|4Conv|2X|40.1|60.6|43.1|21.8|44.5|54.3|
19 | |Faster|R50v1b-FPN|2MLP|1X|37.2|59.4|40.4|22.3|41.3|47.6|
20 | |Faster|R50v1b-FPN|2MLP|2X|38.0|59.7|41.5|22.2|41.6|48.8|
21 | |Faster|R50v1b-FPN-TR152v1b2X|2MLP|1X|39.9|61.3|43.6|22.7|44.2|52.7|
22 | |Faster|R50v1b-FPN-TR152v1b2X|2MLP|2X|40.5|62.2|43.9|23.1|44.7|53.9|
23 | 


--------------------------------------------------------------------------------
/scripts/terminate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 | echo "usage: $0 comma_separated_worker_hostnames"
 5 | exit -1
 6 | fi
 7 | 
 8 | hosts=$1
 9 | 
10 | # extract worker and check reachablity
11 | IFS=, read -r -a host_array <<< $hosts
12 | for host in ${host_array[@]}; do
13 |     # check reachability
14 |     echo "check reachability of $host"
15 |     ssh -q $host exit
16 |     if [ $? -ne 0 ]; then
17 |         echo "$host is not reachable"
18 | 	exit -1
19 |     fi
20 | 
21 |     # check availablity (retreat if remote host is in use)
22 |     echo "check availability of $host"
23 |     for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 
24 | 	x="${x//[$'\t\r\n ']}"  # remove trailing whitespace
25 | 	if [ $x -gt 5 ]; then 
26 | 	    echo "$host has gpu utilization of $x%"; 
27 |         fi;  
28 |     done
29 |     
30 |     # cleanup potentially dead python process (march since we checked it)
31 |     ssh $host ps aux | grep python
32 |     echo -e "\n"
33 |     echo "Terminate tasks on $host in 5s"
34 |     sleep 5
35 |     ssh -q $host pkill python
36 | done
37 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | from pytz import utc, timezone
 4 | 
 5 | 
 6 | def config_logger(path):
 7 |     def custom_time(*args):
 8 |         utc_dt = utc.localize(datetime.utcnow())
 9 |         my_tz = timezone("Asia/Shanghai")
10 |         converted = utc_dt.astimezone(my_tz)
11 |         return converted.timetuple()
12 | 
13 |     logging.basicConfig()
14 |     logging.getLogger().handlers.pop()
15 | 
16 |     fmt = '%(asctime)s %(message)s'
17 |     date_fmt = '%m-%d %H:%M:%S'
18 |     formatter = logging.Formatter(fmt=fmt, datefmt=date_fmt)
19 |     formatter.converter = custom_time
20 | 
21 |     logging.getLogger().setLevel(logging.INFO)
22 | 
23 |     log_file_save_name = path
24 |     file_handler = logging.FileHandler(filename=log_file_save_name, mode='w')
25 |     file_handler.setLevel(logging.INFO)
26 |     file_handler.setFormatter(formatter)
27 |     logging.getLogger().addHandler(file_handler)
28 | 
29 |     console = logging.StreamHandler()
30 |     console.setLevel(logging.INFO)
31 |     console.setFormatter(formatter)
32 |     logging.getLogger().addHandler(console)


--------------------------------------------------------------------------------
/models/TSD/readme.md:
--------------------------------------------------------------------------------
 1 | # TSD
 2 | This repository implements the CVPR 2020 paper [\<\<Revisiting the Sibling Head in Object Detector\>\>](https://openaccess.thecvf.com/content_CVPR_2020/papers/Song_Revisiting_the_Sibling_Head_in_Object_Detector_CVPR_2020_paper.pdf).
 3 | 
 4 | ## Quick Start
 5 | ```
 6 | # train
 7 | python detection_train.py --config config/TSD/tsd_r50_rpn_1x.py
 8 | 
 9 | # test
10 | python detection_test.py --config config/TSD/tsd_r50_rpn_1x.py
11 | ```
12 | 
13 | ## COCO minival Performance
14 | 
15 | All results are reported using ResNet-50 and 1x schedule training.
16 | 
17 | TSD: Task-aware Spatial Disentanglement 
18 | 
19 | PC: Progressive Constraint 
20 | 
21 | |Method|AP|AP_50|AP_75|AP_s|AP_m|AP_l|
22 | |------|--|-----|-----|----|----|----|
23 | |Baseline Faster RCNN|36.3|58.2|39.0|21.3|39.8|46.9|
24 | |+TSD|39.3|60.6|42.8|22.2|42.8|52.0|	
25 | |+TSD and PC|38.9|60.2|42.2|22.0|42.4|51.6|
26 | 
27 | ## Citation
28 | ```
29 | @InProceedings{Song_2020_CVPR,
30 | author = {Song, Guanglu and Liu, Yu and Wang, Xiaogang},
31 | title = {Revisiting the Sibling Head in Object Detector},
32 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
33 | month = {June},
34 | year = {2020}
35 | }
36 | ```


--------------------------------------------------------------------------------
/operator_py/cython/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/doc/DISTRIBUTED.md:
--------------------------------------------------------------------------------
 1 | #### Requirement
 2 | Here we only provide a guide to launch distributed training with singularity, please make sure your singularity works by checking [INSTALL.md](./doc/INSTALL.md)
 3 | 
 4 | #### Setup
 5 | 1. obtain the mxnet launcher and place it in the parent directory of the simpledet working directory
 6 | ```bash
 7 | git clone https://github.com/RogerChern/mxnet-dist-lancher.git lancher
 8 | ```
 9 | 
10 | 2. mv `data`, `pretrain_model`, `experiments` outside of simpledet and symink them back.
11 | This step is to avoid unnecessary `rsync` of large binary files in the working directory during launching.
12 | 
13 | 3. after step 1 and 2, your directory should be as following
14 | ```
15 | lancher/
16 | simpledet/
17 |   data -> /path/to/data
18 |   pretrain_model -> /path/to/pretain_model
19 |   experiments -> /path/to/experiments
20 |   ...
21 | ```
22 | 
23 | 4. make a hostfile containing hostnames of all nodes, these nodes would be accessed from our launch node by ssh without password
24 | simpledet/hostfile.txt
25 | ```
26 | node1
27 | node2
28 | ```
29 | 
30 | 5. change the singulariy mounting point in `scripts/dist_worker.sh`
31 | 
32 | 7. launch distributed training with scripts
33 | ```bash
34 | bash scritps/launch.sh config/mask_r50v1_fpn_1x.py node1,node2
35 | ```
36 | 


--------------------------------------------------------------------------------
/models/efficientnet/README.md:
--------------------------------------------------------------------------------
 1 | ## EfficientNet for object detection
 2 | This repository implements [**EfficientNet**](https://arxiv.org/abs/1905.11946) in the SimpleDet framework. Efficient B5 achives the same mAP with **~1/10 FLOPs** compared with ResNet-50.
 3 | 
 4 | ### Qucik Start
 5 | ```bash
 6 | # train faster r-cnn with efficientnet fpn backbone
 7 | python3 detection_train.py --config config/efficientnet/efficientnet_b5_fpn_bn_scratch_400_6x.py
 8 | ```
 9 | 
10 | ### Results and Models
11 | All AP results are reported on minival of the [COCO dataset](http://cocodataset.org).
12 | 
13 | |Model|InputSize|Backbone|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP|Link|
14 | |-----|-----|--------|--------------|---|---------|----|---------|-----------|---------------|----|
15 | |Faster|400x600|B5-FPN|36 epoch(6X)|8X 1080Ti|8|yes|-|75 img/s|37.2|[model](https://1dv.aflat.top/efficientnet_b5_fpn_bn_scratch_400_6x.zip)|
16 | |Faster|400x600|B5-FPN|54 epoch(9X)|8X 1080Ti|8|yes|-|75 img/s|37.9|-|
17 | |Faster|400x600|B5-FPN|72 epoch(12X)|8X 1080Ti|8|yes|-|75 img/s|38.3|-|
18 | 
19 | ### Reference
20 | ```
21 | @inproceedings{tan2019,
22 |   title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
23 |   author={Tan, Mingxing and Le, Quoc V},
24 |   booktitle={ICML},
25 |   year={2019}
26 | }
27 | ```
28 | 


--------------------------------------------------------------------------------
/models/FreeAnchor/README.md:
--------------------------------------------------------------------------------
 1 | ## FreeAnchor
 2 | 
 3 | This repository implements [**FreeAnchor**](https://arxiv.org/abs/1909.02466) in the SimpleDet framework.
 4 | FreeAnchor assigns anchors for ground-truth objects with a maximum likelihood estimation procedure. On the basis of RetinaNet, this method achieves a significant improvement on performance.
 5 | 
 6 | ### Qucik Start
 7 | ```bash
 8 | # train
 9 | python3 detection_train.py --config config/FreeAnchor/free_anchor_r50v1_fpn_1x.py
10 | # test
11 | python3 detection_test.py --config config/FreeAnchor/free_anchor_r50v1_fpn_1x.py
12 | ```
13 | 
14 | ### Models
15 | All AP results are reported on minival2014 of the [COCO dataset](http://cocodataset.org).
16 | 
17 | |Method|Backbone|Schedule|AP|Link|
18 | |------|--------|--------|--|----|
19 | |FreeAnchor|R50v1-FPN|1x|38.3|[model](https://drive.google.com/open?id=1k043sSZa-sa6qeHuDG21OFOrze1SF364)|
20 | |FreeAnchor|R101v1-FPN|1x|40.4|[model](https://drive.google.com/open?id=1Rki-hZFsuMHleYJpoXFJQMplCFxkDfW-)|
21 | 
22 | ### Reference
23 | ```
24 | @inproceedings{zhang2019freeanchor,
25 |   title={{FreeAnchor}: Learning to Match Anchors for Visual Object Detection},
26 |   author={Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang},
27 |   booktitle={Neural Information Processing Systems},
28 |   year={2019}
29 | }
30 | ```
31 | 


--------------------------------------------------------------------------------
/models/cascade_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Cascade R-CNN
 2 | 
 3 | This repository implements [**Cascade R-CNN**](https://arxiv.org/abs/1712.00726) in the **SimpleDet** framework. Cascade R-CNN is a multi-stage object detector, aiming to reduce the overfitting problem by resampling of progressively improved hypotheses.
 4 | 
 5 | ### How we build Cascade R-CNN
 6 | 
 7 | #### Input
 8 | 
 9 | Cascade R-CNN can share the origin Faster R-CNN input, so there is no need to implement an extra one.
10 | 
11 | #### Symbol
12 | 
13 | - ```CascadeRcnn```: detector with three ```R-CNN``` stages
14 | - ```CascadeBbox2fcHead```: header for ```R-CNN``` stages. Note that it is also required to generate proposal for next ```R-CNN``` stages, thus we add ```get_all_proposal``` to decode boxes predicted in this stage and ```get_sampled_proposal``` to generate ```bbox_target```.
15 | 
16 | #### Config
17 | 
18 | - ```BboxParam```, ```BboxParam2nd```, ```BboxParam3rd```: config for ```R-CNN``` stages, ```mean``` and ```std``` in ```regress_target``` aim to decode boxes predicted in this stage, and those in ```bbox_target``` is prepared to generate ```bbox_target``` for next ```R-CNN``` stage. Note that we add ```stage``` field to specify the weight used by ```R-CNN```, as in **test phase** ```bbox_head_1st``` and ```bbox_head_2nd``` forward twice with different input feature.
19 | 
20 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/bbox_norm.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file bbox_norm.cu
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./bbox_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(BBoxNormParam param, int dtype) {
33 |   Operator *op = NULL;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BBoxNormOp<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet
42 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sync_batch_norm.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | /*!
20 |  * Copyright (c) 2018 by Contributors
21 |  * \file sync_batch_norm.cc
22 |  * \brief Synchronized BatchNorm modified from BatchNormV1
23 |  * \author Hang Zhang
24 | */
25 | 
26 | #include "sync_batch_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | template<>
31 | Operator *CreateOp<gpu>(SyncBatchNormParam param, int dtype) {
32 |   return new SyncBatchNorm<gpu>(param);
33 | }
34 | 
35 | }  // namespace op
36 | }  // namespace mxnet
37 | 
38 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/focal_loss.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file focal_loss.cu
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./focal_loss-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(FocalLossParam param, int dtype) {
33 |   Operator *op = NULL;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new FocalLossOp<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet
42 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/broadcast_scale.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file broadcast_scale.cu
22 |  * \brief
23 |  * \author Yuntao Chen
24 | */
25 | 
26 | #include "./broadcast_scale-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(BroadcastScaleParam param, int dtype) {
33 |   Operator *op = NULL;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BroadcastScaleOp<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet
42 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sync_inplace_activation_batch_norm.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | /*!
20 |  * Copyright (c) 2018 by Contributors
21 |  * \file sync_inplace_activation_batch_norm.cc
22 |  * \brief Synchronized BatchNorm modified from BatchNormV1
23 |  * \author Yuntao Chen
24 | */
25 | 
26 | #include "sync_inplace_activation_batch_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | template<>
31 | Operator *CreateOp<gpu>(SyncInplaceABNParam param, int dtype) {
32 |   return new SyncInplaceABN<gpu>(param);
33 | }
34 | 
35 | }  // namespace op
36 | }  // namespace mxnet
37 | 
38 | 


--------------------------------------------------------------------------------
/models/FPG/README.md:
--------------------------------------------------------------------------------
 1 | ## Feature Pyramid Grids & PAFPN
 2 | 
 3 | This repository implements [**FPG**](https://arxiv.org/pdf/2004.03580.pdf) and [**PAFPN**](https://arxiv.org/abs/1803.01534) in the SimpleDet framework.
 4 | 
 5 | ### Quick Start
 6 | ```bash
 7 | # train
 8 | python detection_train.py --config config/FPG/faster_r50v1b_fpg6@128_syncbn_1x.py
 9 | python detection_train.py --config config/pafpn/faster_r50v1b_pafpn3@256_syncbn_1x.py
10 | python detection_train.py --config config/pafpn/faster_r50v1b_pafpn3@384_syncbn_1x.py
11 | # test
12 | python detection_test.py --config config/FPG/faster_r50v1b_fpg6@128_syncbn_1x.py
13 | python detection_test.py --config config/pafpn/faster_r50v1b_pafpn3@256_syncbn_1x.py
14 | python detection_test.py --config config/pafpn/faster_r50v1b_pafpn3@384_syncbn_1x.py
15 | ```
16 | 
17 | ### Results
18 | 
19 | | Detector | Pyramid | AP | AP50 | AP75 | APs | APm | APl |
20 | |----------|---------|----|------|------|-----|-----|-----|
21 | | Faster R50v1b | FPG 6@128 | 38.7 | 59.5 | 42.3 | 23.7 | 42.3 | 48.3|
22 | | Faster R50v1b | PAFPN 3@256 | 38.6 | 58.8 | 41.8 | 22.3 | 42.6 | 50.8 |
23 | | Faster R50v1b | PAFPN 3@384 | 39.4 | 59.9 | 42.8 | 23.9 | 43.2 | 50.9 |
24 | 
25 | Note that SyncBN is only used in FPG neck but used in all BN layers under PAFPN settings according to the original papers. Besides, TDBU Neck in NASFPN folder is a special case of PAFPN with 3 stages and 384 channels, thus this setting is also appended in this config.


--------------------------------------------------------------------------------
/operator_cxx/contrib/quantization_int8.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * Copyright (c) 2019 by Contributors
22 |  * \file Quantization_int8.cu
23 |  * \brief
24 | * \author Xiaotao Chen, Jingqiu Zhou, Ruize Hou
25 | */
26 | #include "./quantization_int8-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<gpu>(Quantization_int8Para param, int dtype) {
33 |   Operator* op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new Quantization_int8Op<gpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | }  // namespace op
41 | }  // namespace mxnet


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IntelliJ project files
 2 | .idea
 3 | *.iml
 4 | out
 5 | gen
 6 | 
 7 | ### Vim template
 8 | [._]*.s[a-w][a-z]
 9 | [._]s[a-w][a-z]
10 | *.un~
11 | Session.vim
12 | .netrwhist
13 | *~
14 | 
15 | ### IPythonNotebook template
16 | # Temporary data
17 | .ipynb_checkpoints/
18 | 
19 | ### Python template
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 | 
25 | # C extensions
26 | *.so
27 | 
28 | # Distribution / packaging
29 | .Python
30 | env/
31 | build/
32 | develop-eggs/
33 | dist/
34 | downloads/
35 | eggs/
36 | .eggs/
37 | lib/
38 | lib64/
39 | parts/
40 | sdist/
41 | var/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | 
46 | # PyInstaller
47 | #  Usually these files are written by a python script from a template
48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 | 
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 | 
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *,cover
65 | 
66 | # Translations
67 | *.mo
68 | *.pot
69 | 
70 | # Django stuff:
71 | *.log
72 | 
73 | # Sphinx documentation
74 | docs/_build/
75 | 
76 | # PyBuilder
77 | target/
78 | 
79 | *.ipynb
80 | *.params
81 | *.json
82 | .vscode/
83 | 
84 | model
85 | model/
86 | visimg
87 | mxnet/
88 | mxnext/
89 | data
90 | experiments
91 | pretrain_model
92 | !data/cache/coco_micro_test.roidb
93 | 


--------------------------------------------------------------------------------
/models/sepc/README.md:
--------------------------------------------------------------------------------
 1 | ## Scale-Equalizing Pyramid Convolution for Object Detection
 2 | This repository implements [Scale-Equalizing Pyramid Convolution for Object Detection](https://arxiv.org/abs/2005.03101) in the SimpleDet framework.
 3 | 
 4 | ### Qucik Start
 5 |  
 6 | ```python
 7 | # train
 8 | python detection_train.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py
 9 | 
10 | # test
11 | python detection_test.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py
12 | ```
13 | 
14 | ### Performance and Model
15 | All AP results are reported on COCO val2017:
16 | 
17 | Model | Backbone | Train Schedule | GPU | Image/GPU| Train MEM| Train Speed|	FP16| Box AP | link |
18 | ---------- | --------- | --------- | ---------- | ---------| ----------| ----------| ---------| -----------| -----------
19 | retinanet (baseline) | res50v1b | 1x | 8X 2080Ti |4|8653M | 44 img/s| yes|35.9 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGJfW59R3IEelhxv?e=Ob9y4W)|
20 | retinanet_pconv | res50v1b | 1x | 8X 2080Ti |4| 9111M|43 img/s | yes|37.2 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGPiw3cfqOWZkUAB?e=PIHppA)|
21 | retinanet_pconv+ibn | res50v1b | 1x | 8X 2080Ti|4 |9467M | 40 img/s| yes|37.6 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGayQr_1Ew-dhRfA?e=W2AXi6)|
22 | retinanet_sepclite | res50v1b | 1x | 8X 2080Ti |4| 9467M|36 img/s |yes|38.6 |[model](https://1drv.ms/u/s!AhNcLYzCx6CCjGTAbLT7_YXjq3GF?e=ZHfIqn) |
23 | retinanet_sepc | res50v1b | 1x | 8X 2080Ti |4| 9471M|25 img/s | yes|**39.7** | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGWO02qo_adoy8km?e=30H1sl)|
24 | 


--------------------------------------------------------------------------------
/models/sepc/readme.md:
--------------------------------------------------------------------------------
 1 | ## Scale-Equalizing Pyramid Convolution for Object Detection
 2 | This repository implements [Scale-Equalizing Pyramid Convolution for Object Detection](https://arxiv.org/abs/2005.03101) in the SimpleDet framework.
 3 | 
 4 | ### Qucik Start
 5 |  
 6 | ```python
 7 | # train
 8 | python detection_train.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py
 9 | 
10 | # test
11 | python detection_test.py --config config/sepc/retina_r50v1b_fpn_sepc_1x.py
12 | ```
13 | 
14 | ### Performance and Model
15 | All AP results are reported on COCO val2017:
16 | 
17 | Model | Backbone | Train Schedule | GPU | Image/GPU| Train MEM| Train Speed|	FP16| Box AP | link |
18 | ---------- | --------- | --------- | ---------- | ---------| ----------| ----------| ---------| -----------| -----------
19 | retinanet (baseline) | res50v1b | 1x | 8X 2080Ti |4|8653M | 44 img/s| yes|35.9 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGJfW59R3IEelhxv?e=Ob9y4W)|
20 | retinanet_pconv | res50v1b | 1x | 8X 2080Ti |4| 9111M|43 img/s | yes|37.2 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGPiw3cfqOWZkUAB?e=PIHppA)|
21 | retinanet_pconv+ibn | res50v1b | 1x | 8X 2080Ti|4 |9467M | 40 img/s| yes|37.6 | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGayQr_1Ew-dhRfA?e=W2AXi6)|
22 | retinanet_sepclite | res50v1b | 1x | 8X 2080Ti |4| 9467M|36 img/s |yes|38.6 |[model](https://1drv.ms/u/s!AhNcLYzCx6CCjGTAbLT7_YXjq3GF?e=ZHfIqn) |
23 | retinanet_sepc | res50v1b | 1x | 8X 2080Ti |4| 9471M|25 img/s | yes|**39.7** | [model](https://1drv.ms/u/s!AhNcLYzCx6CCjGWO02qo_adoy8km?e=30H1sl)|
24 | 


--------------------------------------------------------------------------------
/utils/roidb_to_coco.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import tempfile
 3 | 
 4 | from pycocotools.coco import COCO
 5 | from operator_py.detectron_bbox_utils import xyxy_to_xywh
 6 | 
 7 | 
 8 | def roidb_to_coco(roidb):
 9 |     # The whole coco dataset
10 |     dataset = {
11 |         'categories': [],
12 |         'images': [],
13 |         'annotations': []
14 |     }
15 | 
16 |     category_ids = set()
17 |     obj_id = 0
18 |     for roirec in roidb:
19 |         dataset['images'].append({
20 |             'id': roirec['im_id'], 
21 |             'width': roirec['w'], 
22 |             'height': roirec['h']
23 |         })
24 |         roirec['gt_bbox'] = xyxy_to_xywh(roirec['gt_bbox'])
25 |         for bbox, cls in zip(roirec['gt_bbox'], roirec['gt_class']):
26 |             x, y, h, w = bbox.tolist()
27 |             dataset["annotations"].append({
28 |                 'area': h * w,
29 |                 'bbox': [x, y, h, w],
30 |                 'category_id': float(cls),
31 |                 'id': obj_id,
32 |                 'image_id': roirec['im_id'],
33 |                 'iscrowd': 0
34 |             })
35 |             obj_id += 1
36 |             category_ids.add(float(cls))
37 |     for class_id in category_ids:
38 |         dataset['categories'].append({
39 |             'id': class_id,
40 |             'name': class_id,
41 |             'supercategory': 'none'
42 |         })
43 |     
44 |     with tempfile.NamedTemporaryFile(mode="w") as f:
45 |         json.dump(dataset, f)
46 |         f.flush()
47 |         coco = COCO(f.name)
48 | 
49 |     return coco


--------------------------------------------------------------------------------
/models/retinanet/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | 
 4 | from core.detection_metric import EvalMetricWithSummary
 5 | 
 6 | 
 7 | class FGAccMetric(EvalMetricWithSummary):
 8 |     def __init__(self, name, output_names, label_names, threshold=0, **kwargs):
 9 |         super().__init__(name, output_names, label_names, **kwargs)
10 |         self.thr = threshold
11 | 
12 |     def update(self, labels, preds):
13 |         if len(preds) == 1 and len(labels) == 1:
14 |             pred = preds[0]
15 |             label = labels[0]
16 |         elif len(preds) == 2:
17 |             pred = preds[0]
18 |             label = preds[1]
19 |         else:
20 |             raise Exception(
21 |                 "unknown loss output: len(preds): {}, len(labels): {}".format(
22 |                     len(preds), len(labels)
23 |                 )
24 |             )
25 | 
26 |         label = label.asnumpy().astype('int32')
27 |         keep_inds = np.where(label >= 1)
28 | 
29 |         # treat as foreground if score larger than threshold
30 |         # select class with maximum score as prediction
31 |         pred_score = pred.max(axis=-1)
32 |         pred_label = pred.argmax(axis=-1) + 1
33 |         if self.thr != 0:
34 |             pred_label *= pred_score > self.thr
35 | 
36 |         pred_label = pred_label.asnumpy().astype('int32')
37 | 
38 |         pred_label = pred_label[keep_inds]
39 |         label = label[keep_inds]
40 | 
41 |         self.sum_metric += np.sum(pred_label.flat == label.flat)
42 |         self.num_inst += len(pred_label.flat)
43 | 


--------------------------------------------------------------------------------
/doc/TENSORBOARD.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | 
 3 | ### Setup tensorboard
 4 | ```bash
 5 | pip3 install mxboard tensorboard --user
 6 | ```
 7 | 
 8 | ### Modify your config
 9 | You need to import the `SummaryWriter` and pass it to your metric in the config.
10 | 
11 | ```python
12 | from mxboard import SummaryWriter
13 | 
14 | # modify the logdir as you like
15 | sw = SummaryWriter(logdir="./tflogs", flush_secs=5)  
16 | 
17 | rpn_acc_metric = metric.AccWithIgnore(
18 |     name="RpnAcc",
19 |     output_names=["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"],
20 |     label_names=[],
21 |     summary=sw
22 | )
23 | rpn_l1_metric = metric.L1(
24 |     name="RpnL1",
25 |     output_names=["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"],
26 |     label_names=[],
27 |     summary=sw
28 | )
29 | box_acc_metric = metric.AccWithIgnore(
30 |     name="RcnnAcc",
31 |     output_names=["bbox_cls_loss_output", "bbox_label_blockgrad_output"],
32 |     label_names=[],
33 |     summary=sw
34 | )
35 | box_l1_metric = metric.L1(
36 |     name="RcnnL1",
37 |     output_names=["bbox_reg_loss_output", "bbox_label_blockgrad_output"],
38 |     label_names=[],
39 |     summary=sw
40 | )
41 | mask_cls_metric = SigmoidCELossMetric(
42 |     name="MaskCE",
43 |     output_names=["mask_loss_output"],
44 |     label_names=[],
45 |     summary=sw
46 | )
47 | ```
48 | 
49 | 
50 | ### Launch tensorborad on the shell
51 | ```bash
52 | # you can specify the logdir in your 
53 | tensorboard --logdir tflogs > /dev/null 2>&1  &
54 | 
55 | python detection_train --config path/to/your/config.py
56 | ```
57 | 
58 | Now open the web browser you can see the training curve like
59 | ![training_curve](image/tensorboard_screenshot.png)
60 | 


--------------------------------------------------------------------------------
/models/crowdhuman/README.md:
--------------------------------------------------------------------------------
 1 | ## Crowdhuman Dataset
 2 | 
 3 | This repository implements Faster-RCNN and [**Double Pred**](https://arxiv.org/abs/2003.09163) on [**CrowdHuman**](https://arxiv.org/abs/1805.00123) dataset in the SimpleDet framework.
 4 | 
 5 | ### Quick Start
 6 | 
 7 | #### 1. Prepare Crowdhuman Format Dataset
 8 | ```bash
 9 | # Ensure that the directory of crowdhuman dataset looks like:
10 | # data/crowdhuman
11 | #       ---------/images/xxxx.jpg
12 | #       ---------/annotations/xxxx.ogdt
13 | python utils/create_crowdhuman_roidb.py --dataset crowdhuman --dataset-split train --num-threads 45
14 | ```
15 | 
16 | #### 2. Train Model
17 | ```bash
18 | # train
19 | python detection_train.py --config config/crowdhuman/faster_r50v1b_fpn_1x.py
20 | python detection_train.py --config config/doublepred_r50v1b_fpn_1x.py
21 | python detection_train.py --config config/doublepred_r50v1b_fpn_1x_refine.py
22 | 
23 | # test
24 | python detection_test.py --config config/crowdhuman/faster_r50v1b_fpn_1x.py
25 | python detection_test.py --config config/doublepred_r50v1b_fpn_1x.py
26 | python detection_test.py --config config/doublepred_r50v1b_fpn_1x_refine.py
27 | ```
28 | 
29 | ### Results on CrowdHuman
30 | 
31 | | Detector | AP | MR |
32 | |----------|---------|----|
33 | | Faster R50v1b | 84.77 | 46.72 |
34 | | DoublePred R50v1b | 88.64 | 45.52 |
35 | | DoublePred R50v1b + Refine | 88.81 | 45.02 |
36 | 
37 | Note that crowdhuman is different from COCO-like dataset, since it contains **ignore region**. We followed the procedure shared by Zheng Ge([Talk Link](https://www.bilibili.com/video/av455989666/)) by ignoring anchors in RPN and adding BN in FPN. A simple Toolkit to evaluate AP and MR with ignore region can refer to [here](https://github.com/Purkialo/CrowdDet/tree/master/lib/evaluate).


--------------------------------------------------------------------------------
/models/crowdhuman/softmax_entropy_op.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | 
 4 | class SoftmaxEntropyOperator(mx.operator.CustomOp):
 5 |     def __init__(self):
 6 |         super().__init__()
 7 |     
 8 |     def forward(self, is_train, req, in_data, out_data, aux):
 9 |         data = in_data[0]
10 |         label = in_data[1]
11 |         
12 |         num_reg_class = data.shape[-1]
13 |         label = mx.nd.one_hot(label, depth=num_reg_class)
14 |         
15 |         data = mx.nd.softmax(data, axis=-1)
16 |         loss = - label * mx.nd.log(data + 1e-10)
17 |         self.assign(out_data[0], req[0], loss)
18 |     
19 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
20 |         data = in_data[0]
21 |         label = in_data[1]
22 |         
23 |         batch_roi, num_reg_class = data.shape
24 |         onehot_label = mx.nd.one_hot(label, depth=num_reg_class)
25 | 
26 |         d_grad = mx.nd.softmax(data, axis=-1) - onehot_label
27 |         # since we directly backward grad from here, we need to normalize gradient right!
28 |         d_grad *= out_grad[0]
29 |         
30 |         self.assign(in_grad[0], req[0], d_grad)
31 |         self.assign(in_grad[1], req[1], mx.nd.zeros_like(label))
32 |         
33 | 
34 | @mx.operator.register('softmax_entropy')
35 | class SoftmaxEntropyProp(mx.operator.CustomOpProp):
36 |     def __init__(self):
37 |         super().__init__(need_top_grad=True)
38 | 
39 |     def list_arguments(self):
40 |         return ['data', 'label']
41 |     
42 |     def list_outputs(self):
43 |         return ['output']
44 |     
45 |     def infer_shape(self, in_shape):
46 |         return [in_shape[0], in_shape[1]], [in_shape[0]]
47 |     
48 |     def create_operator(self, ctx, shapes, dtypes):
49 |         return SoftmaxEntropyOperator()


--------------------------------------------------------------------------------
/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import mxnet as mx
 3 | import glob
 4 | 
 5 | 
 6 | def get_latest_ckpt_epoch(prefix):
 7 |     """
 8 |     Get latest checkpoint epoch by prefix
 9 |     """
10 |     def get_checkpoint_epoch(prefix):
11 |         return int(prefix[prefix.rfind('.params')-4:prefix.rfind('.params')])
12 | 
13 |     checkpoints = glob.glob(prefix + '*.params')
14 |     assert len(checkpoints), 'can not find params startswith {}'.prefix
15 |     return max([get_checkpoint_epoch(x) for x in checkpoints])
16 | 
17 | 
18 | def load_checkpoint(prefix, epoch):
19 |     """
20 |     Load model checkpoint from file.
21 |     :param prefix: Prefix of model name.
22 |     :param epoch: Epoch number of model we would like to load.
23 |     :return: (arg_params, aux_params)
24 |     arg_params : dict of str to NDArray
25 |         Model parameter, dict of name to NDArray of net's weights.
26 |     aux_params : dict of str to NDArray
27 |         Model parameter, dict of name to NDArray of net's auxiliary states.
28 |     """
29 |     print('load %s-%04d.params' % (prefix, epoch))
30 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
31 |     arg_params = {}
32 |     aux_params = {}
33 |     for k, v in save_dict.items():
34 |         tp, name = k.split(':', 1)
35 |         if tp == 'arg':
36 |             arg_params[name] = v
37 |         if tp == 'aux':
38 |             aux_params[name] = v
39 |     return arg_params, aux_params
40 | 
41 | 
42 | def convert_context(params, ctx):
43 |     """
44 |     :param params: dict of str to NDArray
45 |     :param ctx: the context to convert to
46 |     :return: dict of str of NDArray with context ctx
47 |     """
48 |     new_params = dict()
49 |     for k, v in params.items():
50 |         new_params[k] = v.as_in_context(ctx)
51 |     return new_params
52 | 
53 | 


--------------------------------------------------------------------------------
/models/RepPoints/README.md:
--------------------------------------------------------------------------------
 1 | ## RepPoints
 2 | 
 3 | This repository implements [**RepPoints**](https://arxiv.org/abs/1904.11490) in the SimpleDet framework.
 4 | RPDet is a state-of-the-art anchor-free detector, utilizing a point set as the representation of objects in localization and recognition.
 5 | 
 6 | ### Qucik Start
 7 | ```bash
 8 | # train
 9 | python3 detection_train.py --config config/reppoints_moment_r50v1_fpn_1x.py
10 | 
11 | # test
12 | python3 detection_test.py --config config/reppoints_moment_r50v1_fpn_1x.py
13 | ```
14 | 
15 | ### Models
16 | All AP results are reported on minival2014 of the [COCO dataset](http://cocodataset.org).
17 | 
18 | |Method|Backbone|Transform|Schedule|AP (paper)|AP (re-impl)|Link|
19 | |------|--------|---------|--------|----------|------------|----|
20 | |RepPoints|R50v1-FPN|MinMax|1x|38.2|38.0|[model](https://drive.google.com/open?id=1BNF7cLJDLgOUpSgQ3bcXm2iSHop5G3Rp)|
21 | |RepPoints|R50v1-FPN|Moment|1x|38.3|38.3|[model](https://drive.google.com/open?id=1q0mFJl0qG22Y6AlRQ95HSIFT0GKuQRLS)|
22 | |RepPoints|R101v1-FPN|Moment|2x|40.3|40.7|[model](https://drive.google.com/open?id=1dslqEcvlPh-8NoRhU--7ypan7XnAP_S5)|
23 | |RepPoints|R101v1b-FPN-DCNv1|Moment|2x, multi-scale training & testing|-|46.4|[model](https://drive.google.com/open?id=1SreAuNE7ILXcBx8_-NHyftZTgS94kzO6)|
24 | |RepPoints|R101v1b-FPN-DCNv2|Moment|2x, multi-scale training & testing|-|47.0|[model](https://drive.google.com/open?id=14GFKGeXU9FVBFDQUS-4jlH2raSLzt8Zd)|
25 | 
26 | ### Reference
27 | ```
28 | @inproceedings{yang2019reppoints,
29 |   title={RepPoints: Point Set Representation for Object Detection},
30 |   author={Yang, Ze and Liu, Shaohui and Hu, Han and Wang, Liwei and Lin, Stephen},
31 |   booktitle={The IEEE International Conference on Computer Vision (ICCV)},
32 |   month={Oct},
33 |   year={2019}
34 | }
35 | ```
36 | 


--------------------------------------------------------------------------------
/models/FCOS/metric.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import numpy as np
 3 | 
 4 | class LossWithIgnore(mx.metric.EvalMetric):
 5 |     def __init__(self, name, output_names, label_names, ignore_label=-1):
 6 |         super().__init__(name, output_names, label_names)
 7 |         self.ignore_label = ignore_label
 8 | 
 9 |     def update(self, labels, preds):
10 |         raise NotImplementedError
11 | 
12 | class ClsAccWithIgnore(LossWithIgnore):
13 |     def __init__(self, stride, name, output_names, label_names, ignore_label=-1):
14 |         super().__init__(name, output_names, label_names, ignore_label)
15 |         self.stride = stride
16 | 
17 |     def reset(self):
18 |         self.sum_metric = 0
19 |         self.num_inst = 0
20 | 
21 |     def update(self, labels, preds):
22 |         pred = preds[1]
23 |         label = labels[len(stride):len(stride)*2]
24 | 
25 |         for stride in self.stride:
26 |             self.sum_metric += mx.nd.sum(mx.nd.logical_and(pred[stride]>0.5, label[stride]))
27 |             self.num_inst += mx.nd.sum(label[stride])
28 | 
29 | class LossMeter(mx.metric.EvalMetric):
30 |     def __init__(self, stride, pred_id_start, pred_id_end, name='LossMeter'):
31 |         self.stride = stride
32 |         self.pred_id_start = pred_id_start
33 |         self.pred_id_end = pred_id_end
34 |         super(LossMeter, self).__init__(name=name)
35 | 
36 |     def reset(self):
37 |         self.sum_metric = 0
38 |         self.num_inst = 0
39 | 
40 |     def update(self, labels, preds):
41 |         for i, pred in enumerate(preds[self.pred_id_start:self.pred_id_end]):
42 |             if len(pred.shape) > 1:
43 |                 valid_pred = pred.mean().asnumpy()
44 |             else:
45 |                 valid_pred = pred.asnumpy()
46 | 
47 |             self.sum_metric += valid_pred
48 |             self.num_inst += +1
49 | 


--------------------------------------------------------------------------------
/models/retinanet/README.md:
--------------------------------------------------------------------------------
 1 | ## RetinaNet
 2 | 
 3 | This repository implements [**RetinaNet**](https://arxiv.org/abs/1708.02002) in the **SimpleDet** framework. RetinaNet is state-of-the-art single stage detector, preventing the vast number of easy negatives from overwhelming the detector with focal loss.
 4 | 
 5 | ### How we build RetinaNet
 6 | 
 7 | #### Input
 8 | 
 9 | The pyramid label parts of **RetinaNet** is similar with **Feature Pyramid Network**, you can refer to [FPN README](../FPN/README.md) . In addition, the label assignment method is different compared with **Faster R-CNN**, thus we overrides ```_assign_label_to_anchor``` and ```apply``` of ```AnchorTarget2D```, named ```PyramidAnchorTarget2DBase```, to obtain class-aware labels and avoid sampling RoIs.
10 | 
11 | #### Operators
12 | 
13 | - **bbox_norm**, passes data in forward, and normalizes gradient by number of positive samples in backward
14 | - **focal_loss**, acts same as Sigmoid in forward, and return focal loss gradient in backward
15 | - **decode_retina**, reuses the code from [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test_retinanet.py) to decode boxes and scores. Note that ```min_det_score``` is moved to ```RpnParam.proposal``` as it requires different threshold for results from **P7** level.
16 | 
17 | #### Symbol
18 | 
19 | - ``` RetinaNet```, detector only with RPN
20 | - ```RetinaNetHead```, classification and regression header with sharing weights
21 | - ```RetinaNetNeck```, top-down pathway for **FPN** in **RetinaNet**
22 | 
23 | #### Config
24 | 
25 | - ```min_det_score``` in ```TestParam``` is set to 0 to remove those appended boxes with zero scores
26 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation.
27 | 


--------------------------------------------------------------------------------
/utils/patch_config.py:
--------------------------------------------------------------------------------
 1 | import types
 2 | import inspect
 3 | 
 4 | 
 5 | class NoThrowBase:
 6 |     def __getattr__(self, item):
 7 |         return None
 8 | 
 9 | 
10 | class NoThrowMeta(type):
11 |     def __getattr__(self, item):
12 |         return None
13 | 
14 | 
15 | def patch_config_as_nothrow(instance):
16 |     if "NoThrow" in [instance.__name__, instance.__class__.__name__]:
17 |         return instance
18 | 
19 |     if type(instance) == type:
20 |         instance = types.new_class(instance.__name__ + "NoThrow", (instance, ), dict(metaclass=NoThrowMeta))
21 |         for (k, v) in inspect.getmembers(instance):
22 |             if not k.startswith("__") and type(v) == type:
23 |                 type.__setattr__(instance, k, patch_config_as_nothrow(v))
24 |     else:
25 |         for (k, v) in inspect.getmembers(instance.__class__):
26 |             if not k.startswith("__") and type(v) == type:
27 |                 type.__setattr__(instance.__class__, k, patch_config_as_nothrow(v))
28 |         instance.__class__ = type(instance.__class__.__name__ + "NoThrow", (instance.__class__, NoThrowBase), {})
29 | 
30 |     return instance
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     class A:
35 |         a = 1
36 | 
37 |     A = patch_config_as_nothrow(A)
38 |     assert A.non_exist is None
39 |     assert A.a == 1
40 | 
41 |     class B:
42 |         b = 1
43 |         class B1:
44 |             b1 = 2
45 | 
46 |     B = patch_config_as_nothrow(B)
47 |     assert B.non_exist is None
48 |     assert B.B1.non_exist is None
49 |     assert B.b == 1
50 |     assert B.B1.b1 == 2
51 | 
52 |     class B:
53 |         b = 1
54 |         class B1:
55 |             b1 = 2
56 |             def b1f():
57 |                 return 3
58 | 
59 |     b = B()
60 |     b = patch_config_as_nothrow(b)
61 |     assert b.non_exist is None
62 |     assert b.B1.non_exist is None
63 |     assert b.b == 1
64 |     assert b.B1.b1 == 2
65 |     assert b.B1.b1f() == 3
66 | 


--------------------------------------------------------------------------------
/scripts/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 | echo "usage: $0 config_path comma_separated_worker_hostnames"
 5 | exit -1
 6 | fi
 7 | 
 8 | conffile=$1
 9 | hosts=$2
10 | 
11 | # extract worker and check reachablity
12 | IFS=, read -r -a host_array <<< $hosts
13 | for host in ${host_array[@]}; do
14 |     # check reachability
15 |     echo "check reachability of $host"
16 |     ssh -q $host exit
17 |     if [ $? -ne 0 ]; then
18 |         echo "$host is not reachable"
19 | 	exit -1
20 |     fi
21 | 
22 |     # check availablity (retreat if remote host is in use)
23 |     echo "check availability of $host"
24 |     for x in $(ssh $host nvidia-smi --query-gpu=utilization.gpu --format=csv,nounits,noheader); do 
25 | 	x="${x//[$'\t\r\n ']}"  # remove trailing whitespace
26 | 	if [ $x -gt 10 ]; then 
27 | 	    echo "$host has gpu utilization of $x%"; 
28 | 	    exit -1
29 |         fi;  
30 |     done
31 |     
32 |     # cleanup potentially dead python process (march since we checked it)
33 |     ssh -q $host pkill python
34 | done
35 | 
36 | gpucount=8
37 | num_node=${#host_array[@]}
38 | num_servers=${num_node}
39 | root_dir="/mnt/tscpfs/yuntao.chen/simpledet/simpledet_open"
40 | sync_dir="/tmp/simpledet_sync"
41 | singularity_image=/mnt/tscpfs/yuntao.chen/simpledet.img
42 | 
43 | # check existence of config file
44 | if [ ! -f ${conffile} ]; then
45 | echo "${conffile} does not exsit"
46 | exit -1
47 | fi
48 | 
49 | # dump hosts in a hostfile for launch.py
50 | IFS=,
51 | output=""
52 | for id in $hosts 
53 | do output+="${id}\n"
54 | done
55 | unset IFS
56 | echo -e ${output::-2} > scripts/hosts.txt
57 | sleep 1
58 | 
59 | logfile=${conffile#config/}
60 | logfile=${logfile%.py}
61 | 
62 | export DMLC_INTERFACE=eth0
63 | python -u /mnt/tscpfs/yuntao.chen/dist-mxnet/tools/launch.py \
64 |     -n ${num_node} \
65 |     -s ${num_servers} \
66 |     --launcher ssh \
67 |     -H scripts/hosts.txt \
68 |     scripts/dist_worker.sh ${root_dir} ${singularity_image} ${conffile} \
69 |     2>&1 | tee -a ${root_dir}/log/${logfile}.log
70 | 


--------------------------------------------------------------------------------
/models/sepc/sepc_dconv.py:
--------------------------------------------------------------------------------
 1 | import mxnext as X 
 2 | import mxnet as mx
 3 | from mxnext import conv, relu
 4 | 
 5 | 
 6 | def DeformConv(x, offset, name, out_channels, kernel_size, stride=1, padding=0, dilation=1, 
 7 |                groups=1, deformable_groups=1, no_bias=False, weight=None, bias=None):
 8 |     assert weight is not None
 9 |     if not no_bias:
10 |         assert bias is not None
11 |     assert out_channels % groups == 0, 'out_channels {} cannot be divisible by groups {}'.format(out_channels, groups)
12 |     out = mx.sym.contrib.DeformableConvolution(
13 |             x, offset, weight=weight, bias=bias if not no_bias else None, kernel=(kernel_size,kernel_size), 
14 |             stride=(stride,stride), dilate=(dilation,dilation), pad=(padding,padding), num_filter=out_channels, 
15 |             num_group=groups, num_deformable_group=deformable_groups, no_bias=no_bias, name=name)
16 |     return out
17 | 
18 | 
19 | def sepc_conv(x, name, out_channels, kernel_size, i, stride=1, padding=0, dilation=1, 
20 |                 groups=1, deformable_groups=1, part_deform=False, start_level=1,
21 |                 weight=None, bias=None, weight_offset=None, bias_offset=None):
22 |     assert weight is not None and bias is not None
23 |     if part_deform:
24 |         assert weight_offset is not None and bias_offset is not None
25 |     if i < start_level or not part_deform:
26 |         return conv(x, name, filter=out_channels, kernel=kernel_size, stride=stride, pad=kernel_size//2,
27 |                     dilate=dilation, num_group=groups, no_bias=False, weight=weight, bias=bias)
28 |     offset = conv(x, name+'offset', filter=deformable_groups*2*kernel_size*kernel_size, kernel=kernel_size, stride=stride, 
29 |                   pad=kernel_size//2, dilate=dilation, num_group=groups, no_bias=False, weight=weight_offset, bias=bias_offset)
30 |     return DeformConv(x, offset, name, out_channels, kernel_size, stride, padding=padding, dilation=dilation,
31 |                       groups=groups, deformable_groups=deformable_groups, no_bias=False, weight=weight, bias=bias)


--------------------------------------------------------------------------------
/scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | # install dependency
 5 | sudo apt update && sudo apt install -y git wget make python3-dev libglib2.0-0 libsm6 libxext6 libxrender-dev unzip
 6 | 
 7 | # create conda env
 8 | conda create -n simpledet python=3.7
 9 | conda activate simpledet
10 | 
11 | # fetch CUDA environment
12 | conda install cudatoolkit=10.1
13 | 
14 | # install python dependency
15 | pip install 'matplotlib<3.1' opencv-python pytz
16 | 
17 | # download and intall pre-built wheel for CUDA 10.1
18 | pip install https://1dv.alarge.space/mxnet_cu101-1.6.0b20190820-py2.py3-none-manylinux1_x86_64.whl
19 | 
20 | # install pycocotools
21 | pip install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI'
22 | 
23 | # install mxnext, a wrapper around MXNet symbolic API
24 | pip install 'git+https://github.com/RogerChern/mxnext#egg=mxnext'
25 | 
26 | # get simpledet
27 | git clone https://github.com/tusimple/simpledet
28 | cd simpledet
29 | make
30 | 
31 | # make data dir
32 | mkdir -p data/coco/images data/src
33 | 
34 | # skip this if you have the zip files
35 | wget -c http://images.cocodataset.org/zips/train2017.zip -O data/src/train2017.zip
36 | wget -c http://images.cocodataset.org/zips/val2017.zip -O data/src/val2017.zip
37 | wget -c http://images.cocodataset.org/zips/test2017.zip -O data/src/test2017.zip
38 | wget -c http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O data/src/annotations_trainval2017.zip
39 | wget -c http://images.cocodataset.org/annotations/image_info_test2017.zip -O data/src/image_info_test2017.zip
40 | 
41 | unzip data/src/train2017.zip -d data/coco/images
42 | unzip data/src/val2017.zip -d data/coco/images
43 | unzip data/src/test2017.zip -d data/coco/images
44 | unzip data/src/annotations_trainval2017.zip -d data/coco
45 | unzip data/src/image_info_test2017.zip -d data/coco
46 | 
47 | python utils/create_coco_roidb.py --dataset coco --dataset-split train2017
48 | python utils/create_coco_roidb.py --dataset coco --dataset-split val2017
49 | python utils/create_coco_roidb.py --dataset coco --dataset-split test-dev2017
50 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/focal_loss.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file focal_loss.cc
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./focal_loss-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<cpu>(FocalLossParam param, int dtype) {
33 |   Operator *op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new FocalLossOp<cpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | Operator *FocalLossProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
41 |                                          std::vector<int> *in_type) const {
42 |   std::vector<TShape> out_shape, aux_shape;
43 |   std::vector<int> out_type, aux_type;
44 |   CHECK(InferType(in_type, &out_type, &aux_type));
45 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
46 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
47 | }
48 | 
49 | DMLC_REGISTER_PARAMETER(FocalLossParam);
50 | 
51 | MXNET_REGISTER_OP_PROPERTY(_contrib_FocalLoss, FocalLossProp)
52 | .describe("Focal loss for dense object detection")
53 | .add_argument("data", "NDArray-or-Symbol", "Data")
54 | .add_argument("label", "NDArray-or-Symbol", "Label")
55 | .add_arguments(FocalLossParam::__FIELDS__());
56 | 
57 | }  // namespace op
58 | }  // namespace mxnet
59 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/bbox_norm.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file bbox_norm.cc
22 |  * \brief
23 |  * \author Chenxia Han
24 | */
25 | 
26 | #include "./bbox_norm-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<cpu>(BBoxNormParam param, int dtype) {
33 |   Operator *op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BBoxNormOp<cpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | Operator *BBoxNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
41 |                                          std::vector<int> *in_type) const {
42 |   std::vector<TShape> out_shape, aux_shape;
43 |   std::vector<int> out_type, aux_type;
44 |   CHECK(InferType(in_type, &out_type, &aux_type));
45 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
46 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
47 | }
48 | 
49 | DMLC_REGISTER_PARAMETER(BBoxNormParam);
50 | 
51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BBoxNorm, BBoxNormProp)
52 | .describe("Normalize those boxes with positive label")
53 | .add_argument("data", "NDArray-or-Symbol", "Data to normalize")
54 | .add_argument("label", "NDArray-or-Symbol", "Label")
55 | .add_arguments(BBoxNormParam::__FIELDS__());
56 | 
57 | }  // namespace op
58 | }  // namespace mxnet
59 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/broadcast_scale.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * \file broadcast_scale.cc
22 |  * \brief
23 |  * \author Yuntao Chen
24 | */
25 | 
26 | #include "./broadcast_scale-inl.h"
27 | 
28 | namespace mxnet {
29 | namespace op {
30 | 
31 | template<>
32 | Operator *CreateOp<cpu>(BroadcastScaleParam param, int dtype) {
33 |   Operator *op = nullptr;
34 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
35 |     op = new BroadcastScaleOp<cpu, DType>(param);
36 |   });
37 |   return op;
38 | }
39 | 
40 | Operator *BroadcastScaleProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
41 |                                          std::vector<int> *in_type) const {
42 |   std::vector<TShape> out_shape, aux_shape;
43 |   std::vector<int> out_type, aux_type;
44 |   CHECK(InferType(in_type, &out_type, &aux_type));
45 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
46 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
47 | }
48 | 
49 | DMLC_REGISTER_PARAMETER(BroadcastScaleParam);
50 | 
51 | MXNET_REGISTER_OP_PROPERTY(_contrib_BroadcastScale, BroadcastScaleProp)
52 | .describe("Broadcast_scale to enable in-place scaling of tensor")
53 | .add_argument("data", "NDArray-or-Symbol", "Data")
54 | .add_argument("label", "NDArray-or-Symbol", "Label")
55 | .add_arguments(BroadcastScaleParam::__FIELDS__());
56 | 
57 | }  // namespace op
58 | }  // namespace mxnet
59 | 


--------------------------------------------------------------------------------
/models/NASFPN/README.md:
--------------------------------------------------------------------------------
 1 | ## NAS-FPN
 2 | 
 3 | This repository implements [**NAS-FPN**](https://arxiv.org/abs/1904.07392) in the SimpleDet framework.
 4 | 
 5 | ### Qucik Start
 6 | ```bash
 7 | # train baseline retinanet following the setting of NAS-FPN
 8 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_fpn_640_1@256_25epoch.py
 9 | 
10 | # train NAS-FPN
11 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_640_7@256_25epoch.py
12 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1024_7@256_25epoch.py
13 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_nasfpn_1280_7@384_25epoch.py
14 | 
15 | # train hand-crafted neck
16 | python3 detection_train.py --config config/NASFPN/retina_r50v1b_tdbu_1280_3@384_25epoch.py
17 | ```
18 | 
19 | ### Results and Models
20 | All AP results are reported on test-dev of the [COCO dataset](http://cocodataset.org).
21 | 
22 | |Model|InputSize|Backbone|Neck|Train Schedule|GPU|Image/GPU|FP16|Train MEM|Train Speed|Box AP(Mask AP)|Link|
23 | |-----|-----|--------|----|--------------|---|---------|----|---------|-----------|---------------|----|
24 | |RetinaNet|640|R50v1b-FPN|1@256|25 epoch|8X 1080Ti|8|yes|6.6G|85 img/s|37.4|[model](https://1dv.aflat.top/retina_r50v1b_fpn_640640_25epoch.zip)|
25 | |NAS-FPN|640|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|8|yes|7.8G|66 img/s|40.1|[model](https://1dv.aflat.top/retina_r50v1b_nasfpn_640640_25epoch.zip)|
26 | |NAS-FPN|1024|R50v1b-FPN|7@256|25 epoch|8X 1080Ti|4|yes|9.1G|17 img/s|44.2|[model](https://1dv.aflat.top/retina_r50v1b_nasfpn_1024_7%40256_25epoch.zip)|
27 | |NAS-FPN|1280|R50v1b-FPN|7@384|25 epoch|8X 1080Ti|2|yes|8.9G|10 img/s|45.3|[model](https://1dv.aflat.top/retina_r50v1b_nasfpn_1280_7%40384_25epoch.zip)|
28 | |TD-BU*|1280|R50v1b-FPN|3@384|25 epoch|8X 1080Ti|3|yes|10.5G|12 img/s|44.7|[model](https://1dv.aflat.top/retina_r50v1b_tdbu_1280_3%40384_25epoch.zip)|
29 | 
30 | \* Short for TopDown-BottomUp neck which is highly symmetric proposed by Zehao.
31 | ### Reference
32 | ```
33 | @inproceedings{ghiasi2019fpn,
34 |   title={NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection},
35 |   author={Ghiasi, Golnaz and Lin, Tsung-Yi and Pang, Ruoming and Le, Quoc V},
36 |   booktitle={CVPR},
37 |   year={2019}
38 | }
39 | ```
40 | 


--------------------------------------------------------------------------------
/models/FPN/README.md:
--------------------------------------------------------------------------------
 1 | ## Feature Pyramid Networks for Object Detection
 2 | 
 3 | Here we introduce how is [**Feature Pyramid Network**](https://arxiv.org/abs/1612.03144) built in **simpledet** framework. The following sections explain detail implementation.
 4 | 
 5 | #### AnchorTarget
 6 | 
 7 | Since **FPN** uses **Feature Pyramid** as backbone,  we cannot use ```AnchorTarget2D``` directly, which only generates anchor target for single stride declared in ```RpnParam```. Instead, we implement ```PyramidAnchorTarget2D``` to create a list of ```AnchorTarget2D```, each generating anchor target for single pyramid stride, then collect them together. More specifically, we create instances for each pyramid stride to generate anchor. To collect anchors from different pyramid levels, we overrides ```v_all_anchor``` and ```h_all_anchor``` property, which returns the concatenation of anchors from different levels, then assign to primary instances. Also, we override ```apply``` function to obtain label, sample anchor, target and weight from primary instances, then split and concat them in a certain axis.
 8 | 
 9 | #### Operators
10 | 
11 | - **get_top_proposal**, since **FPN** has mutli-scale proposals, we should concat the multi-scale proposals together and get the topK proposals for roi-pooling or roi-align
12 | - **assign_layer_fpn**, **FPN** assign the proposals to target levels(P2, P3, P4, P5) according to the areas, so we use this Operator to assign feature levels for proposals
13 | 
14 | 
15 | #### Symbol
16 | 
17 | - ``` Detector```, detector is the same as FasterRcnn
18 | - ```FPNNeck```, top-down pathway for **Feature Pyramid Network**
19 | - ```FPNRpnHead```, classification and regression header with sharing weights for FPN-RPN
20 | - ```FPNRoiAlign```, we use this module to get the proposal feature for the proposals of different levels respectively, then add the feature from different level proposals together for next rcnn head
21 | 
22 | #### Config
23 | 
24 | - ```TestParam``` is the same as the setting in [**Detectron**](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md)
25 | - To avoid sharing parameter of the same field from config in pyramid levels, i.e. ```stride```, we move ```stride```, ```long``` and ```short``` to ```__init__```, and pass ```AnchorTarget2DParam``` instance rather than class for anchor generation.
26 | 


--------------------------------------------------------------------------------
/models/KD/utils.py:
--------------------------------------------------------------------------------
 1 | import pprint
 2 | import mxnet as mx
 3 | 
 4 | from core.detection_module import DetModule
 5 | from utils.load_model import load_checkpoint
 6 | 
 7 | 
 8 | def create_teacher_module(pTeacherModel, worker_data_shape, input_batch_size, ctx, rank, logger):
 9 |     t_prefix = pTeacherModel.prefix
10 |     t_epoch = pTeacherModel.epoch
11 |     t_endpoint = pTeacherModel.endpoint
12 |     t_data_name = pTeacherModel.data_name
13 |     t_label_name = pTeacherModel.label_name
14 |     if rank == 0:
15 |         logger.info('Building teacher module with endpoint: {}'.format(t_endpoint))
16 |     t_sym = pTeacherModel.prefix + '-symbol.json'
17 |     t_sym = mx.sym.load(t_sym)
18 |     t_sym = mx.sym.Group([t_sym.get_internals()[out] for out in t_endpoint])
19 |     t_worker_data_shape = {key: worker_data_shape[key] for key in t_data_name}
20 |     _, t_out_shape, _ = t_sym.infer_shape(**t_worker_data_shape)
21 |     t_terminal_out_shape_dict = zip(t_sym.list_outputs(), t_out_shape)
22 |     t_data_shape = []
23 |     for idx, data_name in enumerate(t_data_name):
24 |         data_shape = t_worker_data_shape[data_name]
25 |         data_shape = (input_batch_size,) + data_shape[1:]
26 |         t_data_shape.append((data_name, data_shape))
27 |     t_label_shape = []
28 |     for idx, label_name in enumerate(t_label_name):
29 |         label_shape = t_out_shape[idx]
30 |         label_shape = (input_batch_size,) + label_shape[1:]
31 |         t_label_shape.append((label_name, label_shape))
32 |     if rank == 0:
33 |         logger.info('Teacher data_name: {}'.format(t_data_name))
34 |         logger.info('Teacher data_shape: {}'.format(t_data_shape))
35 |         logger.info('Teacher label_name: {}'.format(t_label_name))
36 |         logger.info('Teacher label_shape: {}'.format(t_label_shape))
37 | 
38 |     if rank == 0:
39 |         logger.info('Teacher terminal output shape')
40 |         logger.info(pprint.pformat([i for i in t_terminal_out_shape_dict]))
41 |     t_arg_params, t_aux_params = load_checkpoint(t_prefix, t_epoch)
42 |     t_mod = DetModule(t_sym, data_names=t_data_name, label_names=None,
43 |                       logger=logger, context=ctx)
44 |     t_mod.bind(data_shapes=t_data_shape, for_training=False, grad_req='null')
45 |     t_mod.set_params(t_arg_params, t_aux_params)
46 |     if rank == 0:
47 |         logger.info('Finish teacher module build')
48 |     return t_mod, t_label_name, t_label_shape


--------------------------------------------------------------------------------
/models/TSD/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import mxnet as mx
 4 | import mxnext as X
 5 | 
 6 | class FasterRcnn_TSD(object):
 7 |     _rpn_output = None
 8 | 
 9 |     def __init__(self):
10 |         pass
11 |     
12 |     @classmethod
13 |     def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head):
14 |         gt_bbox = X.var("gt_bbox")
15 |         im_info = X.var("im_info")
16 | 
17 |         rpn_feat = backbone.get_rpn_feature()
18 |         rcnn_feat = backbone.get_rcnn_feature()
19 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
20 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
21 | 
22 |         rpn_head.get_anchor()
23 |         rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info)
24 |         proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info)
25 | 
26 |         roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal)
27 |         bbox_loss = bbox_head.get_loss(proposal, roi_feat, rcnn_feat, bbox_cls, bbox_target, bbox_weight, gt_bbox)
28 | 
29 |         return X.group(rpn_loss + bbox_loss)
30 | 
31 |     @classmethod
32 |     def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head):
33 |         rec_id, im_id, im_info, proposal, proposal_score = \
34 |             FasterRcnn_TSD.get_rpn_test_symbol(backbone, neck, rpn_head)
35 | 
36 |         rcnn_feat = backbone.get_rcnn_feature()
37 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
38 | 
39 |         roi_feat = roi_extractor.get_roi_feature_test(rcnn_feat, proposal)
40 | 
41 |         preds = bbox_head.get_prediction(proposal, roi_feat, rcnn_feat, im_info)
42 | 
43 |         return X.group([rec_id, im_id, im_info] + list(preds))
44 | 
45 |     @classmethod
46 |     def get_rpn_test_symbol(cls, backbone, neck, rpn_head):
47 |         if cls._rpn_output is not None:
48 |             return cls._rpn_output
49 | 
50 |         im_info = X.var("im_info")
51 |         im_id = X.var("im_id")
52 |         rec_id = X.var("rec_id")
53 | 
54 |         rpn_head.get_anchor()
55 |         rpn_feat = backbone.get_rpn_feature()
56 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
57 | 
58 |         (proposal, proposal_score) = rpn_head.get_all_proposal(rpn_feat, im_info)
59 | 
60 |         cls._rpn_output = X.group([rec_id, im_id, im_info, proposal, proposal_score])
61 |         return cls._rpn_output
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/quantization_int8.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * Copyright (c) 2019 by Contributors
22 |  * \file Quantization_int8.cc
23 |  * \brief
24 |  * \author Xiaotao Chen, Jingqiu Zhou, Ruize Hou
25 | */
26 | 
27 | #include "./quantization_int8-inl.h"
28 | 
29 | #include <nnvm/op_attr_types.h>
30 | 
31 | namespace mxnet {
32 | namespace op {
33 | 
34 | template<>
35 | Operator *CreateOp<cpu>(Quantization_int8Para param, int dtype) {
36 |   Operator* op = nullptr;
37 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
38 |     op = new Quantization_int8Op<cpu, DType>(param);
39 |   });
40 |   return op;
41 | }
42 | 
43 | Operator *Quantization_int8Prop::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
44 |                                           std::vector<int> *in_type) const {
45 |    std::vector<TShape> out_shape, aux_shape;
46 |    std::vector<int> out_type, aux_type;
47 |    CHECK(InferType(in_type, &out_type, &aux_type));
48 |    CHECK(InferShape(in_shape, &out_shape, &aux_shape));
49 |    DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
50 | }
51 | 
52 | DMLC_REGISTER_PARAMETER(Quantization_int8Para);
53 | 
54 | MXNET_REGISTER_OP_PROPERTY(_contrib_Quantization_int8, Quantization_int8Prop)
55 | .describe(R"code(perform simulated int8 quatization)code" ADD_FILELINE)
56 | .add_argument("data", "NDArray-or-Symbol", "Input data to activation function.")
57 | .add_argument("minmax", "NDArray-or-Symbol", "minmax array")
58 | .add_arguments(Quantization_int8Para::__FIELDS__());
59 | 
60 | }  // namespace op
61 | }  // namespace mxnet
62 | 


--------------------------------------------------------------------------------
/models/maskrcnn/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | from pycocotools import mask as mask_util
 5 | 
 6 | 
 7 | def expand_boxes(boxes, scale):
 8 |     """Expand an array of boxes by a given scale."""
 9 |     w_half = (boxes[:, 2] - boxes[:, 0]) * .5
10 |     h_half = (boxes[:, 3] - boxes[:, 1]) * .5
11 |     x_c = (boxes[:, 2] + boxes[:, 0]) * .5
12 |     y_c = (boxes[:, 3] + boxes[:, 1]) * .5
13 | 
14 |     w_half *= scale
15 |     h_half *= scale
16 | 
17 |     boxes_exp = np.zeros(boxes.shape)
18 |     boxes_exp[:, 0] = x_c - w_half
19 |     boxes_exp[:, 2] = x_c + w_half
20 |     boxes_exp[:, 1] = y_c - h_half
21 |     boxes_exp[:, 3] = y_c + h_half
22 | 
23 |     return boxes_exp
24 | 
25 | 
26 | def segm_results(bbox_xyxy, cls, masks, im_h, im_w):
27 |     # Modify from Detectron
28 |     # To work around an issue with cv2.resize (it seems to automatically pad
29 |     # with repeated border values), we manually zero-pad the masks by 1 pixel
30 |     # prior to resizing back to the original image resolution. This prevents
31 |     # "top hat" artifacts. We therefore need to expand the reference boxes by an
32 |     # appropriate factor.
33 |     segms = []
34 |     M = masks.shape[-1]
35 |     scale = (M + 2.0) / M
36 |     ref_boxes = expand_boxes(bbox_xyxy, scale)
37 |     ref_boxes = ref_boxes.astype(np.int32)
38 |     padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
39 | 
40 |     for ref_box_i, mask_i, cls_i in zip(ref_boxes, masks, cls):
41 |         padded_mask[1:-1, 1:-1] = mask_i[cls_i, :, :]
42 | 
43 |         w = ref_box_i[2] - ref_box_i[0] + 1
44 |         h = ref_box_i[3] - ref_box_i[1] + 1
45 |         w = np.maximum(w, 1)
46 |         h = np.maximum(h, 1)
47 | 
48 |         mask = cv2.resize(padded_mask, (w, h))
49 |         mask = np.array(mask > 0.5, dtype=np.uint8)
50 |         im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
51 | 
52 |         x_0 = max(ref_box_i[0], 0)
53 |         x_1 = min(ref_box_i[2] + 1, im_w)
54 |         y_0 = max(ref_box_i[1], 0)
55 |         y_1 = min(ref_box_i[3] + 1, im_h)
56 | 
57 |         im_mask[y_0:y_1, x_0:x_1] = mask[
58 |             (y_0 - ref_box_i[1]):(y_1 - ref_box_i[1]),
59 |             (x_0 - ref_box_i[0]):(x_1 - ref_box_i[0])
60 |         ]
61 | 
62 |         # Get RLE encoding used by the COCO evaluation API
63 |         rle = mask_util.encode(
64 |             np.array(im_mask[:, :, np.newaxis], order='F')
65 |         )[0]
66 |         segms.append(rle)
67 |     segms = np.array(segms)
68 |     return segms


--------------------------------------------------------------------------------
/models/FPN/get_top_proposal.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Collect top proposals across all levels for FPN
 3 | author: Yi Jiang, Chenxia Han
 4 | """
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | 
 9 | 
10 | class GetTopProposalOperator(mx.operator.CustomOp):
11 |     def __init__(self, top_n):
12 |         super().__init__()
13 |         self.top_n = top_n
14 | 
15 |     def forward(self, is_train, req, in_data, out_data, aux):
16 |         bboxes = in_data[0]
17 |         scores = in_data[1]
18 | 
19 |         num_image = bboxes.shape[0]
20 |         top_n = self.top_n
21 |         top_bboxes = []
22 |         top_scores = []
23 | 
24 |         for i in range(num_image):
25 |             image_bboxes = bboxes[i]
26 |             image_scores = scores[i]
27 |             argsort_ind = mx.nd.argsort(image_scores[:,0], is_ascend=False)
28 |             image_bboxes = image_bboxes[argsort_ind]
29 |             image_bboxes = image_bboxes[:top_n]
30 |             image_scores = image_scores[argsort_ind]
31 |             image_scores = image_scores[:top_n]
32 | 
33 |             top_bboxes.append(image_bboxes)
34 |             top_scores.append(image_scores)
35 | 
36 |         top_bboxes = mx.nd.stack(*top_bboxes)
37 |         top_scores = mx.nd.stack(*top_scores)
38 | 
39 |         self.assign(out_data[0], req[0], top_bboxes)
40 |         self.assign(out_data[1], req[1], top_scores)
41 | 
42 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
43 |         self.assign(in_grad[0], req[0], 0)
44 |         self.assign(in_grad[1], req[1], 0)
45 | 
46 | 
47 | @mx.operator.register('get_top_proposal')
48 | class GetTopProposalProp(mx.operator.CustomOpProp):
49 |     def __init__(self, top_n):
50 |         super().__init__(need_top_grad=False)
51 |         self.top_n = int(top_n)
52 | 
53 |     def list_arguments(self):
54 |         return ['bbox', 'score']
55 | 
56 |     def list_outputs(self):
57 |         return ['bbox', 'score']
58 | 
59 |     def infer_shape(self, in_shape):
60 |         bbox_shape = in_shape[0]
61 |         score_shape = in_shape[1]
62 |         num_image = bbox_shape[0]
63 | 
64 |         top_bbox_shape = (num_image, self.top_n, 4)
65 |         top_score_shape = (num_image, self.top_n, 1)
66 | 
67 |         return [bbox_shape, score_shape], \
68 |                [top_bbox_shape, top_score_shape]
69 | 
70 |     def create_operator(self, ctx, shapes, dtypes):
71 |         return GetTopProposalOperator(self.top_n)
72 | 
73 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
74 |         return []
75 | 


--------------------------------------------------------------------------------
/utils/deprecated.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Util comes from https://stackoverflow.com/questions/2536307/decorators-in-the-python-standard-lib-deprecated-specifically
 3 | """
 4 | 
 5 | import functools
 6 | import inspect
 7 | import warnings
 8 | 
 9 | string_types = (type(b''), type(u''))
10 | 
11 | 
12 | def deprecated(reason):
13 |     """
14 |     This is a decorator which can be used to mark functions
15 |     as deprecated. It will result in a warning being emitted
16 |     when the function is used.
17 |     """
18 | 
19 |     if isinstance(reason, string_types):
20 | 
21 |         # The @deprecated is used with a 'reason'.
22 |         #
23 |         # .. code-block:: python
24 |         #
25 |         #    @deprecated("please, use another function")
26 |         #    def old_function(x, y):
27 |         #      pass
28 | 
29 |         def decorator(func1):
30 | 
31 |             if inspect.isclass(func1):
32 |                 fmt1 = "Call to deprecated class {name} ({reason})."
33 |             else:
34 |                 fmt1 = "Call to deprecated function {name} ({reason})."
35 | 
36 |             @functools.wraps(func1)
37 |             def new_func1(*args, **kwargs):
38 |                 warnings.simplefilter('always', DeprecationWarning)
39 |                 warnings.warn(
40 |                     fmt1.format(name=func1.__name__, reason=reason),
41 |                     category=DeprecationWarning,
42 |                     stacklevel=2
43 |                 )
44 |                 warnings.simplefilter('default', DeprecationWarning)
45 |                 return func1(*args, **kwargs)
46 | 
47 |             return new_func1
48 | 
49 |         return decorator
50 | 
51 |     elif inspect.isclass(reason) or inspect.isfunction(reason):
52 | 
53 |         # The @deprecated is used without any 'reason'.
54 |         #
55 |         # .. code-block:: python
56 |         #
57 |         #    @deprecated
58 |         #    def old_function(x, y):
59 |         #      pass
60 | 
61 |         func2 = reason
62 | 
63 |         if inspect.isclass(func2):
64 |             fmt2 = "Call to deprecated class {name}."
65 |         else:
66 |             fmt2 = "Call to deprecated function {name}."
67 | 
68 |         @functools.wraps(func2)
69 |         def new_func2(*args, **kwargs):
70 |             warnings.simplefilter('always', DeprecationWarning)
71 |             warnings.warn(
72 |                 fmt2.format(name=func2.__name__),
73 |                 category=DeprecationWarning,
74 |                 stacklevel=2
75 |             )
76 |             warnings.simplefilter('default', DeprecationWarning)
77 |             return func2(*args, **kwargs)
78 | 
79 |         return new_func2
80 | 
81 |     else:
82 |         raise TypeError(repr(type(reason)))


--------------------------------------------------------------------------------
/utils/contrib/edit_model_weight.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     This script allows you to edit the model weight from https://github.com/TuSimple/simpledet/blob/master/MODEL_ZOO.md
 4 |     for your own training. In this script, it assumes you don't change the layer of model, but only edit the content in 
 5 |     the layer that models of MODEL_ZOO have. For example, you just change the training classes but not add or delete a 
 6 |     unit of resnet.
 7 |     
 8 |     Example: Edit the weight from 80 + 1 classes to 3 + 1 classes training.
 9 |         - Train by your own configuration for one epoch, the configuration should have edited for 3 + 1 classes training.
10 |         - Edit the constant in this file.
11 |             - SIMPLEDET_WEIGHT_FOLDER   the path to the weight folder you download
12 |             - TRAINED_WEIGHT_FOLDER     the path to the weight folder you need the shape
13 |             - EDIT_KEY                  the key of layer which you want to edit the weight, you can show the key by 
14 |                                         print(arg_params_src), in this example, the key names are 
15 |                                         "bbox_cls_logit_weight", "bbox_cls_logit_bias"
16 |         - Run the code!
17 |     
18 |     Note: The new generated model weight file will cover your original downloaded weight file, if you don't want like this,
19 |     you can edit the last line of the code.
20 |     
21 |     TODO: Before you run the code, you should train a model for one epoch and edit the code as instructions above.
22 | """
23 | 
24 | import mxnet as mx
25 | import numpy as np
26 | import os
27 | 
28 | # TODO: Edit the path.
29 | SIMPLEDET_WEIGHT_FOLDER = " "
30 | TRAINED_WEIGHT_FOLDER = " "
31 | 
32 | # TODO: Edit the key names which you want to modify.
33 | EDIT_KEY = ["bbox_cls_logit_weight", "bbox_cls_logit_bias"]
34 | 
35 | def change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst):
36 |     for key in EDIT_KEY:
37 |         arg_params_src[key] = arg_params_dst[key]
38 |     return arg_params_src
39 | 
40 | if __name__ == "__main__":
41 |     sym, arg_params_src, aux_params = \
42 |         mx.model.load_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 6)
43 |     _, arg_params_dst, _ = \
44 |         mx.model.load_checkpoint(os.path.join(TRAINED_WEIGHT_FOLDER, "checkpoint"), 1)
45 | 
46 |     # print(arg_params_src) to show the key name.
47 |     # arg_params_src means the weight you want to change which is downloaded from simpledet, 
48 |     # arg_params_src means the weight you need the shape.
49 |     arg_params = change_weight_by_copy_from_right_weight(arg_params_src, arg_params_dst)
50 | 
51 |     mx.model.save_checkpoint(os.path.join(SIMPLEDET_WEIGHT_FOLDER, "checkpoint"), 1, sym, arg_params, aux_params)
52 | 


--------------------------------------------------------------------------------
/models/maskrcnn/detector.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import mxnext as X
 4 | import mxnet as mx
 5 | 
 6 | from symbol.detector import FasterRcnn, RpnHead
 7 | from models.FPN.builder import FPNRpnHead
 8 | 
 9 | from models.maskrcnn import bbox_post_processing
10 | from utils.patch_config import patch_config_as_nothrow
11 | 
12 | 
13 | class MaskRcnn(object):
14 |     def __init__(self):
15 |         pass
16 | 
17 |     @staticmethod
18 |     def get_train_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head):
19 |         gt_bbox = X.var("gt_bbox")
20 |         gt_poly = X.var("gt_poly")
21 |         im_info = X.var("im_info")
22 | 
23 |         rpn_feat = backbone.get_rpn_feature()
24 |         rcnn_feat = backbone.get_rcnn_feature()
25 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
26 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
27 | 
28 |         rpn_head.get_anchor()
29 |         rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info)
30 |         proposal, bbox_cls, bbox_target, bbox_weight, mask_proposal, mask_target, mask_ind = \
31 |             rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, gt_poly, im_info)
32 |         roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal)
33 |         mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, mask_proposal)
34 | 
35 |         bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight)
36 |         mask_loss = mask_head.get_loss(mask_roi_feat, mask_target, mask_ind)
37 |         return X.group(rpn_loss + bbox_loss + mask_loss)
38 | 
39 |     @staticmethod
40 |     def get_test_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor):
41 |         rec_id, im_id, im_info, proposal, proposal_score = \
42 |             MaskRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)
43 | 
44 |         rcnn_feat = backbone.get_rcnn_feature()
45 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
46 | 
47 |         roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal)
48 |         cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal)
49 | 
50 |         post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(cls_score, bbox_xyxy)
51 | 
52 |         mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, post_bbox_xyxy)
53 |         mask = mask_head.get_prediction(mask_roi_feat)
54 | 
55 |         mask_score = mx.sym.ones((1, ), name='maskiou_prediction')
56 |         return X.group([rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask, mask_score])
57 | 
58 |     @staticmethod
59 |     def get_rpn_test_symbol(backbone, neck, rpn_head):
60 |         return FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)


--------------------------------------------------------------------------------
/models/msrcnn/maskiou_compute.py:
--------------------------------------------------------------------------------
 1 | """
 2 | IoU convert Operator
 3 | Compute MaskIoU Target Given feature, mask ratio, mask target and mask predict logits
 4 | """
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | 
 9 | 
10 | class MaskIoUComputeOperator(mx.operator.CustomOp):
11 |     def __init__(self):
12 |         super().__init__()
13 | 
14 |     def forward(self, is_train, req, in_data, out_data, aux):
15 |         mask_pred_logits = in_data[0].asnumpy()
16 |         mask_target = in_data[1].asnumpy()
17 |         mask_ratio = in_data[2].asnumpy().reshape(-1, )
18 |         mask_inds = in_data[3].asnumpy().reshape(-1, )
19 | 
20 |         mask_pred = np.array(mask_pred_logits > 0.5, dtype=np.bool)
21 | 
22 |         intersec = mask_target * mask_pred
23 |         mask_pred_sum = np.sum(mask_pred, axis=(1, 2))
24 |         intersec_sum = np.sum(intersec, axis=(1, 2))
25 |         mask_target_sum = np.sum(mask_target, axis=(1, 2)).astype(np.float)
26 |         mask_target_sum /= mask_ratio
27 |         union = mask_target_sum + mask_pred_sum - intersec_sum
28 |         union = np.maximum(union, 1)
29 |         intersec_sum = np.maximum(intersec_sum, 0)
30 |         iou = np.reshape(intersec_sum / union , (-1, 1))
31 | 
32 |         positive_inds = np.where(mask_inds > 0)[0]
33 |         weight_list = np.zeros_like(mask_inds)
34 |         weight_list[positive_inds] = 1
35 |         weight_list = weight_list.reshape(-1, 1)
36 | 
37 |         self.assign(out_data[0], req[0], iou)
38 |         self.assign(out_data[1], req[1], weight_list)
39 | 
40 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
41 |         self.assign(in_grad[0], req[0], 0)
42 |         self.assign(in_grad[1], req[1], 0)
43 |         self.assign(in_grad[2], req[2], 0)
44 | 
45 | @mx.operator.register('maskiou_compute')
46 | class MaskIoUComputeProp(mx.operator.CustomOpProp):
47 |     def __init__(self):
48 |         super().__init__(need_top_grad=False)
49 | 
50 |     def list_arguments(self):
51 |         return ['mask_pred_logits', 'mask_target', 'mask_ratio', 'mask_inds']
52 | 
53 |     def infer_shape(self, in_shape):
54 |         mask_pred_logits_shape = in_shape[0]
55 |         mask_target_shape = in_shape[1]
56 |         mask_ratio_shape = in_shape[2]
57 |         mask_ind_shape = in_shape[3]
58 | 
59 |         maskiou_target_shape = (mask_target_shape[0], 1)
60 |         weight_shape = (mask_target_shape[0], 1)
61 |         return [mask_pred_logits_shape, mask_target_shape, mask_ratio_shape, mask_ind_shape], [maskiou_target_shape, weight_shape]
62 | 
63 |     def list_outputs(self):
64 |         return ['maskiou_target', 'weight_list']
65 | 
66 |     def create_operator(self, ctx, shapes, dtypes):
67 |         return MaskIoUComputeOperator()
68 | 
69 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
70 |         return []
71 | 
72 | 


--------------------------------------------------------------------------------
/operator_py/cython/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps_cython(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps


--------------------------------------------------------------------------------
/models/FPN/assign_layer_fpn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Assign Layer operator for FPN
 3 | author: Yi Jiang, Chenxia Han
 4 | """
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | 
 9 | 
10 | class AssignLayerFPNOperator(mx.operator.CustomOp):
11 |     def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level):
12 |         super().__init__()
13 |         self.rcnn_stride = rcnn_stride
14 |         self.roi_canonical_scale = roi_canonical_scale
15 |         self.roi_canonical_level = roi_canonical_level
16 | 
17 |     def forward(self, is_train, req, in_data, out_data, aux):
18 |         all_rois = in_data[0]
19 | 
20 |         rcnn_stride = self.rcnn_stride
21 |         scale0 = self.roi_canonical_scale
22 |         lvl0 = self.roi_canonical_level
23 |         k_min = np.log2(min(rcnn_stride))
24 |         k_max = np.log2(max(rcnn_stride))
25 | 
26 |         rois_area = (all_rois[:, :, 2] - all_rois[:, :, 0] + 1) \
27 |                     * (all_rois[:, :, 3] - all_rois[:, :, 1] + 1)
28 | 
29 |         scale = mx.nd.sqrt(rois_area)
30 |         target_lvls = mx.nd.floor(lvl0 + mx.nd.log2(scale / scale0 + 1e-6))
31 |         target_lvls = mx.nd.clip(target_lvls, k_min, k_max)
32 |         target_stride = (2 ** target_lvls).astype('uint8')
33 | 
34 |         for i, s in enumerate(rcnn_stride):
35 |             lvl_rois = mx.nd.zeros_like(all_rois)
36 |             lvl_inds = mx.nd.expand_dims(target_stride == s, axis=2).astype('float32')
37 |             lvl_inds = mx.nd.broadcast_like(lvl_inds, lvl_rois)
38 |             lvl_rois = mx.nd.where(lvl_inds, all_rois, lvl_rois)
39 | 
40 |             self.assign(out_data[i], req[i], lvl_rois)
41 | 
42 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
43 |         self.assign(in_grad[0], req[0], 0)
44 | 
45 | 
46 | @mx.operator.register('assign_layer_fpn')
47 | class AssignLayerFPNProp(mx.operator.CustomOpProp):
48 |     def __init__(self, rcnn_stride, roi_canonical_scale, roi_canonical_level):
49 |         super().__init__(need_top_grad=False)
50 |         self.rcnn_stride = eval(rcnn_stride)
51 |         self.roi_canonical_scale = int(roi_canonical_scale)
52 |         self.roi_canonical_level = int(roi_canonical_level)
53 | 
54 |     def list_arguments(self):
55 |         return ['rois']
56 | 
57 |     def list_outputs(self):
58 |         rois_list = ['rois_s{}'.format(s) for s in self.rcnn_stride]
59 |         return rois_list
60 | 
61 |     def infer_shape(self, in_shape):
62 |         rpn_rois_shape = in_shape[0]
63 | 
64 |         output_rois_shape = [rpn_rois_shape] * len(self.rcnn_stride)
65 | 
66 |         return [rpn_rois_shape], output_rois_shape
67 | 
68 |     def create_operator(self, ctx, shapes, dtypes):
69 |         return AssignLayerFPNOperator(self.rcnn_stride, self.roi_canonical_scale,
70 |                                       self.roi_canonical_level)
71 | 
72 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
73 |         return []
74 | 


--------------------------------------------------------------------------------
/operator_py/cython/bbox_self.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_selfoverlaps_cython(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     (N, k) ndarray of self overlap with query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps_self = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua, ub
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1] + 1)
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0]) + 1
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) + 1
65 |                     )
66 |                     if ih > 0:
67 |                         ub = float(
68 |                             (boxes[n, 2] - boxes[n, 0] + 1) *
69 |                             (boxes[n, 3] - boxes[n, 1] + 1)
70 |                         )
71 |                         ua = float(
72 |                             ub + box_area - iw * ih
73 |                         )
74 |                         overlaps_self[n, k] = iw * ih / ub
75 |     return overlaps_self


--------------------------------------------------------------------------------
/models/msrcnn/detector.py:
--------------------------------------------------------------------------------
 1 | import mxnext as X
 2 | import mxnet as mx
 3 | 
 4 | from symbol.detector import FasterRcnn, RpnHead
 5 | from models.FPN.builder import FPNRpnHead
 6 | 
 7 | from models.maskrcnn import bbox_post_processing
 8 | from models.msrcnn import maskiou_compute
 9 | 
10 | from utils.patch_config import patch_config_as_nothrow
11 | 
12 | 
13 | class MaskScoringRcnn(object):
14 |     def __init__(self):
15 |         pass
16 | 
17 |     @staticmethod
18 |     def get_train_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, maskiou_head):
19 |         gt_bbox = X.var("gt_bbox")
20 |         gt_poly = X.var("gt_poly")
21 |         im_info = X.var("im_info")
22 | 
23 |         rpn_feat = backbone.get_rpn_feature()
24 |         rcnn_feat = backbone.get_rcnn_feature()
25 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
26 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
27 | 
28 |         rpn_head.get_anchor()
29 |         rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info)
30 |         proposal, bbox_cls, bbox_target, bbox_weight, mask_proposal, mask_target, mask_ind, mask_ratio = \
31 |             rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, gt_poly, im_info)
32 |         roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal)
33 |         mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, mask_proposal)
34 | 
35 |         bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight)
36 |         mask_loss, mask_pred_logits = mask_head.get_loss(mask_roi_feat, mask_target, mask_ind)
37 | 
38 |         iou_loss = maskiou_head.get_loss(mask_roi_feat, mask_pred_logits, mask_target, mask_ind, mask_ratio)
39 |         return X.group(rpn_loss + bbox_loss + mask_loss + iou_loss)
40 | 
41 |     @staticmethod
42 |     def get_test_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor, maskiou_head):
43 |         rec_id, im_id, im_info, proposal, proposal_score = \
44 |             MaskScoringRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)
45 | 
46 |         rcnn_feat = backbone.get_rcnn_feature()
47 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
48 | 
49 |         roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal)
50 |         cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal)
51 | 
52 |         post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(cls_score, bbox_xyxy)
53 | 
54 |         mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, post_bbox_xyxy)
55 |         mask = mask_head.get_prediction(mask_roi_feat)
56 | 
57 |         iou_pred = maskiou_head.get_maskiou_prediction(mask, mask_roi_feat, post_cls)
58 | 
59 |         return X.group([rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask, iou_pred])
60 | 
61 |     @staticmethod
62 |     def get_rpn_test_symbol(backbone, neck, rpn_head):
63 |         return FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)


--------------------------------------------------------------------------------
/utils/contrib/data_to_coco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     This script allows you to transfer your own data from your own data format to coco format.
 4 | 
 5 |     Attention: This is not the official format, it does not require licenses and other redundant info, but can generate
 6 |     coco-like dataset which can be accepted by Simpledet.
 7 | 
 8 |     TODO: You should reimplement the code from line 31 to the end, this file only describe the format of dataset
 9 |     and the way to save it.
10 | """
11 | 
12 | import json
13 | import sys
14 | 
15 | def main():
16 |     if len(sys.argv) < 3:
17 |         print("Usage: python data_to_coco.py infile outfile")
18 |         exit(1)
19 |     input_file = sys.argv[1]
20 |     output_file = sys.argv[2]
21 | 
22 |     # The whole coco dataset
23 |     dataset = {
24 |         'licenses': [],
25 |         'info': {},
26 |         'categories': [],   # Required
27 |         'images': [],       # Required
28 |         'annotations': []   # Required
29 |     }
30 | 
31 |     # TODO: class_map maps the class, which would be added into dataset['categories']
32 |     class_map = {
33 |         "box": 1,
34 |         "can": 2,
35 |         "bottle": 3
36 |     }
37 |     for class_name, class_id in class_map.items():
38 |         dataset['categories'].append({
39 |             'id': class_id,
40 |             'name': class_name,
41 |             'supercategory': 'supercategory_name'
42 |         })
43 | 
44 |     # TODO: Load your own data
45 |     self_data_list = []
46 |     with open(input_file, 'r') as in_file:
47 |         for line in in_file:
48 |             self_data_list.append(json.loads(line))
49 | 
50 |     # TODO: Dataset images info, normally you should implement an iter here to append the info
51 |     dataset['images'].append({
52 |         'coco_url': '',
53 |         'date_captured': '',
54 |         'file_name': '',    # Required (str)    image file name
55 |         'flickr_url': '',
56 |         'id': int(),        # Required (int)    id of image
57 |         'license': '',
58 |         'width': int(),     # Required (int)    width of image
59 |         'height': int()     # Required (int)    height of image
60 |     })
61 | 
62 |     # TODO: Dataset annotation info, normally you should implement an iter here to append the info
63 |     dataset["annotations"].append({
64 |         'area': int(),          # Required (int)    image area
65 |         'bbox': [int()] * 4,    # Required (int)    one of the image bboxes
66 |         'category_id': int(),   # Required (int)    class id of this bbox
67 |         'id': int(),            # Required (int)    bbox id in this image
68 |         'image_id': int(),      # Required (int)    image id of this bbox
69 |         'iscrowd': 0,           # Optional, required only if you want to train for semantic segmentation
70 |         'segmentation': []      # Optional, required only if you want to train for semantic segmentation
71 |     })
72 | 
73 |     with open(output_file, 'w') as ofile:
74 |         json.dump(dataset, ofile, sort_keys=True, indent=2)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     main()
79 | 


--------------------------------------------------------------------------------
/doc/BUILD_WHEEL.md:
--------------------------------------------------------------------------------
 1 | ## Introduction
 2 | This document describes the process of packaging our custom mxnet as a python wheel for local installation.
 3 | 
 4 | ### Platform
 5 | In order to suppport CentOS 7, which is popular in production, we have to make the wheel largely comformant to `manylinux2014` tag of pypi.
 6 | The `manylinux2014` tag specify the maximum version of some system library as
 7 | ```
 8 | GLIBC_2.17
 9 | CXXABI_1.3.7
10 | GLIBCXX_3.4.19
11 | GCC_4.8.5
12 | ```
13 | Compiling the library under such restriction from a newer platform could be quite tricky, since this is essential cross-compiling.
14 | So here we complile from the Ubuntu 14.04 which is also `manylinux2014` comformant.
15 | 
16 | ### Setup toolchains
17 | ```bash
18 | sudo apt-get update && \
19 | sudo apt-get install -y git \
20 |     vim \
21 |     libcurl4-openssl-dev \
22 |     unzip \
23 |     gcc-4.8 \
24 |     g++-4.8 \
25 |     gfortran \
26 |     gfortran-4.8 \
27 |     binutils \
28 |     nasm \
29 |     libtool \
30 |     curl \
31 |     wget \
32 |     sudo \
33 |     gnupg \
34 |     gnupg2 \
35 |     gnupg-agent \
36 |     pandoc \
37 |     python3-pip \
38 |     automake \
39 |     pkg-config
40 | 
41 | wget https://github.com/Kitware/CMake/releases/download/v3.15.2/cmake-3.15.2.tar.gz && \
42 |     tar xzf cmake-3.15.2.tar.gz && \
43 |     cd cmake-3.15.2 && \
44 |     ./configure && make install -j4 && cd .. && \
45 |     rm -r cmake-3.15.2 cmake-3.15.2.tar.gz
46 | 
47 | # change the url to your repo link if you are doing PR
48 | export SIMPLEDET_URL=https://github.com/tusimple/simpledet
49 | git clone --recursive --depth=1 https://github.com/apache/incubator-mxnet /work/mxnet && \
50 |     cd /work/mxnet && \
51 |     git clone $SIMPLEDET_URL /work/simpledet && \
52 |     cp -r /work/simpledet/operator_cxx/* /work/mxnet/src/operator && \
53 |     git clone https://github.com/RogerChern/cocoapi /work/cocoapi && \
54 |     mkdir -p src/coco_api && \
55 |     cp -r /work/cocoapi/common src/coco_api && \
56 |     rm /work/mxnet/src/operator/nn/group_norm* && \
57 |     rm -r /work/cocoapi /work/simpledet
58 | ```
59 | 
60 | ### Compile `libmxnet.so` with static dependancy
61 | ```
62 | cd /work/mxnet
63 | # remove sm_30
64 | sed -i 's/KNOWN_CUDA_ARCHS :=.*/KNOWN_CUDA_ARCHS := 35 50 60 70/' Makefile
65 | # change build config according to the target CUDA version
66 | tools/staticbuild/build.sh cu100 pip
67 | # tools/staticbuild/build.sh cu101 pip
68 | ```
69 | 
70 | ### Package wheel
71 | ```
72 | cd /work/mxnet/tools/pip
73 | ln -s /work/mxnet mxnet-build
74 | 
75 | # change the path according to the target CUDA version
76 | LD_LIBRARY_PATH=/work/mxnet/staticdeps/usr/local/cuda-10.0/lib64:/work/mxnet/staticdeps/usr/lib/x86_64-linux-gnu:/work/mxnet/staticdeps/usr/lib/nvidia-410
77 | # LD_LIBRARY_PATH=/work/mxnet/staticdeps/usr/local/cuda-10.1/lib64:/work/mxnet/staticdeps/usr/lib/x86_64-linux-gnu:/work/mxnet/staticdeps/usr/lib/nvidia-418
78 | export LD_LIBRARY_PATH
79 | mxnet_variant=CU100 python3 setup.py bdist_wheel
80 | # mxnet_variant=CU101 python3 setup.py bdist_wheel
81 | ```
82 | 
83 | The built wheel file is in `dist/`
84 | 


--------------------------------------------------------------------------------
/unittest/test_loader.py:
--------------------------------------------------------------------------------
 1 | import pickle as pkl
 2 | import unittest
 3 | import mxnet as mx
 4 | 
 5 | from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
 6 |     ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
 7 |     RenameRecord, AnchorTarget2D, AnchorLoader
 8 | from config import detection_config
 9 | 
10 | 
11 | class TestLoader(unittest.TestCase):
12 | 
13 |     def test_empty_v_loader(self):
14 |         pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
15 |         transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True)
16 |         roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1")
17 |         all_v_roidbs = [roidb for roidb in roidbs if roidb['h'] >= roidb['w']]
18 | 
19 |         loader = AnchorLoader(
20 |             roidb=all_v_roidbs,
21 |             transform=transform,
22 |             data_name=data_name,
23 |             label_name=label_name,
24 |             batch_size=1,
25 |             shuffle=True,
26 |             num_worker=1,
27 |             kv=mx.kvstore.create(pKv.kvstore)
28 |         )
29 |         with self.assertRaises(StopIteration):
30 |             while True:
31 |                 data_batch = loader.next()
32 | 
33 |     def test_empty_h_loader(self):
34 |         pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
35 |         transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True)
36 |         roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1")
37 |         all_h_roidbs = [roidb for roidb in roidbs if roidb['h'] < roidb['w']]
38 | 
39 |         loader = AnchorLoader(
40 |             roidb=all_h_roidbs,
41 |             transform=transform,
42 |             data_name=data_name,
43 |             label_name=label_name,
44 |             batch_size=1,
45 |             shuffle=True,
46 |             num_worker=1,
47 |             kv=mx.kvstore.create(pKv.kvstore)
48 |         )
49 |         with self.assertRaises(StopIteration):
50 |             while True:
51 |                 data_batch = loader.next()
52 | 
53 |     def test_record_num(self):
54 |         pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
55 |         transform, data_name, label_name, metric_list = detection_config.get_config(is_train=True)
56 |         roidbs = pkl.load(open("unittest/data/coco_micro_test.roidb", "rb"), encoding="latin1")
57 |         batch_size = 4
58 | 
59 |         loader = AnchorLoader(
60 |             roidb=roidbs,
61 |             transform=transform,
62 |             data_name=data_name,
63 |             label_name=label_name,
64 |             batch_size=batch_size,
65 |             shuffle=True,
66 |             num_worker=1,
67 |             kv=mx.kvstore.create(pKv.kvstore)
68 |         )
69 | 
70 |         num_batch = 0
71 |         while True:
72 |             try:
73 |                 data_batch = loader.next()
74 |                 num_batch += 1
75 |             except StopIteration:
76 |                 break
77 |         self.assertEqual(batch_size * num_batch, loader.total_record)
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/axpy.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * Copyright (c) 2018 by Contributors
22 |  * \file axpy.cc
23 |  * \brief port from https://github.com/hujie-frank/SENet
24 |  * \author Yuntao Chen
25 | */
26 | #include "./axpy-inl.h"
27 | 
28 | namespace mshadow {
29 | template <typename Dtype>
30 | inline void AxpyForwardLauncher(const Tensor<cpu, 2, Dtype> &scale_data,
31 |                                 const Tensor<cpu, 4, Dtype> &x_data,
32 |                                 const Tensor<cpu, 4, Dtype> &y_data,
33 |                                 const Tensor<cpu, 1, Dtype> &out) {
34 |     LOG(FATAL) << "NotImplemented";
35 | }
36 | 
37 | template <typename Dtype>
38 | inline void AxpyBackwardLauncher(const Tensor<cpu, 2, Dtype> &scale_data,
39 |                                  const Tensor<cpu, 4, Dtype> &x_data,
40 |                                  const Tensor<cpu, 4, Dtype> &y_data,
41 |                                  const Tensor<cpu, 2, Dtype> &scale_grad,
42 |                                  const Tensor<cpu, 4, Dtype> &x_grad,
43 |                                  const Tensor<cpu, 4, Dtype> &y_grad,
44 |                                  const Tensor<cpu, 4, Dtype> &out_grad,
45 |                                  Stream<cpu> *s) {
46 |     LOG(FATAL) << "NotImplemented";
47 | }
48 | }  // namespace mshadow
49 | 
50 | namespace mxnet {
51 | namespace op {
52 | 
53 | template<>
54 | Operator *CreateOp<cpu>(AxpyParam param, int dtype) {
55 |   Operator* op = NULL;
56 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
57 |     op = new AxpyOp<cpu, DType>(param);
58 |   });
59 |   return op;
60 | }
61 | 
62 | Operator *AxpyProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
63 |                                            std::vector<int> *in_type) const {
64 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
65 | }
66 | 
67 | DMLC_REGISTER_PARAMETER(AxpyParam);
68 | 
69 | MXNET_REGISTER_OP_PROPERTY(_contrib_Axpy, AxpyProp)
70 | .describe(R"code(Accelerate Squeeze and Excitation Network)code" ADD_FILELINE)
71 | .add_argument("scale", "NDArray-or-Symbol", "channel scaling factor")
72 | .add_argument("x", "NDArray-or-Symbol", "resnet increase output")
73 | .add_argument("y", "NDArray-or-Symbol", "resnet shortcut output")
74 | .add_arguments(AxpyParam::__FIELDS__());
75 | }  // namespace op
76 | }  // namespace mxnet
77 | 


--------------------------------------------------------------------------------
/utils/create_voc_roidb.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import glob
 4 | import pickle
 5 | import json
 6 | import xml.etree.ElementTree as ET
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database for PASCAL VOC dataset')
13 |     parser.add_argument('--data-dir', help='Path to VOC-like dataset', type=str)
14 |     parser.add_argument('--label-map', help='A json file containing the map from class name to training id', 
15 |         type=str, default="data/label_map/voc_label_map.json")
16 |     parser.add_argument('--split', help='Dataset split', type=str, default=None)
17 | 
18 |     args = parser.parse_args()
19 |     with open(args.label_map) as f:
20 |         label_map = json.load(f)
21 |     return args.data_dir, label_map, args.split
22 | 
23 | 
24 | def create_roidb(data_dir, label_map, split):
25 |     # sanity check
26 |     if not os.path.exists(data_dir):
27 |         raise Exception("{} is not accessible".format(data_dir))
28 |     for subdir in ["Annotations", "JPEGImages", "ImageSets"]:
29 |         if not os.path.exists(os.path.join(data_dir, subdir)):
30 |             raise Exception("{}/{} is not accessible".format(data_dir, subdir))
31 |     
32 |     if split is not None:
33 |         subset = set()
34 |         with open("{}/ImageSets/Main/{}.txt".format(data_dir, split)) as f:
35 |             for line in f:
36 |                 subset.add("{}/Annotations/{}.xml".format(data_dir, line.strip()))
37 |     else:
38 |         subset = glob.glob("{}/Annotations/*.xml".format(data_dir))
39 | 
40 |     roidb = []
41 |     for i, anno_name in enumerate(sorted(subset)):
42 |         tree = ET.parse(anno_name)
43 |         root = tree.getroot()
44 |         h = int(root.find("size/height").text)
45 |         w = int(root.find("size/width").text)
46 |         filename = root.find("filename").text
47 |         image_url = os.path.abspath(os.path.join(data_dir, "JPEGImages", filename))
48 |         assert os.path.exists(image_url)
49 |         im_id = i
50 |         gt_class, gt_bbox = list(), list()
51 |         for obj in root.findall("object"):
52 |             gt_class.append(label_map[obj.find("name").text])
53 |             x1 = float(obj.find("bndbox/xmin").text)
54 |             y1 = float(obj.find("bndbox/ymin").text)
55 |             x2 = float(obj.find("bndbox/xmax").text)
56 |             y2 = float(obj.find("bndbox/ymax").text)
57 |             gt_bbox.append([x1, y1, x2, y2])
58 | 
59 |         roidb.append(dict(
60 |             gt_class=np.array(gt_class, dtype=np.float32),
61 |             gt_bbox=np.array(gt_bbox, dtype=np.float32),
62 |             flipped=False,
63 |             h=h,
64 |             w=w,
65 |             image_url=image_url,
66 |             im_id=im_id))
67 | 
68 |     dataset_name = os.path.basename(data_dir).lower()
69 |     if split is not None:
70 |         roidb_name = "data/cache/{}_{}.roidb".format(dataset_name, split)
71 |     else:
72 |         roidb_name = "data/cache/{}.roidb".format(dataset_name)
73 | 
74 |     with open(roidb_name, "wb") as f:
75 |         pickle.dump(roidb, f)
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     os.makedirs("data/cache", exist_ok=True)
80 |     create_roidb(*parse_args())
81 | 


--------------------------------------------------------------------------------
/models/maskrcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Mask-RCNN
 2 | 
 3 | This repository implements [**Mask-RCNN**](https://arxiv.org/abs/1703.06870) in the SimpleDet framework.
 4 | Mask-RCNN is a simple and effective approach for object instance segmentation. By simply extending Faster-RCNN with a mask branch, Mask-RCNN can generate a high-quality segmentation mask for each instance. In the following, we will introduce how we build Mask-RCNN in the SimpleDet framework. Currently, we only provide FPN based Mask-RCNN.
 5 | 
 6 | ### Qucik Start
 7 | ```bash
 8 | # train
 9 | python3 detection_train.py --config config/mask_r50v1_fpn_1x.py
10 | 
11 | # test
12 | python3 mask_test.py --config config/mask_r50v1_fpn_1x.py
13 | ```
14 | ### How we build Mask-RCNN
15 | #### Input
16 | First, we need mask label.
17 | 
18 | Instead of providing binary masks to the network, we adopt poly format in the current implementation. Since each instance may contain several parts, we adopt a list of lists ([[ax1, ay1, ax2, ay2,...], [bx1, by1, bx2, by2,...], ...) to represent each instance following COCO. For simplicity, we note [ax1, ay1, ax2, ay2, ...] as a segm.
19 | 
20 | We implement these transforms for poly format mask label:
21 | - **PreprocessGtPoly**: convert each segm in a instance into ndarray.
22 | - **EncodeGtPoly**: encode each instance into a fixed length format ([class_id, num_segms, len_segm1, len_segm2, segm1, segm2]).
23 | 
24 | For data augmentation, we extend several transfroms from Faster-RCNN:
25 | - **Resize2DImageBboxMask**: based on **Resize2DImageBbox**
26 | - **Flip2DImageBboxMask**: based on **Flip2DImageBbox**
27 | - **Pad2DImageBboxMask**: based on **Pad2DImageBbox**
28 | 
29 | #### Operators
30 | Then, we extend proposal_target to get sampled mask target for mask branch training:
31 | - **proposal_mask_target**, decodes encoded gt poly into binary mask and samples a fixed amount of masks as mask target. For acceleration, we only provide mask target for fg roi. So the number of mask target is ```int(image_roi * fg_fraction)```. Currently we only support class specific mask target. So the shape of mask target is ```(batch_size, int(image_roi * fg_fraction), num_class (81 in COCO), mask_size, mask_size)```.
32 | 
33 | In order to test mask in an end-to-end manner, we reuses the code from detection_test.py and implement a bbox post processing operator:
34 | - **bbox_post_processing**, adopts NMS for multi-class bbox and get final bbox results.
35 | 
36 | For loss function, we implement sigmoid cross entropy:
37 | - **sigmoid_cross_entropy**, a general sigmoid cross entropy loss function.
38 | 
39 | #### Symbol
40 | - **MaskFasterRcnn**, detector for MaskRCNN
41 | - **MaskFPNRpnHead**, a new RpnHead inherited from FPNRpnHead, note that we slice the proposal sampled from proposal_mask_target since the mask target provided by this operator is only for fg roi.
42 | - **MaskFasterRcnnHead**, mask head for MaskRCNN
43 | - **MaskFasterRcnn4ConvHead**, a specific mask head with 4 convolutions.
44 | - **BboxPostProcessor**, a bbox post processor for end-to-end test.
45 | 
46 | ### How to build Mask-RCNN without FPN
47 | - Implement **MaskRpnHead** following **MaskFPNRpnHead**.
48 | - Implement your own MaskHead following **MaskFasterRcnn4ConvHead**
49 | - Write your own config following **mask_r50v1_fpn_1x.py** and **faster_r50v1c4_c5_512roi_1x.py**
50 | 
51 | 


--------------------------------------------------------------------------------
/symbol/detector.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import mxnet as mx
 4 | import mxnext as X
 5 | from utils.patch_config import patch_config_as_nothrow
 6 | 
 7 | 
 8 | class Rpn(object):
 9 |     _rpn_output = None
10 | 
11 |     def __init__(self):
12 |         pass
13 | 
14 |     @classmethod
15 |     def get_train_symbol(cls, backbone, neck, rpn_head):
16 |         rpn_feat = backbone.get_rpn_feature()
17 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
18 | 
19 |         rpn_loss = rpn_head.get_loss(rpn_feat, None, None)
20 | 
21 |         return X.group(rpn_loss)
22 | 
23 |     @classmethod
24 |     def get_rpn_test_symbol(cls, backbone, neck, rpn_head):
25 |         if cls._rpn_output is not None:
26 |             return cls._rpn_output
27 | 
28 |         im_info = X.var("im_info")
29 |         im_id = X.var("im_id")
30 |         rec_id = X.var("rec_id")
31 | 
32 |         rpn_feat = backbone.get_rpn_feature()
33 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
34 | 
35 |         (proposal, proposal_score) = rpn_head.get_all_proposal(rpn_feat, im_info)
36 | 
37 |         cls._rpn_output = X.group([rec_id, im_id, im_info, proposal, proposal_score])
38 |         return cls._rpn_output
39 | 
40 | 
41 | class FasterRcnn(object):
42 |     _rpn_output = None
43 | 
44 |     def __init__(self):
45 |         pass
46 | 
47 |     @classmethod
48 |     def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head):
49 |         gt_bbox = X.var("gt_bbox")
50 |         im_info = X.var("im_info")
51 | 
52 |         rpn_feat = backbone.get_rpn_feature()
53 |         rcnn_feat = backbone.get_rcnn_feature()
54 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
55 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
56 | 
57 |         rpn_head.get_anchor()
58 |         rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info)
59 |         proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info)
60 |         roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal)
61 |         bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight)
62 | 
63 |         return X.group(rpn_loss + bbox_loss)
64 | 
65 |     @classmethod
66 |     def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head):
67 |         rec_id, im_id, im_info, proposal, proposal_score = \
68 |             FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)
69 | 
70 |         rcnn_feat = backbone.get_rcnn_feature()
71 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
72 | 
73 |         roi_feat = roi_extractor.get_roi_feature_test(rcnn_feat, proposal)
74 |         cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal)
75 | 
76 |         return X.group([rec_id, im_id, im_info, cls_score, bbox_xyxy])
77 | 
78 |     @classmethod
79 |     def get_rpn_test_symbol(cls, backbone, neck, rpn_head):
80 |         if cls._rpn_output is not None:
81 |             return cls._rpn_output
82 | 
83 |         im_info = X.var("im_info")
84 |         im_id = X.var("im_id")
85 |         rec_id = X.var("rec_id")
86 | 
87 |         rpn_head.get_anchor()
88 |         rpn_feat = backbone.get_rpn_feature()
89 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
90 | 
91 |         (proposal, proposal_score) = rpn_head.get_all_proposal(rpn_feat, im_info)
92 | 
93 |         cls._rpn_output = X.group([rec_id, im_id, im_info, proposal, proposal_score])
94 |         return cls._rpn_output


--------------------------------------------------------------------------------
/detection_infer_speed.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import importlib
 3 | import time
 4 | from utils.patch_config import patch_config_as_nothrow
 5 | from core.detection_module import DetModule
 6 | 
 7 | import mxnet as mx
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(description='Test detector inference speed')
12 |     # general
13 |     parser.add_argument('--config', help='config file path', type=str, required=True)
14 |     parser.add_argument('--shape', help='specify input 2d image shape', metavar=('SHORT', 'LONG'), type=int, nargs=2, required=True)
15 |     parser.add_argument('--gpu', help='GPU index', type=int, default=0)
16 |     parser.add_argument('--count', help='number of runs, final result will be averaged', type=int, default=100)
17 |     args = parser.parse_args()
18 | 
19 |     config = importlib.import_module(args.config.replace('.py', '').replace('/', '.'))
20 |     return config, args.gpu, args.shape, args.count
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     config, gpu, shape, count = parse_args()
25 | 
26 |     pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
27 |         transform, data_name, label_name, metric_list = config.get_config(is_train=False)
28 |     sym = pModel.test_symbol
29 | 
30 |     # create dummy data batch
31 |     data = mx.nd.ones(shape=[1, 3] + shape)
32 |     im_info = mx.nd.array([x / 2.0 for x in shape] + [2.0]).reshape(1, 3)
33 |     im_id = mx.nd.array([1])
34 |     rec_id = mx.nd.array([1])
35 |     data_names = ["data", "im_info", "im_id", "rec_id"]
36 |     data_shape = [[1, 3] + shape, [1, 3], [1], [1]]
37 |     data_shape = [(name, shape) for name, shape in zip(data_names, data_shape)]
38 |     data_batch = mx.io.DataBatch(data=[data, im_info, im_id, rec_id])
39 | 
40 |     '''
41 |     there are some conflicts between `mergebn` and `attach_quantized_node` in graph_optimize.py
42 |     when mergebn ahead of attach_quantized_node
43 |     such as `Symbol.ComposeKeyword`
44 |     '''
45 |     pModel = patch_config_as_nothrow(pModel)
46 |     if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag:
47 |         pQuant = pModel.QuantizeTrainingParam
48 |         assert pGen.fp16 == False, "current quantize training only support fp32 mode."
49 |         from utils.graph_optimize import attach_quantize_node
50 |         worker_data_shape = dict([(name, tuple(shape)) for name, shape in data_shape])
51 |         # print(worker_data_shape)
52 |         # raise NotImplementedError
53 |         _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape)
54 |         out_shape_dictoinary = dict(zip(sym.get_internals().list_outputs(), out_shape))
55 |         sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam,
56 |                                    pQuant.ActQuantizeParam, pQuant.quantized_op)
57 |     sym.save(pTest.model.prefix + "_infer_speed.json")
58 | 
59 | 
60 |     ctx = mx.gpu(gpu)
61 |     mod = DetModule(sym, data_names=data_names, context=ctx)
62 |     mod.bind(data_shapes=data_shape, for_training=False)
63 |     mod.set_params({}, {}, True)
64 | 
65 |     # let AUTOTUNE run for once
66 |     mod.forward(data_batch, is_train=False)
67 |     for output in mod.get_outputs():
68 |         output.wait_to_read()
69 | 
70 |     tic = time.time()
71 |     for _ in range(count):
72 |         mod.forward(data_batch, is_train=False)
73 |         for output in mod.get_outputs():
74 |             output.wait_to_read()
75 |     toc = time.time()
76 | 
77 |     print((toc - tic) / count * 1000)
78 | 
79 | 


--------------------------------------------------------------------------------
/utils/create_coco_roidb.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import pickle as pkl
 4 | import numpy as np
 5 | from pycocotools.coco import COCO
 6 | 
 7 | 
 8 | dataset_split_mapping = {
 9 |     "valminusminival2014": "val2014",
10 |     "minival2014": "val2014",
11 |     "test-dev2017": "test2017"
12 | }
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database for COCO-like dataset')
17 |     parser.add_argument('--dataset', help='dataset name', type=str)
18 |     parser.add_argument('--dataset-split', help='dataset split, e.g. train2017, minival2014', type=str)
19 | 
20 |     args = parser.parse_args()
21 |     return args.dataset, args.dataset_split
22 | 
23 | 
24 | def generate_groundtruth_database(dataset_name, dataset_split):
25 |     annotation_type = 'image_info' if 'test' in dataset_split else 'instances'
26 |     annotation_path = "data/%s/annotations/%s_%s.json" % (dataset_name, annotation_type, dataset_split)
27 |     assert os.path.exists(annotation_path)
28 | 
29 |     dataset = COCO(annotation_path)
30 |     img_ids = dataset.getImgIds()
31 |     roidb = []
32 |     for img_id in img_ids:
33 |         img_anno = dataset.loadImgs(img_id)[0]
34 |         im_filename = img_anno['file_name']
35 |         im_w = img_anno['width']
36 |         im_h = img_anno['height']
37 | 
38 |         ins_anno_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=False)
39 |         trainid_to_datasetid = dict({i + 1: cid for i, cid in enumerate(dataset.getCatIds())})  # 0 for bg
40 |         datasetid_to_trainid = dict({cid: tid for tid, cid in trainid_to_datasetid.items()})
41 |         instances = dataset.loadAnns(ins_anno_ids)
42 | 
43 |         # sanitize bboxes
44 |         valid_instances = []
45 |         for inst in instances:
46 |             x, y, box_w, box_h = inst['bbox']
47 |             x1 = max(0, x)
48 |             y1 = max(0, y)
49 |             x2 = min(im_w - 1, x1 + max(0, box_w - 1))
50 |             y2 = min(im_h - 1, y1 + max(0, box_h - 1))
51 |             if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
52 |                 inst['clean_bbox'] = [x1, y1, x2, y2]
53 |                 valid_instances.append(inst)
54 |         num_instance = len(valid_instances)
55 | 
56 |         gt_bbox = np.zeros((num_instance, 4), dtype=np.float32)
57 |         gt_class = np.zeros((num_instance, ), dtype=np.int32)
58 |         gt_poly = [None] * num_instance
59 | 
60 |         for i, inst in enumerate(valid_instances):
61 |             cls = datasetid_to_trainid[inst['category_id']]
62 |             gt_bbox[i, :] = inst['clean_bbox']
63 |             gt_class[i] = cls
64 |             gt_poly[i] = 'segmentation' in inst and inst['segmentation'] or gt_poly[i]
65 | 
66 |         # split mapping is specific to coco as it uses annotation files to manage split
67 |         split = dataset_split in dataset_split_mapping and dataset_split_mapping[dataset_split] or dataset_split
68 | 
69 |         image_url = 'data/%s/images/%s/%s' % (dataset_name, split, im_filename)
70 |         assert os.path.exists(image_url)
71 |         roi_rec = {
72 |             'image_url': image_url,
73 |             'im_id': img_id,
74 |             'h': im_h,
75 |             'w': im_w,
76 |             'gt_class': gt_class,
77 |             'gt_bbox': gt_bbox,
78 |             'gt_poly': gt_poly,
79 |             'flipped': False}
80 | 
81 |         roidb.append(roi_rec)
82 | 
83 |     return roidb
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     d, dsplit = parse_args()
88 |     roidb = generate_groundtruth_database(d, dsplit)
89 |     os.makedirs("data/cache", exist_ok=True)
90 |     with open("data/cache/%s_%s.roidb" % (d, dsplit), "wb") as fout:
91 |         pkl.dump(roidb, fout)
92 | 


--------------------------------------------------------------------------------
/config/int8/README.md:
--------------------------------------------------------------------------------
 1 | ## Quantization during Training
 2 | 
 3 | #### Motivation
 4 | Low precision weight and activation could greatly reduce the storage and memory footprint of detection models and improve the inference latency. We provide the inference time measured on TensorRT of INT8 and FP32 version of `faster_r50v1c4_c5_512roi_1x` as an example below.
 5 | 
 6 | | dtype | time(ms) | minival mAP|
 7 | | ----- | -------- | -----------|
 8 | | fp32  | 260      | 35.7       |
 9 | | int8  | 100      | 35.8       |
10 | 
11 | **detail configs**
12 | 
13 | ```shell
14 | batch size=1
15 | device = GTX 1080
16 | data shape = (1, 3, 800, 1200)
17 | ```
18 | 
19 | ### Implementation Details
20 | 
21 | #### the Quantization Methods
22 | 
23 | **for model weight:**
24 | ```shell
25 | nbits = 8
26 | QUANT_LEVEL = 2 ** (nbits - 1) - 1
27 | threshold = max(abs(w_tensor))
28 | quant_unit = threshold / QUANT_LEVEL
29 | quantized_w = round(w_tensor / quant_unit) * quant_unit
30 | ```
31 | 
32 | **for model activation:** The threshold is maintained with exponetial moving average of max absolute activation. [ref](<https://arxiv.org/pdf/1712.05877.pdf>)
33 | 
34 | ```shell
35 | nbits = 8
36 | QUANT_LEVEL = 2**(nbits -1) -1
37 | history_threshold;  # initialized by max(abs(act_tensor))
38 | curr_max = max(abs(act_tensor))
39 | threshold = 0.99 * history_threshold + 0.01 * curr_max
40 | quant_unit = threshold / QUANT_LEVEL
41 | quantized_act = round(w_tensor / quant_unit) * quant_unit
42 | ```
43 | 
44 | ### Quantization Configs
45 | The quantization configs are in the `ModelParam.QuantizeTrainingParam` class, which give users more flexibility during quantization.
46 | 
47 | **quantize_flag:**  to quantize the model or not.
48 | 
49 | **quantized_op:** the operators to quantize.
50 | 
51 | `WeightQuantizeParam` and `ActQuantizeParam` is attributes need by `Quantization_int8` operator for quantizing `weight` and `activation`.
52 | 
53 | ### Attributes of the `quantization_int8` operator
54 | 
55 | **delya_quant:** after delay_quant iters, the quantization working actually.
56 | 
57 | **ema_decay:**  the hyperparameter for activation threshold update.
58 | 
59 | **grad_mode:**  the mode for gradients pass. there are two mode: ste or clip. ste mean straightforward pass the out gradients to data, clip mean only pass the gradients whose value of data in the range of [-threshold, threshold], the gradients of outer is settting to 0.
60 | 
61 | **workspace:**  the temporary space used in grad_mode='clip'
62 | 
63 | **is_weight:** the tensor to be quantized is weight or not.
64 | 
65 | **is_weight_perchannel:** the granularity of quantization for weight : per tensor or per channel. Only used when the tensor is weight. Currently,  only support pertensor mode.
66 | 
67 | **quant_mode:**  the quantization methods: `minmax` or `power2`,  Currently, only support minmax mode.
68 | 
69 | 
70 | ### How to reproduce the result
71 | 1. Install a newer version of MXNet
72 | [[CUDA100]](https://1dv.aflat.top/mxnet_cu100-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl)
73 | [[CUDA101]](https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl)
74 | 2. To train a fp32 model with the default config.
75 | 3. Finetune the trained fp32 model with quantization training. Our finetuning setting are: `begin_epoch=6` and `end_epoch=12`. All other configs remains the same as fp32 training configs. 
76 | 4. We provide an example [model](https://1dv.aflat.top/faster_r50v1bc4_c5_512roi_1x_int8.zip) for `faster_r50v1c4_c5_512roi_1x`.
77 | 
78 | ### Drawbacks
79 | TensorRT does not provide API to set `quantize scale` as user's own `scale` instead of `scale` calcuated by itself. So the learned `threshold` can't be directly deployed to TensorRT currently. You may need to tweak with the weight file generated by TensorRT.
80 | 


--------------------------------------------------------------------------------
/utils/create_crowdhuman_roidb.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import pickle as pkl
  4 | import numpy as np
  5 | import random
  6 | from PIL import Image
  7 | import concurrent.futures
  8 | import json
  9 | 
 10 | def parse_args():
 11 |     parser = argparse.ArgumentParser(description='Generate SimpleDet GroundTruth Database for Crowdhuman-like dataset')
 12 |     parser.add_argument('--dataset', help='dataset name', type=str)
 13 |     parser.add_argument('--dataset-split', help='dataset split, e.g. train, val', type=str)
 14 |     parser.add_argument('--num-threads', help='number of threads to process', default=4, type=int)
 15 | 
 16 |     args = parser.parse_args()
 17 |     return args.dataset, args.dataset_split, args.num_threads
 18 | 
 19 | def load_func(fpath):
 20 |     assert os.path.exists(fpath)
 21 |     with open(fpath,'r') as fid:
 22 |         lines = fid.readlines()
 23 |     records = [json.loads(line.strip('\n')) for line in lines]
 24 |     return records
 25 | 
 26 | def decode_annotations(rec_id):
 27 |     global dataset_path
 28 |     img_id = records[rec_id]['ID']
 29 |     img_url = dataset_path + 'images/' + img_id + '.jpg'
 30 |     assert os.path.exists(img_url)
 31 |     im = Image.open(img_url)
 32 |     im_w, im_h = im.width, im.height
 33 | 
 34 |     gt_box = records[rec_id]['gtboxes']
 35 |     gt_box_len = len(gt_box)
 36 |     valid_bboxes = []
 37 |     valid_classes = []
 38 |     for ii in range(gt_box_len):
 39 |         each_data = gt_box[ii]
 40 |         x, y, w, h = each_data['fbox']
 41 | 
 42 |         if w <= 0 or h <= 0:
 43 |             continue
 44 | 
 45 |         x1 = x; y1 = y; x2 = x + w; y2 = y + h
 46 | 
 47 |         valid_bbox = [x1, y1, x2, y2]
 48 |         valid_bboxes.append(valid_bbox)
 49 |         if each_data['tag'] == 'person':
 50 |             tag = 1
 51 |         else:
 52 |             tag = -2
 53 |         if 'extra' in each_data:
 54 |             if 'ignore' in each_data['extra']:
 55 |                 if each_data['extra']['ignore'] != 0:
 56 |                     tag = -2
 57 |         valid_classes.append(tag)
 58 | 
 59 |     valid_bboxes = np.array(valid_bboxes).reshape(-1, 4)
 60 |     valid_classes = np.array(valid_classes).reshape(-1,)
 61 | 
 62 |     valid_num = valid_bboxes.shape[0]
 63 |     rand_ind = np.arange(valid_num)
 64 |     np.random.shuffle(rand_ind)
 65 |     gt_bbox = valid_bboxes[rand_ind]
 66 |     gt_class = valid_classes[rand_ind]
 67 | 
 68 |     roi_rec = {
 69 |         'image_url': img_url,
 70 |         'im_id': rec_id,
 71 |         'id': img_id,
 72 |         'h': im_h,
 73 |         'w': im_w,
 74 |         'gt_class': gt_class,
 75 |         'gt_bbox': gt_bbox,
 76 |         'flipped': False
 77 |     }
 78 |     return roi_rec, gt_bbox.shape[0]
 79 | 
 80 | if __name__ == "__main__":
 81 |     dataset_name, dataset_type, num_threads = parse_args()
 82 | 
 83 |     dataset_path = 'data/%s/' % dataset_name
 84 |     ch_file_path = dataset_path + 'annotations/annotation_%s.odgt' % dataset_type
 85 |     json_file_path = dataset_path + 'annotations/annotation_%s.json' % dataset_type
 86 | 
 87 |     records = load_func(ch_file_path)
 88 |     print("Loading Annotations Done")
 89 | 
 90 |     roidbs = []; num_bbox = 0
 91 |     rec_ids = list(range(len(records)))
 92 | 
 93 |     with concurrent.futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
 94 |         for index, res_data in enumerate(executor.map(decode_annotations, rec_ids)):
 95 |             roidb, cnt = res_data
 96 |             roidbs.append(roidb)
 97 |             num_bbox += cnt
 98 |             if index % 1000 == 0:
 99 |                 print("Finished %d/%d" % (index, len(rec_ids)))
100 |     print("Parsing Bbox Number: %d" % num_bbox)
101 |     os.makedirs("data/cache", exist_ok=True)
102 |     with open("data/cache/%s_%s.roidb" % (dataset_name, dataset_type), "wb") as fout:
103 |         pkl.dump(roidbs, fout)
104 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/sigmoid_cross_entropy.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *   http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing,
13 |  * software distributed under the License is distributed on an
14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |  * KIND, either express or implied.  See the License for the
16 |  * specific language governing permissions and limitations
17 |  * under the License.
18 |  */
19 | 
20 | /*!
21 |  * Copyright (c) 2018 by Contributors
22 |  * \file sigmoid_cross_entropy.cc
23 |  * \brief
24 |  * \author Yuntao Chen
25 | */
26 | 
27 | #include "./sigmoid_cross_entropy-inl.h"
28 | 
29 | namespace mshadow {
30 | 
31 | template<typename T>
32 | inline void SigmoidCrossEntropyForward(const Tensor<cpu, 2, T> &data,
33 |                                        const Tensor<cpu, 2, T> &label,
34 |                                        Tensor<cpu, 2, T> &loss,
35 |                                        Tensor<cpu, 1, T> &loss_sum,
36 |                                        Tensor<cpu, 2, T> &count,
37 |                                        Tensor<cpu, 1, T> &count_sum,
38 |                                        Tensor<cpu, 1, T> &out,
39 |                                        T scale) {
40 |   LOG(FATAL) << "NotImplemented";
41 | }
42 | 
43 | template<typename T>
44 | inline void SigmoidCrossEntropyBackward(const Tensor<cpu, 2, T> &data,
45 |                                         const Tensor<cpu, 2, T> &label,
46 |                                         Tensor<cpu, 2, T> &d_data,
47 |                                         Tensor<cpu, 2, T> &count,
48 |                                         Tensor<cpu, 1, T> &count_sum,
49 |                                         T scale) {
50 |   LOG(FATAL) << "NotImplemented";
51 | }
52 | 
53 | }
54 | 
55 | namespace mxnet {
56 | namespace op {
57 | template<>
58 | Operator *CreateOp<cpu>(SigmoidCrossEntropyParam param, int dtype) {
59 |   Operator *op = NULL;
60 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
61 |     op = new SigmoidCrossEntropyOp<cpu, DType>(param);
62 |   })
63 |   return op;
64 | }
65 | 
66 | // DO_BIND_DISPATCH comes from operator_common.h
67 | Operator *SigmoidCrossEntropyProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
68 |                                      std::vector<int> *in_type) const {
69 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
70 | }
71 | 
72 | DMLC_REGISTER_PARAMETER(SigmoidCrossEntropyParam);
73 | 
74 | MXNET_REGISTER_OP_PROPERTY(_contrib_SigmoidCrossEntropy, SigmoidCrossEntropyProp)
75 | .describe(R"DOC(
76 | Compute sigmoid activations followed by averaged binary cross entropy loss. The
77 | target values may be in {-1, 0, 1}, where -1 indicates that the corresponding
78 | sample should be ignored and {0, 1} correspond to the binary classes 0 and 1. By
79 | default the loss is divided by the number of targets > -1 and then multiplied by
80 | the `grad_scale` op argument. The divisive normalization may be disable by setting
81 | the op argument `normalize` to 0 (the multiplication by `scale` still takes
82 | effect).
83 | This op fuses sigmoid and cross entropy for numerical stability in both forward
84 | and gradient computation.
85 | )DOC" ADD_FILELINE)
86 | .add_argument("data", "NDArray-or-Symbol", "Input array.")
87 | .add_argument("label", "NDArray-or-Symbol", "Ground truth label.")
88 | .add_arguments(SigmoidCrossEntropyParam::__FIELDS__());
89 | 
90 | 
91 | }  // namespace op
92 | }  // namespace mxnet
93 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/global_average_pooling.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * Copyright (c) 2018 by Contributors
 22 |  * \file global_average_pooling.cc
 23 |  * \brief port from https://github.com/hujie-frank/SENet
 24 |  * \author Chenxia Han
 25 | */
 26 | #include "./global_average_pooling-inl.h"
 27 | 
 28 | namespace mshadow {
 29 | template<typename DType>
 30 | inline void GAPForward(const Tensor<cpu, 4, DType> &out,
 31 |                        const Tensor<cpu, 4, DType> &data) {
 32 |   // NOT_IMPLEMENTED
 33 |   return;
 34 | }
 35 | 
 36 | template<typename DType>
 37 | inline void GAPBackward(const Tensor<cpu, 4, DType> &in_grad,
 38 |                        const Tensor<cpu, 4, DType> &out_grad) {
 39 |   // NOT_IMPLEMENTED
 40 |   return;
 41 | }
 42 | }  // namespace mshadow
 43 | 
 44 | namespace mxnet {
 45 | namespace op {
 46 | 
 47 | template<>
 48 | Operator *CreateOp<cpu>(GAPParam param, int dtype) {
 49 |   Operator *op = NULL;
 50 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
 51 |     op = new GAPOp<cpu, DType>(param);
 52 |   });
 53 | 
 54 |   return op;
 55 | }
 56 | 
 57 | // DO_BIND_DISPATCH comes from operator_common.h
 58 | Operator* GAPProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
 59 |                                      std::vector<int> *in_type) const {
 60 |   std::vector<TShape> out_shape, aux_shape;
 61 |   std::vector<int> out_type, aux_type;
 62 |   CHECK(InferType(in_type, &out_type, &aux_type));
 63 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
 64 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 65 | }
 66 | 
 67 | DMLC_REGISTER_PARAMETER(GAPParam);
 68 | 
 69 | MXNET_REGISTER_OP_PROPERTY(_contrib_GAP, GAPProp)
 70 | .describe(R"code(This operator is DEPRECATED.
 71 | Perform pooling on the input.
 72 | 
 73 | The shapes for 2-D pooling is
 74 | 
 75 | - **data**: *(batch_size, channel, height, width)*
 76 | - **out**: *(batch_size, num_filter, out_height, out_width)*, with::
 77 | 
 78 |     out_height = f(height, kernel[0], pad[0], stride[0])
 79 |     out_width = f(width, kernel[1], pad[1], stride[1])
 80 | 
 81 | The definition of *f* depends on ``pooling_convention``, which has two options:
 82 | 
 83 | - **valid** (default)::
 84 | 
 85 |     f(x, k, p, s) = floor((x+2*p-k)/s)+1
 86 | 
 87 | - **full**, which is compatible with Caffe::
 88 | 
 89 |     f(x, k, p, s) = ceil((x+2*p-k)/s)+1
 90 | 
 91 | But ``global_pool`` is set to be true, then do a global pooling, namely reset
 92 | ``kernel=(height, width)``.
 93 | 
 94 | Three pooling options are supported by ``pool_type``:
 95 | 
 96 | - **avg**: average pooling
 97 | - **max**: max pooling
 98 | - **sum**: sum pooling
 99 | 
100 | 1-D pooling is special case of 2-D pooling with *weight=1* and
101 | *kernel[1]=1*.
102 | 
103 | For 3-D pooling, an additional *depth* dimension is added before
104 | *height*. Namely the input data will have shape *(batch_size, channel, depth,
105 | height, width)*.
106 | 
107 | )code" ADD_FILELINE)
108 | .add_argument("data", "NDArray-or-Symbol", "Input data to the pooling operator.")
109 | .add_arguments(GAPParam::__FIELDS__());
110 | 
111 | }  // namespace op
112 | }  // namespace mxnet
113 | 


--------------------------------------------------------------------------------
/models/tridentnet/README.md:
--------------------------------------------------------------------------------
 1 | ## Scale-Aware Trident Networks for Object Detection
 2 | 
 3 | Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang
 4 | 
 5 | <p align="center"> <img src="../../doc/image/trident_block.png" width="480"> </p>
 6 | 
 7 | ### Introduction
 8 | 
 9 | This repository implements [TridentNet](https://arxiv.org/abs/1901.01892) in the SimpleDet framework. 
10 | 
11 | Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. Then, we propose a scale-aware training scheme to specialize each branch by sampling object instances of proper scales for training. As a bonus, a fast approximation version of TridentNet could achieve significant improvements without any additional parameters and computational cost. On the COCO dataset, our TridentNet with ResNet-101 backbone achieves state-of-the-art single-model results by obtaining an mAP of 48.4.
12 | 
13 | #### Trident Blocks
14 | 
15 | - Dilated convolution for efficient scale enumeration
16 | - Weight sharing between convs for uniform representation
17 | 
18 | <p align="center"> <img src="../../doc/image/trident_block_details.png" width="480"> </p>
19 | 
20 | The above figure shows how to convert bottleneck residual blocks to 3-branch Trident Blocks. The dilation rate of three branches are set as 1, 2 and 3, respectively.
21 | 
22 | ### Use TridentNet
23 | 
24 | Please setup SimpleDet following [README](../../README.md) and [INSTALL](../../doc/INSTALL.md) and use the TridentNet configuration files in the `config` folder.
25 | 
26 | ### Results on MS-COCO
27 | 
28 | |                             | Backbone   | Test data | mAP@[0.5:0.95] | Link |
29 | | --------------------------- | ---------- | --------- | :------------: | -----|
30 | | Faster R-CNN, 1x            | ResNet-101 | minival   |      37.6      |[model](https://1dv.aflat.top/faster_r101v2c4_c5_256roi_1x.zip)|
31 | | TridentNet, 1x              | ResNet-101 | minival   |      40.6      |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_1x.zip)|
32 | | TridentNet, 1x, Fast Approx | ResNet-101 | minival   |      39.9      |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_fastapprox_1x.zip)|
33 | | TridentNet, 2x              | ResNet-101 | test-dev  |      42.8      |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_addminival_2x.zip)|
34 | | TridentNet*, 3x             | ResNet-101 | test-dev  |      48.4      |[model](https://1dv.aflat.top/tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.zip)|
35 | 
36 | Note: 
37 | 1. These models are not trained in SimpleDet. Re-training these models in SimpleDet gives a slightly better result.
38 | 2. TridentNet* - TridentNet = extended training + softNMS + multi-scale training/testing + syncBN + DCNv1.
39 | 
40 | ### Results on MS-COCO with stronger baselines
41 | All config files are available in [config/resnet_v1b](../../config/resnet_v1b).
42 | 
43 | |Model|Backbone|Head|Train Schedule|AP|AP50|AP75|APs|APm|APl|
44 | |-----|--------|----|--------------|--|----|----|---|---|---|
45 | |Faster|R50v1b-C4|C5-512ROI|2X|36.9|57.9|39.3|19.9|41.4|50.2|
46 | |Trident|R50v1b-C4|C5-128ROI|2X|39.6|60.9|42.9|22.5|44.5|53.9|
47 | |TridentFast|R50v1b-C4|C5-128ROI|2X|39.0|60.2|41.8|20.8|43.6|53.8|
48 | |Faster|R101v1b-C4|C5-512ROI|2X|40.5|61.2|43.8|22.5|44.8|55.4|
49 | |Trident|R101v1b-C4|C5-128ROI|2X|43.0|64.3|46.3|25.3|47.9|58.4|
50 | |TridentFast|R101v1b-C4|C5-128ROI|2X|42.5|63.7|46.0|23.3|46.7|59.3|
51 | |Faster|R152v1b-C4|C5-512ROI|2X|41.8|62.4|45.2|23.2|46.0|56.9|
52 | |Trident|R152v1b-C4|C5-128ROI|2X|44.4|65.4|48.3|26.4|49.4|59.6|
53 | |TridentFast|R152v1b-C4|C5-128ROI|2X|43.9|65.1|47.0|25.1|48.1|60.4|
54 | 
55 | ### Citing TridentNet
56 | 
57 | ```
58 | @article{li2019scale,
59 |   title={Scale-Aware Trident Networks for Object Detection},
60 |   author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang},
61 |   journal={ICCV 2019},
62 |   year={2019}
63 | }
64 | ```
65 | 


--------------------------------------------------------------------------------
/doc/DATASET.md:
--------------------------------------------------------------------------------
  1 | ## Introduction
  2 | This document describes the process of creating roidb from COCO-format, VOC-format or JSON-format annotations.
  3 | 
  4 | ### COCO format
  5 | In this section, we create roidb from coco-format annotaions of PASCAL VOC dataset. 
  6 | 
  7 | ```bash
  8 | # enter simpledet main directory
  9 | cd simpledet
 10 | 
 11 | # create data dir
 12 | mkdir -p data/src
 13 | pushd data/src
 14 | 
 15 | # download and extract VOC2007 trainval
 16 | wget http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 
 17 | tar xf data/src/VOCtrainval_06-Nov-2007.tar
 18 | 
 19 | # download and extract VOC annotaitons provided by COCO
 20 | wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip
 21 | unzip PASCAL_VOC.zip
 22 | popd
 23 | 
 24 | # create soft links
 25 | mkdir -p data/pascal_voc/annotations
 26 | ln -s data/src/PASCAL_VOC/pascal_train2007.json data/pascal_voc/annotations/instances_train2007.json
 27 | ln -s data/src/PASCAL_VOC/pascal_val2007.json data/pascal_voc/annotations/instances_val2007.json
 28 | 
 29 | mkdir -p data/pascal_voc/images
 30 | ln -s data/src/VOCdevkit/VOC2007/JPEGImages data/pascal_voc/images/train2007
 31 | ln -s data/src/VOCdevkit/VOC2007/JPEGImages data/pascal_voc/images/val2007
 32 | 
 33 | # annotations/instances_split.json should correspond with images/split
 34 | pascal_voc
 35 | ├── annotations
 36 | │   ├── instances_train2007.json -> data/src/PASCAL_VOC/pascal_train2007.json
 37 | │   └── instances_val2007.json -> data/src/PASCAL_VOC/pascal_val2007.json
 38 | └── images
 39 |     ├── train2007 -> data/src/VOCdevkit/VOC2007/JPEGImages
 40 |     └── val2007 -> data/src/VOCdevkit/VOC2007/JPEGImages
 41 | 
 42 | # generate roidbs
 43 | python3 utils/create_coco_roidb.py --dataset pascal_voc --dataset-split train2007
 44 | python3 utils/create_coco_roidb.py --dataset pascal_voc --dataset-split val2007
 45 | ```
 46 | 
 47 | 
 48 | ### VOC format
 49 | In this section, we create roidb from voc-format annotaions of clipart dataset. 
 50 | ```bash
 51 | # enter simpledet main directory
 52 | cd simpledet
 53 | 
 54 | # create data dir
 55 | mkdir -p data/src
 56 | pushd data/src
 57 | 
 58 | # download and extract clipart.zip
 59 | # courtesy to "Towards Universal Object Detection by Domain Attention"
 60 | wget https://1dv.aflat.top/clipart.zip -O clipart.zip
 61 | unzip clipart.zip
 62 | popd
 63 | 
 64 | # generate roidbs
 65 | python3 utils/create_voc_roidb.py --data-dir data/src/clipart --split train
 66 | ```
 67 | 
 68 | ### JSON format
 69 | In this section, we create roidb from json-format annotaions of clipart dataset. 
 70 | 
 71 | Prepare your own data like the example
 72 | ```json
 73 | # example.json
 74 | [
 75 |     {
 76 |         "gt_class": [1, 5],
 77 |         "gt_bbox": [[48, 240, 195, 371], [8, 12, 352, 498]],
 78 |         "flipped": false,
 79 |         "h": 500,
 80 |         "w": 353,
 81 |         "image_url": "/absolute/path/to/VOCdevkit/VOC2007/JPEGImages/000001.jpg",
 82 |         "im_id": 1
 83 |     },
 84 |     ...
 85 | ]
 86 | ```
 87 | 
 88 | ```bash
 89 | python3 utils/json_to_roidb.py --json path/to/your.json
 90 | ```
 91 | 
 92 | ### Existing Annotations
 93 | - Cityscapes (coco format)
 94 | Check [this](https://github.com/facebookresearch/Detectron/blob/master/tools/convert_cityscapes_to_coco.py) script
 95 | - COCO (coco format)
 96 | http://cocodataset.org/#download
 97 | - DeepLesion (voc format)
 98 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M)
 99 | - DOTA (voc format)
100 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M)
101 | - Kitchen (voc format)
102 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M)
103 | - KITTI (voc format)
104 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M)
105 | - VOC (voc format)
106 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M)
107 | - WiderFace (voc format)
108 | Check [here](https://drive.google.com/drive/folders/1Uwnhg0qZ5k-3VZ7uSDt3WyETcnSdeA3M)
109 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/group_norm.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
15 |    and IDIAP Research Institute nor the names of its contributors may be
16 |    used to endorse or promote products derived from this software without
17 |    specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | */
31 | /*
32 |  * Licensed to the Apache Software Foundation (ASF) under one
33 |  * or more contributor license agreements.  See the NOTICE file
34 |  * distributed with this work for additional information
35 |  * regarding copyright ownership.  The ASF licenses this file
36 |  * to you under the Apache License, Version 2.0 (the
37 |  * "License"); you may not use this file except in compliance
38 |  * with the License.  You may obtain a copy of the License at
39 |  *
40 |  *   http://www.apache.org/licenses/LICENSE-2.0
41 |  *
42 |  * Unless required by applicable law or agreed to in writing,
43 |  * software distributed under the License is distributed on an
44 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
45 |  * KIND, either express or implied.  See the License for the
46 |  * specific language governing permissions and limitations
47 |  * under the License.
48 |  */
49 | 
50 | /*!
51 |  * \file group_norm.cc
52 |  * \author Yuntao Chen
53 | */
54 | 
55 | #include "./group_norm-inl.h"
56 | 
57 | namespace mxnet {
58 | namespace op {
59 | template <>
60 | Operator* CreateOp<cpu>(GroupNormParam param, int dtype) {
61 |   LOG(FATAL) << "not implemented.";
62 |   return NULL;
63 | }
64 | 
65 | // DO_BIND_DISPATCH comes from operator_common.h
66 | Operator* GroupNormProp::CreateOperatorEx(Context ctx,
67 |                                           std::vector<TShape>* in_shape,
68 |                                           std::vector<int>* in_type) const {
69 |   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
70 | }
71 | 
72 | DMLC_REGISTER_PARAMETER(GroupNormParam);
73 | 
74 | MXNET_REGISTER_OP_PROPERTY(_contrib_GroupNorm, GroupNormProp)
75 | .add_argument("data", "NDArray-or-Symbol",
76 |               "An n-dimensional input array (n > 2) of the form [batch, "
77 |               "channel, spatial_dim1, spatial_dim2, ...].")
78 | .add_argument("gamma", "NDArray-or-Symbol",
79 |               "A vector of length \'channel\', which multiplies the "
80 |               "normalized input.")
81 | .add_argument("beta", "NDArray-or-Symbol",
82 |               "A vector of length \'channel\', which is added to the "
83 |               "product of the normalized input and the weight.")
84 | .add_arguments(GroupNormParam::__FIELDS__())
85 | .describe(R"code(Group Normalization (GN) operation: https://arxiv.org/abs/1803.08494)code" ADD_FILELINE);
86 | }  // namespace op
87 | }  // namespace mxnet
88 | 


--------------------------------------------------------------------------------
/models/se/builder.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import mxnext as X
  3 | from mxnext import conv, relu, add, sigmoid
  4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit
  5 | from symbol.builder import Backbone
  6 | from models.efficientnet.builder import se
  7 | from models.dcn.builder import hybrid_resnet_fpn_builder
  8 | from models.maskrcnn.builder import MaskFasterRcnnHead
  9 | 
 10 | 
 11 | def se_resnet_v1b_unit(input, name, filter, stride, dilate, proj, norm, **kwargs):
 12 |     conv1 = conv(input, name=name + "_conv1", filter=filter // 4)
 13 |     bn1 = norm(conv1, name=name + "_bn1")
 14 |     relu1 = relu(bn1, name=name + "_relu1")
 15 | 
 16 |     conv2 = conv(relu1, name=name + "_conv2", stride=stride, filter=filter // 4, kernel=3)
 17 |     bn2 = norm(conv2, name=name + "_bn2")
 18 |     relu2 = relu(bn2, name=name + "_relu2")
 19 | 
 20 |     conv3 = conv(relu2, name=name + "_conv3", filter=filter)
 21 |     bn3 = norm(conv3, name=name + "_bn3")
 22 |     bn3 = se(bn3, prefix=name + "_se3", f_down=filter // 16, f_up=filter)
 23 | 
 24 | 
 25 |     if proj:
 26 |         shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride)
 27 |         shortcut = norm(shortcut, name=name + "_sc_bn")
 28 |     else:
 29 |         shortcut = input
 30 | 
 31 |     eltwise = add(bn3, shortcut, name=name + "_plus")
 32 | 
 33 |     return relu(eltwise, name=name + "_relu")
 34 | 
 35 | 
 36 | def se_v2_resnet_v1b_unit(input, name, filter, stride, dilate, proj, norm, **kwargs):
 37 |     """
 38 |     diff with v1: move the SE module to 3x3 conv
 39 |     """
 40 |     conv1 = conv(input, name=name + "_conv1", filter=filter // 4)
 41 |     bn1 = norm(conv1, name=name + "_bn1")
 42 |     relu1 = relu(bn1, name=name + "_relu1")
 43 | 
 44 |     conv2 = conv(relu1, name=name + "_conv2", stride=stride, filter=filter // 4, kernel=3)
 45 |     bn2 = norm(conv2, name=name + "_bn2")
 46 |     relu2 = relu(bn2, name=name + "_relu2")
 47 |     relu2 = se(relu2, prefix=name + "_se2", f_down=filter // 16, f_up=filter // 4)
 48 | 
 49 |     conv3 = conv(relu2, name=name + "_conv3", filter=filter)
 50 |     bn3 = norm(conv3, name=name + "_bn3")
 51 | 
 52 |     if proj:
 53 |         shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride)
 54 |         shortcut = norm(shortcut, name=name + "_sc_bn")
 55 |     else:
 56 |         shortcut = input
 57 | 
 58 |     eltwise = add(bn3, shortcut, name=name + "_plus")
 59 | 
 60 |     return relu(eltwise, name=name + "_relu")
 61 | 
 62 | 
 63 | SEResNetV1bFPN = hybrid_resnet_fpn_builder(se_resnet_v1b_unit)
 64 | SEv2ResNetV1bFPN = hybrid_resnet_fpn_builder(se_v2_resnet_v1b_unit)
 65 | 
 66 | 
 67 | class MaskRcnnSe4convHead(MaskFasterRcnnHead):
 68 |     def __init__(self, pBbox, pMask, pMaskRoi):
 69 |         super().__init__(pBbox, pMask, pMaskRoi)
 70 | 
 71 |     def _get_mask_head_logit(self, conv_feat):
 72 |         if self._head_feat is not None:
 73 |             return self._head_feat
 74 | 
 75 |         up_stride = int(self.pMask.resolution // self.pMaskRoi.out_size)
 76 |         dim_reduced = self.pMask.dim_reduced
 77 | 
 78 |         msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2)
 79 | 
 80 |         current = conv_feat
 81 |         for i in range(4):
 82 |             current = X.conv(
 83 |                 current,
 84 |                 name="mask_fcn_conv{}".format(i + 1),
 85 |                 filter=dim_reduced,
 86 |                 kernel=3,
 87 |                 no_bias=False,
 88 |                 init=msra_init
 89 |             )
 90 |             current = self.add_norm(current)
 91 |             current = X.relu(current)
 92 |             current = se(current, "mask_fcn_se{}".format(i + 1), f_down=dim_reduced // 4, f_up=dim_reduced)
 93 | 
 94 |         mask_up = current
 95 |         for i in range(up_stride // 2):
 96 |             weight = X.var(
 97 |                 name="mask_up{}_weight".format(i),
 98 |                 init=msra_init,
 99 |                 lr_mult=1,
100 |                 wd_mult=1)
101 |             mask_up = mx.sym.Deconvolution(
102 |                 mask_up,
103 |                 kernel=(2, 2),
104 |                 stride=(2, 2),
105 |                 num_filter=dim_reduced,
106 |                 no_bias=False,
107 |                 weight=weight,
108 |                 name="mask_up{}".format(i)
109 |                 )
110 |             mask_up = X.relu(
111 |                 mask_up,
112 |                 name="mask_up{}_relu".format(i))
113 | 
114 |         mask_up = X.to_fp32(mask_up, name='mask_up_to_fp32')
115 |         self._head_feat = mask_up
116 | 
117 |         return self._head_feat


--------------------------------------------------------------------------------
/models/KD/builder.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | 
  4 | import math
  5 | import mxnet as mx
  6 | import mxnext as X
  7 | 
  8 | from symbol.builder import FasterRcnn, RpnHead, Backbone, Neck
  9 | from models.retinanet.builder import RetinaNet
 10 | 
 11 | 
 12 | class FitNetHead(object):
 13 |     def __init__(self, pKD):
 14 |         super().__init__()
 15 |         self.p = pKD
 16 |         self._student_feat = None
 17 |     
 18 |     def get_student_feat(self, mimic_feat, mimic_channel):
 19 |         if self._student_feat:
 20 |             return self._student_feat
 21 | 
 22 |         mimic_channel = self.p.channel
 23 |         student_hint = mx.sym.Convolution(data=mimic_feat,
 24 |                                           num_filter=mimic_channel,
 25 |                                           kernel=(1, 1),
 26 |                                           stride=(1, 1),
 27 |                                           pad=(0, 0),
 28 |                                           name="student_hint_conv")
 29 |         student_hint = mx.sym.Activation(data=student_hint, 
 30 |                                          act_type='relu', 
 31 |                                          name="student_hint_relu")
 32 |         return student_hint
 33 |             
 34 |     def get_loss(self, feat_dict, label):
 35 |         mimic_stage = self.p.stage
 36 |         mimic_channel = self.p.channel
 37 |         mimic_grad_scale = self.p.grad_scale
 38 |         
 39 |         student_feat = self.get_student_feat(feat_dict[mimic_stage], mimic_channel)
 40 |         fit_loss = mx.sym.mean(mx.sym.square(student_feat - label))
 41 |         fit_loss = mx.sym.MakeLoss(fit_loss, grad_scale=mimic_grad_scale, name="fit_loss")
 42 |         return fit_loss
 43 |         
 44 |         
 45 | class FitNetRetinaNet(RetinaNet):
 46 |     def __init__(self):
 47 |         super().__init__()
 48 | 
 49 |     @staticmethod
 50 |     def get_train_symbol(backbone, neck, head, kd_head):
 51 |         rpn_cls_label = X.var("rpn_cls_label")
 52 |         rpn_reg_target = X.var("rpn_reg_target")
 53 |         rpn_reg_weight = X.var("rpn_reg_weight")
 54 |         teacher_label = X.var("teacher_label")
 55 | 
 56 |         feat = backbone.get_rpn_feature()
 57 |         c2, c3, c4, c5 = feat
 58 |         feat_dict = {'c2': c2,
 59 |                      'c3': c3,
 60 |                      'c4': c4,
 61 |                      'c5': c5}
 62 |         feat = neck.get_rpn_feature(feat)
 63 | 
 64 |         loss = head.get_loss(feat, rpn_cls_label, rpn_reg_target, rpn_reg_weight)
 65 |         kd_loss = kd_head.get_loss(feat_dict, teacher_label)
 66 | 
 67 |         return X.group(loss + (kd_loss, ))
 68 | 
 69 | 
 70 | class FitNetFasterRcnn(FasterRcnn):
 71 |     def __init__(self):
 72 |         super().__init__()
 73 | 
 74 |     @classmethod
 75 |     def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head, kd_head):
 76 |         gt_bbox = X.var("gt_bbox")
 77 |         im_info = X.var("im_info")
 78 |         teacher_label = X.var("teacher_label")
 79 | 
 80 |         rpn_feat = backbone.get_rpn_feature()
 81 |         c2, c3, c4, c5 = rpn_feat
 82 |         feat_dict = {'c2': c2,
 83 |                      'c3': c3,
 84 |                      'c4': c4,
 85 |                      'c5': c5}
 86 |         rcnn_feat = backbone.get_rcnn_feature()
 87 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
 88 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
 89 | 
 90 |         rpn_head.get_anchor()
 91 |         rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info)
 92 |         proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info)
 93 |         roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal)
 94 |         bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight)
 95 | 
 96 |         kd_loss = kd_head.get_loss(feat_dict, teacher_label)
 97 | 
 98 |         return X.group(rpn_loss + bbox_loss + (kd_loss, ))
 99 | 
100 |     @classmethod
101 |     def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head):
102 |         rec_id, im_id, im_info, proposal, proposal_score = \
103 |             FitNetFasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)
104 | 
105 |         rcnn_feat = backbone.get_rcnn_feature()
106 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
107 | 
108 |         roi_feat = roi_extractor.get_roi_feature_test(rcnn_feat, proposal)
109 |         cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal)
110 | 
111 |         return X.group([rec_id, im_id, im_info, cls_score, bbox_xyxy])
112 | 


--------------------------------------------------------------------------------
/doc/INSTALL.md:
--------------------------------------------------------------------------------
  1 | ## Setup Locally with Pre-built Wheel
  2 | We provide pre-built wheel for python >= 3.4, Ubuntu >= 14.04 or CentOS >=7. The wheels are staticly linked so no dependency other than CUDA is required.
  3 | 
  4 | [Downdload wheel for CUDA-9.0(2019.8.20)](https://1dv.aflat.top/mxnet_cu90-1.6.0b20190820-py2.py3-none-manylinux1_x86_64.whl)
  5 | 
  6 | [Downdload wheel for CUDA-10.0(2019.12.14)](https://1dv.aflat.top/mxnet_cu100-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl)
  7 | 
  8 | [Downdload wheel for CUDA-10.1(2019.12.14)](https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl)
  9 | 
 10 | Install the wheel as
 11 | ```bash
 12 | pip3 install https://1dv.aflat.top/mxnet_cu101-1.6.0b20191214-py2.py3-none-manylinux1_x86_64.whl --user
 13 | ```
 14 | 
 15 | 
 16 | 
 17 | ## Setup with Docker
 18 | We provide pre-built docker images for both cuda9.0 and cuda10.0.
 19 | 
 20 | Maxwell, Pascal, Volta and Turing GPUs are supported.
 21 | 
 22 | For nvidia-driver >= 410.48, cuda10 image is recommended.
 23 | 
 24 | For nvidia-driver >= 384.81, cuda9 image is recommended.
 25 | 
 26 | Aliyun beijing mirror is provided for users pulling from China.
 27 | 
 28 | ```bash
 29 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda9 zsh
 30 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR rogerchen/simpledet:cuda10 zsh
 31 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda9 zsh
 32 | nvidia-docker run -it -v $HOST-SIMPLEDET-DIR:$CONTAINER-WORKDIR registry.cn-beijing.aliyuncs.com/rogerchen/simpledet:cuda10 zsh
 33 | ```
 34 | 
 35 | ## Setup with Singularity
 36 | We recommend the users to adopt singualrity as the default environment manager to minimize the efforts of configuration.
 37 | Singularity is a virtual environment manager like virtualenv, but in the system-level.
 38 | 
 39 | #### Install Singularity >= 2.6
 40 | ```bash
 41 | # install dependency
 42 | sudo apt update
 43 | sudo apt install build-essential python libarchive-dev
 44 | 
 45 | # install singularity
 46 | wget https://github.com/sylabs/singularity/releases/download/2.6.1/singularity-2.6.1.tar.gz
 47 | tar xzfv singularity-2.6.1.tar.gz
 48 | cd singularity-2.6.1
 49 | ./configure --prefix=/usr/local
 50 | make
 51 | sudo make install
 52 | ```
 53 | 
 54 | #### Download singularity image for SimpleDet
 55 | ```bash
 56 | wget https://1dv.aflat.top/simpledet.img
 57 | ```
 58 | 
 59 | #### Invoke simpledet shell
 60 | Here we need to map the working directory into singularity shell, note that **symlink to files outside the working directory will not work** since singularity has its own filesystem. Thus we recommend users to map the whole data storage into singularity by replacing $WORKDIR by something like `/data` or `/mnt/`.
 61 | 
 62 | ```bash
 63 | sudo singularity shell --no-home --nv -s /usr/bin/zsh --bind $WORKDIR /path/to/simpledet.img
 64 | ```
 65 | 
 66 | ## Setup from Scratch
 67 | #### System Requirements
 68 | - Ubuntu 16.04
 69 | - Python >= 3.5
 70 | 
 71 | #### Install CUDA, cuDNN and NCCL
 72 | 
 73 | #### Install cocotools
 74 | ```bash
 75 | # Install a patched cocotools for python3
 76 | pip3 install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI'
 77 | ```
 78 | 
 79 | #### Install MXNet
 80 | ```bash
 81 | # Install dependency
 82 | sudo apt-get update
 83 | sudo apt-get install -y build-essential git
 84 | sudo apt-get install -y libopenblas-dev
 85 | ```
 86 | 
 87 | ```bash
 88 | git clone --recursive https://github.com/apache/incubator-mxnet /tmp/mxnet && \
 89 | git clone https://github.com/Tusimple/simpledet /tmp/simpledet && \
 90 | git clone https://github.com/RogerChern/cocoapi /tmp/cocoapi && \
 91 | cp -r /tmp/simpledet/operator_cxx/* /tmp/mxnet/src/operator && \
 92 | mkdir -p /tmp/mxnet/src/coco_api && \
 93 | cp -r /tmp/cocoapi/common /tmp/mxnet/src/coco_api && \
 94 | cd /tmp/mxnet && \
 95 | echo "USE_SIGNAL_HANDLER = 1" >> ./config.mk && \
 96 | echo "USE_OPENCV = 0" >> ./config.mk && \
 97 | echo "USE_MKLDNN = 0" >> ./config.mk && \
 98 | echo "USE_BLAS = openblas" >> ./config.mk && \
 99 | echo "USE_CUDA = 1" >> ./config.mk && \
100 | echo "USE_CUDA_PATH = /usr/local/cuda" >> ./config.mk && \
101 | echo "USE_CUDNN = 1" >> ./config.mk && \
102 | echo "USE_NCCL = 1" >> ./config.mk && \
103 | echo "USE_DIST_KVSTORE = 1" >> ./config.mk && \
104 | echo "CUDA_ARCH = -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70" >> ./config.mk && \
105 | rm /tmp/mxnet/src/operator/nn/group_norm* && \
106 | make -j$((`nproc`-1)) && \
107 | cd python && \
108 | python3 setup.py install && \
109 | rm -rf /tmp/mxnet /tmp/simpledet /tmp/cocoapi
110 | ```
111 | 


--------------------------------------------------------------------------------
/utils/callback.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | import mxnet as mx
  4 | 
  5 | 
  6 | class Speedometer(object):
  7 |     def __init__(self, batch_size, total_iter, frequent=50):
  8 |         self.batch_size = batch_size
  9 |         self.total_iter = total_iter
 10 |         self.frequent = frequent
 11 |         self.init = False
 12 |         self.tic = 0
 13 |         self.last_count = 0
 14 | 
 15 |     def __call__(self, param):
 16 |         """Callback to Show speed."""
 17 |         count = param.nbatch
 18 |         if self.last_count > count:
 19 |             self.init = False
 20 |         self.last_count = count
 21 | 
 22 |         if self.init:
 23 |             if count % self.frequent == 0:
 24 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
 25 |                 if param.eval_metric is not None:
 26 |                     name, value = param.eval_metric.get()
 27 |                     s = "Epoch[%d] Batch [%d]\tIter: %d/%d\tLr: %.5f\tSpeed: %.2f samples/sec\tTrain-" % \
 28 |                         (param.epoch, count, param.iter, self.total_iter, param.lr, speed)
 29 |                     for n, v in zip(name, value):
 30 |                         s += "%s=%f,\t" % (n, v)
 31 |                     logging.info(s)
 32 |                 else:
 33 |                     logging.info("Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec",
 34 |                                  param.epoch, count, speed)
 35 |                 self.tic = time.time()
 36 |         else:
 37 |             self.init = True
 38 |             self.tic = time.time()
 39 | 
 40 | class DetailSpeedometer(object):
 41 |     def __init__(self, batch_size, frequent=50):
 42 |         self.batch_size = batch_size
 43 |         self.frequent = frequent
 44 |         self.init = False
 45 |         self.tic = 0
 46 |         self.last_count = 0
 47 | 
 48 |     def __call__(self, param):
 49 |         """Callback to Show speed."""
 50 |         count = param.nbatch
 51 |         rank = param.rank
 52 |         total_iter = param.total_iter
 53 | 
 54 |         if self.last_count > count:
 55 |             self.init = False
 56 |         self.last_count = count
 57 | 
 58 |         if self.init:
 59 |             if count % self.frequent == 0:
 60 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
 61 |                 if param.eval_metric is not None:
 62 |                     name, value = param.eval_metric.get()
 63 |                     s = "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\t" \
 64 |                         "data:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec\tTrain-" % (
 65 |                         param.epoch, rank, count, total_iter,
 66 |                         param.cur_batch_time, param.avg_batch_time,
 67 |                         param.cur_kvstore_sync_time, param.avg_kvstore_sync_time,
 68 |                         param.cur_data_time, param.avg_data_time,
 69 |                         param.cur_iter_total_time, param.avg_iter_total_time,
 70 |                         speed)
 71 |                     for n, v in zip(name, value):
 72 |                         s += "%s=%f,\t" % (n, v)
 73 |                     logging.info(s)
 74 |                 else:
 75 |                     logging.info(
 76 |                         "Epoch[%d] Rank[%d] Batch[%d] TotalIter[%d] Train:%.3f(%.3f)\tkv_sync:%.3f(%.3f)\tdata:%.3f(%.3f)\titer_total_time:%.3f(%.3f)\tSpeed: %.2f samples/sec",
 77 |                         param.epoch, rank, count, total_iter,
 78 |                         param.cur_batch_time, param.avg_batch_time,
 79 |                         param.cur_kvstore_sync_time, param.avg_kvstore_sync_time,
 80 |                         param.cur_data_time, param.avg_data_time,
 81 |                         param.cur_iter_total_time, param.avg_iter_total_time,
 82 |                         speed)
 83 | 
 84 |                 self.tic = time.time()
 85 |         else:
 86 |             self.init = True
 87 |             self.tic = time.time()
 88 | 
 89 | 
 90 | def do_checkpoint(prefix):
 91 |     def _callback(iter_no, sym, arg, aux):
 92 |         mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
 93 |     return _callback
 94 | 
 95 | 
 96 | def do_checkpoint_iter(prefix, checkpoint_iter):
 97 |     def _callback(param):
 98 |         if checkpoint_iter == param.locals["total_iter"]:
 99 |             arg_params, aux_params = param.locals["self"].get_params()
100 |             save_dict = {('arg:%s' % k) : v.as_in_context(mx.cpu()) for k, v in arg_params.items()}
101 |             save_dict.update({('aux:%s' % k) : v.as_in_context(mx.cpu()) for k, v in aux_params.items()})
102 |             param_name = '%s-iter-%s.params' % (prefix, checkpoint_iter)
103 |             mx.nd.save(param_name, save_dict)
104 |             logging.info('Saved checkpoint to \"%s\"', param_name)
105 |     return _callback
106 | 


--------------------------------------------------------------------------------
/doc/FRAMEWOKR_OVERVIEW.md:
--------------------------------------------------------------------------------
  1 | ## Introduction
  2 | 
  3 | ### Scripts
  4 | - `detection_train.py`
  5 | - `detection_test.py`
  6 | - `rpn_test.py`
  7 | - `mask_test.py`
  8 | 
  9 | 
 10 | ### Top-level Singletons (Detectors)
 11 | Top-level singletons generally represent an unique kind of detection method (detector).
 12 | By unique we mean that the detector has a pipeline that can not be easily adapted from any existing detector.
 13 | Detectors now include
 14 | - RPN
 15 | - RetinaNet
 16 | - KD RetinaNet
 17 | - Fast R-CNN
 18 | - Faster R-CNN
 19 | - KD Faster R-CNN
 20 | - Mask R-CNN
 21 | - Cascade R-CNN
 22 | 
 23 | Every detector have a `get_train_symbol` method.
 24 | Each may have one or more of `get_bbox_test_symbol`, `get_mask_test_symbol`, `get_rpn_test_symbol`, and `get_kp_test_symbol` methods.
 25 | 
 26 | Here we leave the API design of `get_xxx_symbol` to user due to
 27 | - Detector design should not be bounded by the framework.
 28 | - The user is responsible for constructing the `train_sym` used in `detection_train.py` in the config.
 29 | 
 30 | We provide a detailed annotated Mask R-CNN as an example.
 31 | ``` python
 32 | class MaskRcnn(object):
 33 |     def __init__(self):
 34 |         pass
 35 | 
 36 |     @staticmethod
 37 |     def get_train_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head):
 38 |         # mask r-cnn needs ground truth bboxes and instance polygons to generate the target for training
 39 |         gt_bbox = X.var("gt_bbox")
 40 |         gt_poly = X.var("gt_poly")
 41 |         # im_info contains the width and height of image before padding and is use to remove anchors or
 42 |         # proposals out of image
 43 |         im_info = X.var("im_info")
 44 | 
 45 |         # backbone network provide feature map for sub-networks
 46 |         rpn_feat = backbone.get_rpn_feature()
 47 |         rcnn_feat = backbone.get_rcnn_feature()
 48 |         # neck is used for feature fusion across scales or dimension reduction
 49 |         rpn_feat = neck.get_rpn_feature(rpn_feat)
 50 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
 51 | 
 52 |         # cache anchor for generating anchor target and bbox target
 53 |         rpn_head.get_anchor()
 54 |         rpn_loss = rpn_head.get_loss(rpn_feat, gt_bbox, im_info)
 55 |         # calculate bbox_target, mask_target from rpn proposals
 56 |         proposal, bbox_cls, bbox_target, bbox_weight, mask_proposal, mask_target, mask_ind = \
 57 |             rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, gt_poly, im_info)
 58 |         roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal)
 59 |         mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, mask_proposal)
 60 | 
 61 |         bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight)
 62 |         mask_loss = mask_head.get_loss(mask_roi_feat, mask_target, mask_ind)
 63 |         return X.group(rpn_loss + bbox_loss + mask_loss)
 64 | 
 65 |     @staticmethod
 66 |     def get_test_symbol(backbone, neck, rpn_head, bbox_roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor):
 67 |         rec_id, im_id, im_info, proposal, proposal_score = \
 68 |             MaskRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)
 69 | 
 70 |         rcnn_feat = backbone.get_rcnn_feature()
 71 |         rcnn_feat = neck.get_rcnn_feature(rcnn_feat)
 72 | 
 73 |         roi_feat = bbox_roi_extractor.get_roi_feature(rcnn_feat, proposal)
 74 |         cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info, proposal)
 75 | 
 76 |         # during test, in order to save computation, only the top 100 bbox after NMS are used for mask prediction
 77 |         post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(cls_score, bbox_xyxy)
 78 | 
 79 |         mask_roi_feat = mask_roi_extractor.get_roi_feature(rcnn_feat, post_bbox_xyxy)
 80 |         mask = mask_head.get_prediction(mask_roi_feat)
 81 | 
 82 |         # the layout is fixed for mask_test.py
 83 |         return X.group([rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask])
 84 | 
 85 |     @staticmethod
 86 |     def get_rpn_test_symbol(backbone, neck, rpn_head):
 87 |         return FasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head)
 88 | ```
 89 | 
 90 | 
 91 | ### Components
 92 | Components now include
 93 | - Backbone
 94 |     - ResNet
 95 |     - ResNeXt
 96 |     - TridentNet
 97 |     - EfficientNet
 98 |     - DCNv1/v2
 99 | - Neck
100 |     - ReduceNeck
101 |     - FPNNeck
102 |     - NASFPNNeck
103 | - RpnHead
104 |     - RpnHead
105 |     - FPNRpnHead
106 |     - MaskRpnHead
107 |     - TridentRpnHead
108 |     - RetinaNetHead
109 | - RoIExtractor
110 |     - RoIAlign
111 |     - FPNRoIAlign
112 | - BboxHead
113 |     - BboxResNetv1C5Head
114 |     - BboxResNetv2C5Head
115 |     - BboxResNeXtC5Head
116 |     - Bbox2fcHead
117 |     - Bbox4conv1fcHead
118 |     - BboxDualHead
119 | - MaskHead
120 |     - MaskHead
121 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/generate_anchor.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file generate_anchor.cc
 22 |  * \brief
 23 |  * \author Yanghao Li, Chenxia Han
 24 | */
 25 | 
 26 | #include "./generate_anchor-inl.h"
 27 | 
 28 | namespace mxnet {
 29 | namespace op {
 30 | 
 31 | template<typename xpu>
 32 | class GenAnchorOp : public Operator{
 33 |  public:
 34 |   explicit GenAnchorOp(GenAnchorParam param) {
 35 |     this->param_ = param;
 36 |   }
 37 | 
 38 |   virtual void Forward(const OpContext &ctx,
 39 |                        const std::vector<TBlob> &in_data,
 40 |                        const std::vector<OpReqType> &req,
 41 |                        const std::vector<TBlob> &out_data,
 42 |                        const std::vector<TBlob> &aux_states) {
 43 |     using namespace mshadow;
 44 |     using namespace mshadow::expr;
 45 |     CHECK_EQ(in_data.size(), 1);
 46 |     CHECK_EQ(out_data.size(), 1);
 47 |     CHECK_EQ(req.size(), 1);
 48 |     CHECK_EQ(req[gen_anchor::kOut], kWriteTo);
 49 | 
 50 |     Stream<xpu> *s = ctx.get_stream<xpu>();
 51 |     Tensor<xpu, 4> scores = in_data[gen_anchor::kClsProb].get<cpu, 4, float>(s);
 52 | 
 53 |     Tensor<cpu, 2> out = out_data[gen_anchor::kOut].get<cpu, 2, float>(s);
 54 | 
 55 |     std::vector<double> scales(param_.scales.begin(), param_.scales.end());
 56 |     std::vector<double> ratios(param_.ratios.begin(), param_.ratios.end());
 57 | 
 58 |     int num_anchors = scales.size() * ratios.size();
 59 |     int height = scores.size(2);
 60 |     int width = scores.size(3);
 61 | 
 62 |     // Generate anchors
 63 |     std::vector<double> base_anchor({
 64 |       0.0f, 0.0f, param_.feature_stride - 1.0f, param_.feature_stride - 1.0f
 65 |     });
 66 |     std::vector<double> anchors;
 67 |     gen_anchor_utils::GenerateAnchors(
 68 |       base_anchor, ratios, scales, anchors
 69 |     );
 70 | 
 71 |     // Enumerate all shifted anchors
 72 |     for (index_t i = 0; i < num_anchors; ++i) {
 73 |       for (index_t j = 0; j < height; ++j) {
 74 |         for (index_t k = 0; k < width; ++k) {
 75 |           index_t index = j * (width * num_anchors) + k * (num_anchors) + i;
 76 |           out[index][0] = static_cast<float>(anchors[i * 4 + 0] + k * param_.feature_stride);
 77 |           out[index][1] = static_cast<float>(anchors[i * 4 + 1] + j * param_.feature_stride);
 78 |           out[index][2] = static_cast<float>(anchors[i * 4 + 2] + k * param_.feature_stride);
 79 |           out[index][3] = static_cast<float>(anchors[i * 4 + 3] + j * param_.feature_stride);
 80 |         }
 81 |       }
 82 |     }
 83 |   }
 84 | 
 85 |   virtual void Backward(const OpContext &ctx,
 86 |                         const std::vector<TBlob> &out_grad,
 87 |                         const std::vector<TBlob> &in_data,
 88 |                         const std::vector<TBlob> &out_data,
 89 |                         const std::vector<OpReqType> &req,
 90 |                         const std::vector<TBlob> &in_grad,
 91 |                         const std::vector<TBlob> &aux_states) {
 92 |     using namespace mshadow;
 93 |     using namespace mshadow::expr;
 94 |     CHECK_EQ(in_grad.size(), 1);
 95 | 
 96 |     Stream<xpu> *s = ctx.get_stream<xpu>();
 97 |     Tensor<xpu, 4> gscores = in_grad[gen_anchor::kClsProb].get<xpu, 4, float>(s);
 98 | 
 99 |     // can not assume the grad would be zero
100 |     Assign(gscores, req[gen_anchor::kClsProb], 0);
101 |   }
102 | 
103 |  private:
104 |   GenAnchorParam param_;
105 | };  // class GenAnchorOp
106 | 
107 | template<>
108 | Operator *CreateOp<cpu>(GenAnchorParam param) {
109 |   return new GenAnchorOp<cpu>(param);
110 | }
111 | 
112 | Operator* GenAnchorProp::CreateOperator(Context ctx) const {
113 |   DO_BIND_DISPATCH(CreateOp, param_);
114 | }
115 | 
116 | DMLC_REGISTER_PARAMETER(GenAnchorParam);
117 | 
118 | MXNET_REGISTER_OP_PROPERTY(_contrib_GenAnchor, GenAnchorProp)
119 | .describe("Generate region anchors")
120 | .add_argument("cls_prob", "NDArray-or-Symbol", "Probability of how likely proposal is object.")
121 | .add_arguments(GenAnchorParam::__FIELDS__());
122 | 
123 | }  // namespace op
124 | }  // namespace mxnet
125 | 


--------------------------------------------------------------------------------
/models/dcn/builder.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import mxnext as X
  3 | from mxnext import conv, relu, add
  4 | from mxnext.backbone.resnet_v1b_helper import resnet_unit
  5 | from symbol.builder import Backbone
  6 | 
  7 | 
  8 | def dcn_resnet_unit(input, name, filter, stride, dilate, proj, norm, **kwargs):
  9 |     conv1 = conv(input, name=name + "_conv1", filter=filter // 4)
 10 |     bn1 = norm(conv1, name=name + "_bn1")
 11 |     relu1 = relu(bn1, name=name + "_relu1")
 12 | 
 13 |     # conv2 filter router
 14 |     conv2_offset = conv(relu1, name=name + "_conv2_offset", filter=72, kernel=3, stride=stride, dilate=dilate)
 15 |     conv2 = mx.sym.contrib.DeformableConvolution(relu1, conv2_offset, kernel=(3, 3),
 16 |         stride=(stride, stride), dilate=(dilate, dilate), pad=(1, 1), num_filter=filter // 4,
 17 |         num_deformable_group=4, no_bias=True, name=name + "_conv2")
 18 |     bn2 = norm(conv2, name=name + "_bn2")
 19 |     relu2 = relu(bn2, name=name + "_relu2")
 20 | 
 21 |     conv3 = conv(relu2, name=name + "_conv3", filter=filter)
 22 |     bn3 = norm(conv3, name=name + "_bn3")
 23 | 
 24 |     if proj:
 25 |         shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride)
 26 |         shortcut = norm(shortcut, name=name + "_sc_bn")
 27 |     else:
 28 |         shortcut = input
 29 | 
 30 |     eltwise = add(bn3, shortcut, name=name + "_plus")
 31 | 
 32 |     return relu(eltwise, name=name + "_relu")
 33 | 
 34 | 
 35 | def hybrid_resnet_stage(data, name, num_block, num_special_block, special_res_unit, filter,
 36 |     stride, dilate, norm, **kwargs):
 37 |     s, d = stride, dilate
 38 | 
 39 |     for i in range(1, num_block + 1 - num_special_block):
 40 |         proj = True if i == 1 else False
 41 |         s = stride if i == 1 else 1
 42 |         d = dilate
 43 |         data = resnet_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm)
 44 | 
 45 |     for i in range(num_block + 1 - num_special_block, num_block + 1):
 46 |         proj = True if i == 1 else False
 47 |         s = stride if i == 1 else 1
 48 |         d = dilate
 49 |         data = special_res_unit(data, "{}_unit{}".format(name, i), filter, s, d, proj, norm, **kwargs)
 50 | 
 51 |     return data
 52 | 
 53 | 
 54 | def hybrid_resnet_c4_builder(special_resnet_unit):
 55 |     class ResNetC4(Backbone):
 56 |         def __init__(self, pBackbone):
 57 |             super().__init__(pBackbone)
 58 |             p = self.p
 59 | 
 60 |             import mxnext.backbone.resnet_v1b_helper as helper
 61 |             num_c2, num_c3, num_c4, _ = helper.depth_config[p.depth]
 62 | 
 63 |             data = X.var("data")
 64 |             if p.fp16:
 65 |                 data = data.astype("float16")
 66 |             c1 = helper.resnet_c1(data, p.normalizer)
 67 |             c2 = helper.resnet_c2(c1, num_c2, 1, 1, p.normalizer)
 68 |             c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1,
 69 |                 p.normalizer, params=p)
 70 |             c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1,
 71 |                 p.normalizer, params=p)
 72 | 
 73 |             self.symbol = c4
 74 | 
 75 |         def get_rpn_feature(self):
 76 |             return self.symbol
 77 | 
 78 |         def get_rcnn_feature(self):
 79 |             return self.symbol
 80 | 
 81 |     return ResNetC4
 82 | 
 83 | 
 84 | def hybrid_resnet_fpn_builder(special_resnet_unit):
 85 |     class ResNetFPN(Backbone):
 86 |         def __init__(self, pBackbone):
 87 |             super().__init__(pBackbone)
 88 |             p = self.p
 89 | 
 90 |             import mxnext.backbone.resnet_v1b_helper as helper
 91 |             num_c2, num_c3, num_c4, num_c5 = helper.depth_config[p.depth]
 92 | 
 93 |             data = X.var("data")
 94 |             if p.fp16:
 95 |                 data = data.astype("float16")
 96 |             c1 = helper.resnet_c1(data, p.normalizer)
 97 |             c2 = hybrid_resnet_stage(c1, "stage1", num_c2, p.num_c2_block or 0, special_resnet_unit, 256, 1, 1,
 98 |                 p.normalizer, params=p)
 99 |             c3 = hybrid_resnet_stage(c2, "stage2", num_c3, p.num_c3_block or 0, special_resnet_unit, 512, 2, 1,
100 |                 p.normalizer, params=p)
101 |             c4 = hybrid_resnet_stage(c3, "stage3", num_c4, p.num_c4_block or 0, special_resnet_unit, 1024, 2, 1,
102 |                 p.normalizer, params=p)
103 |             c5 = hybrid_resnet_stage(c4, "stage4", num_c5, p.num_c5_block or 0, special_resnet_unit, 2048, 2, 1,
104 |                 p.normalizer, params=p)
105 | 
106 |             self.symbol = (c2, c3, c4, c5)
107 | 
108 |         def get_rpn_feature(self):
109 |             return self.symbol
110 | 
111 |         def get_rcnn_feature(self):
112 |             return self.symbol
113 | 
114 |     return ResNetFPN
115 | 
116 | 
117 | DCNResNetC4 = hybrid_resnet_c4_builder(dcn_resnet_unit)
118 | DCNResNetFPN = hybrid_resnet_fpn_builder(dcn_resnet_unit)
119 | 


--------------------------------------------------------------------------------
/models/tridentnet/input.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import mxnet as mx
  3 | 
  4 | from core.detection_input import DetectionAugmentation, AnchorTarget2D
  5 | from operator_py.cython.bbox import bbox_overlaps_cython
  6 | 
  7 | 
  8 | class ScaleAwareRange(DetectionAugmentation):
  9 |     def __init__(self, pScaleRange):
 10 |         super().__init__()
 11 |         self.p = pScaleRange
 12 | 
 13 |     def apply(self, input_record):
 14 |         p = self.p
 15 | 
 16 |         im_info = input_record['im_info']
 17 | 
 18 |        # input_record["valid_ranges_on_origin"] = p.cal_on_origin
 19 |         input_record["valid_ranges"] = np.array(p.valid_ranges, dtype=np.float32).reshape(-1, 2)
 20 |         if p.cal_on_origin:
 21 |             input_record["valid_ranges"] *= im_info[2]
 22 |         # replace -1 with max_size
 23 |         inds = np.where(input_record["valid_ranges"][:, 1] < 0)[0]
 24 |         input_record["valid_ranges"][inds, 1] = max(im_info[0], im_info[1])
 25 | 
 26 | 
 27 | class TridentAnchorTarget2D(AnchorTarget2D):
 28 |     """
 29 |     input: image_meta: tuple(h, w, scale)
 30 |            gt_bbox, ndarry(max_num_gt, 4)
 31 |     output: anchor_label, ndarray(num_branch, num_anchor * 2, h, w)
 32 |             anchor_bbox_target, ndarray(num_branch, num_anchor * 4, h, w)
 33 |             anchor_bbox_weight, ndarray(num_branch, num_anchor * 4, h, w)
 34 |             valid_ranges, ndarray(num_branch, 2)
 35 |     """
 36 | 
 37 |     def __init__(self, pAnchor):
 38 |         super().__init__(pAnchor)
 39 | 
 40 |     def _filter_anchor_by_scale_range(self, cls_label, valid_anchor, gt_bbox, valid_range, invalid_anchor_threshd):
 41 |         if len(gt_bbox) == 0:
 42 |             return
 43 |         gt_bbox_sizes = (gt_bbox[:, 2] - gt_bbox[:, 0] + 1.0) * (gt_bbox[:, 3] - gt_bbox[:, 1] + 1.0)
 44 |         invalid_gt_bbox_inds = np.where((gt_bbox_sizes < valid_range[0]**2) | (gt_bbox_sizes > valid_range[1]**2))[0]
 45 |         invalid_gt_bbox = gt_bbox[invalid_gt_bbox_inds]
 46 |         if len(invalid_gt_bbox) > 0:
 47 |             invalid_overlaps = bbox_overlaps_cython(
 48 |                 valid_anchor.astype(np.float32, copy=False), invalid_gt_bbox.astype(np.float32, copy=False))
 49 |             invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1)
 50 |             invalid_max_overlaps = invalid_overlaps[np.arange(len(valid_anchor)), invalid_argmax_overlaps]
 51 | 
 52 |             # ignore anchors overlapped with invalid gt boxes
 53 |             disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshd))[0]
 54 |             cls_label[disable_inds] = -1
 55 | 
 56 |     def apply(self, input_record):
 57 |         p = self.p
 58 | 
 59 |         im_info = input_record["im_info"]
 60 |         gt_bbox = input_record["gt_bbox"]
 61 |         valid_ranges = input_record["valid_ranges"]
 62 |         assert isinstance(gt_bbox, np.ndarray)
 63 |         assert gt_bbox.dtype == np.float32
 64 | 
 65 |         valid = np.where(gt_bbox[:, 0] != -1)[0]
 66 |         gt_bbox = gt_bbox[valid]
 67 | 
 68 |         if gt_bbox.shape[1] == 5:
 69 |             gt_bbox = gt_bbox[:, :4]
 70 | 
 71 |         h, w = im_info[:2]
 72 |         if h >= w:
 73 |             fh, fw = p.generate.long, p.generate.short
 74 |         else:
 75 |             fh, fw = p.generate.short, p.generate.long
 76 | 
 77 |         valid_index, valid_anchor = self._gather_valid_anchor(im_info)
 78 | 
 79 |         valid_cls_label, valid_anchor_label = \
 80 |             self._assign_label_to_anchor(valid_anchor, gt_bbox,
 81 |                                          p.assign.neg_thr, p.assign.pos_thr, p.assign.min_pos_thr)
 82 | 
 83 |         cls_labels, reg_targets, reg_weights = [], [], []
 84 |         for valid_range in valid_ranges:
 85 |             cls_label = valid_cls_label.copy()
 86 |             self._filter_anchor_by_scale_range(cls_label, valid_anchor, gt_bbox,
 87 |                                                valid_range, p.trident.invalid_anchor_threshd)
 88 |             self._sample_anchor(cls_label, p.sample.image_anchor, p.sample.pos_fraction)
 89 |             reg_target, reg_weight = self._cal_anchor_target(cls_label, valid_anchor, gt_bbox, valid_anchor_label)
 90 |             cls_label, reg_target, reg_weight = \
 91 |                 self._scatter_valid_anchor(valid_index, cls_label, reg_target, reg_weight)
 92 | 
 93 |             cls_labels.append(cls_label.reshape((fh, fw, -1)).transpose(2, 0, 1).reshape(-1))
 94 |             reg_targets.append(reg_target.reshape((fh, fw, -1)).transpose(2, 0, 1))
 95 |             reg_weights.append(reg_weight.reshape((fh, fw, -1)).transpose(2, 0, 1))
 96 | 
 97 |         input_record["rpn_cls_label"] = np.stack(cls_labels)
 98 |         input_record["rpn_reg_target"] = np.stack(reg_targets)
 99 |         input_record["rpn_reg_weight"] = np.stack(reg_weights)
100 | 
101 |         return input_record["rpn_cls_label"], \
102 |                input_record["rpn_reg_target"], \
103 |                input_record["rpn_reg_weight"]
104 | 
105 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # modify from https://github.com/NVIDIA/TensorRT/blob/master/docker/ubuntu.Dockerfile
 2 | ARG CUDA_VERSION=11.1
 3 | ARG CUDNN_VERSION=8
 4 | ARG OS_VERSION=16.04
 5 | ARG NVCR_SUFFIX=
 6 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${OS_VERSION}${NVCR_SUFFIX}
 7 | 
 8 | LABEL maintainer="Simpledet"
 9 | 
10 | WORKDIR workspace
11 | 
12 | # basic
13 | RUN apt-get update && \
14 |     apt-get install -y --no-install-recommends && \
15 |     apt-get install -y build-essential python-dev python3-dev && \
16 |     apt-get install -y git wget sudo curl openssh-server openssh-client bash-completion command-not-found \
17 |     vim htop tmux zsh rsync bzip2 zip unzip patch time make cmake locales locales-all libgtk2.0-dev libgl1-mesa-glx python3-tk \
18 |     ninja-build libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev llvm libncurses5-dev libncursesw5-dev \
19 |     xz-utils tk-dev libffi-dev liblzma-dev python-openssl libopenblas-dev && \
20 |     rm -rf /var/lib/apt/lists/*
21 | RUN ln -sfv /usr/bin/python3 /usr/bin/python
22 | 
23 | # zsh and fzf
24 | RUN wget https://github.com/robbyrussell/oh-my-zsh/raw/master/tools/install.sh -O - | zsh || true && \
25 |     git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions && \
26 |     sed -i 's/robbyrussell/fishy/' ~/.zshrc && \
27 |     sed -i 's/(git)/(git zsh-autosuggestions)/' ~/.zshrc && \
28 |     sed -i 's/# DISABLE_AUTO_UPDATE/DISABLE_AUTO_UPDATE/' ~/.zshrc && \
29 |     git clone --depth 1 https://github.com/junegunn/fzf.git ~/.fzf && ~/.fzf/install --all
30 | 
31 | # use pyenv to manage python version
32 | RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \
33 |     echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.zshrc && \
34 |     echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.zshrc && \
35 |     echo 'export PYTHON_CONFIGURE_OPTS="--enable-shared"' >> ~/.zshrc && \
36 |     echo 'if command -v pyenv 1>/dev/null 2>&1; then\n  eval "$(pyenv init -)"\nfi' >> ~/.zshrc
37 | 
38 | RUN /usr/bin/zsh -c "source ~/.zshrc && \
39 |                      pyenv install 3.6.8 && \
40 |                      pyenv global 3.6.8 && \
41 |                      eval zsh && \
42 |                      pip install -U pipenv setuptools && \
43 |                      pip install ipython numpy scipy scikit-learn tqdm graphviz easydict matplotlib pyarrow pyzmq pillow cython requests pytz opencv-python tensorboard && \
44 |                      rm -rf ~/.cache"
45 | 
46 | # build mxnet
47 | RUN /usr/bin/zsh -c "source ~/.zshrc && \
48 |                      git clone --recursive https://github.com/apache/incubator-mxnet /tmp/mxnet -b 1.6.0 && \
49 |                      git clone https://github.com/Tusimple/simpledet /tmp/simpledet && \
50 |                      git clone https://github.com/RogerChern/cocoapi /tmp/cocoapi && \
51 |                      cp -r /tmp/simpledet/operator_cxx/* /tmp/mxnet/src/operator && \
52 |                      mkdir -p /tmp/mxnet/src/coco_api && \
53 |                      cp -r /tmp/cocoapi/common /tmp/mxnet/src/coco_api && \
54 |                      cd /tmp/mxnet && \
55 |                      echo 'USE_SIGNAL_HANDLER = 1' >> ./config.mk && \
56 |                      echo 'USE_OPENCV = 0' >> ./config.mk && \
57 |                      echo 'USE_MKLDNN = 0' >> ./config.mk && \
58 |                      echo 'USE_BLAS = openblas' >> ./config.mk && \
59 |                      echo 'USE_CUDA = 1' >> ./config.mk && \
60 |                      echo 'USE_CUDA_PATH = /usr/local/cuda' >> ./config.mk && \
61 |                      echo 'USE_CUDNN = 1' >> ./config.mk && \
62 |                      echo 'USE_NCCL = 1' >> ./config.mk && \
63 |                      echo 'USE_DIST_KVSTORE = 1' >> ./config.mk && \
64 |                      echo 'CUDA_ARCH = -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86' >> ./config.mk && \
65 |                      rm /tmp/mxnet/src/operator/nn/group_norm* && \
66 |                      make -j$((`nproc`-1)) && \
67 |                      cd python && \
68 |                      python3 setup.py install && \
69 |                      rm -rf /tmp/mxnet /tmp/simpledet /tmp/cocoapi"
70 | 
71 | # install pycocotools and mxnext
72 | RUN /usr/bin/zsh -c "source ~/.zshrc && \
73 |                      pip install 'git+https://github.com/RogerChern/cocoapi.git#subdirectory=PythonAPI' && \
74 |                      pip install 'git+https://github.com/RogerChern/mxnext#egg=mxnext'"
75 | 
76 | # ssh
77 | RUN chsh -s /usr/bin/zsh root && \
78 |     mkdir /var/run/sshd && \
79 |     echo 'root:simpledet' | chpasswd && \
80 |     sed -i '/PermitRootLogin/s/prohibit-password/yes/' /etc/ssh/sshd_config
81 | EXPOSE 22
82 | 
83 | # env
84 | RUN echo "export PATH=/usr/local/cuda/bin:\$PATH" >> ~/.zshrc && \
85 |     echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64:\$LD_LIBRARY_PATH" >> ~/.zsrhc
86 | 
87 | CMD ["/usr/sbin/sshd", "-D"]
88 | 
89 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/nms-inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * \file nms-inl.h
 22 |  * \brief NMS Operator
 23 |  * \author Yanghao Li
 24 | */
 25 | #ifndef MXNET_OPERATOR_CONTRIB_NMS_INL_H_
 26 | #define MXNET_OPERATOR_CONTRIB_NMS_INL_H_
 27 | 
 28 | #include <dmlc/logging.h>
 29 | #include <dmlc/parameter.h>
 30 | #include <mxnet/operator.h>
 31 | #include <map>
 32 | #include <vector>
 33 | #include <string>
 34 | #include <utility>
 35 | #include <ctime>
 36 | #include <cstring>
 37 | #include <iostream>
 38 | #include "../operator_common.h"
 39 | #include "../mshadow_op.h"
 40 | 
 41 | namespace mxnet {
 42 | namespace op {
 43 | 
 44 | namespace nms {
 45 | enum NMSOpInputs {kBBox};
 46 | enum NMSOpOutputs {kOut, kScore};
 47 | enum NMSForwardResource {kTempSpace};
 48 | }  // nms
 49 | 
 50 | struct NMSParam : public dmlc::Parameter<NMSParam> {
 51 |   int rpn_pre_nms_top_n;
 52 |   int rpn_post_nms_top_n;
 53 |   float threshold;
 54 |   bool output_score;
 55 |   bool already_sorted;
 56 |   uint64_t workspace;
 57 | 
 58 |   DMLC_DECLARE_PARAMETER(NMSParam) {
 59 |     DMLC_DECLARE_FIELD(rpn_pre_nms_top_n).set_default(6000)
 60 |     .describe("Number of top scoring boxes to keep before applying NMS to RPN proposals");
 61 |     DMLC_DECLARE_FIELD(rpn_post_nms_top_n).set_default(300)
 62 |     .describe("Number of top scoring boxes to keep after applying NMS to RPN proposals");
 63 |     DMLC_DECLARE_FIELD(threshold).set_default(0.7)
 64 |     .describe("NMS value, below which to suppress.");
 65 |     DMLC_DECLARE_FIELD(output_score).set_default(false)
 66 |     .describe("Add score to outputs");
 67 |     DMLC_DECLARE_FIELD(already_sorted).set_default(false)
 68 |     .describe("if input rois have been sorted by confidence");
 69 |     DMLC_DECLARE_FIELD(workspace).set_default(256)
 70 |     .describe("Workspace for NMS in MB, default to 256");
 71 |   }
 72 | };
 73 | 
 74 | template<typename xpu>
 75 | Operator *CreateOp(NMSParam param);
 76 | 
 77 | #if DMLC_USE_CXX11
 78 | class NMSProp : public OperatorProperty {
 79 |  public:
 80 |   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
 81 |     param_.Init(kwargs);
 82 |   }
 83 | 
 84 |   std::map<std::string, std::string> GetParams() const override {
 85 |     return param_.__DICT__();
 86 |   }
 87 | 
 88 |   bool InferShape(std::vector<TShape> *in_shape,
 89 |                   std::vector<TShape> *out_shape,
 90 |                   std::vector<TShape> *aux_shape) const override {
 91 |     using namespace mshadow;
 92 |     CHECK_EQ(in_shape->size(), 1) << "Input:[bbox]";
 93 |     const TShape &dshape = in_shape->at(nms::kBBox);
 94 |     if (dshape.ndim() == 0) return false;
 95 |     out_shape->clear();
 96 |     // output
 97 |     out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 4));
 98 |     // score
 99 |     out_shape->push_back(Shape3(dshape[0], param_.rpn_post_nms_top_n, 1));
100 |     return true;
101 |   }
102 | 
103 |   OperatorProperty* Copy() const override {
104 |     auto ptr = new NMSProp();
105 |     ptr->param_ = param_;
106 |     return ptr;
107 |   }
108 | 
109 |   std::string TypeString() const override {
110 |     return "_contrib_NMS";
111 |   }
112 | 
113 |   std::vector<ResourceRequest> ForwardResource(
114 |       const std::vector<TShape> &in_shape) const override {
115 |     return {ResourceRequest::kTempSpace};
116 |   }
117 | 
118 |   std::vector<int> DeclareBackwardDependency(
119 |     const std::vector<int> &out_grad,
120 |     const std::vector<int> &in_data,
121 |     const std::vector<int> &out_data) const override {
122 |     return {};
123 |   }
124 | 
125 |   int NumVisibleOutputs() const override {
126 |     if (param_.output_score) {
127 |       return 2;
128 |     } else {
129 |       return 1;
130 |     }
131 |   }
132 | 
133 |   int NumOutputs() const override {
134 |     return 2;
135 |   }
136 | 
137 |   std::vector<std::string> ListArguments() const override {
138 |     return {"rois"};
139 |   }
140 | 
141 |   std::vector<std::string> ListOutputs() const override {
142 |     return {"output", "score"};
143 |   }
144 | 
145 |   Operator* CreateOperator(Context ctx) const override;
146 | 
147 |  private:
148 |   NMSParam param_;
149 | };  // class NMSProp
150 | 
151 | #endif  // DMLC_USE_CXX11
152 | }  // namespace op
153 | }  // namespace mxnet
154 | 
155 | #endif  //  MXNET_OPERATOR_CONTRIB_NMS_INL_H_
156 | 


--------------------------------------------------------------------------------
/models/maskrcnn/bbox_post_processing.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import numpy as np
  3 | 
  4 | from operator_py.nms import py_nms_wrapper
  5 | 
  6 | def multiclass_nms(nms, cls_score, bbox_xyxy, min_det_score, max_det_per_image):
  7 |     # remove background
  8 |     cls_score = cls_score[:, 1:]
  9 |     # TODO: the output shape of class_agnostic box is [n, 4], while class_aware box is [n, 4 * (1 + class)]
 10 |     bbox_xyxy = bbox_xyxy[:, 4:] if bbox_xyxy.shape[1] != 4 else bbox_xyxy
 11 |     num_class = cls_score.shape[1]
 12 | 
 13 |     cls_det = [np.empty((0, 6), dtype=np.float32) for _ in range(num_class)] # [x1, y1, x2, y2, score, cls]
 14 | 
 15 |     for cid in range(num_class):
 16 |         score = cls_score[:, cid]
 17 |         if bbox_xyxy.shape[1] != 4:
 18 |             _bbox_xyxy = bbox_xyxy[:, cid * 4:(cid + 1) * 4]
 19 |         else:
 20 |             _bbox_xyxy = bbox_xyxy
 21 |         valid_inds = np.where(score > min_det_score)[0]
 22 |         box = _bbox_xyxy[valid_inds]
 23 |         score = score[valid_inds]
 24 |         det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32)
 25 |         det = nms(det)
 26 |         cls = np.full((det.shape[0], 1), cid, dtype=np.float32)
 27 |         cls_det[cid] = np.hstack((det, cls))
 28 | 
 29 |     cls_det = np.vstack([det for det in cls_det])
 30 |     scores = cls_det[:, -2]
 31 |     top_index = np.argsort(scores)[::-1][:max_det_per_image]
 32 |     return cls_det[top_index]
 33 | 
 34 | 
 35 | class BboxPostProcessingOperator(mx.operator.CustomOp):
 36 |     def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr):
 37 |         super().__init__()
 38 |         self.max_det_per_image = max_det_per_image
 39 |         self.min_det_score = min_det_score
 40 |         self.nms_type = nms_type
 41 |         self.nms_thr = nms_thr
 42 | 
 43 |     def forward(self, is_train, req, in_data, out_data, aux):
 44 |         if self.nms_type == 'nms':
 45 |             nms = py_nms_wrapper(self.nms_thr)
 46 |         else:
 47 |             raise NotImplementedError
 48 | 
 49 |         cls_score = in_data[0].asnumpy()
 50 |         bbox_xyxy = in_data[1].asnumpy()
 51 | 
 52 |         cls_score_shape = cls_score.shape # (b, n, num_class_withbg)
 53 |         bbox_xyxy_shape = bbox_xyxy.shape # (b, n, 4) or (b, n, 4 * num_class_withbg)
 54 |         batch_image = cls_score_shape[0]
 55 |         num_bbox = cls_score_shape[1]
 56 |         num_class_withbg = cls_score_shape[2]
 57 | 
 58 |         post_score = np.zeros((batch_image, self.max_det_per_image, 1), dtype=np.float32)
 59 |         post_bbox_xyxy = np.zeros((batch_image, self.max_det_per_image, 4), dtype=np.float32)
 60 |         post_cls = np.full((batch_image, self.max_det_per_image, 1), -1, dtype=np.float32)
 61 | 
 62 |         for i, (per_image_cls_score, per_image_bbox_xyxy) in enumerate(zip(cls_score, bbox_xyxy)):
 63 |             cls_det = multiclass_nms(nms, per_image_cls_score, per_image_bbox_xyxy, \
 64 |                                      self.min_det_score, self.max_det_per_image)
 65 |             num_det = cls_det.shape[0]
 66 |             post_bbox_xyxy[i, :num_det] = cls_det[:, :4]
 67 |             post_score[i, :num_det] = cls_det[:, -2][:, np.newaxis] # convert to (n, 1)
 68 |             post_cls[i, :num_det] = cls_det[:, -1][:, np.newaxis] # convert to (n, 1)
 69 | 
 70 |         self.assign(out_data[0], req[0], post_score)
 71 |         self.assign(out_data[1], req[1], post_bbox_xyxy)
 72 |         self.assign(out_data[2], req[2], post_cls)
 73 | 
 74 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
 75 |         self.assign(in_grad[0], req[0], 0)
 76 |         self.assign(in_grad[1], req[1], 0)
 77 | 
 78 | 
 79 | @mx.operator.register("BboxPostProcessing")
 80 | class BboxPostProcessingProp(mx.operator.CustomOpProp):
 81 |     def __init__(self, max_det_per_image, min_det_score, nms_type, nms_thr):
 82 |         super().__init__(need_top_grad=False)
 83 |         self.max_det_per_image = int(max_det_per_image)
 84 |         self.min_det_score = float(min_det_score)
 85 |         self.nms_type = str(nms_type)
 86 |         self.nms_thr = float(nms_thr)
 87 | 
 88 |     def list_arguments(self):
 89 |         return ['cls_score', 'bbox_xyxy']
 90 | 
 91 |     def list_outputs(self):
 92 |         return ['post_score', 'post_bbox_xyxy', 'post_cls']
 93 | 
 94 |     def infer_shape(self, in_shape):
 95 |         cls_score_shape = in_shape[0] # (b, n, num_class_withbg)
 96 |         bbox_xyxy_shape = in_shape[1] # (b, n, 4) or (b, n, 4 * num_class_withbg)
 97 | 
 98 |         batch_image = cls_score_shape[0]
 99 | 
100 |         post_score_shape = (batch_image, self.max_det_per_image, 1)
101 |         post_bbox_xyxy_shape = (batch_image, self.max_det_per_image, 4)
102 |         post_cls_shape = (batch_image, self.max_det_per_image, 1)
103 | 
104 |         return [cls_score_shape, bbox_xyxy_shape], \
105 |                [post_score_shape, post_bbox_xyxy_shape, post_cls_shape]
106 | 
107 |     def create_operator(self, ctx, shapes, dtypes):
108 |         return BboxPostProcessingOperator(self.max_det_per_image, self.min_det_score, self.nms_type, self.nms_thr)
109 | 
110 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
111 |         return []
112 | 


--------------------------------------------------------------------------------
/operator_cxx/contrib/global_average_pooling.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *   http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing,
 13 |  * software distributed under the License is distributed on an
 14 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15 |  * KIND, either express or implied.  See the License for the
 16 |  * specific language governing permissions and limitations
 17 |  * under the License.
 18 |  */
 19 | 
 20 | /*!
 21 |  * Copyright (c) 2018 by Contributors
 22 |  * \file global_average_pooling.cu
 23 |  * \brief port from https://github.com/hujie-frank/SENet
 24 |  * \author Chenxia Han
 25 | */
 26 | #include <vector>
 27 | #include <algorithm>
 28 | #include "../mxnet_op.h"
 29 | #include "../../common/cuda_utils.h"
 30 | #include "./global_average_pooling-inl.h"
 31 | 
 32 | #define GAP_CUDA_CHECK(condition) \
 33 |   /* Code block avoids redefinition of cudaError_t error */ \
 34 |   do { \
 35 |     cudaError_t error = condition; \
 36 |     CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
 37 |   } while (0)
 38 | #define CUDA_KERNEL_LOOP(i, n) \
 39 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
 40 |       i < (n); \
 41 |       i += blockDim.x * gridDim.x)
 42 | 
 43 | constexpr int CAFFE_CUDA_NUM_THREADS = 512;
 44 | constexpr int CAFFE_MAXIMUM_NUM_BLOCKS = 4096;
 45 | 
 46 | inline int CAFFE_GET_BLOCKS(const int N) {
 47 |   return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS,
 48 |                   CAFFE_MAXIMUM_NUM_BLOCKS);
 49 | }
 50 | 
 51 | namespace mshadow {
 52 | namespace cuda {
 53 | 
 54 | template <typename Dtype>
 55 | __global__ void GlobalAvePoolForwardKernel(const int spatial_dim, 
 56 |     const Dtype* bottom_data, Dtype* top_data) {
 57 |   __shared__ Dtype buffer[CAFFE_CUDA_NUM_THREADS];
 58 |   unsigned int tid = threadIdx.x;
 59 |   buffer[tid] = 0;
 60 |   __syncthreads();
 61 | 
 62 |   for (int j = tid; j < spatial_dim; j += blockDim.x) {
 63 |     buffer[tid] += bottom_data[blockIdx.x * spatial_dim + j];
 64 |   }
 65 |   __syncthreads();
 66 | 
 67 |   for (int i = blockDim.x / 2; i > 0; i >>= 1) {
 68 |     if (tid < i) {
 69 |       buffer[threadIdx.x] += buffer[threadIdx.x + i];
 70 |     }
 71 |     __syncthreads();
 72 |   }
 73 | 
 74 |   if (tid == 0) {
 75 |     top_data[blockIdx.x] = buffer[0] / spatial_dim;
 76 |   }
 77 | }
 78 | 
 79 | template<typename DType>
 80 | inline void GAPForward(const Tensor<gpu, 4, DType> &out,
 81 | 					   const Tensor<gpu, 4, DType> &data) {
 82 |   const DType *bottom_data = data.dptr_;
 83 |   DType *top_data = out.dptr_;
 84 |   const int nblocks = data.shape_.ProdShape(0, 2);
 85 |   const int spatial_dim = data.shape_.ProdShape(2, 4);
 86 |   cudaStream_t stream = Stream<gpu>::GetStream(out.stream_);
 87 |   GlobalAvePoolForwardKernel<DType> << <nblocks, CAFFE_CUDA_NUM_THREADS,
 88 | 	0, stream >> >(spatial_dim, bottom_data, top_data);
 89 |   GAP_CUDA_CHECK(cudaPeekAtLastError());
 90 | }
 91 | 
 92 | template <typename Dtype> 
 93 | __global__ void GlobalAvePoolBackwardKernel(const int nthreads, const int spatial_dim, 
 94 |     const Dtype* top_diff, Dtype* bottom_diff) {
 95 |   CUDA_KERNEL_LOOP(index, nthreads) {
 96 |     const int n = index / spatial_dim;
 97 |     bottom_diff[index] = top_diff[n] / spatial_dim;
 98 |   }
 99 | }
100 | 
101 | template<typename DType>
102 | inline void GAPBackward(const Tensor<gpu, 4, DType> &in_grad,
103 | 					    const Tensor<gpu, 4, DType> &out_grad) {
104 |   const DType *top_diff = out_grad.dptr_;
105 |   DType *bottom_diff = in_grad.dptr_;
106 |   const int count = in_grad.shape_.Size();
107 |   const int spatial_dim = in_grad.shape_.ProdShape(2, 4);
108 |   cudaStream_t stream = Stream<gpu>::GetStream(in_grad.stream_);
109 |   GlobalAvePoolBackwardKernel<DType> << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS,
110 |     0, stream >> >(count, spatial_dim, top_diff, bottom_diff);
111 |   GAP_CUDA_CHECK(cudaPeekAtLastError());
112 | }
113 | 
114 | }  // namespace cuda
115 | 
116 | template<typename DType>
117 | inline void GAPForward(const Tensor<gpu, 4, DType> &out,
118 |                        const Tensor<gpu, 4, DType> &data) {
119 |   cuda::GAPForward(out, data);
120 | }
121 | 
122 | template<typename DType>
123 | inline void GAPBackward(const Tensor<gpu, 4, DType> &in_grad,
124 |                         const Tensor<gpu, 4, DType> &out_grad) {
125 |   cuda::GAPBackward(in_grad, out_grad);
126 | }
127 | 
128 | }  // namespace mshadow
129 | 
130 | namespace mxnet {
131 | namespace op {
132 | template<>
133 | Operator *CreateOp<gpu>(GAPParam param, int dtype) {
134 |   Operator *op = NULL;
135 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
136 |     op = new GAPOp<gpu, DType>(param);
137 |   });
138 |   return op;
139 | }
140 | 
141 | }  // namespace op
142 | }  // namespace mxnet
143 | 
144 | 


--------------------------------------------------------------------------------