├── .gitignore ├── LICENSE ├── Poster.png ├── README.md ├── megengine_release ├── README.md ├── configs │ ├── __init__.py │ ├── atss_res101_coco_3x_800size.py │ ├── atss_res18_coco_3x_800size.py │ ├── atss_res34_coco_3x_800size.py │ ├── atss_res50_coco_3x_800size.py │ ├── atss_resx101_coco_2x_800size.py │ ├── faster_rcnn_res101_coco_3x_800size.py │ ├── faster_rcnn_res18_coco_3x_800size.py │ ├── faster_rcnn_res34_coco_3x_800size.py │ ├── faster_rcnn_res50_coco_3x_800size.py │ ├── faster_rcnn_resx101_coco_2x_800size.py │ ├── fcos_res101_coco_3x_800size.py │ ├── fcos_res18_coco_3x_800size.py │ ├── fcos_res34_coco_3x_800size.py │ ├── fcos_res50_coco_3x_800size.py │ ├── fcos_resx101_coco_2x_800size.py │ ├── freeanchor_res101_coco_3x_800size.py │ ├── freeanchor_res18_coco_3x_800size.py │ ├── freeanchor_res34_coco_3x_800size.py │ ├── freeanchor_res50_coco_3x_800size.py │ ├── freeanchor_resx101_coco_2x_800size.py │ ├── retinanet_res101_coco_3x_800size.py │ ├── retinanet_res18_coco_3x_800size.py │ ├── retinanet_res34_coco_3x_800size.py │ ├── retinanet_res50_coco_3x_800size.py │ └── retinanet_resx101_coco_2x_800size.py ├── distill_configs │ ├── ICD.py │ ├── ICD_rcnn.py │ ├── atss_res50_coco_1x_800size.py │ ├── coco_obj.json │ ├── fcos_res50_coco_1x_800size.py │ └── retinanet_res50_coco_1x_800size.py ├── layers │ ├── __init__.py │ ├── basic │ │ ├── __init__.py │ │ ├── functional.py │ │ ├── nn.py │ │ └── norm.py │ ├── det │ │ ├── __init__.py │ │ ├── anchor.py │ │ ├── box_head.py │ │ ├── box_utils.py │ │ ├── fpn.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── point_head.py │ │ ├── pooler.py │ │ ├── rcnn.py │ │ ├── rpn.py │ │ └── sampling.py │ └── tools │ │ ├── __init__.py │ │ ├── data_mapper.py │ │ ├── inference.py │ │ ├── nms.py │ │ └── utils.py ├── models │ ├── ICD │ │ ├── ICD.py │ │ ├── __init__.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── layers.py │ │ ├── transformer.py │ │ └── utility.py │ ├── __init__.py │ ├── atss.py │ ├── backbones │ │ ├── __init__.py │ │ └── resnet │ │ │ ├── __init__.py │ │ │ └── model.py │ ├── faster_rcnn.py │ ├── fcos.py │ ├── freeanchor.py │ └── retinanet.py ├── requirements.txt ├── test.py ├── train.py └── train_distill_icd.py └── pytorch_release ├── README.md ├── configs ├── Base-CondInst.yaml ├── Base-FCOS.yaml ├── Base-RCNN-C4.yaml ├── Base-RCNN-DilatedC5.yaml ├── Base-RCNN-FPN.yaml ├── Base-RetinaNet.yaml ├── Base-SOLOv2.yaml ├── COCO-Detection │ ├── FCOS_R_101_DCN_FPN_2x.yaml │ ├── FCOS_R_50_FPN_1x.yaml │ ├── FCOS_R_50_FPN_2x.yaml │ ├── POTO_R_50_FPN_2x.yaml │ ├── fast_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_101_C4_3x.yaml │ ├── faster_rcnn_R_101_DC5_3x.yaml │ ├── faster_rcnn_R_101_FPN_3x.yaml │ ├── faster_rcnn_R_152_FPN_3x.yaml │ ├── faster_rcnn_R_50_C4_1x.yaml │ ├── faster_rcnn_R_50_C4_3x.yaml │ ├── faster_rcnn_R_50_DC5_1x.yaml │ ├── faster_rcnn_R_50_DC5_3x.yaml │ ├── faster_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_1x_bs8.yaml │ ├── faster_rcnn_R_50_FPN_2x.yaml │ ├── faster_rcnn_R_50_FPN_3x.yaml │ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── retinanet_R_101_FPN_3x.yaml │ ├── retinanet_R_152_FPN_3x.yaml │ ├── retinanet_R_50_FPN_1x.yaml │ ├── retinanet_R_50_FPN_1x_bs8.yaml │ ├── retinanet_R_50_FPN_2x.yaml │ ├── retinanet_R_50_FPN_3x.yaml │ ├── retinanet_X101_32x8d_FPN_3x.yaml │ ├── rpn_R_50_C4_1x.yaml │ └── rpn_R_50_FPN_1x.yaml ├── COCO-InstanceSegmentation │ ├── mask_rcnn_R_101_C4_3x.yaml │ ├── mask_rcnn_R_101_DC5_3x.yaml │ ├── mask_rcnn_R_101_FPN_3x.yaml │ ├── mask_rcnn_R_50_C4_1x.py │ ├── mask_rcnn_R_50_C4_1x.yaml │ ├── mask_rcnn_R_50_C4_3x.yaml │ ├── mask_rcnn_R_50_DC5_1x.yaml │ ├── mask_rcnn_R_50_DC5_3x.yaml │ ├── mask_rcnn_R_50_FPN_1x.py │ ├── mask_rcnn_R_50_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x_giou.yaml │ ├── mask_rcnn_R_50_FPN_3x.yaml │ ├── mask_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── mask_rcnn_regnetx_4gf_dds_fpn_1x.py │ └── mask_rcnn_regnety_4gf_dds_fpn_1x.py ├── Distillation-ICD │ ├── CondInst_R50_R101_icd.yaml │ ├── FCOS_R50_R101_icd.yaml │ ├── MaskRCNN_R_50_R101_icd_FPN_1x.yaml │ ├── RCNN_R_50_R101_icd_FPN_1x.yaml │ ├── SOLOv2_R_50_R101_icd_FPN_1x.yaml │ └── retinanet_R_50_R101_icd_FPN_1x.yaml ├── Teachers │ ├── CondIns_R101_3x_ms.yaml │ ├── FCOS_R101_2x_ms.yaml │ └── SOLOv2_R101_3x_ms.yaml └── coco_obj.json ├── models ├── distiller.py ├── layers │ └── transformer.py ├── models.py ├── teacher.py └── utils.py ├── requirements.txt ├── train_baseline.py ├── train_distill.py └── utils └── build.py /.gitignore: -------------------------------------------------------------------------------- 1 | *log*/ 2 | *.jpg 3 | *.png 4 | *output* 5 | *_model_zoo/ 6 | 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | -------------------------------------------------------------------------------- /Poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MegEngine/ICD/acf27269648e4538a9d6d22171d1abbcd4eceed1/Poster.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instance-Conditional Knowledge Distillation for Object Detection 2 | This is the official implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection", based on [MegEngine](./megengine_release/README.md) and [Pytorch](./pytorch_release/README.md). Go to the desired subfolders for more information and guidance! 3 | 4 | 5 |
6 | 7 |
8 | 9 | > [**Instance-Conditional Knowledge Distillation for Object Detection**](https://arxiv.org/abs/2110.12724), 10 | > Zijian Kang, Peizhen Zhang, Xiangyu Zhang, Jian Sun, Nanning Zheng 11 | > In Proc. of Advances in Neural Information Processing Systems (NeurIPS), 2021 12 | > [[arXiv](https://arxiv.org/abs/2110.12724)][[Citation](#citation)][[OpenReview](https://openreview.net/forum?id=k7aeAz4Vbb)] 13 | 14 | ## Usage 15 | You can find two implementations for [MegEngine](./megengine_release/README.md) and [Pytorch](./pytorch_release/README.md) under two sub-folders. We use the latter one to report the performance in the paper. Switch to the subfolder for more information. 16 | 17 | ### Try it in a few lines : 18 | Take the detectron2 implementation as an example, you can train your model in a few lines: 19 | ``` 20 | cd pytorch_release 21 | 22 | # Install dependancies 23 | pip install pip --upgrade 24 | pip install -r requirements.txt 25 | pip install https://github.com/facebookresearch/detectron2/archive/refs/tags/v0.5.tar.gz 26 | pip install 'git+https://github.com/aim-uofa/AdelaiDet.git@7bf9d87' 27 | 28 | # Prepare dataset according to https://github.com/facebookresearch/detectron2/tree/main/datasets 29 | 30 | # Train and distill a retinanet detector with ICD 31 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_retinanet 32 | ``` 33 | 34 | ## Performance 35 | For object detection in MS-COCO: 36 | | Model | Baseline (BoxAP) | + Ours (BoxAP) | 37 | | --- | :---: | :---: | 38 | | Faster R-CNN | 37.9 | 40.9 (+3.0) | 39 | | Retinanet | 37.4 | 40.7 (+3.3) | 40 | | FCOS | 39.4 | 42.9 (+3.5) | 41 | 42 | For instance-segmentation in MS-COCO: 43 | | Model | Baseline (BoxAP) | + Ours (BoxAP) | Baseline (MaskAP) | + Ours (MaskAP) | 44 | | --- | :---: | :---: | :---: | :---: | 45 | | Mask R-CNN | 38.6 | 41.2 (+2.6) | 35.2 | 37.4 (+2.2) | 46 | | SOLOv2 | - | - | 34.6 | 38.5 (+3.9) | 47 | | CondInst |39.7 | 43.7 (+4.0) | 35.7 | 39.1 (+3.4) | 48 | 49 | ## Acknowledgement 50 | 51 | Some files are modified from [MegEngine Models](https://github.com/MegEngine/Models) and [Detectron2](https://github.com/facebookresearch/detectron2). We also refer to [Pytorch](https://github.com/pytorch/pytorch), [DETR](https://github.com/facebookresearch/detr) and [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) for some implementations. 52 | 53 | 54 | ## License 55 | 56 | This repo is licensed under the Apache License, Version 2.0 (the "License"). 57 | 58 | ## Citation 59 | You can use the following BibTeX entry for citation in your research. 60 | ``` 61 | @inproceedings{icd_neurips2021, 62 | author = {Kang, Zijian and Zhang, Peizhen and Zhang, Xiangyu and Sun, Jian and Zheng, Nanning}, 63 | booktitle = {Advances in Neural Information Processing Systems}, 64 | editor = {M. Ranzato and A. Beygelzimer and Y. Dauphin and P.S. Liang and J. Wortman Vaughan}, 65 | pages = {16468--16480}, 66 | publisher = {Curran Associates, Inc.}, 67 | title = {Instance-Conditional Knowledge Distillation for Object Detection}, 68 | url = {https://proceedings.neurips.cc/paper/2021/file/892c91e0a653ba19df81a90f89d99bcd-Paper.pdf}, 69 | volume = {34}, 70 | year = {2021} 71 | } 72 | ``` 73 | -------------------------------------------------------------------------------- /megengine_release/README.md: -------------------------------------------------------------------------------- 1 | # Instance-Conditional Knowledge Distillation for Object Detection 2 | This is a [MegEngine](https://github.com/MegEngine/MegEngine) implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection", based on [MegEngine Models](https://github.com/MegEngine/Models). 3 | 4 | ## Requirements 5 | 6 | ### Installation 7 | 8 | In order to run the code, please prepare a CUDA environment with: 9 | - Python 3 (3.6 is recommended) 10 | - [MegEngine](https://github.com/MegEngine/MegEngine) 11 | 12 | 13 | 1. Install dependencies. 14 | 15 | ``` 16 | pip3 install --upgrade pip 17 | pip3 install -r requirements.txt 18 | ``` 19 | 20 | 2. Prepare [MS-COCO 2017 dataset](http://cocodataset.org/#download),put it to a proper directory with the following structures: 21 | 22 | ``` 23 | /path/to/ 24 | |->coco 25 | | |annotations 26 | | |train2017 27 | | |val2017 28 | ``` 29 | 30 | 31 | [Microsoft COCO: Common Objects in Context](https://arxiv.org/abs/1405.0312) Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollár, and C Lawrence Zitnick. European Conference on Computer Vision (ECCV), 2014. 32 | 33 | ## Usage 34 | 35 | ### Train baseline models 36 | 37 | Following [MegEngine Models](https://github.com/MegEngine/Models): 38 | ```bash 39 | python3 train.py -f distill_configs/retinanet_res50_coco_1x_800size.py -n 8 \ 40 | -d /data/Datasets 41 | ``` 42 | 43 | `train.py` arguments: 44 | 45 | - `-f`, config file for the network. 46 | - `-n`, required devices(gpu). 47 | - `-w`, pretrained backbone weights. 48 | - `-b`, training `batch size`, default is 2. 49 | - `-d`, dataset root,default is `/data/datasets`. 50 | 51 | 52 | ### Train with distillation 53 | 54 | ```bash 55 | python3 train_distill_icd.py -f distill_configs/retinanet_res50_coco_1x_800size.py \ 56 | -n 8 -l -d /data/Datasets -tf configs/retinanet_res101_coco_3x_800size.py \ 57 | -df distill_configs/ICD.py \ 58 | -tw _model_zoo/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl 59 | ``` 60 | 61 | `train_distill_icd.py` arguments: 62 | 63 | - `-f`, config file for the student network. 64 | - `-w`, pretrained backbone weights. 65 | - `-tf`, config file for the teacher network. 66 | - `-tw`, pretrained weights for the teacher. 67 | - `-df`, config file for the distillation module, `distill_configs/ICD.py` by default. 68 | - `-l`, use the inheriting strategy, load pretrained parameters. 69 | - `-n`, required devices(gpu). 70 | - `-b`, training `batch size`, default is 2. 71 | - `-d`, dataset root,default is `/data/datasets`. 72 | 73 | Note that we set `backbone_pretrained` in distill configs, where backbone weights will be loaded automatically, that `-w` can be omitted. Checkpoints will be saved to a log-xxx directory. 74 | 75 | ### Evaluate 76 | 77 | ``` 78 | python3 test.py -f distill_configs/retinanet_res50_coco_3x_800size.py -n 8 \ 79 | -w log-of-xxx/epoch_17.pkl -d /data/Datasets/ 80 | ``` 81 | 82 | `test.py` arguments: 83 | 84 | - `-f`, config file for the network. 85 | - `-n`, required devices(gpu). 86 | - `-w`, pretrained weights. 87 | - `-d`, dataset root,default is `/data/datasets`. 88 | 89 | ## Examples and Results 90 | ### Steps 91 | 1. Download the pretrained teacher model to ```_model_zoo``` directory. 92 | 2. Train baseline or distill with ICD. 93 | 3. Evaluate checkpoints (use the last checkpoint by default). 94 | 95 | ### Example of Common Detectors 96 | 97 | #### RetinaNet 98 | - [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár. IEEE International Conference on Computer Vision (ICCV), 2017. 99 | 100 | 101 | - Teacher RetinaNet-R101-3x: 102 | https://data.megengine.org.cn/models/weights/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl 103 | 104 | 105 | - Config: distill_configs/retinanet_res50_coco_1x_800size.py 106 | 107 | Command: 108 | ``` 109 | python3 train_distill_icd.py -f distill_configs/retinanet_res50_coco_1x_800size.py \ 110 | -n 8 -l -d /data/Datasets -tf configs/retinanet_res101_coco_3x_800size.py \ 111 | -df distill_configs/ICD.py \ 112 | -tw _model_zoo/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl 113 | ``` 114 | 115 | #### FCOS 116 | 117 | - [FCOS: Fully Convolutional One-Stage Object Detection](https://arxiv.org/abs/1904.01355) Zhi Tian, Chunhua Shen, Hao Chen, and Tong He. IEEE International Conference on Computer Vision (ICCV), 2019. 118 | 119 | - Teacher FCOS-R101-3x: 120 | https://data.megengine.org.cn/models/weights/fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl 121 | 122 | 123 | - Config: distill_configs/fcos_res50_coco_1x_800size.py 124 | 125 | Command: 126 | ``` 127 | python3 train_distill_icd.py -f distill_configs/fcos_res50_coco_1x_800size.py \ 128 | -n 8 -l -d /data/Datasets -tf configs/fcos_res101_coco_3x_800size.py \ 129 | -df distill_configs/ICD.py \ 130 | -tw _model_zoo/fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl 131 | ``` 132 | 133 | #### ATSS 134 | 135 | - [Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection](https://arxiv.org/abs/1912.02424) Shifeng Zhang, Cheng Chi, Yongqiang Yao, Zhen Lei, and Stan Z. Li. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2020. 136 | 137 | - Teacher ATSS-R101-3x: 138 | https://data.megengine.org.cn/models/weights/atss_res101_coco_3x_800size_44dot7_9181687e.pkl 139 | 140 | 141 | - Config: distill_configs/atss_res50_coco_1x_800size.py 142 | 143 | Command: 144 | ``` 145 | python3 train_distill_icd.py -f distill_configs/atss_res50_coco_1x_800size.py \ 146 | -n 8 -l -d /data/Datasets -tf configs/atss_res101_coco_3x_800size.py \ 147 | -df distill_configs/ICD.py \ 148 | -tw _model_zoo/atss_res101_coco_3x_800size_44dot7_9181687e.pkl 149 | ``` 150 | 151 | ### Results of AP in MS-COCO: 152 | 153 | | Model | Baseline | +ICD | 154 | | --- | :---: | :---: | 155 | | Retinanet | 36.8 | 40.3 | 156 | | FCOS | 40.0 | 43.3 | 157 | | ATSS | 39.6 | 43.0 | 158 | 159 | 160 | ### Notice 161 | 162 | - Results of this implementation are mainly for demonstration, please refer to the Detectron2 version for reproduction. 163 | 164 | - We simply adopt the hyperparameter from Detectron2 version, further tunning could be helpful. 165 | 166 | - There is a known CUDA memory issue related to MegEngine: the actual memory consumption will be much larger than the theoretical value, due to the memory fragmentation. This is expected to be fixed in a future version of MegEngine. -------------------------------------------------------------------------------- /megengine_release/configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss_res18_coco_3x_800size import atss_res18_coco_3x_800size 2 | from .atss_res34_coco_3x_800size import atss_res34_coco_3x_800size 3 | from .atss_res50_coco_3x_800size import atss_res50_coco_3x_800size 4 | from .atss_res101_coco_3x_800size import atss_res101_coco_3x_800size 5 | from .atss_resx101_coco_2x_800size import atss_resx101_coco_2x_800size 6 | from .faster_rcnn_res18_coco_3x_800size import faster_rcnn_res18_coco_3x_800size 7 | from .faster_rcnn_res34_coco_3x_800size import faster_rcnn_res34_coco_3x_800size 8 | from .faster_rcnn_res50_coco_3x_800size import faster_rcnn_res50_coco_3x_800size 9 | from .faster_rcnn_res101_coco_3x_800size import faster_rcnn_res101_coco_3x_800size 10 | from .faster_rcnn_resx101_coco_2x_800size import faster_rcnn_resx101_coco_2x_800size 11 | from .fcos_res18_coco_3x_800size import fcos_res18_coco_3x_800size 12 | from .fcos_res34_coco_3x_800size import fcos_res34_coco_3x_800size 13 | from .fcos_res50_coco_3x_800size import fcos_res50_coco_3x_800size 14 | from .fcos_res101_coco_3x_800size import fcos_res101_coco_3x_800size 15 | from .fcos_resx101_coco_2x_800size import fcos_resx101_coco_2x_800size 16 | from .freeanchor_res18_coco_3x_800size import freeanchor_res18_coco_3x_800size 17 | from .freeanchor_res34_coco_3x_800size import freeanchor_res34_coco_3x_800size 18 | from .freeanchor_res50_coco_3x_800size import freeanchor_res50_coco_3x_800size 19 | from .freeanchor_res101_coco_3x_800size import freeanchor_res101_coco_3x_800size 20 | from .freeanchor_resx101_coco_2x_800size import freeanchor_resx101_coco_2x_800size 21 | from .retinanet_res18_coco_3x_800size import retinanet_res18_coco_3x_800size 22 | from .retinanet_res34_coco_3x_800size import retinanet_res34_coco_3x_800size 23 | from .retinanet_res50_coco_3x_800size import retinanet_res50_coco_3x_800size 24 | from .retinanet_res101_coco_3x_800size import retinanet_res101_coco_3x_800size 25 | from .retinanet_resx101_coco_2x_800size import retinanet_resx101_coco_2x_800size 26 | 27 | _EXCLUDE = {} 28 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 29 | -------------------------------------------------------------------------------- /megengine_release/configs/atss_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "atss_res101_coco_3x_800size_44dot7_9181687e.pkl" 24 | ) 25 | def atss_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | ATSS trained from COCO dataset. 28 | `"ATSS" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomATSSConfig() 33 | cfg.backbone_pretrained = False 34 | return models.ATSS(cfg, **kwargs) 35 | 36 | 37 | Net = models.ATSS 38 | Cfg = CustomATSSConfig 39 | -------------------------------------------------------------------------------- /megengine_release/configs/atss_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "atss_res18_coco_3x_800size_38dot3_58e249d5.pkl" 25 | ) 26 | def atss_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | ATSS trained from COCO dataset. 29 | `"ATSS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomATSSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.ATSS(cfg, **kwargs) 36 | 37 | 38 | Net = models.ATSS 39 | Cfg = CustomATSSConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/atss_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "atss_res34_coco_3x_800size_41dot5_ec16a67b.pkl" 25 | ) 26 | def atss_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | ATSS trained from COCO dataset. 29 | `"ATSS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomATSSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.ATSS(cfg, **kwargs) 36 | 37 | 38 | Net = models.ATSS 39 | Cfg = CustomATSSConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/atss_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl" 17 | ) 18 | def atss_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | ATSS trained from COCO dataset. 21 | `"ATSS" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.ATSSConfig() 26 | cfg.backbone_pretrained = False 27 | return models.ATSS(cfg, **kwargs) 28 | 29 | 30 | Net = models.ATSS 31 | Cfg = models.ATSSConfig 32 | -------------------------------------------------------------------------------- /megengine_release/configs/atss_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomATSSConfig(models.ATSSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "atss_resx101_coco_2x_800size_45dot6_b3a91b36.pkl" 26 | ) 27 | def atss_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | ATSS trained from COCO dataset. 30 | `"ATSS" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomATSSConfig() 35 | cfg.backbone_pretrained = False 36 | return models.ATSS(cfg, **kwargs) 37 | 38 | 39 | Net = models.ATSS 40 | Cfg = CustomATSSConfig 41 | -------------------------------------------------------------------------------- /megengine_release/configs/faster_rcnn_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "faster_rcnn_res101_coco_3x_800size_42dot6_2538b0ff.pkl" 24 | ) 25 | def faster_rcnn_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | Faster-RCNN FPN trained from COCO dataset. 28 | `"Faster-RCNN" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomFasterRCNNConfig() 33 | cfg.backbone_pretrained = False 34 | return models.FasterRCNN(cfg, **kwargs) 35 | 36 | 37 | Net = models.FasterRCNN 38 | Cfg = CustomFasterRCNNConfig 39 | -------------------------------------------------------------------------------- /megengine_release/configs/faster_rcnn_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [64, 128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "faster_rcnn_res18_coco_3x_800size_35dot7_a33835ca.pkl" 25 | ) 26 | def faster_rcnn_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | Faster-RCNN FPN trained from COCO dataset. 29 | `"Faster-RCNN" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFasterRCNNConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FasterRCNN(cfg, **kwargs) 36 | 37 | 38 | Net = models.FasterRCNN 39 | Cfg = CustomFasterRCNNConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/faster_rcnn_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [64, 128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "faster_rcnn_res34_coco_3x_800size_39dot6_11fca4d4.pkl" 25 | ) 26 | def faster_rcnn_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | Faster-RCNN FPN trained from COCO dataset. 29 | `"Faster-RCNN" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFasterRCNNConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FasterRCNN(cfg, **kwargs) 36 | 37 | 38 | Net = models.FasterRCNN 39 | Cfg = CustomFasterRCNNConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/faster_rcnn_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "faster_rcnn_res50_coco_3x_800size_40dot1_8682ff1a.pkl" 17 | ) 18 | def faster_rcnn_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | Faster-RCNN FPN trained from COCO dataset. 21 | `"Faster-RCNN" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FasterRCNNConfig() 26 | cfg.backbone_pretrained = False 27 | return models.FasterRCNN(cfg, **kwargs) 28 | 29 | 30 | Net = models.FasterRCNN 31 | Cfg = models.FasterRCNNConfig 32 | -------------------------------------------------------------------------------- /megengine_release/configs/faster_rcnn_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "faster_rcnn_resx101_coco_2x_800size_44dot1_e5e0060b.pkl" 26 | ) 27 | def faster_rcnn_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | Faster-RCNN FPN trained from COCO dataset. 30 | `"Faster-RCNN" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomFasterRCNNConfig() 35 | cfg.backbone_pretrained = False 36 | return models.FasterRCNN(cfg, **kwargs) 37 | 38 | 39 | Net = models.FasterRCNN 40 | Cfg = CustomFasterRCNNConfig 41 | -------------------------------------------------------------------------------- /megengine_release/configs/fcos_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl" 24 | ) 25 | def fcos_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | FCOS trained from COCO dataset. 28 | `"FCOS" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomFCOSConfig() 33 | cfg.backbone_pretrained = False 34 | return models.FCOS(cfg, **kwargs) 35 | 36 | 37 | Net = models.FCOS 38 | Cfg = CustomFCOSConfig 39 | -------------------------------------------------------------------------------- /megengine_release/configs/fcos_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "fcos_res18_coco_3x_800size_37dot6_adab0136.pkl" 25 | ) 26 | def fcos_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | FCOS trained from COCO dataset. 29 | `"FCOS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFCOSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FCOS(cfg, **kwargs) 36 | 37 | 38 | Net = models.FCOS 39 | Cfg = CustomFCOSConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/fcos_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "fcos_res34_coco_3x_800size_41dot0_8ba4633f.pkl" 25 | ) 26 | def fcos_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | FCOS trained from COCO dataset. 29 | `"FCOS" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = CustomFCOSConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FCOS(cfg, **kwargs) 36 | 37 | 38 | Net = models.FCOS 39 | Cfg = CustomFCOSConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/fcos_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl" 17 | ) 18 | def fcos_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | FCOS trained from COCO dataset. 21 | `"FCOS" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FCOSConfig() 26 | cfg.backbone_pretrained = False 27 | return models.FCOS(cfg, **kwargs) 28 | 29 | 30 | Net = models.FCOS 31 | Cfg = models.FCOSConfig 32 | -------------------------------------------------------------------------------- /megengine_release/configs/fcos_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFCOSConfig(models.FCOSConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "fcos_resx101_coco_2x_800size_44dot8_42ac8e82.pkl" 26 | ) 27 | def fcos_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | FCOS trained from COCO dataset. 30 | `"FCOS" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomFCOSConfig() 35 | cfg.backbone_pretrained = False 36 | return models.FCOS(cfg, **kwargs) 37 | 38 | 39 | Net = models.FCOS 40 | Cfg = CustomFCOSConfig 41 | -------------------------------------------------------------------------------- /megengine_release/configs/freeanchor_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "freeanchor_res101_coco_3x_800size_43dot9_8c707d7d.pkl" 24 | ) 25 | def freeanchor_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | FreeAnchor trained from COCO dataset. 28 | `"FreeAnchor" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = models.FreeAnchorConfig() 33 | cfg.backbone_pretrained = False 34 | return models.FreeAnchor(cfg, **kwargs) 35 | 36 | 37 | Net = models.FreeAnchor 38 | Cfg = CustomFreeAnchorConfig 39 | -------------------------------------------------------------------------------- /megengine_release/configs/freeanchor_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "freeanchor_res18_coco_3x_800size_38dot1_3d0559a8.pkl" 25 | ) 26 | def freeanchor_res18_coco_3x_800size(**kwargs): 27 | r""" 28 | FreeAnchor trained from COCO dataset. 29 | `"FreeAnchor" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = models.FreeAnchorConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FreeAnchor(cfg, **kwargs) 36 | 37 | 38 | Net = models.FreeAnchor 39 | Cfg = CustomFreeAnchorConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/freeanchor_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | 21 | 22 | @hub.pretrained( 23 | "https://data.megengine.org.cn/models/weights/" 24 | "freeanchor_res34_coco_3x_800size_41dot1_3b03734e.pkl" 25 | ) 26 | def freeanchor_res34_coco_3x_800size(**kwargs): 27 | r""" 28 | FreeAnchor trained from COCO dataset. 29 | `"FreeAnchor" `_ 30 | `"FPN" `_ 31 | `"COCO" `_ 32 | """ 33 | cfg = models.FreeAnchorConfig() 34 | cfg.backbone_pretrained = False 35 | return models.FreeAnchor(cfg, **kwargs) 36 | 37 | 38 | Net = models.FreeAnchor 39 | Cfg = CustomFreeAnchorConfig 40 | -------------------------------------------------------------------------------- /megengine_release/configs/freeanchor_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "freeanchor_res50_coco_3x_800size_42dot1_5c567f14.pkl" 17 | ) 18 | def freeanchor_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | FreeAnchor trained from COCO dataset. 21 | `"FreeAnchor" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FreeAnchorConfig() 26 | cfg.backbone_pretrained = False 27 | return models.FreeAnchor(cfg, **kwargs) 28 | 29 | 30 | Net = models.FreeAnchor 31 | Cfg = models.FreeAnchorConfig 32 | -------------------------------------------------------------------------------- /megengine_release/configs/freeanchor_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "freeanchor_resx101_coco_2x_800size_44dot9_5a23fca7.pkl" 26 | ) 27 | def freeanchor_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | FreeAnchor trained from COCO dataset. 30 | `"FreeAnchor" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = models.FreeAnchorConfig() 35 | cfg.backbone_pretrained = False 36 | return models.FreeAnchor(cfg, **kwargs) 37 | 38 | 39 | Net = models.FreeAnchor 40 | Cfg = CustomFreeAnchorConfig 41 | -------------------------------------------------------------------------------- /megengine_release/configs/retinanet_res101_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet101" 19 | 20 | 21 | @hub.pretrained( 22 | "https://data.megengine.org.cn/models/weights/" 23 | "retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl" 24 | ) 25 | def retinanet_res101_coco_3x_800size(**kwargs): 26 | r""" 27 | RetinaNet trained from COCO dataset. 28 | `"RetinaNet" `_ 29 | `"FPN" `_ 30 | `"COCO" `_ 31 | """ 32 | cfg = CustomRetinaNetConfig() 33 | cfg.backbone_pretrained = False 34 | return models.RetinaNet(cfg, **kwargs) 35 | 36 | 37 | Net = models.RetinaNet 38 | Cfg = CustomRetinaNetConfig 39 | -------------------------------------------------------------------------------- /megengine_release/configs/retinanet_res18_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet18" 19 | self.fpn_in_channels = [128, 256, 512] 20 | self.fpn_top_in_channel = 512 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "retinanet_res18_coco_3x_800size_35dot3_0c4956c8.pkl" 26 | ) 27 | def retinanet_res18_coco_3x_800size(**kwargs): 28 | r""" 29 | RetinaNet trained from COCO dataset. 30 | `"RetinaNet" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomRetinaNetConfig() 35 | cfg.backbone_pretrained = False 36 | return models.RetinaNet(cfg, **kwargs) 37 | 38 | 39 | Net = models.RetinaNet 40 | Cfg = CustomRetinaNetConfig 41 | -------------------------------------------------------------------------------- /megengine_release/configs/retinanet_res34_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnet34" 19 | self.fpn_in_channels = [128, 256, 512] 20 | self.fpn_top_in_channel = 512 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "retinanet_res34_coco_3x_800size_38dot4_3485f9ec.pkl" 26 | ) 27 | def retinanet_res34_coco_3x_800size(**kwargs): 28 | r""" 29 | RetinaNet trained from COCO dataset. 30 | `"RetinaNet" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomRetinaNetConfig() 35 | cfg.backbone_pretrained = False 36 | return models.RetinaNet(cfg, **kwargs) 37 | 38 | 39 | Net = models.RetinaNet 40 | Cfg = CustomRetinaNetConfig 41 | -------------------------------------------------------------------------------- /megengine_release/configs/retinanet_res50_coco_3x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl" 17 | ) 18 | def retinanet_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | RetinaNet trained from COCO dataset. 21 | `"RetinaNet" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.RetinaNetConfig() 26 | cfg.backbone_pretrained = False 27 | return models.RetinaNet(cfg, **kwargs) 28 | 29 | 30 | Net = models.RetinaNet 31 | Cfg = models.RetinaNetConfig 32 | -------------------------------------------------------------------------------- /megengine_release/configs/retinanet_resx101_coco_2x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | class CustomRetinaNetConfig(models.RetinaNetConfig): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | self.backbone = "resnext101_32x8d" 19 | self.max_epoch = 36 20 | self.lr_decay_stages = [24, 32] 21 | 22 | 23 | @hub.pretrained( 24 | "https://data.megengine.org.cn/models/weights/" 25 | "retinanet_resx101_coco_2x_800size_42dot3_1502eace.pkl" 26 | ) 27 | def retinanet_resx101_coco_2x_800size(**kwargs): 28 | r""" 29 | RetinaNet trained from COCO dataset. 30 | `"RetinaNet" `_ 31 | `"FPN" `_ 32 | `"COCO" `_ 33 | """ 34 | cfg = CustomRetinaNetConfig() 35 | cfg.backbone_pretrained = False 36 | return models.RetinaNet(cfg, **kwargs) 37 | 38 | 39 | Net = models.RetinaNet 40 | Cfg = CustomRetinaNetConfig 41 | -------------------------------------------------------------------------------- /megengine_release/distill_configs/ICD.py: -------------------------------------------------------------------------------- 1 | import megengine.module as M 2 | import megengine.functional as F 3 | from models.ICD.ICD import ICD 4 | from easydict import EasyDict as edict 5 | 6 | def get_distillator(): 7 | cfg = edict({ 8 | 'distiller': { 9 | 'FEAT_KEYS': ['p3', 'p4', 'p5', 'p6', 'p7'], 10 | 'WEIGHT_VALUE': 8.0, 11 | 'TEMP_VALUE': 1.0, 12 | 'NUM_SCALE_SPLITS': 5, 13 | 'HIDDEN_DIM': 256, 14 | 'NUM_CLASSES': 80, 15 | 'MAX_LABELS': 300, 16 | 'ATT_HEADS': 8, 17 | 'USE_POS_EMBEDDING': True, 18 | 'DECODER_POSEMB_ON_V': False, 19 | 20 | }, 21 | }) 22 | return ICD(256, cfg) 23 | 24 | Net = get_distillator -------------------------------------------------------------------------------- /megengine_release/distill_configs/ICD_rcnn.py: -------------------------------------------------------------------------------- 1 | import megengine.module as M 2 | import megengine.functional as F 3 | from models.ICD.ICD import ICD 4 | from easydict import EasyDict as edict 5 | 6 | def get_distillator(): 7 | cfg = edict({ 8 | 'distiller': { 9 | 'FEAT_KEYS': ['p2', 'p3', 'p4', 'p5', 'p6'], 10 | 'WEIGHT_VALUE': 3.0, 11 | 'TEMP_VALUE': 1.0, 12 | 'HIDDEN_DIM': 256, 13 | 'NUM_CLASSES': 80, 14 | 'MAX_LABELS': 300, 15 | 'ATT_HEADS': 8, 16 | 'USE_POS_EMBEDDING': True, 17 | 'DECODER_POSEMB_ON_V': False, 18 | 19 | }, 20 | }) 21 | return ICD(256, cfg) 22 | 23 | Net = get_distillator -------------------------------------------------------------------------------- /megengine_release/distill_configs/atss_res50_coco_1x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl" 17 | ) 18 | def atss_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | ATSS trained from COCO dataset. 21 | `"ATSS" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.ATSSConfig() 26 | cfg.backbone_pretrained = True 27 | cfg.max_epoch = 18 28 | cfg.lr_decay_stages = [12, 16] 29 | return models.ATSS(cfg, **kwargs) 30 | 31 | 32 | def get_cfg(): 33 | cfg = models.ATSSConfig() 34 | cfg.backbone_pretrained = True 35 | cfg.max_epoch = 18 36 | cfg.lr_decay_stages = [12, 16] 37 | 38 | return cfg 39 | 40 | 41 | Net = models.ATSS 42 | Cfg = get_cfg 43 | -------------------------------------------------------------------------------- /megengine_release/distill_configs/fcos_res50_coco_1x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl" 17 | ) 18 | def fcos_res50_coco_3x_800size(**kwargs): 19 | r""" 20 | FCOS trained from COCO dataset. 21 | `"FCOS" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.FCOSConfig() 26 | cfg.backbone_pretrained = True 27 | cfg.max_epoch = 18 28 | cfg.lr_decay_stages = [12, 16] 29 | return models.FCOS(cfg, **kwargs) 30 | 31 | 32 | def get_cfg(): 33 | cfg = models.FCOSConfig() 34 | cfg.backbone_pretrained = True 35 | cfg.max_epoch = 18 36 | cfg.lr_decay_stages = [12, 16] 37 | 38 | return cfg 39 | 40 | 41 | Net = models.FCOS 42 | Cfg = get_cfg 43 | -------------------------------------------------------------------------------- /megengine_release/distill_configs/retinanet_res50_coco_1x_800size.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine import hub 10 | 11 | import models 12 | 13 | 14 | @hub.pretrained( 15 | "https://data.megengine.org.cn/models/weights/" 16 | "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl" 17 | ) 18 | def retinanet_res50_coco_1x_800size(**kwargs): 19 | r""" 20 | RetinaNet trained from COCO dataset. 21 | `"RetinaNet" `_ 22 | `"FPN" `_ 23 | `"COCO" `_ 24 | """ 25 | cfg = models.RetinaNetConfig() 26 | cfg.backbone_pretrained = True 27 | cfg.max_epoch = 18 28 | cfg.lr_decay_stages = [12, 16] 29 | 30 | return models.RetinaNet(cfg, **kwargs) 31 | 32 | 33 | def get_cfg(): 34 | cfg = models.RetinaNetConfig() 35 | cfg.backbone_pretrained = True 36 | cfg.max_epoch = 18 37 | cfg.lr_decay_stages = [12, 16] 38 | 39 | return cfg 40 | 41 | Net = models.RetinaNet 42 | Cfg = get_cfg 43 | -------------------------------------------------------------------------------- /megengine_release/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .basic import * 10 | from .det import * 11 | 12 | _EXCLUDE = {} 13 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 14 | -------------------------------------------------------------------------------- /megengine_release/layers/basic/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .functional import * 10 | from .nn import * 11 | from .norm import * 12 | 13 | _EXCLUDE = {} 14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 15 | -------------------------------------------------------------------------------- /megengine_release/layers/basic/functional.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from typing import Optional 10 | 11 | import numpy as np 12 | 13 | import megengine.distributed as dist 14 | import megengine.functional as F 15 | from megengine import Tensor 16 | 17 | 18 | def get_padded_tensor( 19 | array: Tensor, multiple_number: int = 32, pad_value: float = 0 20 | ) -> Tensor: 21 | """ pad the nd-array to multiple stride of th e 22 | 23 | Args: 24 | array (Tensor): 25 | the tensor with the shape of [batch, channel, height, width] 26 | multiple_number (int): 27 | make the height and width can be divided by multiple_number 28 | pad_value (int): the value to be padded 29 | 30 | Returns: 31 | padded_array (Tensor) 32 | """ 33 | batch, chl, t_height, t_width = array.shape 34 | padded_height = ( 35 | (t_height + multiple_number - 1) // multiple_number * multiple_number 36 | ) 37 | padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number 38 | 39 | padded_array = F.full( 40 | (batch, chl, padded_height, padded_width), pad_value, dtype=array.dtype 41 | ) 42 | 43 | ndim = array.ndim 44 | if ndim == 4: 45 | padded_array[:, :, :t_height, :t_width] = array 46 | elif ndim == 3: 47 | padded_array[:, :t_height, :t_width] = array 48 | else: 49 | raise Exception("Not supported tensor dim: %d" % ndim) 50 | return padded_array 51 | 52 | 53 | def safelog(x, eps=None): 54 | if eps is None: 55 | eps = np.finfo(x.dtype).eps 56 | return F.log(F.maximum(x, eps)) 57 | 58 | 59 | def batched_nms( 60 | boxes: Tensor, scores: Tensor, idxs: Tensor, iou_thresh: float, max_output: Optional[int] = None 61 | ) -> Tensor: 62 | r""" 63 | Performs non-maximum suppression (NMS) on the boxes according to 64 | their intersection-over-union (IoU). 65 | 66 | :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; 67 | each box is expected to be in `(x1, y1, x2, y2)` format. 68 | :param iou_thresh: ``IoU`` threshold for overlapping. 69 | :param idxs: tensor of shape `(N,)`, the class indexs of boxes in the batch. 70 | :param scores: tensor of shape `(N,)`, the score of boxes. 71 | :return: indices of the elements that have been kept by NMS. 72 | 73 | Examples: 74 | 75 | .. testcode:: 76 | 77 | import numpy as np 78 | from megengine import tensor 79 | 80 | x = np.zeros((100,4)) 81 | np.random.seed(42) 82 | x[:,:2] = np.random.rand(100,2) * 20 83 | x[:,2:] = np.random.rand(100,2) * 20 + 100 84 | scores = tensor(np.random.rand(100)) 85 | idxs = tensor(np.random.randint(0, 10, 100)) 86 | inp = tensor(x) 87 | result = batched_nms(inp, scores, idxs, iou_thresh=0.6) 88 | print(result.numpy()) 89 | 90 | Outputs: 91 | 92 | .. testoutput:: 93 | 94 | [75 41 99 98 69 64 11 27 35 18] 95 | 96 | """ 97 | assert ( 98 | boxes.ndim == 2 and boxes.shape[1] == 4 99 | ), "the expected shape of boxes is (N, 4)" 100 | assert scores.ndim == 1, "the expected shape of scores is (N,)" 101 | assert idxs.ndim == 1, "the expected shape of idxs is (N,)" 102 | assert ( 103 | boxes.shape[0] == scores.shape[0] == idxs.shape[0] 104 | ), "number of boxes, scores and idxs are not matched" 105 | 106 | idxs = idxs.detach() 107 | max_coordinate = boxes.max() 108 | offsets = idxs.astype("float32") * (max_coordinate + 1) 109 | boxes = boxes + offsets.reshape(-1, 1) 110 | return F.nn.nms(boxes, scores, iou_thresh, max_output) 111 | 112 | 113 | def all_reduce_mean(array: Tensor) -> Tensor: 114 | if dist.get_world_size() > 1: 115 | array = dist.functional.all_reduce_sum(array) / dist.get_world_size() 116 | return array 117 | -------------------------------------------------------------------------------- /megengine_release/layers/basic/nn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # --------------------------------------------------------------------- 16 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 17 | # 18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # 24 | # This file has been modified by Megvii ("Megvii Modifications"). 25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved. 26 | # --------------------------------------------------------------------- 27 | from collections import namedtuple 28 | 29 | import megengine.module as M 30 | 31 | 32 | class Conv2d(M.Conv2d): 33 | """ 34 | A wrapper around :class:`megengine.module.Conv2d`. 35 | """ 36 | 37 | def __init__(self, *args, **kwargs): 38 | """ 39 | Extra keyword arguments supported in addition to 40 | `megengine.module.Conv2d`. 41 | 42 | Args: 43 | norm (M.Module, optional): a normalization layer 44 | activation (callable(Tensor) -> Tensor): a callable activation 45 | function 46 | """ 47 | norm = kwargs.pop("norm", None) 48 | activation = kwargs.pop("activation", None) 49 | super().__init__(*args, **kwargs) 50 | 51 | self.norm = norm 52 | self.activation = activation 53 | 54 | def forward(self, x): 55 | x = super().forward(x) 56 | if self.norm is not None: 57 | x = self.norm(x) 58 | if self.activation is not None: 59 | x = self.activation(x) 60 | return x 61 | 62 | 63 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 64 | """ 65 | A simple structure that contains basic shape specification about a tensor. 66 | Useful for getting the modules output channels when building the graph. 67 | """ 68 | 69 | def __new__(cls, channels=None, height=None, width=None, stride=None): 70 | return super().__new__(cls, channels, height, width, stride) 71 | -------------------------------------------------------------------------------- /megengine_release/layers/basic/norm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # --------------------------------------------------------------------- 16 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 17 | # 18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # 24 | # This file has been modified by Megvii ("Megvii Modifications"). 25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved. 26 | # --------------------------------------------------------------------- 27 | from functools import partial 28 | 29 | import megengine.module as M 30 | from megengine.module.normalization import GroupNorm, InstanceNorm, LayerNorm 31 | 32 | 33 | def get_norm(norm): 34 | """ 35 | Args: 36 | norm (str): currently support "BN", "SyncBN", "FrozenBN", "GN", "LN" and "IN" 37 | 38 | Returns: 39 | M.Module or None: the normalization layer 40 | """ 41 | if norm is None: 42 | return None 43 | norm = { 44 | "BN": M.BatchNorm2d, 45 | "SyncBN": M.SyncBatchNorm, 46 | "FrozenBN": partial(M.BatchNorm2d, freeze=True), 47 | "GN": GroupNorm, 48 | "LN": LayerNorm, 49 | "IN": InstanceNorm, 50 | }[norm] 51 | return norm 52 | -------------------------------------------------------------------------------- /megengine_release/layers/det/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .anchor import * 10 | from .box_head import * 11 | from .box_utils import * 12 | from .fpn import * 13 | from .loss import * 14 | from .matcher import * 15 | from .point_head import * 16 | from .pooler import * 17 | from .rcnn import * 18 | from .rpn import * 19 | from .sampling import * 20 | 21 | _EXCLUDE = {} 22 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 23 | -------------------------------------------------------------------------------- /megengine_release/layers/det/anchor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import math 10 | from abc import ABCMeta, abstractmethod 11 | from typing import List 12 | 13 | import numpy as np 14 | 15 | import megengine.functional as F 16 | from megengine import Tensor, tensor 17 | 18 | 19 | def meshgrid(x, y): 20 | assert len(x.shape) == 1 21 | assert len(y.shape) == 1 22 | mesh_shape = (y.shape[0], x.shape[0]) 23 | mesh_x = F.broadcast_to(x, mesh_shape) 24 | mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape) 25 | return mesh_x, mesh_y 26 | 27 | 28 | def create_anchor_grid(featmap_size, offsets, stride, device): 29 | step_x, step_y = featmap_size 30 | shift = offsets * stride 31 | 32 | grid_x = F.arange(shift, step_x * stride + shift, step=stride, device=device) 33 | grid_y = F.arange(shift, step_y * stride + shift, step=stride, device=device) 34 | grids_x, grids_y = meshgrid(grid_y, grid_x) 35 | return grids_x.reshape(-1), grids_y.reshape(-1) 36 | 37 | 38 | class BaseAnchorGenerator(metaclass=ABCMeta): 39 | """base class for anchor generator. 40 | """ 41 | 42 | def __init__(self): 43 | pass 44 | 45 | @property 46 | @abstractmethod 47 | def anchor_dim(self): 48 | pass 49 | 50 | @abstractmethod 51 | def generate_anchors_by_features(self, sizes, device) -> List[Tensor]: 52 | pass 53 | 54 | def __call__(self, featmaps): 55 | feat_sizes = [fmap.shape[-2:] for fmap in featmaps] 56 | return self.generate_anchors_by_features(feat_sizes, featmaps[0].device) 57 | 58 | 59 | class AnchorBoxGenerator(BaseAnchorGenerator): 60 | """default anchor box generator, usually used in anchor-based methods. 61 | This class generate anchors by feature map in level. 62 | Args: 63 | anchor_scales (list): anchor scales based on stride. 64 | The practical anchor scale is anchor_scale * stride 65 | anchor_ratios (list): anchor aspect ratios. 66 | strides (list): strides of inputs. 67 | offset (float): center point offset. default is 0.5. 68 | """ 69 | 70 | # pylint: disable=dangerous-default-value 71 | def __init__( 72 | self, 73 | anchor_scales: list = [[32], [64], [128], [256], [512]], 74 | anchor_ratios: list = [[0.5, 1, 2]], 75 | strides: list = [4, 8, 16, 32, 64], 76 | offset: float = 0.5, 77 | ): 78 | super().__init__() 79 | self.anchor_scales = np.array(anchor_scales, dtype="float32") 80 | self.anchor_ratios = np.array(anchor_ratios, dtype="float32") 81 | self.strides = strides 82 | self.offset = offset 83 | self.num_features = len(strides) 84 | 85 | self.base_anchors = self._different_level_anchors(anchor_scales, anchor_ratios) 86 | 87 | @property 88 | def anchor_dim(self): 89 | return 4 90 | 91 | def _different_level_anchors(self, scales, ratios): 92 | if len(scales) == 1: 93 | scales *= self.num_features 94 | assert len(scales) == self.num_features 95 | 96 | if len(ratios) == 1: 97 | ratios *= self.num_features 98 | assert len(ratios) == self.num_features 99 | return [ 100 | tensor(self.generate_base_anchors(scale, ratio)) 101 | for scale, ratio in zip(scales, ratios) 102 | ] 103 | 104 | def generate_base_anchors(self, scales, ratios): 105 | base_anchors = [] 106 | areas = [s ** 2.0 for s in scales] 107 | for area in areas: 108 | for ratio in ratios: 109 | w = math.sqrt(area / ratio) 110 | h = ratio * w 111 | # center-based anchor 112 | x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 113 | base_anchors.append([x0, y0, x1, y1]) 114 | return base_anchors 115 | 116 | def generate_anchors_by_features(self, sizes, device): 117 | all_anchors = [] 118 | assert len(sizes) == self.num_features, ( 119 | "input features expected {}, got {}".format(self.num_features, len(sizes)) 120 | ) 121 | for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors): 122 | grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) 123 | grids = F.stack([grid_x, grid_y, grid_x, grid_y], axis=1) 124 | all_anchors.append( 125 | (F.expand_dims(grids, axis=1) + F.expand_dims(base_anchor, axis=0)).reshape(-1, 4) 126 | ) 127 | return all_anchors 128 | 129 | 130 | class AnchorPointGenerator(BaseAnchorGenerator): 131 | """default anchor point generator, usually used in anchor-free methods. 132 | This class generate anchors by feature map in level. 133 | Args: 134 | num_anchors (int): number of anchors per location 135 | strides (list): strides of inputs. 136 | offset (float): center point offset. default is 0.5. 137 | """ 138 | 139 | # pylint: disable=dangerous-default-value 140 | def __init__( 141 | self, 142 | num_anchors: int = 1, 143 | strides: list = [4, 8, 16, 32, 64], 144 | offset: float = 0.5, 145 | ): 146 | super().__init__() 147 | self.num_anchors = num_anchors 148 | self.strides = strides 149 | self.offset = offset 150 | self.num_features = len(strides) 151 | 152 | @property 153 | def anchor_dim(self): 154 | return 2 155 | 156 | def generate_anchors_by_features(self, sizes, device): 157 | all_anchors = [] 158 | assert len(sizes) == self.num_features, ( 159 | "input features expected {}, got {}".format(self.num_features, len(sizes)) 160 | ) 161 | for size, stride in zip(sizes, self.strides): 162 | grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) 163 | grids = F.stack([grid_x, grid_y], axis=1) 164 | all_anchors.append( 165 | F.broadcast_to( 166 | F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2) 167 | ).reshape(-1, 2) 168 | ) # FIXME: need F.repeat 169 | return all_anchors 170 | -------------------------------------------------------------------------------- /megengine_release/layers/det/box_head.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import math 10 | from typing import List 11 | 12 | import megengine.module as M 13 | from megengine import Tensor 14 | 15 | import layers 16 | 17 | 18 | class BoxHead(M.Module): 19 | """ 20 | The head used when anchor boxes are adopted for object classification and box regression. 21 | """ 22 | 23 | def __init__(self, cfg, input_shape: List[layers.ShapeSpec]): 24 | super().__init__() 25 | 26 | in_channels = input_shape[0].channels 27 | num_classes = cfg.num_classes 28 | num_convs = 4 29 | prior_prob = cfg.cls_prior_prob 30 | num_anchors = [ 31 | len(cfg.anchor_scales[i]) * len(cfg.anchor_ratios[i]) 32 | for i in range(len(input_shape)) 33 | ] 34 | 35 | assert ( 36 | len(set(num_anchors)) == 1 37 | ), "not support different number of anchors between levels" 38 | num_anchors = num_anchors[0] 39 | 40 | cls_subnet = [] 41 | bbox_subnet = [] 42 | for _ in range(num_convs): 43 | cls_subnet.append( 44 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) 45 | ) 46 | cls_subnet.append(M.ReLU()) 47 | bbox_subnet.append( 48 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) 49 | ) 50 | bbox_subnet.append(M.ReLU()) 51 | 52 | self.cls_subnet = M.Sequential(*cls_subnet) 53 | self.bbox_subnet = M.Sequential(*bbox_subnet) 54 | self.cls_score = M.Conv2d( 55 | in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 56 | ) 57 | self.bbox_pred = M.Conv2d( 58 | in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1 59 | ) 60 | 61 | # Initialization 62 | for modules in [ 63 | self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred 64 | ]: 65 | for layer in modules.modules(): 66 | if isinstance(layer, M.Conv2d): 67 | M.init.normal_(layer.weight, mean=0, std=0.01) 68 | M.init.fill_(layer.bias, 0) 69 | 70 | # Use prior in model initialization to improve stability 71 | bias_value = -math.log((1 - prior_prob) / prior_prob) 72 | M.init.fill_(self.cls_score.bias, bias_value) 73 | 74 | def forward(self, features: List[Tensor]): 75 | logits, offsets = [], [] 76 | for feature in features: 77 | logits.append(self.cls_score(self.cls_subnet(feature))) 78 | offsets.append(self.bbox_pred(self.bbox_subnet(feature))) 79 | return logits, offsets 80 | -------------------------------------------------------------------------------- /megengine_release/layers/det/box_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from abc import ABCMeta, abstractmethod 10 | 11 | import numpy as np 12 | 13 | import megengine.functional as F 14 | from megengine import Tensor 15 | 16 | 17 | class BoxCoderBase(metaclass=ABCMeta): 18 | """Boxcoder class. 19 | """ 20 | 21 | def __init__(self): 22 | pass 23 | 24 | @abstractmethod 25 | def encode(self) -> Tensor: 26 | pass 27 | 28 | @abstractmethod 29 | def decode(self) -> Tensor: 30 | pass 31 | 32 | 33 | class BoxCoder(BoxCoderBase, metaclass=ABCMeta): 34 | # pylint: disable=dangerous-default-value 35 | def __init__( 36 | self, 37 | reg_mean=[0.0, 0.0, 0.0, 0.0], 38 | reg_std=[1.0, 1.0, 1.0, 1.0], 39 | ): 40 | """ 41 | Args: 42 | reg_mean(np.ndarray): [x0_mean, x1_mean, y0_mean, y1_mean] or None 43 | reg_std(np.ndarray): [x0_std, x1_std, y0_std, y1_std] or None 44 | 45 | """ 46 | self.reg_mean = np.array(reg_mean, dtype="float32")[None, :] 47 | self.reg_std = np.array(reg_std, dtype="float32")[None, :] 48 | super().__init__() 49 | 50 | @staticmethod 51 | def _box_ltrb_to_cs_opr(bbox, addaxis=None): 52 | """ transform the left-top right-bottom encoding bounding boxes 53 | to center and size encodings""" 54 | bbox_width = bbox[:, 2] - bbox[:, 0] 55 | bbox_height = bbox[:, 3] - bbox[:, 1] 56 | bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width 57 | bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height 58 | if addaxis is None: 59 | return bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y 60 | else: 61 | return ( 62 | F.expand_dims(bbox_width, addaxis), 63 | F.expand_dims(bbox_height, addaxis), 64 | F.expand_dims(bbox_ctr_x, addaxis), 65 | F.expand_dims(bbox_ctr_y, addaxis), 66 | ) 67 | 68 | def encode(self, bbox: Tensor, gt: Tensor) -> Tensor: 69 | bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y = self._box_ltrb_to_cs_opr(bbox) 70 | gt_width, gt_height, gt_ctr_x, gt_ctr_y = self._box_ltrb_to_cs_opr(gt) 71 | 72 | target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width 73 | target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height 74 | target_dw = F.log(gt_width / bbox_width) 75 | target_dh = F.log(gt_height / bbox_height) 76 | target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1) 77 | 78 | target -= self.reg_mean 79 | target /= self.reg_std 80 | return target 81 | 82 | def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor: 83 | deltas *= self.reg_std 84 | deltas += self.reg_mean 85 | 86 | ( 87 | anchor_width, 88 | anchor_height, 89 | anchor_ctr_x, 90 | anchor_ctr_y, 91 | ) = self._box_ltrb_to_cs_opr(anchors, 1) 92 | pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width 93 | pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height 94 | pred_width = anchor_width * F.exp(deltas[:, 2::4]) 95 | pred_height = anchor_height * F.exp(deltas[:, 3::4]) 96 | 97 | pred_x1 = pred_ctr_x - 0.5 * pred_width 98 | pred_y1 = pred_ctr_y - 0.5 * pred_height 99 | pred_x2 = pred_ctr_x + 0.5 * pred_width 100 | pred_y2 = pred_ctr_y + 0.5 * pred_height 101 | 102 | pred_box = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=2) 103 | pred_box = pred_box.reshape(pred_box.shape[0], -1) 104 | 105 | return pred_box 106 | 107 | 108 | class PointCoder(BoxCoderBase, metaclass=ABCMeta): 109 | def encode(self, point: Tensor, gt: Tensor) -> Tensor: 110 | return F.concat([point - gt[..., :2], gt[..., 2:] - point], axis=-1) 111 | 112 | def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor: 113 | return F.stack([ 114 | F.expand_dims(anchors[:, 0], axis=1) - deltas[:, 0::4], 115 | F.expand_dims(anchors[:, 1], axis=1) - deltas[:, 1::4], 116 | F.expand_dims(anchors[:, 0], axis=1) + deltas[:, 2::4], 117 | F.expand_dims(anchors[:, 1], axis=1) + deltas[:, 3::4], 118 | ], axis=2).reshape(deltas.shape) 119 | 120 | 121 | def get_iou(boxes1: Tensor, boxes2: Tensor, return_ioa=False) -> Tensor: 122 | """ 123 | Given two lists of boxes of size N and M, 124 | compute the IoU (intersection over union) 125 | between __all__ N x M pairs of boxes. 126 | The box order must be (xmin, ymin, xmax, ymax). 127 | 128 | Args: 129 | boxes1 (Tensor): boxes tensor with shape (N, 4) 130 | boxes2 (Tensor): boxes tensor with shape (M, 4) 131 | return_ioa (Bool): wheather return Intersection over Boxes1 or not, default: False 132 | 133 | Returns: 134 | iou (Tensor): IoU matrix, shape (N,M). 135 | """ 136 | b_box1 = F.expand_dims(boxes1, axis=1) 137 | b_box2 = F.expand_dims(boxes2, axis=0) 138 | 139 | iw = F.minimum(b_box1[:, :, 2], b_box2[:, :, 2]) - F.maximum( 140 | b_box1[:, :, 0], b_box2[:, :, 0] 141 | ) 142 | ih = F.minimum(b_box1[:, :, 3], b_box2[:, :, 3]) - F.maximum( 143 | b_box1[:, :, 1], b_box2[:, :, 1] 144 | ) 145 | inter = F.maximum(iw, 0) * F.maximum(ih, 0) 146 | 147 | area_box1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 148 | area_box2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 149 | 150 | union = F.expand_dims(area_box1, axis=1) + F.expand_dims(area_box2, axis=0) - inter 151 | overlaps = F.maximum(inter / union, 0) 152 | 153 | if return_ioa: 154 | ioa = F.maximum(inter / area_box1, 0) 155 | return overlaps, ioa 156 | 157 | return overlaps 158 | 159 | 160 | def get_clipped_boxes(boxes, hw): 161 | """ Clip the boxes into the image region.""" 162 | # x1 >=0 163 | box_x1 = F.clip(boxes[:, 0::4], lower=0, upper=hw[1]) 164 | # y1 >=0 165 | box_y1 = F.clip(boxes[:, 1::4], lower=0, upper=hw[0]) 166 | # x2 < im_info[1] 167 | box_x2 = F.clip(boxes[:, 2::4], lower=0, upper=hw[1]) 168 | # y2 < im_info[0] 169 | box_y2 = F.clip(boxes[:, 3::4], lower=0, upper=hw[0]) 170 | 171 | clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1) 172 | 173 | return clip_box 174 | 175 | 176 | def filter_boxes(boxes, size=0): 177 | width = boxes[:, 2] - boxes[:, 0] 178 | height = boxes[:, 3] - boxes[:, 1] 179 | keep = (width > size) & (height > size) 180 | return keep 181 | -------------------------------------------------------------------------------- /megengine_release/layers/det/fpn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # --------------------------------------------------------------------- 16 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 17 | # 18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 19 | # 20 | # Unless required by applicable law or agreed to in writing, 21 | # software distributed under the License is distributed on an 22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # 24 | # This file has been modified by Megvii ("Megvii Modifications"). 25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved. 26 | # --------------------------------------------------------------------- 27 | import math 28 | from typing import List 29 | 30 | import megengine.functional as F 31 | import megengine.module as M 32 | 33 | import layers 34 | 35 | 36 | class FPN(M.Module): 37 | """ 38 | This module implements Feature Pyramid Network. 39 | It creates pyramid features built on top of some input feature maps which 40 | are produced by the backbone networks like ResNet. 41 | """ 42 | 43 | # pylint: disable=dangerous-default-value 44 | def __init__( 45 | self, 46 | bottom_up: M.Module, 47 | in_features: List[str], 48 | out_channels: int = 256, 49 | norm: str = None, 50 | top_block: M.Module = None, 51 | strides: List[int] = [8, 16, 32], 52 | channels: List[int] = [512, 1024, 2048], 53 | ): 54 | """ 55 | Args: 56 | bottom_up (M.Module): module representing the bottom up sub-network. 57 | it generates multi-scale feature maps which formatted as a 58 | dict like {'res3': res3_feature, 'res4': res4_feature} 59 | in_features (list[str]): list of input feature maps keys coming 60 | from the `bottom_up` which will be used in FPN. 61 | e.g. ['res3', 'res4', 'res5'] 62 | out_channels (int): number of channels used in the output 63 | feature maps. 64 | norm (str): the normalization type. 65 | top_block (nn.Module or None): the module build upon FPN layers. 66 | """ 67 | super(FPN, self).__init__() 68 | 69 | in_strides = strides 70 | in_channels = channels 71 | norm = layers.get_norm(norm) 72 | 73 | use_bias = norm is None 74 | self.lateral_convs = list() 75 | self.output_convs = list() 76 | 77 | for idx, in_channels in enumerate(in_channels): 78 | lateral_norm = None if norm is None else norm(out_channels) 79 | output_norm = None if norm is None else norm(out_channels) 80 | 81 | lateral_conv = layers.Conv2d( 82 | in_channels, 83 | out_channels, 84 | kernel_size=1, 85 | bias=use_bias, 86 | norm=lateral_norm, 87 | ) 88 | output_conv = layers.Conv2d( 89 | out_channels, 90 | out_channels, 91 | kernel_size=3, 92 | stride=1, 93 | padding=1, 94 | bias=use_bias, 95 | norm=output_norm, 96 | ) 97 | M.init.msra_normal_(lateral_conv.weight, mode="fan_in") 98 | M.init.msra_normal_(output_conv.weight, mode="fan_in") 99 | 100 | if use_bias: 101 | M.init.fill_(lateral_conv.bias, 0) 102 | M.init.fill_(output_conv.bias, 0) 103 | 104 | stage = int(math.log2(in_strides[idx])) 105 | 106 | setattr(self, "fpn_lateral{}".format(stage), lateral_conv) 107 | setattr(self, "fpn_output{}".format(stage), output_conv) 108 | self.lateral_convs.insert(0, lateral_conv) 109 | self.output_convs.insert(0, output_conv) 110 | 111 | self.top_block = top_block 112 | self.in_features = in_features 113 | self.bottom_up = bottom_up 114 | 115 | # follow the common practices, FPN features are named to "p", 116 | # like ["p2", "p3", ..., "p6"] 117 | self._out_feature_strides = { 118 | "p{}".format(int(math.log2(s))): s for s in in_strides 119 | } 120 | 121 | # top block output feature maps. 122 | if self.top_block is not None: 123 | for s in range(stage, stage + self.top_block.num_levels): 124 | self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) 125 | 126 | self._out_features = list(sorted(self._out_feature_strides.keys())) 127 | self._out_feature_channels = {k: out_channels for k in self._out_features} 128 | 129 | def forward(self, x): 130 | bottom_up_features = self.bottom_up.extract_features(x) 131 | x = [bottom_up_features[f] for f in self.in_features[::-1]] 132 | 133 | results = [] 134 | prev_features = self.lateral_convs[0](x[0]) 135 | results.append(self.output_convs[0](prev_features)) 136 | 137 | for features, lateral_conv, output_conv in zip( 138 | x[1:], self.lateral_convs[1:], self.output_convs[1:] 139 | ): 140 | top_down_features = F.nn.interpolate( 141 | prev_features, features.shape[2:], mode="BILINEAR" 142 | ) 143 | lateral_features = lateral_conv(features) 144 | prev_features = lateral_features + top_down_features 145 | results.insert(0, output_conv(prev_features)) 146 | 147 | if self.top_block is not None: 148 | top_block_in_feature = bottom_up_features.get( 149 | self.top_block.in_feature, None 150 | ) 151 | if top_block_in_feature is None: 152 | top_block_in_feature = results[ 153 | self._out_features.index(self.top_block.in_feature) 154 | ] 155 | results.extend(self.top_block(top_block_in_feature)) 156 | 157 | return dict(zip(self._out_features, results)) 158 | 159 | def output_shape(self): 160 | return { 161 | name: layers.ShapeSpec( 162 | channels=self._out_feature_channels[name], 163 | stride=self._out_feature_strides[name], 164 | ) 165 | for name in self._out_features 166 | } 167 | 168 | 169 | class FPNP6(M.Module): 170 | """ 171 | used in FPN, generate a downsampled P6 feature from P5. 172 | """ 173 | 174 | def __init__(self, in_feature="p5"): 175 | super().__init__() 176 | self.num_levels = 1 177 | self.in_feature = in_feature 178 | self.pool = M.MaxPool2d(kernel_size=1, stride=2, padding=0) 179 | 180 | def forward(self, x): 181 | return [self.pool(x)] 182 | 183 | 184 | class LastLevelP6P7(M.Module): 185 | """ 186 | This module is used in RetinaNet to generate extra layers, P6 and P7 from 187 | C5 feature. 188 | """ 189 | 190 | def __init__(self, in_channels: int, out_channels: int, in_feature="res5"): 191 | super().__init__() 192 | self.num_levels = 2 193 | if in_feature == "p5": 194 | assert in_channels == out_channels 195 | self.in_feature = in_feature 196 | self.p6 = M.Conv2d(in_channels, out_channels, 3, 2, 1) 197 | self.p7 = M.Conv2d(out_channels, out_channels, 3, 2, 1) 198 | 199 | def forward(self, x): 200 | p6 = self.p6(x) 201 | p7 = self.p7(F.relu(p6)) 202 | return [p6, p7] 203 | -------------------------------------------------------------------------------- /megengine_release/layers/det/loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine.functional as F 10 | from megengine import Tensor 11 | 12 | 13 | def binary_cross_entropy(logits: Tensor, targets: Tensor) -> Tensor: 14 | r"""Binary Cross Entropy 15 | 16 | Args: 17 | logits (Tensor): 18 | the predicted logits 19 | targets (Tensor): 20 | the assigned targets with the same shape as logits 21 | 22 | Returns: 23 | the calculated binary cross entropy. 24 | """ 25 | return -(targets * F.logsigmoid(logits) + (1 - targets) * F.logsigmoid(-logits)) 26 | 27 | 28 | def sigmoid_focal_loss( 29 | logits: Tensor, targets: Tensor, alpha: float = -1, gamma: float = 0, 30 | ) -> Tensor: 31 | r"""Focal Loss for Dense Object Detection: 32 | 33 | 34 | .. math:: 35 | 36 | FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) 37 | 38 | Args: 39 | logits (Tensor): 40 | the predicted logits 41 | targets (Tensor): 42 | the assigned targets with the same shape as logits 43 | alpha (float): 44 | parameter to mitigate class imbalance. Default: -1 45 | gamma (float): 46 | parameter to mitigate easy/hard loss imbalance. Default: 0 47 | 48 | Returns: 49 | the calculated focal loss. 50 | """ 51 | scores = F.sigmoid(logits) 52 | loss = binary_cross_entropy(logits, targets) 53 | if gamma != 0: 54 | loss *= (targets * (1 - scores) + (1 - targets) * scores) ** gamma 55 | if alpha >= 0: 56 | loss *= targets * alpha + (1 - targets) * (1 - alpha) 57 | return loss 58 | 59 | 60 | def smooth_l1_loss(pred: Tensor, target: Tensor, beta: float = 1.0) -> Tensor: 61 | r"""Smooth L1 Loss 62 | 63 | Args: 64 | pred (Tensor): 65 | the predictions 66 | target (Tensor): 67 | the assigned targets with the same shape as pred 68 | beta (int): 69 | the parameter of smooth l1 loss. 70 | 71 | Returns: 72 | the calculated smooth l1 loss. 73 | """ 74 | x = pred - target 75 | abs_x = F.abs(x) 76 | if beta < 1e-5: 77 | loss = abs_x 78 | else: 79 | in_loss = 0.5 * x ** 2 / beta 80 | out_loss = abs_x - 0.5 * beta 81 | loss = F.where(abs_x < beta, in_loss, out_loss) 82 | return loss 83 | 84 | 85 | def iou_loss( 86 | pred: Tensor, target: Tensor, box_mode: str = "xyxy", loss_type: str = "iou", eps: float = 1e-8, 87 | ) -> Tensor: 88 | if box_mode == "ltrb": 89 | pred = F.concat([-pred[..., :2], pred[..., 2:]], axis=-1) 90 | target = F.concat([-target[..., :2], target[..., 2:]], axis=-1) 91 | elif box_mode != "xyxy": 92 | raise NotImplementedError 93 | 94 | pred_area = F.maximum(pred[..., 2] - pred[..., 0], 0) * F.maximum( 95 | pred[..., 3] - pred[..., 1], 0 96 | ) 97 | target_area = F.maximum(target[..., 2] - target[..., 0], 0) * F.maximum( 98 | target[..., 3] - target[..., 1], 0 99 | ) 100 | 101 | w_intersect = F.maximum( 102 | F.minimum(pred[..., 2], target[..., 2]) - F.maximum(pred[..., 0], target[..., 0]), 0 103 | ) 104 | h_intersect = F.maximum( 105 | F.minimum(pred[..., 3], target[..., 3]) - F.maximum(pred[..., 1], target[..., 1]), 0 106 | ) 107 | 108 | area_intersect = w_intersect * h_intersect 109 | area_union = pred_area + target_area - area_intersect 110 | ious = area_intersect / F.maximum(area_union, eps) 111 | 112 | if loss_type == "iou": 113 | loss = -F.log(F.maximum(ious, eps)) 114 | elif loss_type == "linear_iou": 115 | loss = 1 - ious 116 | elif loss_type == "giou": 117 | g_w_intersect = F.maximum(pred[..., 2], target[..., 2]) - F.minimum( 118 | pred[..., 0], target[..., 0] 119 | ) 120 | g_h_intersect = F.maximum(pred[..., 3], target[..., 3]) - F.minimum( 121 | pred[..., 1], target[..., 1] 122 | ) 123 | ac_union = g_w_intersect * g_h_intersect 124 | gious = ious - (ac_union - area_union) / F.maximum(ac_union, eps) 125 | loss = 1 - gious 126 | return loss 127 | -------------------------------------------------------------------------------- /megengine_release/layers/det/matcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine.functional as F 10 | 11 | 12 | class Matcher: 13 | 14 | def __init__(self, thresholds, labels, allow_low_quality_matches=False): 15 | assert len(thresholds) + 1 == len(labels), "thresholds and labels are not matched" 16 | assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])) 17 | thresholds.append(float("inf")) 18 | thresholds.insert(0, -float("inf")) 19 | 20 | self.thresholds = thresholds 21 | self.labels = labels 22 | self.allow_low_quality_matches = allow_low_quality_matches 23 | 24 | def __call__(self, matrix): 25 | """ 26 | matrix(tensor): A two dim tensor with shape of (N, M). N is number of GT-boxes, 27 | while M is the number of anchors in detection. 28 | """ 29 | assert len(matrix.shape) == 2 30 | max_scores = matrix.max(axis=0) 31 | match_indices = F.argmax(matrix, axis=0) 32 | 33 | # default ignore label: -1 34 | labels = F.full_like(match_indices, -1) 35 | 36 | for label, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): 37 | mask = (max_scores >= low) & (max_scores < high) 38 | labels[mask] = label 39 | 40 | if self.allow_low_quality_matches: 41 | mask = (matrix == F.max(matrix, axis=1, keepdims=True)).sum(axis=0) > 0 42 | labels[mask] = 1 43 | 44 | return match_indices, labels 45 | -------------------------------------------------------------------------------- /megengine_release/layers/det/point_head.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import math 10 | from typing import List 11 | 12 | import numpy as np 13 | 14 | import megengine as mge 15 | import megengine.functional as F 16 | import megengine.module as M 17 | from megengine import Tensor 18 | from megengine.module.normalization import GroupNorm 19 | 20 | import layers 21 | 22 | 23 | class PointHead(M.Module): 24 | """ 25 | The head used when anchor points are adopted for object classification and box regression. 26 | """ 27 | 28 | def __init__(self, cfg, input_shape: List[layers.ShapeSpec]): 29 | super().__init__() 30 | self.stride_list = cfg.stride 31 | 32 | in_channels = input_shape[0].channels 33 | num_classes = cfg.num_classes 34 | num_convs = 4 35 | prior_prob = cfg.cls_prior_prob 36 | num_anchors = [cfg.num_anchors] * len(input_shape) 37 | 38 | assert ( 39 | len(set(num_anchors)) == 1 40 | ), "not support different number of anchors between levels" 41 | num_anchors = num_anchors[0] 42 | 43 | cls_subnet = [] 44 | bbox_subnet = [] 45 | for _ in range(num_convs): 46 | cls_subnet.append( 47 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) 48 | ) 49 | cls_subnet.append(GroupNorm(32, in_channels)) 50 | cls_subnet.append(M.ReLU()) 51 | bbox_subnet.append( 52 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) 53 | ) 54 | bbox_subnet.append(GroupNorm(32, in_channels)) 55 | bbox_subnet.append(M.ReLU()) 56 | 57 | self.cls_subnet = M.Sequential(*cls_subnet) 58 | self.bbox_subnet = M.Sequential(*bbox_subnet) 59 | self.cls_score = M.Conv2d( 60 | in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 61 | ) 62 | self.bbox_pred = M.Conv2d( 63 | in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1 64 | ) 65 | self.ctrness = M.Conv2d( 66 | in_channels, num_anchors * 1, kernel_size=3, stride=1, padding=1 67 | ) 68 | 69 | # Initialization 70 | for modules in [ 71 | self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred, 72 | self.ctrness 73 | ]: 74 | for layer in modules.modules(): 75 | if isinstance(layer, M.Conv2d): 76 | M.init.normal_(layer.weight, mean=0, std=0.01) 77 | M.init.fill_(layer.bias, 0) 78 | 79 | # Use prior in model initialization to improve stability 80 | bias_value = -math.log((1 - prior_prob) / prior_prob) 81 | M.init.fill_(self.cls_score.bias, bias_value) 82 | 83 | self.scale_list = mge.Parameter(np.ones(len(self.stride_list), dtype="float32")) 84 | 85 | def forward(self, features: List[Tensor]): 86 | logits, offsets, ctrness = [], [], [] 87 | for feature, scale, stride in zip(features, self.scale_list, self.stride_list): 88 | logits.append(self.cls_score(self.cls_subnet(feature))) 89 | bbox_subnet = self.bbox_subnet(feature) 90 | offsets.append(F.relu(self.bbox_pred(bbox_subnet) * scale) * stride) 91 | ctrness.append(self.ctrness(bbox_subnet)) 92 | return logits, offsets, ctrness 93 | -------------------------------------------------------------------------------- /megengine_release/layers/det/pooler.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import math 10 | 11 | import megengine.functional as F 12 | 13 | 14 | def roi_pool( 15 | rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", 16 | ): 17 | rois = rois.detach() 18 | assert len(stride) == len(rpn_fms) 19 | canonical_level = 4 20 | canonical_box_size = 224 21 | min_level = int(math.log2(stride[0])) 22 | max_level = int(math.log2(stride[-1])) 23 | 24 | num_fms = len(rpn_fms) 25 | box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) 26 | assigned_level = F.floor( 27 | canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / math.log(2) 28 | ).astype("int32") 29 | assigned_level = F.minimum(assigned_level, max_level) 30 | assigned_level = F.maximum(assigned_level, min_level) 31 | assigned_level = assigned_level - min_level 32 | 33 | # avoid empty assignment 34 | assigned_level = F.concat( 35 | [assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device)], 36 | ) 37 | rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) 38 | 39 | pool_list, inds_list = [], [] 40 | for i in range(num_fms): 41 | _, inds = F.cond_take(assigned_level == i, assigned_level) 42 | level_rois = rois[inds] 43 | 44 | if pooler_type == "roi_pool": 45 | pool_fm = F.nn.roi_pooling( 46 | rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i] 47 | ) 48 | elif pooler_type == "roi_align": 49 | pool_fm = F.nn.roi_align( 50 | rpn_fms[i], 51 | level_rois, 52 | pool_shape, 53 | mode="average", 54 | spatial_scale=1.0 / stride[i], 55 | sample_points=2, 56 | aligned=True, 57 | ) 58 | pool_list.append(pool_fm) 59 | inds_list.append(inds) 60 | 61 | fm_order = F.argsort(F.concat(inds_list, axis=0)) 62 | pool_feature = F.concat(pool_list, axis=0) 63 | pool_feature = pool_feature[fm_order][:-num_fms] 64 | 65 | return pool_feature 66 | -------------------------------------------------------------------------------- /megengine_release/layers/det/rcnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine as mge 10 | import megengine.functional as F 11 | import megengine.module as M 12 | 13 | import layers 14 | 15 | 16 | class RCNN(M.Module): 17 | 18 | def __init__(self, cfg): 19 | super().__init__() 20 | self.cfg = cfg 21 | self.box_coder = layers.BoxCoder(cfg.rcnn_reg_mean, cfg.rcnn_reg_std) 22 | 23 | # roi head 24 | self.in_features = cfg.rcnn_in_features 25 | self.stride = cfg.rcnn_stride 26 | self.pooling_method = cfg.pooling_method 27 | self.pooling_size = cfg.pooling_size 28 | 29 | self.fc1 = M.Linear(256 * self.pooling_size[0] * self.pooling_size[1], 1024) 30 | self.fc2 = M.Linear(1024, 1024) 31 | for l in [self.fc1, self.fc2]: 32 | M.init.normal_(l.weight, std=0.01) 33 | M.init.fill_(l.bias, 0) 34 | 35 | # box predictor 36 | self.pred_cls = M.Linear(1024, cfg.num_classes + 1) 37 | self.pred_delta = M.Linear(1024, cfg.num_classes * 4) 38 | M.init.normal_(self.pred_cls.weight, std=0.01) 39 | M.init.normal_(self.pred_delta.weight, std=0.001) 40 | for l in [self.pred_cls, self.pred_delta]: 41 | M.init.fill_(l.bias, 0) 42 | 43 | def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None): 44 | rcnn_rois, labels, bbox_targets = self.get_ground_truth( 45 | rcnn_rois, im_info, gt_boxes 46 | ) 47 | 48 | fpn_fms = [fpn_fms[x] for x in self.in_features] 49 | pool_features = layers.roi_pool( 50 | fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method, 51 | ) 52 | flatten_feature = F.flatten(pool_features, start_axis=1) 53 | roi_feature = F.relu(self.fc1(flatten_feature)) 54 | roi_feature = F.relu(self.fc2(roi_feature)) 55 | pred_logits = self.pred_cls(roi_feature) 56 | pred_offsets = self.pred_delta(roi_feature) 57 | 58 | if self.training: 59 | # loss for rcnn classification 60 | loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1) 61 | # loss for rcnn regression 62 | pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4) 63 | num_samples = labels.shape[0] 64 | fg_mask = labels > 0 65 | loss_rcnn_bbox = layers.smooth_l1_loss( 66 | pred_offsets[fg_mask, labels[fg_mask] - 1], 67 | bbox_targets[fg_mask], 68 | self.cfg.rcnn_smooth_l1_beta, 69 | ).sum() / F.maximum(num_samples, mge.tensor(1)) 70 | 71 | loss_dict = { 72 | "loss_rcnn_cls": loss_rcnn_cls, 73 | "loss_rcnn_bbox": loss_rcnn_bbox, 74 | } 75 | return loss_dict 76 | else: 77 | # slice 1 for removing background 78 | pred_scores = F.softmax(pred_logits, axis=1)[:, 1:] 79 | pred_offsets = pred_offsets.reshape(-1, 4) 80 | target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4) 81 | # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4) 82 | base_rois = F.broadcast_to( 83 | F.expand_dims(rcnn_rois[:, 1:5], axis=1), target_shape).reshape(-1, 4) 84 | pred_bbox = self.box_coder.decode(base_rois, pred_offsets) 85 | return pred_bbox, pred_scores 86 | 87 | def get_ground_truth(self, rpn_rois, im_info, gt_boxes): 88 | if not self.training: 89 | return rpn_rois, None, None 90 | 91 | return_rois = [] 92 | return_labels = [] 93 | return_bbox_targets = [] 94 | 95 | # get per image proposals and gt_boxes 96 | for bid in range(gt_boxes.shape[0]): 97 | num_valid_boxes = im_info[bid, 4].astype("int32") 98 | gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] 99 | batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid) 100 | gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) 101 | batch_roi_mask = rpn_rois[:, 0] == bid 102 | # all_rois : [batch_id, x1, y1, x2, y2] 103 | all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois]) 104 | 105 | overlaps = layers.get_iou(all_rois[:, 1:], gt_boxes_per_img) 106 | 107 | max_overlaps = overlaps.max(axis=1) 108 | gt_assignment = F.argmax(overlaps, axis=1).astype("int32") 109 | labels = gt_boxes_per_img[gt_assignment, 4] 110 | 111 | # ---------------- get the fg/bg labels for each roi ---------------# 112 | fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0) 113 | bg_mask = ( 114 | (max_overlaps >= self.cfg.bg_threshold_low) 115 | & (max_overlaps < self.cfg.bg_threshold_high) 116 | ) 117 | 118 | num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio) 119 | fg_inds_mask = layers.sample_labels(fg_mask, num_fg_rois, True, False) 120 | num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum()) 121 | bg_inds_mask = layers.sample_labels(bg_mask, num_bg_rois, True, False) 122 | 123 | labels[bg_inds_mask] = 0 124 | 125 | keep_mask = fg_inds_mask | bg_inds_mask 126 | labels = labels[keep_mask].astype("int32") 127 | rois = all_rois[keep_mask] 128 | target_boxes = gt_boxes_per_img[gt_assignment[keep_mask], :4] 129 | bbox_targets = self.box_coder.encode(rois[:, 1:], target_boxes) 130 | bbox_targets = bbox_targets.reshape(-1, 4) 131 | 132 | return_rois.append(rois) 133 | return_labels.append(labels) 134 | return_bbox_targets.append(bbox_targets) 135 | 136 | return ( 137 | F.concat(return_rois, axis=0).detach(), 138 | F.concat(return_labels, axis=0).detach(), 139 | F.concat(return_bbox_targets, axis=0).detach(), 140 | ) 141 | -------------------------------------------------------------------------------- /megengine_release/layers/det/rpn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine.functional as F 10 | import megengine.module as M 11 | 12 | import layers 13 | 14 | 15 | class RPN(M.Module): 16 | def __init__(self, cfg): 17 | super().__init__() 18 | self.cfg = cfg 19 | self.box_coder = layers.BoxCoder(cfg.rpn_reg_mean, cfg.rpn_reg_std) 20 | 21 | # check anchor settings 22 | assert len(set(len(x) for x in cfg.anchor_scales)) == 1 23 | assert len(set(len(x) for x in cfg.anchor_ratios)) == 1 24 | self.num_cell_anchors = len(cfg.anchor_scales[0]) * len(cfg.anchor_ratios[0]) 25 | 26 | rpn_channel = cfg.rpn_channel 27 | self.in_features = cfg.rpn_in_features 28 | 29 | self.anchor_generator = layers.AnchorBoxGenerator( 30 | anchor_scales=cfg.anchor_scales, 31 | anchor_ratios=cfg.anchor_ratios, 32 | strides=cfg.rpn_stride, 33 | offset=self.cfg.anchor_offset, 34 | ) 35 | 36 | self.matcher = layers.Matcher( 37 | cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality 38 | ) 39 | 40 | self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1) 41 | self.rpn_cls_score = M.Conv2d( 42 | rpn_channel, self.num_cell_anchors, kernel_size=1, stride=1 43 | ) 44 | self.rpn_bbox_offsets = M.Conv2d( 45 | rpn_channel, self.num_cell_anchors * 4, kernel_size=1, stride=1 46 | ) 47 | 48 | for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]: 49 | M.init.normal_(l.weight, std=0.01) 50 | M.init.fill_(l.bias, 0) 51 | 52 | def forward(self, features, im_info, boxes=None): 53 | # prediction 54 | features = [features[x] for x in self.in_features] 55 | 56 | # get anchors 57 | anchors_list = self.anchor_generator(features) 58 | 59 | pred_cls_logit_list = [] 60 | pred_bbox_offset_list = [] 61 | for x in features: 62 | t = F.relu(self.rpn_conv(x)) 63 | scores = self.rpn_cls_score(t) 64 | pred_cls_logit_list.append( 65 | scores.reshape( 66 | scores.shape[0], 67 | self.num_cell_anchors, 68 | scores.shape[2], 69 | scores.shape[3], 70 | ) 71 | ) 72 | bbox_offsets = self.rpn_bbox_offsets(t) 73 | pred_bbox_offset_list.append( 74 | bbox_offsets.reshape( 75 | bbox_offsets.shape[0], 76 | self.num_cell_anchors, 77 | 4, 78 | bbox_offsets.shape[2], 79 | bbox_offsets.shape[3], 80 | ) 81 | ) 82 | # get rois from the predictions 83 | rpn_rois = self.find_top_rpn_proposals( 84 | pred_cls_logit_list, pred_bbox_offset_list, anchors_list, im_info 85 | ) 86 | 87 | if self.training: 88 | rpn_labels, rpn_offsets = self.get_ground_truth( 89 | anchors_list, boxes, im_info[:, 4].astype("int32") 90 | ) 91 | pred_cls_logits, pred_bbox_offsets = self.merge_rpn_score_box( 92 | pred_cls_logit_list, pred_bbox_offset_list 93 | ) 94 | 95 | fg_mask = rpn_labels > 0 96 | valid_mask = rpn_labels >= 0 97 | num_valid = valid_mask.sum() 98 | 99 | # rpn classification loss 100 | loss_rpn_cls = F.loss.binary_cross_entropy( 101 | pred_cls_logits[valid_mask], rpn_labels[valid_mask] 102 | ) 103 | 104 | # rpn regression loss 105 | loss_rpn_bbox = layers.smooth_l1_loss( 106 | pred_bbox_offsets[fg_mask], 107 | rpn_offsets[fg_mask], 108 | self.cfg.rpn_smooth_l1_beta, 109 | ).sum() / F.maximum(num_valid, 1) 110 | 111 | loss_dict = {"loss_rpn_cls": loss_rpn_cls, "loss_rpn_bbox": loss_rpn_bbox} 112 | return rpn_rois, loss_dict 113 | else: 114 | return rpn_rois 115 | 116 | def find_top_rpn_proposals( 117 | self, rpn_cls_score_list, rpn_bbox_offset_list, anchors_list, im_info 118 | ): 119 | prev_nms_top_n = ( 120 | self.cfg.train_prev_nms_top_n 121 | if self.training 122 | else self.cfg.test_prev_nms_top_n 123 | ) 124 | post_nms_top_n = ( 125 | self.cfg.train_post_nms_top_n 126 | if self.training 127 | else self.cfg.test_post_nms_top_n 128 | ) 129 | 130 | return_rois = [] 131 | 132 | for bid in range(im_info.shape[0]): 133 | batch_proposal_list = [] 134 | batch_score_list = [] 135 | batch_level_list = [] 136 | for l, (rpn_cls_score, rpn_bbox_offset, anchors) in enumerate( 137 | zip(rpn_cls_score_list, rpn_bbox_offset_list, anchors_list) 138 | ): 139 | # get proposals and scores 140 | offsets = rpn_bbox_offset[bid].transpose(2, 3, 0, 1).reshape(-1, 4) 141 | proposals = self.box_coder.decode(anchors, offsets) 142 | 143 | scores = rpn_cls_score[bid].transpose(1, 2, 0).flatten() 144 | scores.detach() 145 | # prev nms top n 146 | scores, order = F.topk(scores, descending=True, k=prev_nms_top_n) 147 | proposals = proposals[order] 148 | 149 | batch_proposal_list.append(proposals) 150 | batch_score_list.append(scores) 151 | batch_level_list.append(F.full_like(scores, l)) 152 | 153 | # gather proposals, scores, level 154 | proposals = F.concat(batch_proposal_list, axis=0) 155 | scores = F.concat(batch_score_list, axis=0) 156 | levels = F.concat(batch_level_list, axis=0) 157 | 158 | proposals = layers.get_clipped_boxes(proposals, im_info[bid]) 159 | # filter invalid proposals and apply total level nms 160 | keep_mask = layers.filter_boxes(proposals) 161 | proposals = proposals[keep_mask] 162 | scores = scores[keep_mask] 163 | levels = levels[keep_mask] 164 | nms_keep_inds = layers.batched_nms( 165 | proposals, scores, levels, self.cfg.rpn_nms_threshold, post_nms_top_n 166 | ) 167 | 168 | # generate rois to rcnn head, rois shape (N, 5), info [batch_id, x1, y1, x2, y2] 169 | rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1) 170 | rois = rois[nms_keep_inds] 171 | batch_inds = F.full((rois.shape[0], 1), bid) 172 | batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1) 173 | return_rois.append(batch_rois) 174 | 175 | return_rois = F.concat(return_rois, axis=0) 176 | return return_rois.detach() 177 | 178 | def merge_rpn_score_box(self, rpn_cls_score_list, rpn_bbox_offset_list): 179 | final_rpn_cls_score_list = [] 180 | final_rpn_bbox_offset_list = [] 181 | 182 | for bid in range(rpn_cls_score_list[0].shape[0]): 183 | batch_rpn_cls_score_list = [] 184 | batch_rpn_bbox_offset_list = [] 185 | 186 | for i in range(len(self.in_features)): 187 | rpn_cls_scores = rpn_cls_score_list[i][bid].transpose(1, 2, 0).flatten() 188 | rpn_bbox_offsets = ( 189 | rpn_bbox_offset_list[i][bid].transpose(2, 3, 0, 1).reshape(-1, 4) 190 | ) 191 | 192 | batch_rpn_cls_score_list.append(rpn_cls_scores) 193 | batch_rpn_bbox_offset_list.append(rpn_bbox_offsets) 194 | 195 | batch_rpn_cls_scores = F.concat(batch_rpn_cls_score_list, axis=0) 196 | batch_rpn_bbox_offsets = F.concat(batch_rpn_bbox_offset_list, axis=0) 197 | 198 | final_rpn_cls_score_list.append(batch_rpn_cls_scores) 199 | final_rpn_bbox_offset_list.append(batch_rpn_bbox_offsets) 200 | 201 | final_rpn_cls_scores = F.concat(final_rpn_cls_score_list, axis=0) 202 | final_rpn_bbox_offsets = F.concat(final_rpn_bbox_offset_list, axis=0) 203 | return final_rpn_cls_scores, final_rpn_bbox_offsets 204 | 205 | def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): 206 | anchors = F.concat(anchors_list, axis=0) 207 | labels_list = [] 208 | offsets_list = [] 209 | 210 | for bid in range(batched_gt_boxes.shape[0]): 211 | gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] 212 | 213 | overlaps = layers.get_iou(gt_boxes[:, :4], anchors) 214 | matched_indices, labels = self.matcher(overlaps) 215 | 216 | offsets = self.box_coder.encode(anchors, gt_boxes[matched_indices, :4]) 217 | 218 | # sample positive labels 219 | num_positive = int(self.cfg.num_sample_anchors * self.cfg.positive_anchor_ratio) 220 | labels = layers.sample_labels(labels, num_positive, 1, -1) 221 | # sample negative labels 222 | num_positive = (labels == 1).sum().astype("int32") 223 | num_negative = self.cfg.num_sample_anchors - num_positive 224 | labels = layers.sample_labels(labels, num_negative, 0, -1) 225 | 226 | labels_list.append(labels) 227 | offsets_list.append(offsets) 228 | 229 | return ( 230 | F.concat(labels_list, axis=0).detach(), 231 | F.concat(offsets_list, axis=0).detach(), 232 | ) 233 | -------------------------------------------------------------------------------- /megengine_release/layers/det/sampling.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import megengine.functional as F 10 | from megengine.random import uniform 11 | 12 | 13 | def sample_labels(labels, num_samples, label_value, ignore_label=-1): 14 | """sample N labels with label value = sample_labels 15 | 16 | Args: 17 | labels(Tensor): shape of label is (N,) 18 | num_samples(int): 19 | label_value(int): 20 | 21 | Returns: 22 | label(Tensor): label after sampling 23 | """ 24 | assert labels.ndim == 1, "Only tensor of dim 1 is supported." 25 | mask = (labels == label_value) 26 | num_valid = mask.sum() 27 | if num_valid <= num_samples: 28 | return labels 29 | 30 | random_tensor = F.zeros_like(labels).astype("float32") 31 | random_tensor[mask] = uniform(size=num_valid) 32 | _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid) 33 | 34 | labels[invalid_inds] = ignore_label 35 | return labels 36 | -------------------------------------------------------------------------------- /megengine_release/layers/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /megengine_release/layers/tools/data_mapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from megengine.data.dataset import COCO, Objects365, PascalVOC 10 | 11 | data_mapper = dict( 12 | coco=COCO, 13 | objects365=Objects365, 14 | voc=PascalVOC, 15 | ) 16 | -------------------------------------------------------------------------------- /megengine_release/layers/tools/inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | 11 | import cv2 12 | 13 | import megengine as mge 14 | 15 | from layers.tools.data_mapper import data_mapper 16 | from layers.tools.utils import DetEvaluator, import_from_file 17 | 18 | logger = mge.get_logger(__name__) 19 | logger.setLevel("INFO") 20 | 21 | 22 | def make_parser(): 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "-f", "--file", default="net.py", type=str, help="net description file" 26 | ) 27 | parser.add_argument( 28 | "-w", "--weight_file", default=None, type=str, help="weights file", 29 | ) 30 | parser.add_argument("-i", "--image", type=str) 31 | return parser 32 | 33 | 34 | def main(): 35 | parser = make_parser() 36 | args = parser.parse_args() 37 | 38 | current_network = import_from_file(args.file) 39 | cfg = current_network.Cfg() 40 | cfg.backbone_pretrained = False 41 | model = current_network.Net(cfg) 42 | model.eval() 43 | 44 | state_dict = mge.load(args.weight_file) 45 | if "state_dict" in state_dict: 46 | state_dict = state_dict["state_dict"] 47 | model.load_state_dict(state_dict) 48 | 49 | evaluator = DetEvaluator(model) 50 | 51 | ori_img = cv2.imread(args.image) 52 | image, im_info = DetEvaluator.process_inputs( 53 | ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size, 54 | ) 55 | pred_res = evaluator.predict( 56 | image=mge.tensor(image), 57 | im_info=mge.tensor(im_info) 58 | ) 59 | res_img = DetEvaluator.vis_det( 60 | ori_img, 61 | pred_res, 62 | is_show_label=True, 63 | classes=data_mapper[cfg.test_dataset["name"]].class_names, 64 | ) 65 | cv2.imwrite("results.jpg", res_img) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /megengine_release/layers/tools/nms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import numpy as np 10 | 11 | 12 | def py_cpu_nms(dets, thresh): 13 | x1 = np.ascontiguousarray(dets[:, 0]) 14 | y1 = np.ascontiguousarray(dets[:, 1]) 15 | x2 = np.ascontiguousarray(dets[:, 2]) 16 | y2 = np.ascontiguousarray(dets[:, 3]) 17 | 18 | areas = (x2 - x1) * (y2 - y1) 19 | order = dets[:, 4].argsort()[::-1] 20 | keep = list() 21 | 22 | while order.size > 0: 23 | pick_idx = order[0] 24 | keep.append(pick_idx) 25 | order = order[1:] 26 | 27 | xx1 = np.maximum(x1[pick_idx], x1[order]) 28 | yy1 = np.maximum(y1[pick_idx], y1[order]) 29 | xx2 = np.minimum(x2[pick_idx], x2[order]) 30 | yy2 = np.minimum(y2[pick_idx], y2[order]) 31 | 32 | inter = np.maximum(xx2 - xx1, 0) * np.maximum(yy2 - yy1, 0) 33 | iou = inter / np.maximum(areas[pick_idx] + areas[order] - inter, 1e-5) 34 | 35 | order = order[iou <= thresh] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /megengine_release/models/ICD/ICD.py: -------------------------------------------------------------------------------- 1 | import megengine 2 | import megengine as mge 3 | from typing import Dict, List, Tuple 4 | import megengine.module as M 5 | import megengine.functional as F 6 | import numpy as np 7 | from .encoder import InstanceRegEncoder 8 | from .decoder import DecoderWrapper 9 | from .utility import PositionEmbeddingSine 10 | 11 | 12 | def mask_out_padding(fpn_features, images_sizes, images): 13 | # Modified from DETR: https://github.com/facebookresearch/detr/blob/main/LICENSE 14 | # NOTE: zeros for forground 15 | image_sizes = [(images_sizes[i, 2], images_sizes[i, 3]) for i in range(images_sizes.shape[0])] 16 | device = images_sizes.device 17 | h_, w_ = images.shape[-2:] 18 | masks = {} 19 | #assert len(feature_shapes) == len(self.feature_strides) 20 | for k, feat in fpn_features.items(): 21 | # stride = 2 ** int(k[-1]) 22 | N, _, H, W = feat.shape 23 | masks_per_feature_level = F.ones( 24 | (N, H, W), dtype='bool', device=device) 25 | stride = (h_ / H + w_ / W) / 2 26 | for img_idx, (h, w) in enumerate(image_sizes): 27 | masks_per_feature_level[ 28 | img_idx, 29 | : int(np.ceil(float(h) / stride)), 30 | : int(np.ceil(float(w) / stride)), 31 | ] = 0 32 | masks[k] = F.expand_dims(masks_per_feature_level, 1) #masks_per_feature_level.unsqueeze(1) 33 | return masks 34 | 35 | 36 | class ICD(M.Module): 37 | def __init__(self, hidden_dim, cfg): 38 | super().__init__() 39 | self.pos_embedding = PositionEmbeddingSine( 40 | num_pos_feats=hidden_dim // 2, 41 | normalize=True) 42 | 43 | self.ins_encoder = InstanceRegEncoder(cfg) 44 | self.attention_module_aux = DecoderWrapper(cfg) 45 | self.attention_module_distill = DecoderWrapper(cfg) 46 | # NOTE(peizhen): 1e-05 is not large enough and emprically might cause sqrt(neg) nan 47 | self.distill_norm_ = M.LayerNorm( 48 | [hidden_dim // cfg.distiller.ATT_HEADS], eps=1e-04, affine=False) 49 | #self.distill_norm_ = LayerNorm([hidden_dim // cfg.distiller.ATT_HEADS]) 50 | self.feat_keys = cfg.distiller.FEAT_KEYS 51 | self.weight_value = cfg.distiller.WEIGHT_VALUE 52 | self.temp_value = cfg.distiller.TEMP_VALUE 53 | 54 | self.loss_keys = [] 55 | self.num_losses = 3 56 | 57 | def mimic_loss(self, svalue, tvalue, value_mask): 58 | return (F.loss.square_loss(svalue, tvalue, reduction='none').transpose(1, 2, 3, 0) 59 | * value_mask).mean(2).sum() / F.clip(value_mask.sum(), lower=1e-6) 60 | 61 | def forward(self, features_dict_tea, features_dict_stu, images, instances, image_info, distill_flag=0): 62 | ''' 63 | contain_box_mask: 1d float tensor, [1., 0., ...], denoting whether each image contain exactly objects 64 | nr_actual_boxes_per_img: list of int, exact object number each image contains 65 | ''' 66 | nr_actual_boxes_per_img = [image_info[i, -1] for i in range(image_info.shape[0])] 67 | 68 | masks = mask_out_padding(features_dict_tea, image_info, images) 69 | 70 | pos_embs = {k: self.pos_embedding( 71 | features_dict_tea[k], masks[k]) for k in self.feat_keys} 72 | pos_emb = F.concat([F.transpose(F.flatten(pos_embs[k], 2), (2, 0, 1)) for k in self.feat_keys], 0).detach() # S, N, C 73 | masks = F.concat([F.squeeze(F.flatten(masks[k], 2), 1) 74 | for k in self.feat_keys], 1).detach() # N, S 75 | 76 | loss_aux_dict, aux_info_dict = self.forward_aux( 77 | instances, features_dict_tea, image_info, {'mask_out': masks, 'pos_emb': pos_emb}) 78 | loss_distill_dict = self.forward_distill( 79 | features_dict_stu, aux_info_dict, nr_actual_boxes_per_img, distill_flag, {'mask_out': masks, 'pos_emb': pos_emb}) 80 | loss_aux_dict.update(loss_distill_dict) 81 | self.loss_keys = list(loss_aux_dict.keys()) 82 | # print(self.loss_keys) 83 | return loss_aux_dict 84 | 85 | def forward_aux(self, instances, features_dict_tea, image_size, aux_input): 86 | # [S, N, C] 87 | feat = F.concat([F.flatten(features_dict_tea[k], start_axis=2).transpose(2, 0, 1) 88 | for k in self.feat_keys], 0).detach() 89 | 90 | # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K] 91 | # (0 for Fake Instance) in ins_mask 92 | 93 | # Below four variables provided by encoder forward have been detached before passing to here 94 | ins_feat, ins_mask, ins_mask_gt, pos_gt = self.ins_encoder( 95 | instances, pro_feats=features_dict_tea, image_size=image_size) 96 | decoded_feat, tmask, tvalue = self.attention_module_aux( 97 | ins_feat, 98 | feat, 99 | feat, 100 | query_mask=ins_mask, 101 | key_padding_mask=aux_input['mask_out'], 102 | pos_embedding=aux_input['pos_emb']) 103 | 104 | aux_info_dict = { 105 | 'encoded_ins': (ins_feat, ins_mask, ins_mask_gt), 106 | 'tmask': tmask, 107 | 'tvalue': tvalue, 108 | } 109 | 110 | loss_dict = dict() 111 | loss_dict = self.ins_encoder.loss( 112 | decoded_feat, ins_mask_gt, ins_mask, pos_gt) 113 | 114 | return loss_dict, aux_info_dict 115 | 116 | 117 | def forward_distill(self, features_dict_stu, aux_info_dict, nr_actual_boxes_per_img, distill_flag, aux_input): 118 | loss_dict = dict() 119 | 120 | assert set(self.feat_keys) == set(list(features_dict_stu.keys( 121 | ))), 'WARNING: Unequal keys for fpn and attention ! <%s> != <%s>' % (self.feat_keys, features_dict_stu.keys()) 122 | # [S, N, C] 123 | feat = F.concat([F.flatten(features_dict_stu[k], start_axis=2).transpose(2, 0, 1) 124 | for k in self.feat_keys], 0) 125 | 126 | if distill_flag == 0: 127 | feat = feat.detach() 128 | 129 | # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K] 130 | # (0 for Fake Instance) in ins_mask 131 | ins_feat, ins_mask, ins_mask_gt = aux_info_dict['encoded_ins'] 132 | max_ele = int(max(max(nr_actual_boxes_per_img), 1)) 133 | 134 | # Note that mask is not normalized by softmax 135 | # load state dict, therefore we share almost all parameters 136 | _, _, svalue = self.attention_module_distill( 137 | ins_feat[:max_ele, :, :], 138 | feat, 139 | feat, 140 | query_mask=ins_mask[:, :max_ele], 141 | key_padding_mask=aux_input['mask_out'], 142 | pos_embedding=aux_input['pos_emb'], 143 | proj_only=True) 144 | tvalue = aux_info_dict['tvalue'] 145 | tmask = aux_info_dict['tmask'] 146 | 147 | # [bsz, heads, ins, Seq] 148 | svalue = self.distill_norm_(svalue) 149 | # [Seq, bsz, heads, channel] 150 | tvalue = self.distill_norm_(tvalue) 151 | 152 | # cosine similarity between features, unreal instances are masked 153 | # feat are compact features for each instaces 154 | # value is weighted attention maps refactored as different heads 155 | # mask is q, k relation masks for distillation 156 | 157 | # [bsz, heads, 1, S] 158 | value_mask = (F.softmax(tmask / self.temp_value, axis=-1) 159 | * F.expand_dims(F.expand_dims(ins_mask_gt, axis=1), axis=-1) 160 | ).sum(2, keepdims=True).detach() 161 | # NOTE(peizhen): value_mask[j, ...] for any j-th image if it contains no image, beforehand, we could use a pseudo box for images who haven't any box 162 | # this should similarly apply to ins_encoder's loss auxiliary task loss too (no box then corresponding image should not contribute loss) 163 | 164 | # [bsz, heads, 1, num_seq] 165 | # value_mask = value_mask * contain_box_mask.reshape(-1, 1, 1, 1) 166 | loss_dict = {'distill': self.mimic_loss( 167 | svalue, tvalue.detach(), value_mask) * self.weight_value} 168 | return loss_dict 169 | -------------------------------------------------------------------------------- /megengine_release/models/ICD/__init__.py: -------------------------------------------------------------------------------- 1 | from .ICD import ICD 2 | from .utility import get_instance_list 3 | 4 | __all__ = [key for key in globals().keys() if not key.startswith('_')] 5 | -------------------------------------------------------------------------------- /megengine_release/models/ICD/decoder.py: -------------------------------------------------------------------------------- 1 | import megengine as mge 2 | import megengine.module as M 3 | from megengine import functional as F 4 | import numpy as np 5 | from .transformer import MultiheadAttention 6 | #from .utility import has_nan_or_inf 7 | 8 | # mge.core.set_option('async_level', 0) 9 | 10 | class DecoderWrapper(M.Module): 11 | def __init__(self, cfg): 12 | super().__init__() 13 | channels = cfg.distiller.HIDDEN_DIM 14 | heads = cfg.distiller.ATT_HEADS 15 | 16 | # this is a local module derived from official implementation, we modify the last modules 17 | self.matt = MultiheadAttention(channels, heads) 18 | 19 | self.pos_projector = M.Linear(in_features=channels, out_features=channels) 20 | self.use_pos = cfg.distiller.USE_POS_EMBEDDING 21 | self.pos_on_v = cfg.distiller.DECODER_POSEMB_ON_V 22 | 23 | def with_pos_embed(self, tensor, pos): 24 | ''' 25 | tensor: [S, N, C] 26 | pos: [S, N, C] or [S, 1, C] 27 | ''' 28 | if not self.use_pos: 29 | return tensor 30 | 31 | pos = self.pos_projector(pos) 32 | return tensor if pos is None else tensor + pos 33 | 34 | 35 | def forward(self, q, k, v, query_mask=None, key_padding_mask=None, pos_embedding=None, proj_only=False): 36 | # q, v: [sequence_len, batch_size, channels] 37 | k = self.with_pos_embed(k, pos_embedding) 38 | if self.pos_on_v: 39 | v = self.with_pos_embed(v, pos_embedding) 40 | att, mask, values = self.matt( 41 | q, k, v, key_padding_mask=key_padding_mask, proj_only=proj_only) 42 | return att, mask, values 43 | -------------------------------------------------------------------------------- /megengine_release/models/ICD/layers.py: -------------------------------------------------------------------------------- 1 | import megengine as mge 2 | import megengine.module as M 3 | from megengine import functional as F 4 | import numpy as np 5 | 6 | class MLP(M.Module): 7 | """ Very simple multi-layer perceptron (also called FFN)""" 8 | 9 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers): 10 | super().__init__() 11 | self.num_layers = num_layers 12 | h = [hidden_dim] * (num_layers - 1) 13 | self.layers = [M.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])] 14 | 15 | def forward(self, x): 16 | for i, layer in enumerate(self.layers): 17 | x = F.nn.relu(layer(x)) if i < self.num_layers - 1 else layer(x) 18 | return x 19 | -------------------------------------------------------------------------------- /megengine_release/models/ICD/utility.py: -------------------------------------------------------------------------------- 1 | import megengine as mge 2 | import megengine.module as M 3 | from megengine import functional as F 4 | import numpy as np 5 | import math 6 | # mge.core.set_option('async_level', 0) 7 | 8 | 9 | def safe_masked_fill(tensor: mge.Tensor, mask: mge.Tensor, val: float) -> mge.Tensor: 10 | ''' 11 | same behavior as torch.tensor.masked_fill_(mask, val) 12 | ''' 13 | assert mask.dtype == np.bool_ 14 | discard_mask = ~mask 15 | keep_mask = mask.astype('float32') 16 | # NOTE(peizhen): simply tensor * ~mask + value * mask could not handle the value=float('+inf'/'-inf') case, since inf*0 = nan 17 | new_tensor = tensor * ~mask + F.where(mask, F.ones_like(mask) * val, F.zeros_like(mask)) 18 | return new_tensor 19 | 20 | 21 | def has_nan_or_inf(inp): 22 | invalid_mask = F.logical_or(F.isnan(inp), F.isinf(inp)) 23 | return invalid_mask.sum().item() > 0 24 | 25 | 26 | class PositionEmbeddingSine(M.Module): 27 | """ 28 | This is a more standard version of the position embedding, very similar to the one 29 | used by the Attention is all you need paper, generalized to work on images. 30 | Modified from DETR: https://github.com/facebookresearch/detr/blob/main/LICENSE 31 | """ 32 | 33 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): 34 | super().__init__() 35 | self.num_pos_feats = num_pos_feats 36 | self.temperature = temperature 37 | self.normalize = normalize 38 | if scale is not None and normalize is False: 39 | raise ValueError("normalize should be True if scale is passed") 40 | if scale is None: 41 | scale = 2 * math.pi 42 | self.scale = scale 43 | 44 | def forward(self, x, mask): 45 | assert mask is not None 46 | not_mask = F.squeeze(~mask, 1) # ~mask.squeeze(1) 47 | # import ipdb; ipdb.set_trace() 48 | y_embed = F.cumsum(not_mask.astype('int32'), 1) # .cumsum(1, dtype=torch.float32) 49 | x_embed = F.cumsum(not_mask.astype('int32'), 2) # .cumsum(2, dtype=torch.float32) 50 | if self.normalize: 51 | eps = 1e-6 52 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 53 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 54 | 55 | dim_t = F.arange(self.num_pos_feats, 56 | dtype="float32", device=x.device) 57 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 58 | 59 | pos_x = F.expand_dims(x_embed, -1) / dim_t 60 | pos_y = F.expand_dims(y_embed, -1) / dim_t 61 | pos_x = F.flatten(F.stack( 62 | (F.sin(pos_x[:, :, :, 0::2]), F.cos(pos_x[:, :, :, 1::2])), axis=4), start_axis=3) 63 | pos_y = F.flatten(F.stack( 64 | (F.sin(pos_y[:, :, :, 0::2]), F.cos(pos_y[:, :, :, 1::2])), axis=4), start_axis=3) 65 | pos = F.transpose(F.concat((pos_y, pos_x), axis=3), (0, 3, 1, 2)) 66 | return pos 67 | 68 | 69 | def get_valid_boxes(raw_boxes, terminate, ignore): 70 | ''' 71 | Input: 72 | raw_boxes: (B, MAXN, 4+1) 73 | data_tensor: (B, C, H, W) 74 | Return: 75 | boxes: list of (Nb, 4) 76 | labels: list of (Nb,) 77 | ''' 78 | # (B,) 79 | B = raw_boxes.shape[0] 80 | nr_valid_boxes = (1 - F.equal(raw_boxes[:, :, -1], terminate)).sum(axis=1).astype('int32') 81 | 82 | #print(f'nr_valid_boxes: {nr_valid_boxes}') 83 | 84 | # NOTE(peizhen): raw_boxes[i, :0, :4] will cause bug since ':0' indexing is invalid in megengine 85 | #boxes = [raw_boxes[i, :nr_valid_boxes[i], :4] for i in range(B)] 86 | #labels = [raw_boxes[i, :nr_valid_boxes[i], 4] for i in range(B)] 87 | 88 | # B x (Nb, 4) and B x (Nb,) 89 | boxes = list() 90 | labels = list() 91 | for i in range(B): 92 | num_valid = nr_valid_boxes[i].item() 93 | if num_valid > 0: 94 | boxes.append(raw_boxes[i, :num_valid, :4]) 95 | labels.append(raw_boxes[i, :num_valid, 4]) 96 | else: 97 | boxes.append(F.zeros((0, 4), dtype=raw_boxes.dtype, device=raw_boxes.device)) 98 | labels.append(F.zeros((0,), dtype=raw_boxes.dtype, device=raw_boxes.device)) 99 | 100 | # TODO(peizhen): currently discard those terms whose labels are -1. Need better operation ? 101 | # see backup/utility.py annotation part 102 | return boxes, labels 103 | 104 | 105 | def get_instance_list(image_size, gt_boxes_human, gt_boxes_car, terminate=-2, ignore=-1): 106 | ''' 107 | Input: 108 | gt_boxes_human: (B, MAXN, 4+1) 109 | gt_boxes_car: (B, MAXN, 4+1) 110 | ''' 111 | human_box_list, human_label_list = get_valid_boxes(gt_boxes_human, terminate, ignore) 112 | vehicle_box_list, vehicle_label_list = get_valid_boxes(gt_boxes_car, terminate, ignore) 113 | # (1) For `gt_boxes_human`, 1 denotes human. -2 denote invalid object determination (will be process as 0) 114 | # (2) For `gt_boxes_car`, 1 & 2 denotes different kinds of car, -2 denote invalid object determination (will still be 1 and 2) 115 | 116 | instances = list() 117 | contain_box_mask = list() 118 | nr_actual_boxes_per_img = list() 119 | for human_boxes, human_labels, vehicle_boxes, vehicle_labels in \ 120 | zip(human_box_list, human_label_list, vehicle_box_list, vehicle_label_list): 121 | # (k, 4) and (k,) 122 | gt_boxes = F.concat([human_boxes, vehicle_boxes], axis=0).astype("float32") 123 | # Process gt_boxes_human's labels from 1 to 0. Naturally, car owns label 1 and 2 124 | gt_classes = F.concat([human_labels - 1, vehicle_labels], axis=0).astype("int32") 125 | 126 | contain_box_mask.append(gt_boxes.shape[0] > 0) 127 | assert gt_boxes.shape[0] == gt_classes.shape[0] 128 | 129 | # pad box for images who contain no any box to work around potential indexing bug (unlike in coco, an image in business dataset might contain no any image) 130 | nr_valid_objs = gt_boxes.shape[0] 131 | nr_actual_boxes_per_img.append(nr_valid_objs) 132 | if nr_valid_objs == 0: 133 | gt_boxes = F.zeros((1, 4), device=gt_boxes.device, dtype=gt_boxes.dtype) 134 | gt_classes = F.zeros((1,), device=gt_classes.device, dtype=gt_classes.dtype) 135 | 136 | instances.append({'image_size': image_size, 'gt_boxes': gt_boxes, 'gt_classes': gt_classes}) 137 | 138 | # (bsz,) 139 | contain_box_mask = mge.Tensor( 140 | contain_box_mask, device=instances[0]['gt_boxes'].device, dtype='float32').detach() 141 | 142 | return instances, contain_box_mask, nr_actual_boxes_per_img 143 | -------------------------------------------------------------------------------- /megengine_release/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | from .atss import * 10 | from .faster_rcnn import * 11 | from .fcos import * 12 | from .freeanchor import * 13 | from .retinanet import * 14 | 15 | _EXCLUDE = {} 16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 17 | -------------------------------------------------------------------------------- /megengine_release/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /megengine_release/models/backbones/resnet/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | -------------------------------------------------------------------------------- /megengine_release/models/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import numpy as np 10 | 11 | import megengine.functional as F 12 | import megengine.module as M 13 | 14 | import models.backbones.resnet.model as resnet 15 | import layers 16 | 17 | 18 | class FasterRCNN(M.Module): 19 | """ 20 | Implement Faster R-CNN (https://arxiv.org/abs/1506.01497). 21 | """ 22 | 23 | def __init__(self, cfg): 24 | super().__init__() 25 | self.cfg = cfg 26 | # ----------------------- build backbone ------------------------ # 27 | bottom_up = getattr(resnet, cfg.backbone)( 28 | norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained 29 | ) 30 | del bottom_up.fc 31 | 32 | # ----------------------- build FPN ----------------------------- # 33 | self.backbone = layers.FPN( 34 | bottom_up=bottom_up, 35 | in_features=cfg.fpn_in_features, 36 | out_channels=cfg.fpn_out_channels, 37 | norm=cfg.fpn_norm, 38 | top_block=layers.FPNP6(), 39 | strides=cfg.fpn_in_strides, 40 | channels=cfg.fpn_in_channels, 41 | ) 42 | 43 | # ----------------------- build RPN ----------------------------- # 44 | self.rpn = layers.RPN(cfg) 45 | 46 | # ----------------------- build RCNN head ----------------------- # 47 | self.rcnn = layers.RCNN(cfg) 48 | 49 | def preprocess_image(self, image): 50 | padded_image = layers.get_padded_tensor(image, 32, 0.0) 51 | normed_image = ( 52 | padded_image 53 | - np.array(self.cfg.img_mean, dtype="float32")[None, :, None, None] 54 | ) / np.array(self.cfg.img_std, dtype="float32")[None, :, None, None] 55 | return normed_image 56 | 57 | def forward(self, image, im_info, gt_boxes=None): 58 | image = self.preprocess_image(image) 59 | features = self.backbone(image) 60 | 61 | if self.training: 62 | return self._forward_train(features, im_info, gt_boxes) 63 | else: 64 | return self.inference(features, im_info) 65 | 66 | def _forward_train(self, features, im_info, gt_boxes): 67 | rpn_rois, rpn_losses = self.rpn(features, im_info, gt_boxes) 68 | rcnn_losses = self.rcnn(features, rpn_rois, im_info, gt_boxes) 69 | 70 | loss_rpn_cls = rpn_losses["loss_rpn_cls"] 71 | loss_rpn_bbox = rpn_losses["loss_rpn_bbox"] 72 | loss_rcnn_cls = rcnn_losses["loss_rcnn_cls"] 73 | loss_rcnn_bbox = rcnn_losses["loss_rcnn_bbox"] 74 | total_loss = loss_rpn_cls + loss_rpn_bbox + loss_rcnn_cls + loss_rcnn_bbox 75 | 76 | loss_dict = { 77 | "total_loss": total_loss, 78 | "rpn_cls": loss_rpn_cls, 79 | "rpn_bbox": loss_rpn_bbox, 80 | "rcnn_cls": loss_rcnn_cls, 81 | "rcnn_bbox": loss_rcnn_bbox, 82 | } 83 | self.cfg.losses_keys = list(loss_dict.keys()) 84 | return loss_dict 85 | 86 | def inference(self, features, im_info): 87 | rpn_rois = self.rpn(features, im_info) 88 | pred_boxes, pred_score = self.rcnn(features, rpn_rois) 89 | pred_boxes = pred_boxes.reshape(-1, 4) 90 | 91 | scale_w = im_info[0, 1] / im_info[0, 3] 92 | scale_h = im_info[0, 0] / im_info[0, 2] 93 | pred_boxes = pred_boxes / F.concat([scale_w, scale_h, scale_w, scale_h], axis=0) 94 | clipped_boxes = layers.get_clipped_boxes( 95 | pred_boxes, im_info[0, 2:4] 96 | ).reshape(-1, self.cfg.num_classes, 4) 97 | return pred_score, clipped_boxes 98 | 99 | 100 | class FasterRCNNConfig: 101 | # pylint: disable=too-many-statements 102 | def __init__(self): 103 | self.backbone = "resnet50" 104 | self.backbone_pretrained = True 105 | self.backbone_norm = "FrozenBN" 106 | self.backbone_freeze_at = 2 107 | self.fpn_norm = None 108 | self.fpn_in_features = ["res2", "res3", "res4", "res5"] 109 | self.fpn_in_strides = [4, 8, 16, 32] 110 | self.fpn_in_channels = [256, 512, 1024, 2048] 111 | self.fpn_out_channels = 256 112 | 113 | # ------------------------ data cfg -------------------------- # 114 | self.train_dataset = dict( 115 | name="coco", 116 | root="train2017", 117 | ann_file="annotations/instances_train2017.json", 118 | remove_images_without_annotations=True, 119 | ) 120 | self.test_dataset = dict( 121 | name="coco", 122 | root="val2017", 123 | ann_file="annotations/instances_val2017.json", 124 | remove_images_without_annotations=False, 125 | ) 126 | self.num_classes = 80 127 | self.img_mean = [103.530, 116.280, 123.675] # BGR 128 | self.img_std = [57.375, 57.120, 58.395] 129 | 130 | # ----------------------- rpn cfg ------------------------- # 131 | self.rpn_stride = [4, 8, 16, 32, 64] 132 | self.rpn_in_features = ["p2", "p3", "p4", "p5", "p6"] 133 | self.rpn_channel = 256 134 | self.rpn_reg_mean = [0.0, 0.0, 0.0, 0.0] 135 | self.rpn_reg_std = [1.0, 1.0, 1.0, 1.0] 136 | 137 | self.anchor_scales = [[x] for x in [32, 64, 128, 256, 512]] 138 | self.anchor_ratios = [[0.5, 1, 2]] 139 | self.anchor_offset = 0.5 140 | 141 | self.match_thresholds = [0.3, 0.7] 142 | self.match_labels = [0, -1, 1] 143 | self.match_allow_low_quality = True 144 | self.rpn_nms_threshold = 0.7 145 | self.num_sample_anchors = 256 146 | self.positive_anchor_ratio = 0.5 147 | 148 | # ----------------------- rcnn cfg ------------------------- # 149 | self.rcnn_stride = [4, 8, 16, 32] 150 | self.rcnn_in_features = ["p2", "p3", "p4", "p5"] 151 | self.rcnn_reg_mean = [0.0, 0.0, 0.0, 0.0] 152 | self.rcnn_reg_std = [0.1, 0.1, 0.2, 0.2] 153 | 154 | self.pooling_method = "roi_align" 155 | self.pooling_size = (7, 7) 156 | 157 | self.num_rois = 512 158 | self.fg_ratio = 0.5 159 | self.fg_threshold = 0.5 160 | self.bg_threshold_high = 0.5 161 | self.bg_threshold_low = 0.0 162 | self.class_aware_box = True 163 | 164 | # ------------------------ loss cfg -------------------------- # 165 | self.rpn_smooth_l1_beta = 0 # use L1 loss 166 | self.rcnn_smooth_l1_beta = 0 # use L1 loss 167 | self.num_losses = 5 168 | 169 | # ------------------------ training cfg ---------------------- # 170 | self.train_image_short_size = (640, 672, 704, 736, 768, 800) 171 | self.train_image_max_size = 1333 172 | self.train_prev_nms_top_n = 2000 173 | self.train_post_nms_top_n = 1000 174 | 175 | self.basic_lr = 0.02 / 16 # The basic learning rate for single-image 176 | self.momentum = 0.9 177 | self.weight_decay = 1e-4 178 | self.log_interval = 20 179 | self.nr_images_epoch = 80000 180 | self.max_epoch = 54 181 | self.warm_iters = 500 182 | self.lr_decay_rate = 0.1 183 | self.lr_decay_stages = [42, 50] 184 | 185 | # ------------------------ testing cfg ----------------------- # 186 | self.test_image_short_size = 800 187 | self.test_image_max_size = 1333 188 | self.test_prev_nms_top_n = 1000 189 | self.test_post_nms_top_n = 1000 190 | self.test_max_boxes_per_image = 100 191 | self.test_vis_threshold = 0.3 192 | self.test_cls_threshold = 0.05 193 | self.test_nms = 0.5 194 | -------------------------------------------------------------------------------- /megengine_release/requirements.txt: -------------------------------------------------------------------------------- 1 | megengine 2 | numpy==1.19.5 3 | opencv-python==4.5.3.56 4 | tqdm==4.62.3 5 | tabulate==0.8.9 6 | -------------------------------------------------------------------------------- /megengine_release/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | import json 11 | import os 12 | from tqdm import tqdm 13 | 14 | import megengine as mge 15 | import megengine.distributed as dist 16 | from megengine.data import DataLoader 17 | 18 | from layers.tools.data_mapper import data_mapper 19 | from layers.tools.utils import DetEvaluator, InferenceSampler, import_from_file 20 | 21 | logger = mge.get_logger(__name__) 22 | logger.setLevel("INFO") 23 | mge.device.set_prealloc_config(1024, 1024, 256 * 1024 * 1024, 4.0) 24 | 25 | 26 | def make_parser(): 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument( 29 | "-f", "--file", default="net.py", type=str, help="net description file" 30 | ) 31 | parser.add_argument( 32 | "-w", "--weight_file", default=None, type=str, help="weights file", 33 | ) 34 | parser.add_argument( 35 | "-n", "--devices", default=1, type=int, help="total number of gpus for testing", 36 | ) 37 | parser.add_argument( 38 | "-d", "--dataset_dir", default="/data/datasets", type=str, 39 | ) 40 | parser.add_argument("-se", "--start_epoch", default=-1, type=int) 41 | parser.add_argument("-ee", "--end_epoch", default=-1, type=int) 42 | return parser 43 | 44 | 45 | def main(): 46 | # pylint: disable=import-outside-toplevel,too-many-branches,too-many-statements 47 | from pycocotools.coco import COCO 48 | from pycocotools.cocoeval import COCOeval 49 | 50 | parser = make_parser() 51 | args = parser.parse_args() 52 | 53 | current_network = import_from_file(args.file) 54 | cfg = current_network.Cfg() 55 | 56 | if args.weight_file: 57 | args.start_epoch = args.end_epoch = -1 58 | else: 59 | if args.start_epoch == -1: 60 | args.start_epoch = cfg.max_epoch - 1 61 | if args.end_epoch == -1: 62 | args.end_epoch = args.start_epoch 63 | assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch 64 | 65 | for epoch_num in range(args.start_epoch, args.end_epoch + 1): 66 | if args.weight_file: 67 | weight_file = args.weight_file 68 | else: 69 | weight_file = "log-of-{}/epoch_{}.pkl".format( 70 | os.path.basename(args.file).split(".")[0], epoch_num 71 | ) 72 | 73 | if args.devices > 1: 74 | dist_worker = dist.launcher(n_gpus=args.devices)(worker) 75 | result_list = dist_worker(current_network, weight_file, args.dataset_dir) 76 | result_list = sum(result_list, []) 77 | else: 78 | result_list = worker(current_network, weight_file, args.dataset_dir) 79 | 80 | all_results = DetEvaluator.format(result_list, cfg) 81 | if args.weight_file: 82 | json_path = "{}_{}.json".format( 83 | os.path.basename(args.file).split(".")[0], 84 | os.path.basename(args.weight_file).split(".")[0], 85 | ) 86 | else: 87 | json_path = "log-of-{}/epoch_{}.json".format( 88 | os.path.basename(args.file).split(".")[0], epoch_num 89 | ) 90 | all_results = json.dumps(all_results) 91 | 92 | with open(json_path, "w") as fo: 93 | fo.write(all_results) 94 | logger.info("Save results to %s, start evaluation!", json_path) 95 | 96 | eval_gt = COCO( 97 | os.path.join( 98 | args.dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"] 99 | ) 100 | ) 101 | eval_dt = eval_gt.loadRes(json_path) 102 | cocoEval = COCOeval(eval_gt, eval_dt, iouType="bbox") 103 | cocoEval.evaluate() 104 | cocoEval.accumulate() 105 | cocoEval.summarize() 106 | metrics = [ 107 | "AP", 108 | "AP@0.5", 109 | "AP@0.75", 110 | "APs", 111 | "APm", 112 | "APl", 113 | "AR@1", 114 | "AR@10", 115 | "AR@100", 116 | "ARs", 117 | "ARm", 118 | "ARl", 119 | ] 120 | logger.info("mmAP".center(32, "-")) 121 | for i, m in enumerate(metrics): 122 | logger.info("|\t%s\t|\t%.03f\t|", m, cocoEval.stats[i]) 123 | logger.info("-" * 32) 124 | 125 | 126 | def worker(current_network, weight_file, dataset_dir): 127 | cfg = current_network.Cfg() 128 | cfg.backbone_pretrained = False 129 | 130 | model = current_network.Net(cfg) 131 | model.eval() 132 | 133 | state_dict = mge.load(weight_file) 134 | if "state_dict" in state_dict: 135 | state_dict = state_dict["state_dict"] 136 | model.load_state_dict(state_dict) 137 | 138 | evaluator = DetEvaluator(model) 139 | 140 | test_loader = build_dataloader(dataset_dir, model.cfg) 141 | if dist.get_rank() == 0: 142 | test_loader = tqdm(test_loader) 143 | 144 | result_list = [] 145 | for data in test_loader: 146 | image, im_info = DetEvaluator.process_inputs( 147 | data[0][0], 148 | model.cfg.test_image_short_size, 149 | model.cfg.test_image_max_size, 150 | ) 151 | pred_res = evaluator.predict( 152 | image=mge.tensor(image), 153 | im_info=mge.tensor(im_info) 154 | ) 155 | result = { 156 | "pred_boxes": pred_res, 157 | "image_id": int(data[1][2][0].split(".")[0].split("_")[-1]), 158 | } 159 | result_list.append(result) 160 | return result_list 161 | 162 | 163 | def build_dataloader(dataset_dir, cfg): 164 | val_dataset = data_mapper[cfg.test_dataset["name"]]( 165 | os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["root"]), 166 | os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]), 167 | order=["image", "info"], 168 | ) 169 | val_sampler = InferenceSampler(val_dataset, 1) 170 | val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2) 171 | return val_dataloader 172 | 173 | 174 | if __name__ == "__main__": 175 | main() 176 | -------------------------------------------------------------------------------- /megengine_release/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License") 3 | # 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 5 | # 6 | # Unless required by applicable law or agreed to in writing, 7 | # software distributed under the License is distributed on an 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | import argparse 10 | import bisect 11 | import copy 12 | import os 13 | import time 14 | 15 | import megengine as mge 16 | import megengine.distributed as dist 17 | from megengine.autodiff import GradManager 18 | from megengine.data import DataLoader, Infinite, RandomSampler 19 | from megengine.data import transform as T 20 | from megengine.optimizer import SGD 21 | 22 | from layers.tools.data_mapper import data_mapper 23 | from layers.tools.utils import ( 24 | AverageMeter, 25 | DetectionPadCollator, 26 | GroupedRandomSampler, 27 | get_config_info, 28 | import_from_file 29 | ) 30 | 31 | logger = mge.get_logger(__name__) 32 | logger.setLevel("INFO") 33 | mge.device.set_prealloc_config(1024, 1024, 256 * 1024 * 1024, 4.0) 34 | 35 | 36 | def make_parser(): 37 | parser = argparse.ArgumentParser() 38 | parser.add_argument( 39 | "-f", "--file", default="net.py", type=str, help="net description file" 40 | ) 41 | parser.add_argument( 42 | "-w", "--weight_file", default=None, type=str, help="weights file", 43 | ) 44 | parser.add_argument( 45 | "-n", "--devices", default=1, type=int, help="total number of gpus for training", 46 | ) 47 | parser.add_argument( 48 | "-b", "--batch_size", default=2, type=int, help="batch size for training", 49 | ) 50 | parser.add_argument( 51 | "-d", "--dataset_dir", default="/data/datasets", type=str, 52 | ) 53 | 54 | return parser 55 | 56 | 57 | def main(): 58 | parser = make_parser() 59 | args = parser.parse_args() 60 | 61 | # ------------------------ begin training -------------------------- # 62 | logger.info("Device Count = %d", args.devices) 63 | 64 | log_dir = "log-of-{}".format(os.path.basename(args.file).split(".")[0]) 65 | if not os.path.isdir(log_dir): 66 | os.makedirs(log_dir) 67 | 68 | if args.devices > 1: 69 | trainer = dist.launcher(worker, n_gpus=args.devices) 70 | trainer(args) 71 | else: 72 | worker(args) 73 | 74 | 75 | def worker(args): 76 | current_network = import_from_file(args.file) 77 | 78 | model = current_network.Net(current_network.Cfg()) 79 | model.train() 80 | 81 | if dist.get_rank() == 0: 82 | logger.info(get_config_info(model.cfg)) 83 | logger.info(repr(model)) 84 | 85 | params_with_grad = [] 86 | for name, param in model.named_parameters(): 87 | if "bottom_up.conv1" in name and model.cfg.backbone_freeze_at >= 1: 88 | continue 89 | if "bottom_up.layer1" in name and model.cfg.backbone_freeze_at >= 2: 90 | continue 91 | params_with_grad.append(param) 92 | 93 | opt = SGD( 94 | params_with_grad, 95 | lr=model.cfg.basic_lr * args.batch_size * dist.get_world_size(), 96 | momentum=model.cfg.momentum, 97 | weight_decay=model.cfg.weight_decay, 98 | ) 99 | 100 | # print('BASE LR:', model.cfg.basic_lr * args.batch_size * dist.get_world_size()) 101 | 102 | gm = GradManager() 103 | if dist.get_world_size() > 1: 104 | gm.attach( 105 | params_with_grad, 106 | callbacks=[dist.make_allreduce_cb("mean", dist.WORLD)] 107 | ) 108 | else: 109 | gm.attach(params_with_grad) 110 | 111 | if args.weight_file is not None: 112 | weights = mge.load(args.weight_file) 113 | model.backbone.bottom_up.load_state_dict(weights, strict=False) 114 | if dist.get_world_size() > 1: 115 | dist.bcast_list_(model.parameters()) # sync parameters 116 | dist.bcast_list_(model.buffers()) # sync buffers 117 | 118 | if dist.get_rank() == 0: 119 | logger.info("Prepare dataset") 120 | train_loader = iter(build_dataloader(args.batch_size, args.dataset_dir, model.cfg)) 121 | 122 | for epoch in range(model.cfg.max_epoch): 123 | train_one_epoch(model, train_loader, opt, gm, epoch, args) 124 | if dist.get_rank() == 0: 125 | save_path = "log-of-{}/epoch_{}.pkl".format( 126 | os.path.basename(args.file).split(".")[0], epoch 127 | ) 128 | mge.save( 129 | {"epoch": epoch, "state_dict": model.state_dict()}, save_path, 130 | ) 131 | logger.info("dump weights to %s", save_path) 132 | 133 | 134 | def train_one_epoch(model, data_queue, opt, gm, epoch, args): 135 | def train_func(image, im_info, gt_boxes): 136 | with gm: 137 | loss_dict = model(image=image, im_info=im_info, gt_boxes=gt_boxes) 138 | gm.backward(loss_dict["total_loss"]) 139 | loss_list = list(loss_dict.values()) 140 | opt.step().clear_grad() 141 | return loss_list 142 | 143 | meter = AverageMeter(record_len=model.cfg.num_losses) 144 | time_meter = AverageMeter(record_len=2) 145 | log_interval = model.cfg.log_interval 146 | tot_step = model.cfg.nr_images_epoch // (args.batch_size * dist.get_world_size()) 147 | for step in range(tot_step): 148 | adjust_learning_rate(opt, epoch, step, model.cfg, args) 149 | 150 | data_tik = time.time() 151 | mini_batch = next(data_queue) 152 | data_tok = time.time() 153 | 154 | tik = time.time() 155 | loss_list = train_func( 156 | image=mge.tensor(mini_batch["data"]), 157 | im_info=mge.tensor(mini_batch["im_info"]), 158 | gt_boxes=mge.tensor(mini_batch["gt_boxes"]) 159 | ) 160 | tok = time.time() 161 | 162 | time_meter.update([tok - tik, data_tok - data_tik]) 163 | 164 | if dist.get_rank() == 0: 165 | info_str = "e%d, %d/%d, lr:%f, " 166 | loss_str = ", ".join( 167 | ["{}:%f".format(loss) for loss in model.cfg.losses_keys] 168 | ) 169 | time_str = ", train_time:%.3fs, data_time:%.3fs" 170 | log_info_str = info_str + loss_str + time_str 171 | meter.update([loss.numpy() for loss in loss_list]) 172 | if step % log_interval == 0: 173 | logger.info( 174 | log_info_str, 175 | epoch, 176 | step, 177 | tot_step, 178 | opt.param_groups[0]["lr"], 179 | *meter.average(), 180 | *time_meter.average() 181 | ) 182 | meter.reset() 183 | time_meter.reset() 184 | 185 | 186 | def adjust_learning_rate(optimizer, epoch, step, cfg, args): 187 | base_lr = ( 188 | cfg.basic_lr * dist.get_world_size() * args.batch_size * ( 189 | cfg.lr_decay_rate 190 | ** bisect.bisect_right(cfg.lr_decay_stages, epoch) 191 | ) 192 | ) 193 | # print('UDPATE LR:', base_lr) 194 | # Warm up 195 | lr_factor = 1.0 196 | if epoch == 0 and step < cfg.warm_iters: 197 | lr_factor = (step + 1.0) / cfg.warm_iters 198 | for param_group in optimizer.param_groups: 199 | param_group["lr"] = base_lr * lr_factor 200 | 201 | 202 | def build_dataset(dataset_dir, cfg): 203 | data_cfg = copy.deepcopy(cfg.train_dataset) 204 | data_name = data_cfg.pop("name") 205 | 206 | data_cfg["root"] = os.path.join(dataset_dir, data_name, data_cfg["root"]) 207 | 208 | if "ann_file" in data_cfg: 209 | data_cfg["ann_file"] = os.path.join(dataset_dir, data_name, data_cfg["ann_file"]) 210 | 211 | data_cfg["order"] = ["image", "boxes", "boxes_category", "info"] 212 | 213 | return data_mapper[data_name](**data_cfg) 214 | 215 | 216 | # pylint: disable=dangerous-default-value 217 | def build_sampler(train_dataset, batch_size, aspect_grouping=[1]): 218 | def _compute_aspect_ratios(dataset): 219 | aspect_ratios = [] 220 | for i in range(len(dataset)): 221 | info = dataset.get_img_info(i) 222 | aspect_ratios.append(info["height"] / info["width"]) 223 | return aspect_ratios 224 | 225 | def _quantize(x, bins): 226 | return list(map(lambda y: bisect.bisect_right(sorted(bins), y), x)) 227 | 228 | if len(aspect_grouping) == 0: 229 | return Infinite(RandomSampler(train_dataset, batch_size, drop_last=True)) 230 | 231 | aspect_ratios = _compute_aspect_ratios(train_dataset) 232 | group_ids = _quantize(aspect_ratios, aspect_grouping) 233 | return Infinite(GroupedRandomSampler(train_dataset, batch_size, group_ids)) 234 | 235 | 236 | def build_dataloader(batch_size, dataset_dir, cfg): 237 | train_dataset = build_dataset(dataset_dir, cfg) 238 | train_sampler = build_sampler(train_dataset, batch_size) 239 | train_dataloader = DataLoader( 240 | train_dataset, 241 | sampler=train_sampler, 242 | transform=T.Compose( 243 | transforms=[ 244 | T.ShortestEdgeResize( 245 | cfg.train_image_short_size, 246 | cfg.train_image_max_size, 247 | sample_style="choice", 248 | ), 249 | T.RandomHorizontalFlip(), 250 | T.ToMode(), 251 | ], 252 | order=["image", "boxes", "boxes_category"], 253 | ), 254 | collator=DetectionPadCollator(), 255 | num_workers=2, 256 | ) 257 | return train_dataloader 258 | 259 | 260 | if __name__ == "__main__": 261 | main() 262 | -------------------------------------------------------------------------------- /pytorch_release/README.md: -------------------------------------------------------------------------------- 1 | # Instance-Conditional Knowledge Distillation for Object Detection 2 | This is an official implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection" in [Pytorch](https://pytorch.org), it supports various detectors from Detectron2 and AdelaiDet. 3 | 4 | 5 | # Requirements 6 | The project is depending on the following libraries. You may need to install Detectron2 and AdelaiDet mannully, please refer to their github pages. 7 | - Python3 (recommand 3.8) 8 | - pytorch == 1.9.0 9 | - torchvision == 0.10.0 10 | - opencv-python == 4.5.4.58 11 | - [Detectron2](https://github.com/facebookresearch/detectron2) == 0.5.0 12 | - [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) == 7bf9d87 13 | 14 | (To avoid conflict, we recommend to use the exact above versions.) 15 | 16 | Reference command for installation: 17 | ``` 18 | # Switch to this directory (and maybe create a virtual environment) 19 | pip install pip --upgrade 20 | pip install -r requirements.txt 21 | pip install https://github.com/facebookresearch/detectron2/archive/refs/tags/v0.5.tar.gz 22 | pip install 'git+https://github.com/aim-uofa/AdelaiDet.git@7bf9d87' 23 | ``` 24 | 25 | You will also need to prepare datasets according to [detectron2](https://github.com/facebookresearch/detectron2/tree/main/datasets), put your data under the following structure, and set the environment variable by `export DETECTRON2_DATASETS=/path/to/datasets`. 26 | ``` 27 | $DETECTRON2_DATASETS/ 28 | coco/ 29 | annotations/ 30 | instances_{train,val}2017.json 31 | {train,val}2017/ 32 | # image files 33 | ``` 34 | 35 | # Usage 36 | ## Train baseline models 37 | We use [train_baseline.py](./train_baseline.py) to train baseline models, it is very similar to [tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/main/tools/train_net.py). 38 | 39 | You can use any config files for detectron2 or AdelaiDet to specify a training setting. 40 | ``` 41 | usage: train_baseline.py [-h] [--config-file FILE] [--resume] [--eval-only] 42 | [--num-gpus NUM_GPUS] [--num-machines NUM_MACHINES] 43 | [--machine-rank MACHINE_RANK] [--dist-url DIST_URL] 44 | ... 45 | 46 | positional arguments: 47 | opts Modify config options at the end of the command. For 48 | Yacs configs, use space-separated "PATH.KEY VALUE" 49 | pairs. For python-based LazyConfig, use 50 | "path.key=value". 51 | 52 | optional arguments: 53 | -h, --help show this help message and exit 54 | --config-file FILE path to config file 55 | --resume Whether to attempt to resume from the checkpoint 56 | directory. See documentation of 57 | `DefaultTrainer.resume_or_load()` for what it means. 58 | --eval-only perform evaluation only 59 | --num-gpus NUM_GPUS number of gpus *per machine* 60 | --num-machines NUM_MACHINES 61 | total number of machines 62 | --machine-rank MACHINE_RANK 63 | the rank of this machine (unique per machine) 64 | --dist-url DIST_URL initialization URL for pytorch distributed backend. 65 | See https://pytorch.org/docs/stable/distributed.html 66 | for details. 67 | ``` 68 | ### Examples: 69 | 70 | Train a retinanet baseline detector on a single machine: 71 | 72 | ``` 73 | train_baseline.py --num-gpus 8 --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml 74 | ``` 75 | 76 | Change some config options: 77 | 78 | ``` 79 | train_baseline.py --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 80 | ``` 81 | 82 | Run on multiple machines: 83 | ``` 84 | (machine0)$ train_baseline.py --machine-rank 0 --num-machines 2 --dist-url [--other-flags] 85 | (machine1)$ train_baseline.py --machine-rank 1 --num-machines 2 --dist-url [--other-flags] 86 | ``` 87 | 88 | ## Train and distill models 89 | We leave everything the same as the above, except the entry ([train_distill.py](./train_distill.py)) and the config. 90 | 91 | ### Examples: 92 | 93 | Train RetinaNet with distillation: 94 | 95 | ``` 96 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_retinanet 97 | ``` 98 | 99 | Train Faster R-CNN with distillation: 100 | 101 | ``` 102 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/RCNN_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_frcnn 103 | ``` 104 | 105 | Train CondInst with distillation: 106 | 107 | ``` 108 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/CondInst_R50_R101_icd.yaml OUTPUT_DIR output/icd_condinst 109 | ``` 110 | 111 | ### Write distillation configs: 112 | To introduce how to write a config for distillation, let's see two examples: 113 | 114 | **If teacher model is released by detectron2 officially:** 115 | 116 | You can load checkpoint from detectron2 model_zoo API, set `MODEL_LOAD_OFFICIAL=True` and use the corresponding config file. You may also set `WEIGHT_VALUE` to the desired number. 117 | 118 | ``` 119 | MODEL: 120 | DISTILLER: 121 | MODEL_LOAD_OFFICIAL: True 122 | MODEL_DISTILLER_CONFIG: 'COCO-Detection/retinanet_R_101_FPN_3x.yaml' 123 | 124 | INS_ATT_MIMIC: 125 | WEIGHT_VALUE: 8.0 126 | ``` 127 | 128 | Note: It also support configs from detectron2 new baselines, like [LSJ (large scale jitters) models](https://github.com/facebookresearch/detectron2/blob/main/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py), which could be helpful in practice. 129 | 130 | 131 | **If you want to use a standalone teacher trained by yourself:** 132 | 133 | If you train a teacher by ourselves, you may need to define a standalone config for the teacher. Set `MODEL_LOAD_OFFICIAL=False` and use a standalone config file. 134 | 135 | ``` 136 | MODEL: 137 | DISTILLER: 138 | MODEL_LOAD_OFFICIAL: False 139 | MODEL_DISTILLER_CONFIG: 'Teachers/SOLOv2_R101_3x_ms.yaml' 140 | 141 | INS_ATT_MIMIC: 142 | WEIGHT_VALUE: 8.0 143 | ``` 144 | 145 | For teacher's config, simply set pretrained weight to a checkpoint file: 146 | ``` 147 | _BASE_: "../Base-SOLOv2.yaml" 148 | MODEL: 149 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9w7b3sjaXvqYQEQ" 150 | # This is the official release from AdelaiDet. 151 | RESNETS: 152 | DEPTH: 101 153 | ``` 154 | 155 | You can find more options in [utils/build.py](utils/build.py) 156 | 157 | # Results 158 | For object detection in MS-COCO: 159 | | Model | Baseline (BoxAP) | + Ours (BoxAP) | 160 | | --- | :---: | :---: | 161 | | Faster R-CNN | 37.9 | 40.9 (+3.0) | 162 | | Retinanet | 37.4 | 40.7 (+3.3) | 163 | | FCOS | 39.4 | 42.9 (+3.5) | 164 | 165 | For instance-segmentation in MS-COCO: 166 | | Model | Baseline (BoxAP) | + Ours (BoxAP) | Baseline (MaskAP) | + Ours (MaskAP) | 167 | | --- | :---: | :---: | :---: | :---: | 168 | | Mask R-CNN | 38.6 | 41.2 (+2.6) | 35.2 | 37.4 (+2.2) | 169 | | SOLOv2 | - | - | 34.6 | 38.5 (+3.9) | 170 | | CondInst |39.7 | 43.7 (+4.0) | 35.7 | 39.1 (+3.4) | -------------------------------------------------------------------------------- /pytorch_release/configs/Base-CondInst.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "CondInst" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "FCOS" 12 | FCOS: 13 | THRESH_WITH_CTR: True 14 | USE_SCALE: True 15 | CONDINST: 16 | MAX_PROPOSALS: 500 17 | DATASETS: 18 | TRAIN: ("coco_2017_train",) 19 | TEST: ("coco_2017_val",) 20 | SOLVER: 21 | IMS_PER_BATCH: 16 22 | BASE_LR: 0.01 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | INPUT: 26 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -------------------------------------------------------------------------------- /pytorch_release/configs/Base-FCOS.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "OneStageDetector" 3 | BACKBONE: 4 | NAME: "build_fcos_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res3", "res4", "res5"] 9 | PROPOSAL_GENERATOR: 10 | NAME: "FCOS" 11 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] 12 | DATASETS: 13 | TRAIN: ("coco_2017_train",) 14 | TEST: ("coco_2017_val",) 15 | SOLVER: 16 | IMS_PER_BATCH: 16 17 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 18 | STEPS: (60000, 80000) 19 | MAX_ITER: 90000 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -------------------------------------------------------------------------------- /pytorch_release/configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | VERSION: 2 19 | -------------------------------------------------------------------------------- /pytorch_release/configs/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | VERSION: 2 32 | -------------------------------------------------------------------------------- /pytorch_release/configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | CHECKPOINT_PERIOD: 10000 41 | TEST: 42 | EVAL_PERIOD: 10000 43 | INPUT: 44 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 45 | VERSION: 2 46 | -------------------------------------------------------------------------------- /pytorch_release/configs/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | SMOOTH_L1_LOSS_BETA: 0.0 15 | DATASETS: 16 | TRAIN: ("coco_2017_train",) 17 | TEST: ("coco_2017_val",) 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 21 | STEPS: (60000, 80000) 22 | MAX_ITER: 90000 23 | CHECKPOINT_PERIOD: 10000 24 | INPUT: 25 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 26 | VERSION: 2 27 | TEST: 28 | EVAL_PERIOD: 10000 29 | -------------------------------------------------------------------------------- /pytorch_release/configs/Base-SOLOv2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SOLOv2" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | TEST: ("coco_2017_val",) 13 | SOLVER: 14 | IMS_PER_BATCH: 16 15 | BASE_LR: 0.01 16 | WARMUP_FACTOR: 0.01 17 | WARMUP_ITERS: 1000 18 | STEPS: (60000, 80000) 19 | MAX_ITER: 90000 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | MASK_FORMAT: "bitmask" 23 | VERSION: 2 24 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/FCOS_R_101_DCN_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: FCOSBase 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 5 | RESNETS: 6 | DEPTH: 101 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] 8 | DEFORM_MODULATED: True 9 | SOLVER: 10 | STEPS: (120000, 160000) 11 | MAX_ITER: 180000 12 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/FCOS_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: FCOS 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (60000, 80000) 9 | MAX_ITER: 90000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/FCOS_R_50_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: FCOS 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (120000, 160000) 9 | MAX_ITER: 180000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/POTO_R_50_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: POTO 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | FCOS: 8 | NMS_THRESH_TEST: 1.0 9 | NMS_TYPE: 'null' 10 | SOLVER: 11 | STEPS: (120000, 160000) 12 | MAX_ITER: 180000 13 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | RESNETS: 7 | DEPTH: 50 8 | PROPOSAL_GENERATOR: 9 | NAME: "PrecomputedProposals" 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) 13 | TEST: ("coco_2017_val",) 14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 15 | DATALOADER: 16 | # proposals are part of the dataset_dicts, and take a lot of RAM 17 | NUM_WORKERS: 2 18 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_152_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-152.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 152 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x_bs8.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | LABEL_ENC: 8 | BYPASS_DISTILL: 80000 9 | 10 | SOLVER: 11 | IMS_PER_BATCH: 8 12 | BASE_LR: 0.01 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (120000, 160000) 9 | MAX_ITER: 180000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_R_152_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-152.pkl" 4 | RESNETS: 5 | DEPTH: 152 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_1x_bs8.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | 7 | LABEL_ENC: 8 | BYPASS_DISTILL: 80000 9 | 10 | SOLVER: 11 | IMS_PER_BATCH: 8 12 | BASE_LR: 0.005 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (120000, 160000) 8 | MAX_ITER: 180000 9 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/retinanet_X101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | PRE_NMS_TOPK_TEST: 12000 10 | POST_NMS_TOPK_TEST: 2000 11 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | POST_NMS_TOPK_TEST: 2000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.train import train 2 | from ..common.optim import SGD as optimizer 3 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 4 | from ..common.data.coco import dataloader 5 | from ..common.models.mask_rcnn_c4 import model 6 | 7 | model.backbone.freeze_at = 2 8 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.mask_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | model.backbone.bottom_up.freeze_at = 2 8 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | RPN: 8 | BBOX_REG_LOSS_TYPE: "giou" 9 | BBOX_REG_LOSS_WEIGHT: 2.0 10 | ROI_BOX_HEAD: 11 | BBOX_REG_LOSS_TYPE: "giou" 12 | BBOX_REG_LOSS_WEIGHT: 10.0 13 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.mask_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.modeling.backbone import RegNet 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 10 | 11 | 12 | # Replace default ResNet with RegNetX-4GF from the DDS paper. Config source: 13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=23, 19 | w_a=38.65, 20 | w_0=96, 21 | w_m=2.43, 22 | group_width=40, 23 | freeze_at=2, 24 | norm="FrozenBN", 25 | out_features=["s1", "s2", "s3", "s4"], 26 | ) 27 | model.pixel_std = [57.375, 57.120, 58.395] 28 | 29 | optimizer.weight_decay = 5e-5 30 | train.init_checkpoint = ( 31 | "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth" 32 | ) 33 | # RegNets benefit from enabling cudnn benchmark mode 34 | train.cudnn_benchmark = True 35 | -------------------------------------------------------------------------------- /pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco import dataloader 4 | from ..common.models.mask_rcnn_fpn import model 5 | from ..common.train import train 6 | 7 | from detectron2.config import LazyCall as L 8 | from detectron2.modeling.backbone import RegNet 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 10 | 11 | 12 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source: 13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=22, 19 | w_a=31.41, 20 | w_0=96, 21 | w_m=2.24, 22 | group_width=64, 23 | se_ratio=0.25, 24 | freeze_at=2, 25 | norm="FrozenBN", 26 | out_features=["s1", "s2", "s3", "s4"], 27 | ) 28 | model.pixel_std = [57.375, 57.120, 58.395] 29 | 30 | optimizer.weight_decay = 5e-5 31 | train.init_checkpoint = ( 32 | "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth" 33 | ) 34 | # RegNets benefit from enabling cudnn benchmark mode 35 | train.cudnn_benchmark = True 36 | -------------------------------------------------------------------------------- /pytorch_release/configs/Distillation-ICD/CondInst_R50_R101_icd.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DISTILLER: 7 | MODEL_LOAD_OFFICIAL: False 8 | MODEL_DISTILLER_CONFIG: 'Teachers/CondIns_R101_3x_ms.yaml' 9 | 10 | INS_ATT_MIMIC: 11 | WEIGHT_VALUE: 8.0 12 | 13 | SOLVER: 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | CLIP_GRADIENTS: {"ENABLED": True} -------------------------------------------------------------------------------- /pytorch_release/configs/Distillation-ICD/FCOS_R50_R101_icd.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DISTILLER: 7 | MODEL_LOAD_OFFICIAL: False 8 | MODEL_DISTILLER_CONFIG: 'Teachers/FCOS_R101_2x_ms.yaml' 9 | # NOTE: FCOS only release a 2x model, we use another 3x model trained by ourselves to report in the paper. 10 | 11 | INS_ATT_MIMIC: 12 | WEIGHT_VALUE: 8.0 13 | 14 | SOLVER: 15 | STEPS: (60000, 80000) 16 | MAX_ITER: 90000 17 | CLIP_GRADIENTS: {"ENABLED": True} -------------------------------------------------------------------------------- /pytorch_release/configs/Distillation-ICD/MaskRCNN_R_50_R101_icd_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DISTILLER: 8 | MODEL_LOAD_OFFICIAL: True 9 | MODEL_DISTILLER_CONFIG: 'COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml' 10 | 11 | INS: 12 | INPUT_FEATS: ['p2', 'p3', 'p4', 'p5', 'p6'] 13 | MAX_LABELS: 100 14 | 15 | INS_ATT_MIMIC: 16 | WEIGHT_VALUE: 3.0 17 | 18 | SOLVER: 19 | STEPS: (60000, 80000) 20 | MAX_ITER: 90000 21 | CLIP_GRADIENTS: {"ENABLED": True} 22 | -------------------------------------------------------------------------------- /pytorch_release/configs/Distillation-ICD/RCNN_R_50_R101_icd_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DISTILLER: 7 | MODEL_LOAD_OFFICIAL: True 8 | MODEL_DISTILLER_CONFIG: 'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml' 9 | 10 | INS: 11 | INPUT_FEATS: ['p2', 'p3', 'p4', 'p5', 'p6'] 12 | MAX_LABELS: 100 13 | 14 | INS_ATT_MIMIC: 15 | WEIGHT_VALUE: 3.0 16 | 17 | SOLVER: 18 | STEPS: (60000, 80000) 19 | MAX_ITER: 90000 20 | CLIP_GRADIENTS: {"ENABLED": True} 21 | -------------------------------------------------------------------------------- /pytorch_release/configs/Distillation-ICD/SOLOv2_R_50_R101_icd_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-SOLOv2.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DISTILLER: 7 | MODEL_LOAD_OFFICIAL: False 8 | MODEL_DISTILLER_CONFIG: 'Teachers/SOLOv2_R101_3x_ms.yaml' 9 | 10 | INS_ATT_MIMIC: 11 | WEIGHT_VALUE: 8.0 12 | 13 | SOLVER: 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | CLIP_GRADIENTS: {"ENABLED": True} -------------------------------------------------------------------------------- /pytorch_release/configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DISTILLER: 7 | MODEL_LOAD_OFFICIAL: True 8 | MODEL_DISTILLER_CONFIG: 'COCO-Detection/retinanet_R_101_FPN_3x.yaml' 9 | 10 | INS_ATT_MIMIC: 11 | WEIGHT_VALUE: 8.0 12 | 13 | SOLVER: 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | CLIP_GRADIENTS: {"ENABLED": True} -------------------------------------------------------------------------------- /pytorch_release/configs/Teachers/CondIns_R101_3x_ms.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/M8nNxSR5iNP4qyO/download" 4 | RESNETS: 5 | DEPTH: 101 6 | 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/Teachers/FCOS_R101_2x_ms.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/M3UOT6JcyHy2QW1/download" 4 | RESNETS: 5 | DEPTH: 101 6 | 7 | -------------------------------------------------------------------------------- /pytorch_release/configs/Teachers/SOLOv2_R101_3x_ms.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-SOLOv2.yaml" 2 | MODEL: 3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9w7b3sjaXvqYQEQ" 4 | RESNETS: 5 | DEPTH: 101 6 | 7 | -------------------------------------------------------------------------------- /pytorch_release/models/distiller.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | import torch.nn.functional as F 6 | from .utils import * 7 | 8 | 9 | from detectron2.utils.registry import Registry 10 | 11 | DISTILLER_REGISTRY = Registry("DISTILLER") # noqa F401 isort:skip 12 | DISTILLER_REGISTRY.__doc__ = """ 13 | Registry for meta-architectures, i.e. the whole model. 14 | 15 | The registered object will be called with `obj(cfg)` 16 | and expected to return a `nn.Module` object. 17 | """ 18 | 19 | 20 | def build_distiller(cfg, name, student, teacher): 21 | """ 22 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. 23 | Note that it does not load any weights from ``cfg``. 24 | """ 25 | model = DISTILLER_REGISTRY.get(name)(cfg, student, teacher) 26 | model.to(torch.device(cfg.MODEL.DEVICE)) 27 | return model 28 | 29 | 30 | @DISTILLER_REGISTRY.register() 31 | class InstanceConditionalDistillation(nn.Module): 32 | """ 33 | Distillation with multi-head attention. Mimic attention and features. 34 | """ 35 | 36 | def __init__(self, cfg, student, teacher) -> None: 37 | super().__init__() 38 | self.cfg = cfg 39 | self.student = [student] 40 | 41 | self.cfg = cfg 42 | hidden_dim = cfg.MODEL.DISTILLER.INS.HIDDEN_DIM 43 | 44 | self.pos_embedding = PositionEmbeddingSine( 45 | hidden_dim // 2, normalize=True) 46 | 47 | self.teacher_ptr = [teacher] 48 | self.attention_module = build_decoder_module( 49 | cfg) 50 | 51 | self.feat_keys = cfg.MODEL.DISTILLER.INS.INPUT_FEATS 52 | 53 | self.weight_value = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.WEIGHT_VALUE 54 | self.temp_value = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_VALUE 55 | if self.temp_value < 0: 56 | self.temp_value = nn.Parameter(torch.ones([1]).mean()) 57 | 58 | self.distill_norm_type = cfg.MODEL.DISTILLER.INS.DISTILL_NORM 59 | 60 | self.distill_negative = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.DISTILL_NEGATIVE 61 | self.use_pos_embds = cfg.MODEL.DISTILLER.INS.USE_POS_EMBEDDING 62 | 63 | self.predictor = MLP(hidden_dim, hidden_dim, 1, 3) 64 | 65 | if self.distill_norm_type == 'ln': 66 | self.distill_norm_ = nn.LayerNorm( 67 | [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False) 68 | self.distill_norm_tea = nn.LayerNorm( 69 | [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False) 70 | elif self.distill_norm_type == 'tln': 71 | self.distill_norm_ = nn.Sequential() 72 | self.distill_norm_tea = nn.LayerNorm( 73 | [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False) 74 | else: 75 | self.distill_norm_ = nn.Sequential() 76 | self.distill_norm_tea = nn.Sequential() 77 | 78 | self.loss_form = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.LOSS_FORM 79 | 80 | def concate_multiscale_reps(self, feat, pos_emb, mask): 81 | # permute and concate features form multiscale to a tensor under transformer definition 82 | keys = self.feat_keys 83 | 84 | feat = torch.cat([feat[k].flatten(2).permute(2, 0, 1) 85 | for k in keys], 0) # S, N, C 86 | pos_emb = torch.cat([pos_emb[k].flatten(2).permute( 87 | 2, 0, 1) for k in keys], 0) # S, N, C 88 | mask = torch.cat([mask[k].flatten(2).squeeze(1) 89 | for k in keys], 1) # N, S 90 | return feat, pos_emb, mask 91 | 92 | def bce_identification_loss(self, feat_list, ins_mask, ins_mask_gt): 93 | # this is the identification loss that identifies a given instance is real or fake 94 | positive_mask = (~ins_mask).float() 95 | 96 | loss_dict = {} 97 | for i, dfeat in enumerate(feat_list): 98 | f_pre = self.predictor(dfeat) 99 | 100 | loss = (F.binary_cross_entropy_with_logits(f_pre.squeeze(-1).T, ins_mask_gt, reduction='none') * 101 | positive_mask).sum() / positive_mask.sum() 102 | 103 | loss_dict['stu_bce.%s.loss' % i] = loss 104 | 105 | return loss_dict 106 | 107 | def mimic_loss(self, svalue, tvalue, value_mask): 108 | # value: num_seq, bsz, heads, channel 109 | # mask: [bsz, heads, 1, Seq] 110 | #value_mask = value_mask ** self.power_factor 111 | if self.loss_form in ['mse', 'MSE']: 112 | return ((F.mse_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0) 113 | * value_mask).sum(-1) / value_mask.sum(-1).clamp(min=1e-6)).mean() 114 | elif self.loss_form in ['l1', 'L1']: 115 | return (F.l1_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0) 116 | * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6) 117 | elif self.loss_form in ['smoothL1']: 118 | return (F.smooth_l1_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0) 119 | * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6) 120 | elif self.loss_form in ['L2', 'l2']: 121 | return ((F.mse_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0) 122 | * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)) ** 0.5 123 | 124 | def forward(self, features_dict, features_dict_tea): 125 | if isinstance(self.temp_value, nn.Parameter): 126 | self.temp_value.data = self.temp_value.data.clamp(min=0.1, max=8) 127 | else: 128 | if self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_DECAY: 129 | decay_to = self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_DECAY_TO 130 | ratio = features_dict['iteration'] / self.cfg.SOLVER.MAX_ITER 131 | self.temp_value = ratio * decay_to + \ 132 | (1 - ratio) * self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_VALUE 133 | 134 | images = features_dict['images'] 135 | batched_inputs = features_dict['batched_inputs'] 136 | fpn_outputs = features_dict['fpn_feat'] 137 | 138 | # assert set(self.feat_keys) == set(list(fpn_outputs.keys( 139 | # ))), 'WARNING: Unequal keys for fpn and attention ! <%s> != <%s>' % (self.feat_keys, fpn_outputs.keys()) 140 | 141 | if features_dict['distill_flag'] == 0: 142 | fpn_outputs = {k: v.detach() for k, v in fpn_outputs.items()} 143 | 144 | # mask_out: zero for foreground, one for bg: BoolTensor(N, 1, H, W) 145 | mask_out = mask_out_padding(fpn_outputs, images) 146 | 147 | # fpn_outputs = self.scale_adapter(fpn_outputs) 148 | pos_embs = {k: self.pos_embedding( 149 | fpn_outputs[k], mask_out[k]) for k in self.feat_keys} 150 | # feat, pos: [S, N, C]; mask: [N, S] 151 | feat, pos_embs, mask_padding = self.concate_multiscale_reps( 152 | fpn_outputs, pos_embs, mask_out) 153 | 154 | # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K] 155 | # NOTE: (0 for Fake Instance) in ins_mask 156 | ins_feat, ins_mask, ins_mask_gt = features_dict_tea['aux_feat']['encoded_ins'] 157 | ins_feat = ins_feat.detach() 158 | 159 | if self.distill_negative: 160 | ins_mask_gt = (~ins_mask).detach().float() 161 | max_ele = None # slice to the last element 162 | else: 163 | # calculate an element mask to reduce unnessessary computation 164 | max_ele = ins_mask_gt.long().sum(-1).max().item() 165 | 166 | # Note that mask is not normalized by softmax 167 | 168 | decoded_feat_list, att_mask_list, value_list = self.attention_module( 169 | ins_feat[:max_ele, :, :], feat, feat, query_mask=ins_mask[:, :max_ele], key_padding_mask=mask_padding, pos_embedding=pos_embs, proj_only=True) 170 | 171 | decoded_value_tea = features_dict_tea['aux_feat']['decoded_value'] 172 | decoded_mask_tea = features_dict_tea['aux_feat']['decoded_mask'] 173 | 174 | loss_value = torch.tensor([0.0], device=ins_mask_gt.device).mean() 175 | for i, (tmask, svalue, tvalue) in enumerate(zip(decoded_mask_tea, value_list, decoded_value_tea)): 176 | tmask = tmask.detach() # bsz, heads, num_ins, num_seq 177 | 178 | # num_seq, bsz, heads, channel 179 | tvalue = self.distill_norm_tea(tvalue) 180 | tvalue = tvalue.detach() 181 | 182 | if self.weight_value > 0: 183 | with torch.no_grad(): 184 | value_mask = ((tmask / self.temp_value).softmax(-1) * 185 | ins_mask_gt.unsqueeze(1).unsqueeze(-1)).sum(2, keepdim=True) 186 | # [bsz, heads, ins, Seq] 187 | 188 | svalue = self.distill_norm_(svalue) 189 | loss_value += self.mimic_loss(svalue, 190 | tvalue, value_mask) * self.weight_value 191 | 192 | loss_dict = { 193 | 'matt.value': loss_value / len(decoded_feat_list), 194 | } 195 | 196 | if isinstance(self.temp_value, nn.Parameter): 197 | loss_dict['temp.value'] = self.temp_value.detach() 198 | 199 | return loss_dict 200 | -------------------------------------------------------------------------------- /pytorch_release/models/models.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | import torch 3 | from torch import nn 4 | from detectron2.utils.events import get_event_storage 5 | from .utils import * 6 | from .distiller import build_distiller 7 | from .teacher import build_teacher 8 | 9 | from detectron2.utils.events import EventWriter, get_event_storage 10 | 11 | 12 | class Distillator(nn.Module): 13 | def __init__(self, cfg, student) -> None: 14 | super().__init__() 15 | self.cfg = cfg 16 | self.student_buffer = [student] # as a printer 17 | 18 | self.teacher = build_teacher(cfg, student) 19 | 20 | distillers = [] 21 | for dis_name in cfg.MODEL.DISTILLER.TYPES: 22 | distillers.append(build_distiller( 23 | cfg, dis_name, student, self.teacher)) 24 | 25 | self.distillers = nn.ModuleList(distillers) 26 | 27 | self.register_buffer( 28 | "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) 29 | self.register_buffer( 30 | "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) 31 | 32 | def forward(self, raw_output, forward_only=False, teacher_only=False): 33 | ''' 34 | Input: 35 | batched_inputs, images, r_features, features, gts 36 | Output: 37 | losses_tea : loss dict 38 | r_features_tea : features from backbone 39 | features_tea : features from FPN 40 | ''' 41 | if teacher_only: 42 | loss_dict, _ = self.teacher(raw_output, None, None, None) 43 | return loss_dict 44 | 45 | r_feats = raw_output['backbone_feat'] 46 | fpn_feats = raw_output['fpn_feat'] 47 | batched_inputs = raw_output['batched_inputs'] 48 | images = raw_output['images'] 49 | iteration = raw_output['iteration'] 50 | 51 | if iteration < self.cfg.MODEL.DISTILLER.BYPASS_DISTILL or iteration > self.cfg.MODEL.DISTILLER.BYPASS_DISTILL_AFTER: 52 | distill_flag = self.cfg.MODEL.DISTILLER.DISTILL_OFF 53 | else: 54 | distill_flag = self.cfg.MODEL.DISTILLER.DISTILL_ON 55 | 56 | raw_output['distill_flag'] = distill_flag 57 | 58 | storage = get_event_storage() 59 | storage.put_scalar('distill_flag', distill_flag, False) 60 | 61 | if forward_only: 62 | with torch.no_grad(): 63 | loss_dict, feat_dict_tea = self.teacher( 64 | batched_inputs, images, r_feats, fpn_feats) 65 | else: 66 | loss_dict, feat_dict_tea = self.teacher( 67 | batched_inputs, images, r_feats, fpn_feats) 68 | 69 | for i, distiller in enumerate(self.distillers): 70 | loss_d = distiller(raw_output, feat_dict_tea) 71 | loss_d = {'distill.%s.%s' % (i, k): v for k, v in loss_d.items()} 72 | loss_dict.update(loss_d) 73 | 74 | return loss_dict 75 | -------------------------------------------------------------------------------- /pytorch_release/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.9.0 2 | torchvision==0.10.0 3 | opencv-python==4.5.4.58 --------------------------------------------------------------------------------