├── .gitignore
├── LICENSE
├── Poster.png
├── README.md
├── megengine_release
├── README.md
├── configs
│ ├── __init__.py
│ ├── atss_res101_coco_3x_800size.py
│ ├── atss_res18_coco_3x_800size.py
│ ├── atss_res34_coco_3x_800size.py
│ ├── atss_res50_coco_3x_800size.py
│ ├── atss_resx101_coco_2x_800size.py
│ ├── faster_rcnn_res101_coco_3x_800size.py
│ ├── faster_rcnn_res18_coco_3x_800size.py
│ ├── faster_rcnn_res34_coco_3x_800size.py
│ ├── faster_rcnn_res50_coco_3x_800size.py
│ ├── faster_rcnn_resx101_coco_2x_800size.py
│ ├── fcos_res101_coco_3x_800size.py
│ ├── fcos_res18_coco_3x_800size.py
│ ├── fcos_res34_coco_3x_800size.py
│ ├── fcos_res50_coco_3x_800size.py
│ ├── fcos_resx101_coco_2x_800size.py
│ ├── freeanchor_res101_coco_3x_800size.py
│ ├── freeanchor_res18_coco_3x_800size.py
│ ├── freeanchor_res34_coco_3x_800size.py
│ ├── freeanchor_res50_coco_3x_800size.py
│ ├── freeanchor_resx101_coco_2x_800size.py
│ ├── retinanet_res101_coco_3x_800size.py
│ ├── retinanet_res18_coco_3x_800size.py
│ ├── retinanet_res34_coco_3x_800size.py
│ ├── retinanet_res50_coco_3x_800size.py
│ └── retinanet_resx101_coco_2x_800size.py
├── distill_configs
│ ├── ICD.py
│ ├── ICD_rcnn.py
│ ├── atss_res50_coco_1x_800size.py
│ ├── coco_obj.json
│ ├── fcos_res50_coco_1x_800size.py
│ └── retinanet_res50_coco_1x_800size.py
├── layers
│ ├── __init__.py
│ ├── basic
│ │ ├── __init__.py
│ │ ├── functional.py
│ │ ├── nn.py
│ │ └── norm.py
│ ├── det
│ │ ├── __init__.py
│ │ ├── anchor.py
│ │ ├── box_head.py
│ │ ├── box_utils.py
│ │ ├── fpn.py
│ │ ├── loss.py
│ │ ├── matcher.py
│ │ ├── point_head.py
│ │ ├── pooler.py
│ │ ├── rcnn.py
│ │ ├── rpn.py
│ │ └── sampling.py
│ └── tools
│ │ ├── __init__.py
│ │ ├── data_mapper.py
│ │ ├── inference.py
│ │ ├── nms.py
│ │ └── utils.py
├── models
│ ├── ICD
│ │ ├── ICD.py
│ │ ├── __init__.py
│ │ ├── decoder.py
│ │ ├── encoder.py
│ │ ├── layers.py
│ │ ├── transformer.py
│ │ └── utility.py
│ ├── __init__.py
│ ├── atss.py
│ ├── backbones
│ │ ├── __init__.py
│ │ └── resnet
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ ├── faster_rcnn.py
│ ├── fcos.py
│ ├── freeanchor.py
│ └── retinanet.py
├── requirements.txt
├── test.py
├── train.py
└── train_distill_icd.py
└── pytorch_release
├── README.md
├── configs
├── Base-CondInst.yaml
├── Base-FCOS.yaml
├── Base-RCNN-C4.yaml
├── Base-RCNN-DilatedC5.yaml
├── Base-RCNN-FPN.yaml
├── Base-RetinaNet.yaml
├── Base-SOLOv2.yaml
├── COCO-Detection
│ ├── FCOS_R_101_DCN_FPN_2x.yaml
│ ├── FCOS_R_50_FPN_1x.yaml
│ ├── FCOS_R_50_FPN_2x.yaml
│ ├── POTO_R_50_FPN_2x.yaml
│ ├── fast_rcnn_R_50_FPN_1x.yaml
│ ├── faster_rcnn_R_101_C4_3x.yaml
│ ├── faster_rcnn_R_101_DC5_3x.yaml
│ ├── faster_rcnn_R_101_FPN_3x.yaml
│ ├── faster_rcnn_R_152_FPN_3x.yaml
│ ├── faster_rcnn_R_50_C4_1x.yaml
│ ├── faster_rcnn_R_50_C4_3x.yaml
│ ├── faster_rcnn_R_50_DC5_1x.yaml
│ ├── faster_rcnn_R_50_DC5_3x.yaml
│ ├── faster_rcnn_R_50_FPN_1x.yaml
│ ├── faster_rcnn_R_50_FPN_1x_bs8.yaml
│ ├── faster_rcnn_R_50_FPN_2x.yaml
│ ├── faster_rcnn_R_50_FPN_3x.yaml
│ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml
│ ├── retinanet_R_101_FPN_3x.yaml
│ ├── retinanet_R_152_FPN_3x.yaml
│ ├── retinanet_R_50_FPN_1x.yaml
│ ├── retinanet_R_50_FPN_1x_bs8.yaml
│ ├── retinanet_R_50_FPN_2x.yaml
│ ├── retinanet_R_50_FPN_3x.yaml
│ ├── retinanet_X101_32x8d_FPN_3x.yaml
│ ├── rpn_R_50_C4_1x.yaml
│ └── rpn_R_50_FPN_1x.yaml
├── COCO-InstanceSegmentation
│ ├── mask_rcnn_R_101_C4_3x.yaml
│ ├── mask_rcnn_R_101_DC5_3x.yaml
│ ├── mask_rcnn_R_101_FPN_3x.yaml
│ ├── mask_rcnn_R_50_C4_1x.py
│ ├── mask_rcnn_R_50_C4_1x.yaml
│ ├── mask_rcnn_R_50_C4_3x.yaml
│ ├── mask_rcnn_R_50_DC5_1x.yaml
│ ├── mask_rcnn_R_50_DC5_3x.yaml
│ ├── mask_rcnn_R_50_FPN_1x.py
│ ├── mask_rcnn_R_50_FPN_1x.yaml
│ ├── mask_rcnn_R_50_FPN_1x_giou.yaml
│ ├── mask_rcnn_R_50_FPN_3x.yaml
│ ├── mask_rcnn_X_101_32x8d_FPN_3x.yaml
│ ├── mask_rcnn_regnetx_4gf_dds_fpn_1x.py
│ └── mask_rcnn_regnety_4gf_dds_fpn_1x.py
├── Distillation-ICD
│ ├── CondInst_R50_R101_icd.yaml
│ ├── FCOS_R50_R101_icd.yaml
│ ├── MaskRCNN_R_50_R101_icd_FPN_1x.yaml
│ ├── RCNN_R_50_R101_icd_FPN_1x.yaml
│ ├── SOLOv2_R_50_R101_icd_FPN_1x.yaml
│ └── retinanet_R_50_R101_icd_FPN_1x.yaml
├── Teachers
│ ├── CondIns_R101_3x_ms.yaml
│ ├── FCOS_R101_2x_ms.yaml
│ └── SOLOv2_R101_3x_ms.yaml
└── coco_obj.json
├── models
├── distiller.py
├── layers
│ └── transformer.py
├── models.py
├── teacher.py
└── utils.py
├── requirements.txt
├── train_baseline.py
├── train_distill.py
└── utils
└── build.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *log*/
2 | *.jpg
3 | *.png
4 | *output*
5 | *_model_zoo/
6 |
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | *.py,cover
57 | .hypothesis/
58 | .pytest_cache/
59 | cover/
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 | local_settings.py
68 | db.sqlite3
69 | db.sqlite3-journal
70 |
71 | # Flask stuff:
72 | instance/
73 | .webassets-cache
74 |
75 | # Scrapy stuff:
76 | .scrapy
77 |
78 | # Sphinx documentation
79 | docs/_build/
80 |
81 | # PyBuilder
82 | .pybuilder/
83 | target/
84 |
85 | # Jupyter Notebook
86 | .ipynb_checkpoints
87 |
88 | # IPython
89 | profile_default/
90 | ipython_config.py
91 |
92 | # pyenv
93 | # For a library or package, you might want to ignore these files since the code is
94 | # intended to run in multiple environments; otherwise, check them in:
95 | # .python-version
96 |
97 | # pipenv
98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | # install all needed dependencies.
102 | #Pipfile.lock
103 |
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 |
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 |
111 | # SageMath parsed files
112 | *.sage.py
113 |
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 |
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 |
127 | # Rope project settings
128 | .ropeproject
129 |
130 | # mkdocs documentation
131 | /site
132 |
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 |
138 | # Pyre type checker
139 | .pyre/
140 |
141 | # pytype static type analyzer
142 | .pytype/
143 |
144 | # Cython debug symbols
145 | cython_debug/
146 |
--------------------------------------------------------------------------------
/Poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/ICD/acf27269648e4538a9d6d22171d1abbcd4eceed1/Poster.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Instance-Conditional Knowledge Distillation for Object Detection
2 | This is the official implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection", based on [MegEngine](./megengine_release/README.md) and [Pytorch](./pytorch_release/README.md). Go to the desired subfolders for more information and guidance!
3 |
4 |
5 |
6 |

7 |
8 |
9 | > [**Instance-Conditional Knowledge Distillation for Object Detection**](https://arxiv.org/abs/2110.12724),
10 | > Zijian Kang, Peizhen Zhang, Xiangyu Zhang, Jian Sun, Nanning Zheng
11 | > In Proc. of Advances in Neural Information Processing Systems (NeurIPS), 2021
12 | > [[arXiv](https://arxiv.org/abs/2110.12724)][[Citation](#citation)][[OpenReview](https://openreview.net/forum?id=k7aeAz4Vbb)]
13 |
14 | ## Usage
15 | You can find two implementations for [MegEngine](./megengine_release/README.md) and [Pytorch](./pytorch_release/README.md) under two sub-folders. We use the latter one to report the performance in the paper. Switch to the subfolder for more information.
16 |
17 | ### Try it in a few lines :
18 | Take the detectron2 implementation as an example, you can train your model in a few lines:
19 | ```
20 | cd pytorch_release
21 |
22 | # Install dependancies
23 | pip install pip --upgrade
24 | pip install -r requirements.txt
25 | pip install https://github.com/facebookresearch/detectron2/archive/refs/tags/v0.5.tar.gz
26 | pip install 'git+https://github.com/aim-uofa/AdelaiDet.git@7bf9d87'
27 |
28 | # Prepare dataset according to https://github.com/facebookresearch/detectron2/tree/main/datasets
29 |
30 | # Train and distill a retinanet detector with ICD
31 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_retinanet
32 | ```
33 |
34 | ## Performance
35 | For object detection in MS-COCO:
36 | | Model | Baseline (BoxAP) | + Ours (BoxAP) |
37 | | --- | :---: | :---: |
38 | | Faster R-CNN | 37.9 | 40.9 (+3.0) |
39 | | Retinanet | 37.4 | 40.7 (+3.3) |
40 | | FCOS | 39.4 | 42.9 (+3.5) |
41 |
42 | For instance-segmentation in MS-COCO:
43 | | Model | Baseline (BoxAP) | + Ours (BoxAP) | Baseline (MaskAP) | + Ours (MaskAP) |
44 | | --- | :---: | :---: | :---: | :---: |
45 | | Mask R-CNN | 38.6 | 41.2 (+2.6) | 35.2 | 37.4 (+2.2) |
46 | | SOLOv2 | - | - | 34.6 | 38.5 (+3.9) |
47 | | CondInst |39.7 | 43.7 (+4.0) | 35.7 | 39.1 (+3.4) |
48 |
49 | ## Acknowledgement
50 |
51 | Some files are modified from [MegEngine Models](https://github.com/MegEngine/Models) and [Detectron2](https://github.com/facebookresearch/detectron2). We also refer to [Pytorch](https://github.com/pytorch/pytorch), [DETR](https://github.com/facebookresearch/detr) and [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) for some implementations.
52 |
53 |
54 | ## License
55 |
56 | This repo is licensed under the Apache License, Version 2.0 (the "License").
57 |
58 | ## Citation
59 | You can use the following BibTeX entry for citation in your research.
60 | ```
61 | @inproceedings{icd_neurips2021,
62 | author = {Kang, Zijian and Zhang, Peizhen and Zhang, Xiangyu and Sun, Jian and Zheng, Nanning},
63 | booktitle = {Advances in Neural Information Processing Systems},
64 | editor = {M. Ranzato and A. Beygelzimer and Y. Dauphin and P.S. Liang and J. Wortman Vaughan},
65 | pages = {16468--16480},
66 | publisher = {Curran Associates, Inc.},
67 | title = {Instance-Conditional Knowledge Distillation for Object Detection},
68 | url = {https://proceedings.neurips.cc/paper/2021/file/892c91e0a653ba19df81a90f89d99bcd-Paper.pdf},
69 | volume = {34},
70 | year = {2021}
71 | }
72 | ```
73 |
--------------------------------------------------------------------------------
/megengine_release/README.md:
--------------------------------------------------------------------------------
1 | # Instance-Conditional Knowledge Distillation for Object Detection
2 | This is a [MegEngine](https://github.com/MegEngine/MegEngine) implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection", based on [MegEngine Models](https://github.com/MegEngine/Models).
3 |
4 | ## Requirements
5 |
6 | ### Installation
7 |
8 | In order to run the code, please prepare a CUDA environment with:
9 | - Python 3 (3.6 is recommended)
10 | - [MegEngine](https://github.com/MegEngine/MegEngine)
11 |
12 |
13 | 1. Install dependencies.
14 |
15 | ```
16 | pip3 install --upgrade pip
17 | pip3 install -r requirements.txt
18 | ```
19 |
20 | 2. Prepare [MS-COCO 2017 dataset](http://cocodataset.org/#download),put it to a proper directory with the following structures:
21 |
22 | ```
23 | /path/to/
24 | |->coco
25 | | |annotations
26 | | |train2017
27 | | |val2017
28 | ```
29 |
30 |
31 | [Microsoft COCO: Common Objects in Context](https://arxiv.org/abs/1405.0312) Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollár, and C Lawrence Zitnick. European Conference on Computer Vision (ECCV), 2014.
32 |
33 | ## Usage
34 |
35 | ### Train baseline models
36 |
37 | Following [MegEngine Models](https://github.com/MegEngine/Models):
38 | ```bash
39 | python3 train.py -f distill_configs/retinanet_res50_coco_1x_800size.py -n 8 \
40 | -d /data/Datasets
41 | ```
42 |
43 | `train.py` arguments:
44 |
45 | - `-f`, config file for the network.
46 | - `-n`, required devices(gpu).
47 | - `-w`, pretrained backbone weights.
48 | - `-b`, training `batch size`, default is 2.
49 | - `-d`, dataset root,default is `/data/datasets`.
50 |
51 |
52 | ### Train with distillation
53 |
54 | ```bash
55 | python3 train_distill_icd.py -f distill_configs/retinanet_res50_coco_1x_800size.py \
56 | -n 8 -l -d /data/Datasets -tf configs/retinanet_res101_coco_3x_800size.py \
57 | -df distill_configs/ICD.py \
58 | -tw _model_zoo/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl
59 | ```
60 |
61 | `train_distill_icd.py` arguments:
62 |
63 | - `-f`, config file for the student network.
64 | - `-w`, pretrained backbone weights.
65 | - `-tf`, config file for the teacher network.
66 | - `-tw`, pretrained weights for the teacher.
67 | - `-df`, config file for the distillation module, `distill_configs/ICD.py` by default.
68 | - `-l`, use the inheriting strategy, load pretrained parameters.
69 | - `-n`, required devices(gpu).
70 | - `-b`, training `batch size`, default is 2.
71 | - `-d`, dataset root,default is `/data/datasets`.
72 |
73 | Note that we set `backbone_pretrained` in distill configs, where backbone weights will be loaded automatically, that `-w` can be omitted. Checkpoints will be saved to a log-xxx directory.
74 |
75 | ### Evaluate
76 |
77 | ```
78 | python3 test.py -f distill_configs/retinanet_res50_coco_3x_800size.py -n 8 \
79 | -w log-of-xxx/epoch_17.pkl -d /data/Datasets/
80 | ```
81 |
82 | `test.py` arguments:
83 |
84 | - `-f`, config file for the network.
85 | - `-n`, required devices(gpu).
86 | - `-w`, pretrained weights.
87 | - `-d`, dataset root,default is `/data/datasets`.
88 |
89 | ## Examples and Results
90 | ### Steps
91 | 1. Download the pretrained teacher model to ```_model_zoo``` directory.
92 | 2. Train baseline or distill with ICD.
93 | 3. Evaluate checkpoints (use the last checkpoint by default).
94 |
95 | ### Example of Common Detectors
96 |
97 | #### RetinaNet
98 | - [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár. IEEE International Conference on Computer Vision (ICCV), 2017.
99 |
100 |
101 | - Teacher RetinaNet-R101-3x:
102 | https://data.megengine.org.cn/models/weights/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl
103 |
104 |
105 | - Config: distill_configs/retinanet_res50_coco_1x_800size.py
106 |
107 | Command:
108 | ```
109 | python3 train_distill_icd.py -f distill_configs/retinanet_res50_coco_1x_800size.py \
110 | -n 8 -l -d /data/Datasets -tf configs/retinanet_res101_coco_3x_800size.py \
111 | -df distill_configs/ICD.py \
112 | -tw _model_zoo/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl
113 | ```
114 |
115 | #### FCOS
116 |
117 | - [FCOS: Fully Convolutional One-Stage Object Detection](https://arxiv.org/abs/1904.01355) Zhi Tian, Chunhua Shen, Hao Chen, and Tong He. IEEE International Conference on Computer Vision (ICCV), 2019.
118 |
119 | - Teacher FCOS-R101-3x:
120 | https://data.megengine.org.cn/models/weights/fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl
121 |
122 |
123 | - Config: distill_configs/fcos_res50_coco_1x_800size.py
124 |
125 | Command:
126 | ```
127 | python3 train_distill_icd.py -f distill_configs/fcos_res50_coco_1x_800size.py \
128 | -n 8 -l -d /data/Datasets -tf configs/fcos_res101_coco_3x_800size.py \
129 | -df distill_configs/ICD.py \
130 | -tw _model_zoo/fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl
131 | ```
132 |
133 | #### ATSS
134 |
135 | - [Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection](https://arxiv.org/abs/1912.02424) Shifeng Zhang, Cheng Chi, Yongqiang Yao, Zhen Lei, and Stan Z. Li. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2020.
136 |
137 | - Teacher ATSS-R101-3x:
138 | https://data.megengine.org.cn/models/weights/atss_res101_coco_3x_800size_44dot7_9181687e.pkl
139 |
140 |
141 | - Config: distill_configs/atss_res50_coco_1x_800size.py
142 |
143 | Command:
144 | ```
145 | python3 train_distill_icd.py -f distill_configs/atss_res50_coco_1x_800size.py \
146 | -n 8 -l -d /data/Datasets -tf configs/atss_res101_coco_3x_800size.py \
147 | -df distill_configs/ICD.py \
148 | -tw _model_zoo/atss_res101_coco_3x_800size_44dot7_9181687e.pkl
149 | ```
150 |
151 | ### Results of AP in MS-COCO:
152 |
153 | | Model | Baseline | +ICD |
154 | | --- | :---: | :---: |
155 | | Retinanet | 36.8 | 40.3 |
156 | | FCOS | 40.0 | 43.3 |
157 | | ATSS | 39.6 | 43.0 |
158 |
159 |
160 | ### Notice
161 |
162 | - Results of this implementation are mainly for demonstration, please refer to the Detectron2 version for reproduction.
163 |
164 | - We simply adopt the hyperparameter from Detectron2 version, further tunning could be helpful.
165 |
166 | - There is a known CUDA memory issue related to MegEngine: the actual memory consumption will be much larger than the theoretical value, due to the memory fragmentation. This is expected to be fixed in a future version of MegEngine.
--------------------------------------------------------------------------------
/megengine_release/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .atss_res18_coco_3x_800size import atss_res18_coco_3x_800size
2 | from .atss_res34_coco_3x_800size import atss_res34_coco_3x_800size
3 | from .atss_res50_coco_3x_800size import atss_res50_coco_3x_800size
4 | from .atss_res101_coco_3x_800size import atss_res101_coco_3x_800size
5 | from .atss_resx101_coco_2x_800size import atss_resx101_coco_2x_800size
6 | from .faster_rcnn_res18_coco_3x_800size import faster_rcnn_res18_coco_3x_800size
7 | from .faster_rcnn_res34_coco_3x_800size import faster_rcnn_res34_coco_3x_800size
8 | from .faster_rcnn_res50_coco_3x_800size import faster_rcnn_res50_coco_3x_800size
9 | from .faster_rcnn_res101_coco_3x_800size import faster_rcnn_res101_coco_3x_800size
10 | from .faster_rcnn_resx101_coco_2x_800size import faster_rcnn_resx101_coco_2x_800size
11 | from .fcos_res18_coco_3x_800size import fcos_res18_coco_3x_800size
12 | from .fcos_res34_coco_3x_800size import fcos_res34_coco_3x_800size
13 | from .fcos_res50_coco_3x_800size import fcos_res50_coco_3x_800size
14 | from .fcos_res101_coco_3x_800size import fcos_res101_coco_3x_800size
15 | from .fcos_resx101_coco_2x_800size import fcos_resx101_coco_2x_800size
16 | from .freeanchor_res18_coco_3x_800size import freeanchor_res18_coco_3x_800size
17 | from .freeanchor_res34_coco_3x_800size import freeanchor_res34_coco_3x_800size
18 | from .freeanchor_res50_coco_3x_800size import freeanchor_res50_coco_3x_800size
19 | from .freeanchor_res101_coco_3x_800size import freeanchor_res101_coco_3x_800size
20 | from .freeanchor_resx101_coco_2x_800size import freeanchor_resx101_coco_2x_800size
21 | from .retinanet_res18_coco_3x_800size import retinanet_res18_coco_3x_800size
22 | from .retinanet_res34_coco_3x_800size import retinanet_res34_coco_3x_800size
23 | from .retinanet_res50_coco_3x_800size import retinanet_res50_coco_3x_800size
24 | from .retinanet_res101_coco_3x_800size import retinanet_res101_coco_3x_800size
25 | from .retinanet_resx101_coco_2x_800size import retinanet_resx101_coco_2x_800size
26 |
27 | _EXCLUDE = {}
28 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
29 |
--------------------------------------------------------------------------------
/megengine_release/configs/atss_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomATSSConfig(models.ATSSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet101"
19 |
20 |
21 | @hub.pretrained(
22 | "https://data.megengine.org.cn/models/weights/"
23 | "atss_res101_coco_3x_800size_44dot7_9181687e.pkl"
24 | )
25 | def atss_res101_coco_3x_800size(**kwargs):
26 | r"""
27 | ATSS trained from COCO dataset.
28 | `"ATSS" `_
29 | `"FPN" `_
30 | `"COCO" `_
31 | """
32 | cfg = CustomATSSConfig()
33 | cfg.backbone_pretrained = False
34 | return models.ATSS(cfg, **kwargs)
35 |
36 |
37 | Net = models.ATSS
38 | Cfg = CustomATSSConfig
39 |
--------------------------------------------------------------------------------
/megengine_release/configs/atss_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomATSSConfig(models.ATSSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet18"
19 | self.fpn_in_channels = [128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "atss_res18_coco_3x_800size_38dot3_58e249d5.pkl"
25 | )
26 | def atss_res18_coco_3x_800size(**kwargs):
27 | r"""
28 | ATSS trained from COCO dataset.
29 | `"ATSS" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = CustomATSSConfig()
34 | cfg.backbone_pretrained = False
35 | return models.ATSS(cfg, **kwargs)
36 |
37 |
38 | Net = models.ATSS
39 | Cfg = CustomATSSConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/atss_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomATSSConfig(models.ATSSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet34"
19 | self.fpn_in_channels = [128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "atss_res34_coco_3x_800size_41dot5_ec16a67b.pkl"
25 | )
26 | def atss_res34_coco_3x_800size(**kwargs):
27 | r"""
28 | ATSS trained from COCO dataset.
29 | `"ATSS" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = CustomATSSConfig()
34 | cfg.backbone_pretrained = False
35 | return models.ATSS(cfg, **kwargs)
36 |
37 |
38 | Net = models.ATSS
39 | Cfg = CustomATSSConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/atss_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl"
17 | )
18 | def atss_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | ATSS trained from COCO dataset.
21 | `"ATSS" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.ATSSConfig()
26 | cfg.backbone_pretrained = False
27 | return models.ATSS(cfg, **kwargs)
28 |
29 |
30 | Net = models.ATSS
31 | Cfg = models.ATSSConfig
32 |
--------------------------------------------------------------------------------
/megengine_release/configs/atss_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomATSSConfig(models.ATSSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnext101_32x8d"
19 | self.max_epoch = 36
20 | self.lr_decay_stages = [24, 32]
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "atss_resx101_coco_2x_800size_45dot6_b3a91b36.pkl"
26 | )
27 | def atss_resx101_coco_2x_800size(**kwargs):
28 | r"""
29 | ATSS trained from COCO dataset.
30 | `"ATSS" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = CustomATSSConfig()
35 | cfg.backbone_pretrained = False
36 | return models.ATSS(cfg, **kwargs)
37 |
38 |
39 | Net = models.ATSS
40 | Cfg = CustomATSSConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet101"
19 |
20 |
21 | @hub.pretrained(
22 | "https://data.megengine.org.cn/models/weights/"
23 | "faster_rcnn_res101_coco_3x_800size_42dot6_2538b0ff.pkl"
24 | )
25 | def faster_rcnn_res101_coco_3x_800size(**kwargs):
26 | r"""
27 | Faster-RCNN FPN trained from COCO dataset.
28 | `"Faster-RCNN" `_
29 | `"FPN" `_
30 | `"COCO" `_
31 | """
32 | cfg = CustomFasterRCNNConfig()
33 | cfg.backbone_pretrained = False
34 | return models.FasterRCNN(cfg, **kwargs)
35 |
36 |
37 | Net = models.FasterRCNN
38 | Cfg = CustomFasterRCNNConfig
39 |
--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet18"
19 | self.fpn_in_channels = [64, 128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "faster_rcnn_res18_coco_3x_800size_35dot7_a33835ca.pkl"
25 | )
26 | def faster_rcnn_res18_coco_3x_800size(**kwargs):
27 | r"""
28 | Faster-RCNN FPN trained from COCO dataset.
29 | `"Faster-RCNN" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = CustomFasterRCNNConfig()
34 | cfg.backbone_pretrained = False
35 | return models.FasterRCNN(cfg, **kwargs)
36 |
37 |
38 | Net = models.FasterRCNN
39 | Cfg = CustomFasterRCNNConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet34"
19 | self.fpn_in_channels = [64, 128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "faster_rcnn_res34_coco_3x_800size_39dot6_11fca4d4.pkl"
25 | )
26 | def faster_rcnn_res34_coco_3x_800size(**kwargs):
27 | r"""
28 | Faster-RCNN FPN trained from COCO dataset.
29 | `"Faster-RCNN" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = CustomFasterRCNNConfig()
34 | cfg.backbone_pretrained = False
35 | return models.FasterRCNN(cfg, **kwargs)
36 |
37 |
38 | Net = models.FasterRCNN
39 | Cfg = CustomFasterRCNNConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "faster_rcnn_res50_coco_3x_800size_40dot1_8682ff1a.pkl"
17 | )
18 | def faster_rcnn_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | Faster-RCNN FPN trained from COCO dataset.
21 | `"Faster-RCNN" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.FasterRCNNConfig()
26 | cfg.backbone_pretrained = False
27 | return models.FasterRCNN(cfg, **kwargs)
28 |
29 |
30 | Net = models.FasterRCNN
31 | Cfg = models.FasterRCNNConfig
32 |
--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnext101_32x8d"
19 | self.max_epoch = 36
20 | self.lr_decay_stages = [24, 32]
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "faster_rcnn_resx101_coco_2x_800size_44dot1_e5e0060b.pkl"
26 | )
27 | def faster_rcnn_resx101_coco_2x_800size(**kwargs):
28 | r"""
29 | Faster-RCNN FPN trained from COCO dataset.
30 | `"Faster-RCNN" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = CustomFasterRCNNConfig()
35 | cfg.backbone_pretrained = False
36 | return models.FasterRCNN(cfg, **kwargs)
37 |
38 |
39 | Net = models.FasterRCNN
40 | Cfg = CustomFasterRCNNConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFCOSConfig(models.FCOSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet101"
19 |
20 |
21 | @hub.pretrained(
22 | "https://data.megengine.org.cn/models/weights/"
23 | "fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl"
24 | )
25 | def fcos_res101_coco_3x_800size(**kwargs):
26 | r"""
27 | FCOS trained from COCO dataset.
28 | `"FCOS" `_
29 | `"FPN" `_
30 | `"COCO" `_
31 | """
32 | cfg = CustomFCOSConfig()
33 | cfg.backbone_pretrained = False
34 | return models.FCOS(cfg, **kwargs)
35 |
36 |
37 | Net = models.FCOS
38 | Cfg = CustomFCOSConfig
39 |
--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFCOSConfig(models.FCOSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet18"
19 | self.fpn_in_channels = [128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "fcos_res18_coco_3x_800size_37dot6_adab0136.pkl"
25 | )
26 | def fcos_res18_coco_3x_800size(**kwargs):
27 | r"""
28 | FCOS trained from COCO dataset.
29 | `"FCOS" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = CustomFCOSConfig()
34 | cfg.backbone_pretrained = False
35 | return models.FCOS(cfg, **kwargs)
36 |
37 |
38 | Net = models.FCOS
39 | Cfg = CustomFCOSConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFCOSConfig(models.FCOSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet34"
19 | self.fpn_in_channels = [128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "fcos_res34_coco_3x_800size_41dot0_8ba4633f.pkl"
25 | )
26 | def fcos_res34_coco_3x_800size(**kwargs):
27 | r"""
28 | FCOS trained from COCO dataset.
29 | `"FCOS" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = CustomFCOSConfig()
34 | cfg.backbone_pretrained = False
35 | return models.FCOS(cfg, **kwargs)
36 |
37 |
38 | Net = models.FCOS
39 | Cfg = CustomFCOSConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl"
17 | )
18 | def fcos_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | FCOS trained from COCO dataset.
21 | `"FCOS" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.FCOSConfig()
26 | cfg.backbone_pretrained = False
27 | return models.FCOS(cfg, **kwargs)
28 |
29 |
30 | Net = models.FCOS
31 | Cfg = models.FCOSConfig
32 |
--------------------------------------------------------------------------------
/megengine_release/configs/fcos_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFCOSConfig(models.FCOSConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnext101_32x8d"
19 | self.max_epoch = 36
20 | self.lr_decay_stages = [24, 32]
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "fcos_resx101_coco_2x_800size_44dot8_42ac8e82.pkl"
26 | )
27 | def fcos_resx101_coco_2x_800size(**kwargs):
28 | r"""
29 | FCOS trained from COCO dataset.
30 | `"FCOS" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = CustomFCOSConfig()
35 | cfg.backbone_pretrained = False
36 | return models.FCOS(cfg, **kwargs)
37 |
38 |
39 | Net = models.FCOS
40 | Cfg = CustomFCOSConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet101"
19 |
20 |
21 | @hub.pretrained(
22 | "https://data.megengine.org.cn/models/weights/"
23 | "freeanchor_res101_coco_3x_800size_43dot9_8c707d7d.pkl"
24 | )
25 | def freeanchor_res101_coco_3x_800size(**kwargs):
26 | r"""
27 | FreeAnchor trained from COCO dataset.
28 | `"FreeAnchor" `_
29 | `"FPN" `_
30 | `"COCO" `_
31 | """
32 | cfg = models.FreeAnchorConfig()
33 | cfg.backbone_pretrained = False
34 | return models.FreeAnchor(cfg, **kwargs)
35 |
36 |
37 | Net = models.FreeAnchor
38 | Cfg = CustomFreeAnchorConfig
39 |
--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet18"
19 | self.fpn_in_channels = [128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "freeanchor_res18_coco_3x_800size_38dot1_3d0559a8.pkl"
25 | )
26 | def freeanchor_res18_coco_3x_800size(**kwargs):
27 | r"""
28 | FreeAnchor trained from COCO dataset.
29 | `"FreeAnchor" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = models.FreeAnchorConfig()
34 | cfg.backbone_pretrained = False
35 | return models.FreeAnchor(cfg, **kwargs)
36 |
37 |
38 | Net = models.FreeAnchor
39 | Cfg = CustomFreeAnchorConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet34"
19 | self.fpn_in_channels = [128, 256, 512]
20 |
21 |
22 | @hub.pretrained(
23 | "https://data.megengine.org.cn/models/weights/"
24 | "freeanchor_res34_coco_3x_800size_41dot1_3b03734e.pkl"
25 | )
26 | def freeanchor_res34_coco_3x_800size(**kwargs):
27 | r"""
28 | FreeAnchor trained from COCO dataset.
29 | `"FreeAnchor" `_
30 | `"FPN" `_
31 | `"COCO" `_
32 | """
33 | cfg = models.FreeAnchorConfig()
34 | cfg.backbone_pretrained = False
35 | return models.FreeAnchor(cfg, **kwargs)
36 |
37 |
38 | Net = models.FreeAnchor
39 | Cfg = CustomFreeAnchorConfig
40 |
--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "freeanchor_res50_coco_3x_800size_42dot1_5c567f14.pkl"
17 | )
18 | def freeanchor_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | FreeAnchor trained from COCO dataset.
21 | `"FreeAnchor" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.FreeAnchorConfig()
26 | cfg.backbone_pretrained = False
27 | return models.FreeAnchor(cfg, **kwargs)
28 |
29 |
30 | Net = models.FreeAnchor
31 | Cfg = models.FreeAnchorConfig
32 |
--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnext101_32x8d"
19 | self.max_epoch = 36
20 | self.lr_decay_stages = [24, 32]
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "freeanchor_resx101_coco_2x_800size_44dot9_5a23fca7.pkl"
26 | )
27 | def freeanchor_resx101_coco_2x_800size(**kwargs):
28 | r"""
29 | FreeAnchor trained from COCO dataset.
30 | `"FreeAnchor" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = models.FreeAnchorConfig()
35 | cfg.backbone_pretrained = False
36 | return models.FreeAnchor(cfg, **kwargs)
37 |
38 |
39 | Net = models.FreeAnchor
40 | Cfg = CustomFreeAnchorConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet101"
19 |
20 |
21 | @hub.pretrained(
22 | "https://data.megengine.org.cn/models/weights/"
23 | "retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl"
24 | )
25 | def retinanet_res101_coco_3x_800size(**kwargs):
26 | r"""
27 | RetinaNet trained from COCO dataset.
28 | `"RetinaNet" `_
29 | `"FPN" `_
30 | `"COCO" `_
31 | """
32 | cfg = CustomRetinaNetConfig()
33 | cfg.backbone_pretrained = False
34 | return models.RetinaNet(cfg, **kwargs)
35 |
36 |
37 | Net = models.RetinaNet
38 | Cfg = CustomRetinaNetConfig
39 |
--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet18"
19 | self.fpn_in_channels = [128, 256, 512]
20 | self.fpn_top_in_channel = 512
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "retinanet_res18_coco_3x_800size_35dot3_0c4956c8.pkl"
26 | )
27 | def retinanet_res18_coco_3x_800size(**kwargs):
28 | r"""
29 | RetinaNet trained from COCO dataset.
30 | `"RetinaNet" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = CustomRetinaNetConfig()
35 | cfg.backbone_pretrained = False
36 | return models.RetinaNet(cfg, **kwargs)
37 |
38 |
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnet34"
19 | self.fpn_in_channels = [128, 256, 512]
20 | self.fpn_top_in_channel = 512
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "retinanet_res34_coco_3x_800size_38dot4_3485f9ec.pkl"
26 | )
27 | def retinanet_res34_coco_3x_800size(**kwargs):
28 | r"""
29 | RetinaNet trained from COCO dataset.
30 | `"RetinaNet" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = CustomRetinaNetConfig()
35 | cfg.backbone_pretrained = False
36 | return models.RetinaNet(cfg, **kwargs)
37 |
38 |
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl"
17 | )
18 | def retinanet_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | RetinaNet trained from COCO dataset.
21 | `"RetinaNet" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.RetinaNetConfig()
26 | cfg.backbone_pretrained = False
27 | return models.RetinaNet(cfg, **kwargs)
28 |
29 |
30 | Net = models.RetinaNet
31 | Cfg = models.RetinaNetConfig
32 |
--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 | def __init__(self):
16 | super().__init__()
17 |
18 | self.backbone = "resnext101_32x8d"
19 | self.max_epoch = 36
20 | self.lr_decay_stages = [24, 32]
21 |
22 |
23 | @hub.pretrained(
24 | "https://data.megengine.org.cn/models/weights/"
25 | "retinanet_resx101_coco_2x_800size_42dot3_1502eace.pkl"
26 | )
27 | def retinanet_resx101_coco_2x_800size(**kwargs):
28 | r"""
29 | RetinaNet trained from COCO dataset.
30 | `"RetinaNet" `_
31 | `"FPN" `_
32 | `"COCO" `_
33 | """
34 | cfg = CustomRetinaNetConfig()
35 | cfg.backbone_pretrained = False
36 | return models.RetinaNet(cfg, **kwargs)
37 |
38 |
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 |
--------------------------------------------------------------------------------
/megengine_release/distill_configs/ICD.py:
--------------------------------------------------------------------------------
1 | import megengine.module as M
2 | import megengine.functional as F
3 | from models.ICD.ICD import ICD
4 | from easydict import EasyDict as edict
5 |
6 | def get_distillator():
7 | cfg = edict({
8 | 'distiller': {
9 | 'FEAT_KEYS': ['p3', 'p4', 'p5', 'p6', 'p7'],
10 | 'WEIGHT_VALUE': 8.0,
11 | 'TEMP_VALUE': 1.0,
12 | 'NUM_SCALE_SPLITS': 5,
13 | 'HIDDEN_DIM': 256,
14 | 'NUM_CLASSES': 80,
15 | 'MAX_LABELS': 300,
16 | 'ATT_HEADS': 8,
17 | 'USE_POS_EMBEDDING': True,
18 | 'DECODER_POSEMB_ON_V': False,
19 |
20 | },
21 | })
22 | return ICD(256, cfg)
23 |
24 | Net = get_distillator
--------------------------------------------------------------------------------
/megengine_release/distill_configs/ICD_rcnn.py:
--------------------------------------------------------------------------------
1 | import megengine.module as M
2 | import megengine.functional as F
3 | from models.ICD.ICD import ICD
4 | from easydict import EasyDict as edict
5 |
6 | def get_distillator():
7 | cfg = edict({
8 | 'distiller': {
9 | 'FEAT_KEYS': ['p2', 'p3', 'p4', 'p5', 'p6'],
10 | 'WEIGHT_VALUE': 3.0,
11 | 'TEMP_VALUE': 1.0,
12 | 'HIDDEN_DIM': 256,
13 | 'NUM_CLASSES': 80,
14 | 'MAX_LABELS': 300,
15 | 'ATT_HEADS': 8,
16 | 'USE_POS_EMBEDDING': True,
17 | 'DECODER_POSEMB_ON_V': False,
18 |
19 | },
20 | })
21 | return ICD(256, cfg)
22 |
23 | Net = get_distillator
--------------------------------------------------------------------------------
/megengine_release/distill_configs/atss_res50_coco_1x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl"
17 | )
18 | def atss_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | ATSS trained from COCO dataset.
21 | `"ATSS" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.ATSSConfig()
26 | cfg.backbone_pretrained = True
27 | cfg.max_epoch = 18
28 | cfg.lr_decay_stages = [12, 16]
29 | return models.ATSS(cfg, **kwargs)
30 |
31 |
32 | def get_cfg():
33 | cfg = models.ATSSConfig()
34 | cfg.backbone_pretrained = True
35 | cfg.max_epoch = 18
36 | cfg.lr_decay_stages = [12, 16]
37 |
38 | return cfg
39 |
40 |
41 | Net = models.ATSS
42 | Cfg = get_cfg
43 |
--------------------------------------------------------------------------------
/megengine_release/distill_configs/fcos_res50_coco_1x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl"
17 | )
18 | def fcos_res50_coco_3x_800size(**kwargs):
19 | r"""
20 | FCOS trained from COCO dataset.
21 | `"FCOS" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.FCOSConfig()
26 | cfg.backbone_pretrained = True
27 | cfg.max_epoch = 18
28 | cfg.lr_decay_stages = [12, 16]
29 | return models.FCOS(cfg, **kwargs)
30 |
31 |
32 | def get_cfg():
33 | cfg = models.FCOSConfig()
34 | cfg.backbone_pretrained = True
35 | cfg.max_epoch = 18
36 | cfg.lr_decay_stages = [12, 16]
37 |
38 | return cfg
39 |
40 |
41 | Net = models.FCOS
42 | Cfg = get_cfg
43 |
--------------------------------------------------------------------------------
/megengine_release/distill_configs/retinanet_res50_coco_1x_800size.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine import hub
10 |
11 | import models
12 |
13 |
14 | @hub.pretrained(
15 | "https://data.megengine.org.cn/models/weights/"
16 | "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl"
17 | )
18 | def retinanet_res50_coco_1x_800size(**kwargs):
19 | r"""
20 | RetinaNet trained from COCO dataset.
21 | `"RetinaNet" `_
22 | `"FPN" `_
23 | `"COCO" `_
24 | """
25 | cfg = models.RetinaNetConfig()
26 | cfg.backbone_pretrained = True
27 | cfg.max_epoch = 18
28 | cfg.lr_decay_stages = [12, 16]
29 |
30 | return models.RetinaNet(cfg, **kwargs)
31 |
32 |
33 | def get_cfg():
34 | cfg = models.RetinaNetConfig()
35 | cfg.backbone_pretrained = True
36 | cfg.max_epoch = 18
37 | cfg.lr_decay_stages = [12, 16]
38 |
39 | return cfg
40 |
41 | Net = models.RetinaNet
42 | Cfg = get_cfg
43 |
--------------------------------------------------------------------------------
/megengine_release/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from .basic import *
10 | from .det import *
11 |
12 | _EXCLUDE = {}
13 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
14 |
--------------------------------------------------------------------------------
/megengine_release/layers/basic/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from .functional import *
10 | from .nn import *
11 | from .norm import *
12 |
13 | _EXCLUDE = {}
14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
15 |
--------------------------------------------------------------------------------
/megengine_release/layers/basic/functional.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from typing import Optional
10 |
11 | import numpy as np
12 |
13 | import megengine.distributed as dist
14 | import megengine.functional as F
15 | from megengine import Tensor
16 |
17 |
18 | def get_padded_tensor(
19 | array: Tensor, multiple_number: int = 32, pad_value: float = 0
20 | ) -> Tensor:
21 | """ pad the nd-array to multiple stride of th e
22 |
23 | Args:
24 | array (Tensor):
25 | the tensor with the shape of [batch, channel, height, width]
26 | multiple_number (int):
27 | make the height and width can be divided by multiple_number
28 | pad_value (int): the value to be padded
29 |
30 | Returns:
31 | padded_array (Tensor)
32 | """
33 | batch, chl, t_height, t_width = array.shape
34 | padded_height = (
35 | (t_height + multiple_number - 1) // multiple_number * multiple_number
36 | )
37 | padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number
38 |
39 | padded_array = F.full(
40 | (batch, chl, padded_height, padded_width), pad_value, dtype=array.dtype
41 | )
42 |
43 | ndim = array.ndim
44 | if ndim == 4:
45 | padded_array[:, :, :t_height, :t_width] = array
46 | elif ndim == 3:
47 | padded_array[:, :t_height, :t_width] = array
48 | else:
49 | raise Exception("Not supported tensor dim: %d" % ndim)
50 | return padded_array
51 |
52 |
53 | def safelog(x, eps=None):
54 | if eps is None:
55 | eps = np.finfo(x.dtype).eps
56 | return F.log(F.maximum(x, eps))
57 |
58 |
59 | def batched_nms(
60 | boxes: Tensor, scores: Tensor, idxs: Tensor, iou_thresh: float, max_output: Optional[int] = None
61 | ) -> Tensor:
62 | r"""
63 | Performs non-maximum suppression (NMS) on the boxes according to
64 | their intersection-over-union (IoU).
65 |
66 | :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on;
67 | each box is expected to be in `(x1, y1, x2, y2)` format.
68 | :param iou_thresh: ``IoU`` threshold for overlapping.
69 | :param idxs: tensor of shape `(N,)`, the class indexs of boxes in the batch.
70 | :param scores: tensor of shape `(N,)`, the score of boxes.
71 | :return: indices of the elements that have been kept by NMS.
72 |
73 | Examples:
74 |
75 | .. testcode::
76 |
77 | import numpy as np
78 | from megengine import tensor
79 |
80 | x = np.zeros((100,4))
81 | np.random.seed(42)
82 | x[:,:2] = np.random.rand(100,2) * 20
83 | x[:,2:] = np.random.rand(100,2) * 20 + 100
84 | scores = tensor(np.random.rand(100))
85 | idxs = tensor(np.random.randint(0, 10, 100))
86 | inp = tensor(x)
87 | result = batched_nms(inp, scores, idxs, iou_thresh=0.6)
88 | print(result.numpy())
89 |
90 | Outputs:
91 |
92 | .. testoutput::
93 |
94 | [75 41 99 98 69 64 11 27 35 18]
95 |
96 | """
97 | assert (
98 | boxes.ndim == 2 and boxes.shape[1] == 4
99 | ), "the expected shape of boxes is (N, 4)"
100 | assert scores.ndim == 1, "the expected shape of scores is (N,)"
101 | assert idxs.ndim == 1, "the expected shape of idxs is (N,)"
102 | assert (
103 | boxes.shape[0] == scores.shape[0] == idxs.shape[0]
104 | ), "number of boxes, scores and idxs are not matched"
105 |
106 | idxs = idxs.detach()
107 | max_coordinate = boxes.max()
108 | offsets = idxs.astype("float32") * (max_coordinate + 1)
109 | boxes = boxes + offsets.reshape(-1, 1)
110 | return F.nn.nms(boxes, scores, iou_thresh, max_output)
111 |
112 |
113 | def all_reduce_mean(array: Tensor) -> Tensor:
114 | if dist.get_world_size() > 1:
115 | array = dist.functional.all_reduce_sum(array) / dist.get_world_size()
116 | return array
117 |
--------------------------------------------------------------------------------
/megengine_release/layers/basic/nn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | from collections import namedtuple
28 |
29 | import megengine.module as M
30 |
31 |
32 | class Conv2d(M.Conv2d):
33 | """
34 | A wrapper around :class:`megengine.module.Conv2d`.
35 | """
36 |
37 | def __init__(self, *args, **kwargs):
38 | """
39 | Extra keyword arguments supported in addition to
40 | `megengine.module.Conv2d`.
41 |
42 | Args:
43 | norm (M.Module, optional): a normalization layer
44 | activation (callable(Tensor) -> Tensor): a callable activation
45 | function
46 | """
47 | norm = kwargs.pop("norm", None)
48 | activation = kwargs.pop("activation", None)
49 | super().__init__(*args, **kwargs)
50 |
51 | self.norm = norm
52 | self.activation = activation
53 |
54 | def forward(self, x):
55 | x = super().forward(x)
56 | if self.norm is not None:
57 | x = self.norm(x)
58 | if self.activation is not None:
59 | x = self.activation(x)
60 | return x
61 |
62 |
63 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
64 | """
65 | A simple structure that contains basic shape specification about a tensor.
66 | Useful for getting the modules output channels when building the graph.
67 | """
68 |
69 | def __new__(cls, channels=None, height=None, width=None, stride=None):
70 | return super().__new__(cls, channels, height, width, stride)
71 |
--------------------------------------------------------------------------------
/megengine_release/layers/basic/norm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | from functools import partial
28 |
29 | import megengine.module as M
30 | from megengine.module.normalization import GroupNorm, InstanceNorm, LayerNorm
31 |
32 |
33 | def get_norm(norm):
34 | """
35 | Args:
36 | norm (str): currently support "BN", "SyncBN", "FrozenBN", "GN", "LN" and "IN"
37 |
38 | Returns:
39 | M.Module or None: the normalization layer
40 | """
41 | if norm is None:
42 | return None
43 | norm = {
44 | "BN": M.BatchNorm2d,
45 | "SyncBN": M.SyncBatchNorm,
46 | "FrozenBN": partial(M.BatchNorm2d, freeze=True),
47 | "GN": GroupNorm,
48 | "LN": LayerNorm,
49 | "IN": InstanceNorm,
50 | }[norm]
51 | return norm
52 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from .anchor import *
10 | from .box_head import *
11 | from .box_utils import *
12 | from .fpn import *
13 | from .loss import *
14 | from .matcher import *
15 | from .point_head import *
16 | from .pooler import *
17 | from .rcnn import *
18 | from .rpn import *
19 | from .sampling import *
20 |
21 | _EXCLUDE = {}
22 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
23 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/anchor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import math
10 | from abc import ABCMeta, abstractmethod
11 | from typing import List
12 |
13 | import numpy as np
14 |
15 | import megengine.functional as F
16 | from megengine import Tensor, tensor
17 |
18 |
19 | def meshgrid(x, y):
20 | assert len(x.shape) == 1
21 | assert len(y.shape) == 1
22 | mesh_shape = (y.shape[0], x.shape[0])
23 | mesh_x = F.broadcast_to(x, mesh_shape)
24 | mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape)
25 | return mesh_x, mesh_y
26 |
27 |
28 | def create_anchor_grid(featmap_size, offsets, stride, device):
29 | step_x, step_y = featmap_size
30 | shift = offsets * stride
31 |
32 | grid_x = F.arange(shift, step_x * stride + shift, step=stride, device=device)
33 | grid_y = F.arange(shift, step_y * stride + shift, step=stride, device=device)
34 | grids_x, grids_y = meshgrid(grid_y, grid_x)
35 | return grids_x.reshape(-1), grids_y.reshape(-1)
36 |
37 |
38 | class BaseAnchorGenerator(metaclass=ABCMeta):
39 | """base class for anchor generator.
40 | """
41 |
42 | def __init__(self):
43 | pass
44 |
45 | @property
46 | @abstractmethod
47 | def anchor_dim(self):
48 | pass
49 |
50 | @abstractmethod
51 | def generate_anchors_by_features(self, sizes, device) -> List[Tensor]:
52 | pass
53 |
54 | def __call__(self, featmaps):
55 | feat_sizes = [fmap.shape[-2:] for fmap in featmaps]
56 | return self.generate_anchors_by_features(feat_sizes, featmaps[0].device)
57 |
58 |
59 | class AnchorBoxGenerator(BaseAnchorGenerator):
60 | """default anchor box generator, usually used in anchor-based methods.
61 | This class generate anchors by feature map in level.
62 | Args:
63 | anchor_scales (list): anchor scales based on stride.
64 | The practical anchor scale is anchor_scale * stride
65 | anchor_ratios (list): anchor aspect ratios.
66 | strides (list): strides of inputs.
67 | offset (float): center point offset. default is 0.5.
68 | """
69 |
70 | # pylint: disable=dangerous-default-value
71 | def __init__(
72 | self,
73 | anchor_scales: list = [[32], [64], [128], [256], [512]],
74 | anchor_ratios: list = [[0.5, 1, 2]],
75 | strides: list = [4, 8, 16, 32, 64],
76 | offset: float = 0.5,
77 | ):
78 | super().__init__()
79 | self.anchor_scales = np.array(anchor_scales, dtype="float32")
80 | self.anchor_ratios = np.array(anchor_ratios, dtype="float32")
81 | self.strides = strides
82 | self.offset = offset
83 | self.num_features = len(strides)
84 |
85 | self.base_anchors = self._different_level_anchors(anchor_scales, anchor_ratios)
86 |
87 | @property
88 | def anchor_dim(self):
89 | return 4
90 |
91 | def _different_level_anchors(self, scales, ratios):
92 | if len(scales) == 1:
93 | scales *= self.num_features
94 | assert len(scales) == self.num_features
95 |
96 | if len(ratios) == 1:
97 | ratios *= self.num_features
98 | assert len(ratios) == self.num_features
99 | return [
100 | tensor(self.generate_base_anchors(scale, ratio))
101 | for scale, ratio in zip(scales, ratios)
102 | ]
103 |
104 | def generate_base_anchors(self, scales, ratios):
105 | base_anchors = []
106 | areas = [s ** 2.0 for s in scales]
107 | for area in areas:
108 | for ratio in ratios:
109 | w = math.sqrt(area / ratio)
110 | h = ratio * w
111 | # center-based anchor
112 | x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
113 | base_anchors.append([x0, y0, x1, y1])
114 | return base_anchors
115 |
116 | def generate_anchors_by_features(self, sizes, device):
117 | all_anchors = []
118 | assert len(sizes) == self.num_features, (
119 | "input features expected {}, got {}".format(self.num_features, len(sizes))
120 | )
121 | for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors):
122 | grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device)
123 | grids = F.stack([grid_x, grid_y, grid_x, grid_y], axis=1)
124 | all_anchors.append(
125 | (F.expand_dims(grids, axis=1) + F.expand_dims(base_anchor, axis=0)).reshape(-1, 4)
126 | )
127 | return all_anchors
128 |
129 |
130 | class AnchorPointGenerator(BaseAnchorGenerator):
131 | """default anchor point generator, usually used in anchor-free methods.
132 | This class generate anchors by feature map in level.
133 | Args:
134 | num_anchors (int): number of anchors per location
135 | strides (list): strides of inputs.
136 | offset (float): center point offset. default is 0.5.
137 | """
138 |
139 | # pylint: disable=dangerous-default-value
140 | def __init__(
141 | self,
142 | num_anchors: int = 1,
143 | strides: list = [4, 8, 16, 32, 64],
144 | offset: float = 0.5,
145 | ):
146 | super().__init__()
147 | self.num_anchors = num_anchors
148 | self.strides = strides
149 | self.offset = offset
150 | self.num_features = len(strides)
151 |
152 | @property
153 | def anchor_dim(self):
154 | return 2
155 |
156 | def generate_anchors_by_features(self, sizes, device):
157 | all_anchors = []
158 | assert len(sizes) == self.num_features, (
159 | "input features expected {}, got {}".format(self.num_features, len(sizes))
160 | )
161 | for size, stride in zip(sizes, self.strides):
162 | grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device)
163 | grids = F.stack([grid_x, grid_y], axis=1)
164 | all_anchors.append(
165 | F.broadcast_to(
166 | F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2)
167 | ).reshape(-1, 2)
168 | ) # FIXME: need F.repeat
169 | return all_anchors
170 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/box_head.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import math
10 | from typing import List
11 |
12 | import megengine.module as M
13 | from megengine import Tensor
14 |
15 | import layers
16 |
17 |
18 | class BoxHead(M.Module):
19 | """
20 | The head used when anchor boxes are adopted for object classification and box regression.
21 | """
22 |
23 | def __init__(self, cfg, input_shape: List[layers.ShapeSpec]):
24 | super().__init__()
25 |
26 | in_channels = input_shape[0].channels
27 | num_classes = cfg.num_classes
28 | num_convs = 4
29 | prior_prob = cfg.cls_prior_prob
30 | num_anchors = [
31 | len(cfg.anchor_scales[i]) * len(cfg.anchor_ratios[i])
32 | for i in range(len(input_shape))
33 | ]
34 |
35 | assert (
36 | len(set(num_anchors)) == 1
37 | ), "not support different number of anchors between levels"
38 | num_anchors = num_anchors[0]
39 |
40 | cls_subnet = []
41 | bbox_subnet = []
42 | for _ in range(num_convs):
43 | cls_subnet.append(
44 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
45 | )
46 | cls_subnet.append(M.ReLU())
47 | bbox_subnet.append(
48 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
49 | )
50 | bbox_subnet.append(M.ReLU())
51 |
52 | self.cls_subnet = M.Sequential(*cls_subnet)
53 | self.bbox_subnet = M.Sequential(*bbox_subnet)
54 | self.cls_score = M.Conv2d(
55 | in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
56 | )
57 | self.bbox_pred = M.Conv2d(
58 | in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
59 | )
60 |
61 | # Initialization
62 | for modules in [
63 | self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred
64 | ]:
65 | for layer in modules.modules():
66 | if isinstance(layer, M.Conv2d):
67 | M.init.normal_(layer.weight, mean=0, std=0.01)
68 | M.init.fill_(layer.bias, 0)
69 |
70 | # Use prior in model initialization to improve stability
71 | bias_value = -math.log((1 - prior_prob) / prior_prob)
72 | M.init.fill_(self.cls_score.bias, bias_value)
73 |
74 | def forward(self, features: List[Tensor]):
75 | logits, offsets = [], []
76 | for feature in features:
77 | logits.append(self.cls_score(self.cls_subnet(feature)))
78 | offsets.append(self.bbox_pred(self.bbox_subnet(feature)))
79 | return logits, offsets
80 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/box_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from abc import ABCMeta, abstractmethod
10 |
11 | import numpy as np
12 |
13 | import megengine.functional as F
14 | from megengine import Tensor
15 |
16 |
17 | class BoxCoderBase(metaclass=ABCMeta):
18 | """Boxcoder class.
19 | """
20 |
21 | def __init__(self):
22 | pass
23 |
24 | @abstractmethod
25 | def encode(self) -> Tensor:
26 | pass
27 |
28 | @abstractmethod
29 | def decode(self) -> Tensor:
30 | pass
31 |
32 |
33 | class BoxCoder(BoxCoderBase, metaclass=ABCMeta):
34 | # pylint: disable=dangerous-default-value
35 | def __init__(
36 | self,
37 | reg_mean=[0.0, 0.0, 0.0, 0.0],
38 | reg_std=[1.0, 1.0, 1.0, 1.0],
39 | ):
40 | """
41 | Args:
42 | reg_mean(np.ndarray): [x0_mean, x1_mean, y0_mean, y1_mean] or None
43 | reg_std(np.ndarray): [x0_std, x1_std, y0_std, y1_std] or None
44 |
45 | """
46 | self.reg_mean = np.array(reg_mean, dtype="float32")[None, :]
47 | self.reg_std = np.array(reg_std, dtype="float32")[None, :]
48 | super().__init__()
49 |
50 | @staticmethod
51 | def _box_ltrb_to_cs_opr(bbox, addaxis=None):
52 | """ transform the left-top right-bottom encoding bounding boxes
53 | to center and size encodings"""
54 | bbox_width = bbox[:, 2] - bbox[:, 0]
55 | bbox_height = bbox[:, 3] - bbox[:, 1]
56 | bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width
57 | bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height
58 | if addaxis is None:
59 | return bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y
60 | else:
61 | return (
62 | F.expand_dims(bbox_width, addaxis),
63 | F.expand_dims(bbox_height, addaxis),
64 | F.expand_dims(bbox_ctr_x, addaxis),
65 | F.expand_dims(bbox_ctr_y, addaxis),
66 | )
67 |
68 | def encode(self, bbox: Tensor, gt: Tensor) -> Tensor:
69 | bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y = self._box_ltrb_to_cs_opr(bbox)
70 | gt_width, gt_height, gt_ctr_x, gt_ctr_y = self._box_ltrb_to_cs_opr(gt)
71 |
72 | target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width
73 | target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height
74 | target_dw = F.log(gt_width / bbox_width)
75 | target_dh = F.log(gt_height / bbox_height)
76 | target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1)
77 |
78 | target -= self.reg_mean
79 | target /= self.reg_std
80 | return target
81 |
82 | def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
83 | deltas *= self.reg_std
84 | deltas += self.reg_mean
85 |
86 | (
87 | anchor_width,
88 | anchor_height,
89 | anchor_ctr_x,
90 | anchor_ctr_y,
91 | ) = self._box_ltrb_to_cs_opr(anchors, 1)
92 | pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width
93 | pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height
94 | pred_width = anchor_width * F.exp(deltas[:, 2::4])
95 | pred_height = anchor_height * F.exp(deltas[:, 3::4])
96 |
97 | pred_x1 = pred_ctr_x - 0.5 * pred_width
98 | pred_y1 = pred_ctr_y - 0.5 * pred_height
99 | pred_x2 = pred_ctr_x + 0.5 * pred_width
100 | pred_y2 = pred_ctr_y + 0.5 * pred_height
101 |
102 | pred_box = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=2)
103 | pred_box = pred_box.reshape(pred_box.shape[0], -1)
104 |
105 | return pred_box
106 |
107 |
108 | class PointCoder(BoxCoderBase, metaclass=ABCMeta):
109 | def encode(self, point: Tensor, gt: Tensor) -> Tensor:
110 | return F.concat([point - gt[..., :2], gt[..., 2:] - point], axis=-1)
111 |
112 | def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
113 | return F.stack([
114 | F.expand_dims(anchors[:, 0], axis=1) - deltas[:, 0::4],
115 | F.expand_dims(anchors[:, 1], axis=1) - deltas[:, 1::4],
116 | F.expand_dims(anchors[:, 0], axis=1) + deltas[:, 2::4],
117 | F.expand_dims(anchors[:, 1], axis=1) + deltas[:, 3::4],
118 | ], axis=2).reshape(deltas.shape)
119 |
120 |
121 | def get_iou(boxes1: Tensor, boxes2: Tensor, return_ioa=False) -> Tensor:
122 | """
123 | Given two lists of boxes of size N and M,
124 | compute the IoU (intersection over union)
125 | between __all__ N x M pairs of boxes.
126 | The box order must be (xmin, ymin, xmax, ymax).
127 |
128 | Args:
129 | boxes1 (Tensor): boxes tensor with shape (N, 4)
130 | boxes2 (Tensor): boxes tensor with shape (M, 4)
131 | return_ioa (Bool): wheather return Intersection over Boxes1 or not, default: False
132 |
133 | Returns:
134 | iou (Tensor): IoU matrix, shape (N,M).
135 | """
136 | b_box1 = F.expand_dims(boxes1, axis=1)
137 | b_box2 = F.expand_dims(boxes2, axis=0)
138 |
139 | iw = F.minimum(b_box1[:, :, 2], b_box2[:, :, 2]) - F.maximum(
140 | b_box1[:, :, 0], b_box2[:, :, 0]
141 | )
142 | ih = F.minimum(b_box1[:, :, 3], b_box2[:, :, 3]) - F.maximum(
143 | b_box1[:, :, 1], b_box2[:, :, 1]
144 | )
145 | inter = F.maximum(iw, 0) * F.maximum(ih, 0)
146 |
147 | area_box1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
148 | area_box2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
149 |
150 | union = F.expand_dims(area_box1, axis=1) + F.expand_dims(area_box2, axis=0) - inter
151 | overlaps = F.maximum(inter / union, 0)
152 |
153 | if return_ioa:
154 | ioa = F.maximum(inter / area_box1, 0)
155 | return overlaps, ioa
156 |
157 | return overlaps
158 |
159 |
160 | def get_clipped_boxes(boxes, hw):
161 | """ Clip the boxes into the image region."""
162 | # x1 >=0
163 | box_x1 = F.clip(boxes[:, 0::4], lower=0, upper=hw[1])
164 | # y1 >=0
165 | box_y1 = F.clip(boxes[:, 1::4], lower=0, upper=hw[0])
166 | # x2 < im_info[1]
167 | box_x2 = F.clip(boxes[:, 2::4], lower=0, upper=hw[1])
168 | # y2 < im_info[0]
169 | box_y2 = F.clip(boxes[:, 3::4], lower=0, upper=hw[0])
170 |
171 | clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1)
172 |
173 | return clip_box
174 |
175 |
176 | def filter_boxes(boxes, size=0):
177 | width = boxes[:, 2] - boxes[:, 0]
178 | height = boxes[:, 3] - boxes[:, 1]
179 | keep = (width > size) & (height > size)
180 | return keep
181 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/fpn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | import math
28 | from typing import List
29 |
30 | import megengine.functional as F
31 | import megengine.module as M
32 |
33 | import layers
34 |
35 |
36 | class FPN(M.Module):
37 | """
38 | This module implements Feature Pyramid Network.
39 | It creates pyramid features built on top of some input feature maps which
40 | are produced by the backbone networks like ResNet.
41 | """
42 |
43 | # pylint: disable=dangerous-default-value
44 | def __init__(
45 | self,
46 | bottom_up: M.Module,
47 | in_features: List[str],
48 | out_channels: int = 256,
49 | norm: str = None,
50 | top_block: M.Module = None,
51 | strides: List[int] = [8, 16, 32],
52 | channels: List[int] = [512, 1024, 2048],
53 | ):
54 | """
55 | Args:
56 | bottom_up (M.Module): module representing the bottom up sub-network.
57 | it generates multi-scale feature maps which formatted as a
58 | dict like {'res3': res3_feature, 'res4': res4_feature}
59 | in_features (list[str]): list of input feature maps keys coming
60 | from the `bottom_up` which will be used in FPN.
61 | e.g. ['res3', 'res4', 'res5']
62 | out_channels (int): number of channels used in the output
63 | feature maps.
64 | norm (str): the normalization type.
65 | top_block (nn.Module or None): the module build upon FPN layers.
66 | """
67 | super(FPN, self).__init__()
68 |
69 | in_strides = strides
70 | in_channels = channels
71 | norm = layers.get_norm(norm)
72 |
73 | use_bias = norm is None
74 | self.lateral_convs = list()
75 | self.output_convs = list()
76 |
77 | for idx, in_channels in enumerate(in_channels):
78 | lateral_norm = None if norm is None else norm(out_channels)
79 | output_norm = None if norm is None else norm(out_channels)
80 |
81 | lateral_conv = layers.Conv2d(
82 | in_channels,
83 | out_channels,
84 | kernel_size=1,
85 | bias=use_bias,
86 | norm=lateral_norm,
87 | )
88 | output_conv = layers.Conv2d(
89 | out_channels,
90 | out_channels,
91 | kernel_size=3,
92 | stride=1,
93 | padding=1,
94 | bias=use_bias,
95 | norm=output_norm,
96 | )
97 | M.init.msra_normal_(lateral_conv.weight, mode="fan_in")
98 | M.init.msra_normal_(output_conv.weight, mode="fan_in")
99 |
100 | if use_bias:
101 | M.init.fill_(lateral_conv.bias, 0)
102 | M.init.fill_(output_conv.bias, 0)
103 |
104 | stage = int(math.log2(in_strides[idx]))
105 |
106 | setattr(self, "fpn_lateral{}".format(stage), lateral_conv)
107 | setattr(self, "fpn_output{}".format(stage), output_conv)
108 | self.lateral_convs.insert(0, lateral_conv)
109 | self.output_convs.insert(0, output_conv)
110 |
111 | self.top_block = top_block
112 | self.in_features = in_features
113 | self.bottom_up = bottom_up
114 |
115 | # follow the common practices, FPN features are named to "p",
116 | # like ["p2", "p3", ..., "p6"]
117 | self._out_feature_strides = {
118 | "p{}".format(int(math.log2(s))): s for s in in_strides
119 | }
120 |
121 | # top block output feature maps.
122 | if self.top_block is not None:
123 | for s in range(stage, stage + self.top_block.num_levels):
124 | self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
125 |
126 | self._out_features = list(sorted(self._out_feature_strides.keys()))
127 | self._out_feature_channels = {k: out_channels for k in self._out_features}
128 |
129 | def forward(self, x):
130 | bottom_up_features = self.bottom_up.extract_features(x)
131 | x = [bottom_up_features[f] for f in self.in_features[::-1]]
132 |
133 | results = []
134 | prev_features = self.lateral_convs[0](x[0])
135 | results.append(self.output_convs[0](prev_features))
136 |
137 | for features, lateral_conv, output_conv in zip(
138 | x[1:], self.lateral_convs[1:], self.output_convs[1:]
139 | ):
140 | top_down_features = F.nn.interpolate(
141 | prev_features, features.shape[2:], mode="BILINEAR"
142 | )
143 | lateral_features = lateral_conv(features)
144 | prev_features = lateral_features + top_down_features
145 | results.insert(0, output_conv(prev_features))
146 |
147 | if self.top_block is not None:
148 | top_block_in_feature = bottom_up_features.get(
149 | self.top_block.in_feature, None
150 | )
151 | if top_block_in_feature is None:
152 | top_block_in_feature = results[
153 | self._out_features.index(self.top_block.in_feature)
154 | ]
155 | results.extend(self.top_block(top_block_in_feature))
156 |
157 | return dict(zip(self._out_features, results))
158 |
159 | def output_shape(self):
160 | return {
161 | name: layers.ShapeSpec(
162 | channels=self._out_feature_channels[name],
163 | stride=self._out_feature_strides[name],
164 | )
165 | for name in self._out_features
166 | }
167 |
168 |
169 | class FPNP6(M.Module):
170 | """
171 | used in FPN, generate a downsampled P6 feature from P5.
172 | """
173 |
174 | def __init__(self, in_feature="p5"):
175 | super().__init__()
176 | self.num_levels = 1
177 | self.in_feature = in_feature
178 | self.pool = M.MaxPool2d(kernel_size=1, stride=2, padding=0)
179 |
180 | def forward(self, x):
181 | return [self.pool(x)]
182 |
183 |
184 | class LastLevelP6P7(M.Module):
185 | """
186 | This module is used in RetinaNet to generate extra layers, P6 and P7 from
187 | C5 feature.
188 | """
189 |
190 | def __init__(self, in_channels: int, out_channels: int, in_feature="res5"):
191 | super().__init__()
192 | self.num_levels = 2
193 | if in_feature == "p5":
194 | assert in_channels == out_channels
195 | self.in_feature = in_feature
196 | self.p6 = M.Conv2d(in_channels, out_channels, 3, 2, 1)
197 | self.p7 = M.Conv2d(out_channels, out_channels, 3, 2, 1)
198 |
199 | def forward(self, x):
200 | p6 = self.p6(x)
201 | p7 = self.p7(F.relu(p6))
202 | return [p6, p7]
203 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import megengine.functional as F
10 | from megengine import Tensor
11 |
12 |
13 | def binary_cross_entropy(logits: Tensor, targets: Tensor) -> Tensor:
14 | r"""Binary Cross Entropy
15 |
16 | Args:
17 | logits (Tensor):
18 | the predicted logits
19 | targets (Tensor):
20 | the assigned targets with the same shape as logits
21 |
22 | Returns:
23 | the calculated binary cross entropy.
24 | """
25 | return -(targets * F.logsigmoid(logits) + (1 - targets) * F.logsigmoid(-logits))
26 |
27 |
28 | def sigmoid_focal_loss(
29 | logits: Tensor, targets: Tensor, alpha: float = -1, gamma: float = 0,
30 | ) -> Tensor:
31 | r"""Focal Loss for Dense Object Detection:
32 |
33 |
34 | .. math::
35 |
36 | FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t)
37 |
38 | Args:
39 | logits (Tensor):
40 | the predicted logits
41 | targets (Tensor):
42 | the assigned targets with the same shape as logits
43 | alpha (float):
44 | parameter to mitigate class imbalance. Default: -1
45 | gamma (float):
46 | parameter to mitigate easy/hard loss imbalance. Default: 0
47 |
48 | Returns:
49 | the calculated focal loss.
50 | """
51 | scores = F.sigmoid(logits)
52 | loss = binary_cross_entropy(logits, targets)
53 | if gamma != 0:
54 | loss *= (targets * (1 - scores) + (1 - targets) * scores) ** gamma
55 | if alpha >= 0:
56 | loss *= targets * alpha + (1 - targets) * (1 - alpha)
57 | return loss
58 |
59 |
60 | def smooth_l1_loss(pred: Tensor, target: Tensor, beta: float = 1.0) -> Tensor:
61 | r"""Smooth L1 Loss
62 |
63 | Args:
64 | pred (Tensor):
65 | the predictions
66 | target (Tensor):
67 | the assigned targets with the same shape as pred
68 | beta (int):
69 | the parameter of smooth l1 loss.
70 |
71 | Returns:
72 | the calculated smooth l1 loss.
73 | """
74 | x = pred - target
75 | abs_x = F.abs(x)
76 | if beta < 1e-5:
77 | loss = abs_x
78 | else:
79 | in_loss = 0.5 * x ** 2 / beta
80 | out_loss = abs_x - 0.5 * beta
81 | loss = F.where(abs_x < beta, in_loss, out_loss)
82 | return loss
83 |
84 |
85 | def iou_loss(
86 | pred: Tensor, target: Tensor, box_mode: str = "xyxy", loss_type: str = "iou", eps: float = 1e-8,
87 | ) -> Tensor:
88 | if box_mode == "ltrb":
89 | pred = F.concat([-pred[..., :2], pred[..., 2:]], axis=-1)
90 | target = F.concat([-target[..., :2], target[..., 2:]], axis=-1)
91 | elif box_mode != "xyxy":
92 | raise NotImplementedError
93 |
94 | pred_area = F.maximum(pred[..., 2] - pred[..., 0], 0) * F.maximum(
95 | pred[..., 3] - pred[..., 1], 0
96 | )
97 | target_area = F.maximum(target[..., 2] - target[..., 0], 0) * F.maximum(
98 | target[..., 3] - target[..., 1], 0
99 | )
100 |
101 | w_intersect = F.maximum(
102 | F.minimum(pred[..., 2], target[..., 2]) - F.maximum(pred[..., 0], target[..., 0]), 0
103 | )
104 | h_intersect = F.maximum(
105 | F.minimum(pred[..., 3], target[..., 3]) - F.maximum(pred[..., 1], target[..., 1]), 0
106 | )
107 |
108 | area_intersect = w_intersect * h_intersect
109 | area_union = pred_area + target_area - area_intersect
110 | ious = area_intersect / F.maximum(area_union, eps)
111 |
112 | if loss_type == "iou":
113 | loss = -F.log(F.maximum(ious, eps))
114 | elif loss_type == "linear_iou":
115 | loss = 1 - ious
116 | elif loss_type == "giou":
117 | g_w_intersect = F.maximum(pred[..., 2], target[..., 2]) - F.minimum(
118 | pred[..., 0], target[..., 0]
119 | )
120 | g_h_intersect = F.maximum(pred[..., 3], target[..., 3]) - F.minimum(
121 | pred[..., 1], target[..., 1]
122 | )
123 | ac_union = g_w_intersect * g_h_intersect
124 | gious = ious - (ac_union - area_union) / F.maximum(ac_union, eps)
125 | loss = 1 - gious
126 | return loss
127 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/matcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import megengine.functional as F
10 |
11 |
12 | class Matcher:
13 |
14 | def __init__(self, thresholds, labels, allow_low_quality_matches=False):
15 | assert len(thresholds) + 1 == len(labels), "thresholds and labels are not matched"
16 | assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
17 | thresholds.append(float("inf"))
18 | thresholds.insert(0, -float("inf"))
19 |
20 | self.thresholds = thresholds
21 | self.labels = labels
22 | self.allow_low_quality_matches = allow_low_quality_matches
23 |
24 | def __call__(self, matrix):
25 | """
26 | matrix(tensor): A two dim tensor with shape of (N, M). N is number of GT-boxes,
27 | while M is the number of anchors in detection.
28 | """
29 | assert len(matrix.shape) == 2
30 | max_scores = matrix.max(axis=0)
31 | match_indices = F.argmax(matrix, axis=0)
32 |
33 | # default ignore label: -1
34 | labels = F.full_like(match_indices, -1)
35 |
36 | for label, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
37 | mask = (max_scores >= low) & (max_scores < high)
38 | labels[mask] = label
39 |
40 | if self.allow_low_quality_matches:
41 | mask = (matrix == F.max(matrix, axis=1, keepdims=True)).sum(axis=0) > 0
42 | labels[mask] = 1
43 |
44 | return match_indices, labels
45 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/point_head.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import math
10 | from typing import List
11 |
12 | import numpy as np
13 |
14 | import megengine as mge
15 | import megengine.functional as F
16 | import megengine.module as M
17 | from megengine import Tensor
18 | from megengine.module.normalization import GroupNorm
19 |
20 | import layers
21 |
22 |
23 | class PointHead(M.Module):
24 | """
25 | The head used when anchor points are adopted for object classification and box regression.
26 | """
27 |
28 | def __init__(self, cfg, input_shape: List[layers.ShapeSpec]):
29 | super().__init__()
30 | self.stride_list = cfg.stride
31 |
32 | in_channels = input_shape[0].channels
33 | num_classes = cfg.num_classes
34 | num_convs = 4
35 | prior_prob = cfg.cls_prior_prob
36 | num_anchors = [cfg.num_anchors] * len(input_shape)
37 |
38 | assert (
39 | len(set(num_anchors)) == 1
40 | ), "not support different number of anchors between levels"
41 | num_anchors = num_anchors[0]
42 |
43 | cls_subnet = []
44 | bbox_subnet = []
45 | for _ in range(num_convs):
46 | cls_subnet.append(
47 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
48 | )
49 | cls_subnet.append(GroupNorm(32, in_channels))
50 | cls_subnet.append(M.ReLU())
51 | bbox_subnet.append(
52 | M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
53 | )
54 | bbox_subnet.append(GroupNorm(32, in_channels))
55 | bbox_subnet.append(M.ReLU())
56 |
57 | self.cls_subnet = M.Sequential(*cls_subnet)
58 | self.bbox_subnet = M.Sequential(*bbox_subnet)
59 | self.cls_score = M.Conv2d(
60 | in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
61 | )
62 | self.bbox_pred = M.Conv2d(
63 | in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
64 | )
65 | self.ctrness = M.Conv2d(
66 | in_channels, num_anchors * 1, kernel_size=3, stride=1, padding=1
67 | )
68 |
69 | # Initialization
70 | for modules in [
71 | self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred,
72 | self.ctrness
73 | ]:
74 | for layer in modules.modules():
75 | if isinstance(layer, M.Conv2d):
76 | M.init.normal_(layer.weight, mean=0, std=0.01)
77 | M.init.fill_(layer.bias, 0)
78 |
79 | # Use prior in model initialization to improve stability
80 | bias_value = -math.log((1 - prior_prob) / prior_prob)
81 | M.init.fill_(self.cls_score.bias, bias_value)
82 |
83 | self.scale_list = mge.Parameter(np.ones(len(self.stride_list), dtype="float32"))
84 |
85 | def forward(self, features: List[Tensor]):
86 | logits, offsets, ctrness = [], [], []
87 | for feature, scale, stride in zip(features, self.scale_list, self.stride_list):
88 | logits.append(self.cls_score(self.cls_subnet(feature)))
89 | bbox_subnet = self.bbox_subnet(feature)
90 | offsets.append(F.relu(self.bbox_pred(bbox_subnet) * scale) * stride)
91 | ctrness.append(self.ctrness(bbox_subnet))
92 | return logits, offsets, ctrness
93 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/pooler.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import math
10 |
11 | import megengine.functional as F
12 |
13 |
14 | def roi_pool(
15 | rpn_fms, rois, stride, pool_shape, pooler_type="roi_align",
16 | ):
17 | rois = rois.detach()
18 | assert len(stride) == len(rpn_fms)
19 | canonical_level = 4
20 | canonical_box_size = 224
21 | min_level = int(math.log2(stride[0]))
22 | max_level = int(math.log2(stride[-1]))
23 |
24 | num_fms = len(rpn_fms)
25 | box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])
26 | assigned_level = F.floor(
27 | canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / math.log(2)
28 | ).astype("int32")
29 | assigned_level = F.minimum(assigned_level, max_level)
30 | assigned_level = F.maximum(assigned_level, min_level)
31 | assigned_level = assigned_level - min_level
32 |
33 | # avoid empty assignment
34 | assigned_level = F.concat(
35 | [assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device)],
36 | )
37 | rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))])
38 |
39 | pool_list, inds_list = [], []
40 | for i in range(num_fms):
41 | _, inds = F.cond_take(assigned_level == i, assigned_level)
42 | level_rois = rois[inds]
43 |
44 | if pooler_type == "roi_pool":
45 | pool_fm = F.nn.roi_pooling(
46 | rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]
47 | )
48 | elif pooler_type == "roi_align":
49 | pool_fm = F.nn.roi_align(
50 | rpn_fms[i],
51 | level_rois,
52 | pool_shape,
53 | mode="average",
54 | spatial_scale=1.0 / stride[i],
55 | sample_points=2,
56 | aligned=True,
57 | )
58 | pool_list.append(pool_fm)
59 | inds_list.append(inds)
60 |
61 | fm_order = F.argsort(F.concat(inds_list, axis=0))
62 | pool_feature = F.concat(pool_list, axis=0)
63 | pool_feature = pool_feature[fm_order][:-num_fms]
64 |
65 | return pool_feature
66 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/rcnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import megengine as mge
10 | import megengine.functional as F
11 | import megengine.module as M
12 |
13 | import layers
14 |
15 |
16 | class RCNN(M.Module):
17 |
18 | def __init__(self, cfg):
19 | super().__init__()
20 | self.cfg = cfg
21 | self.box_coder = layers.BoxCoder(cfg.rcnn_reg_mean, cfg.rcnn_reg_std)
22 |
23 | # roi head
24 | self.in_features = cfg.rcnn_in_features
25 | self.stride = cfg.rcnn_stride
26 | self.pooling_method = cfg.pooling_method
27 | self.pooling_size = cfg.pooling_size
28 |
29 | self.fc1 = M.Linear(256 * self.pooling_size[0] * self.pooling_size[1], 1024)
30 | self.fc2 = M.Linear(1024, 1024)
31 | for l in [self.fc1, self.fc2]:
32 | M.init.normal_(l.weight, std=0.01)
33 | M.init.fill_(l.bias, 0)
34 |
35 | # box predictor
36 | self.pred_cls = M.Linear(1024, cfg.num_classes + 1)
37 | self.pred_delta = M.Linear(1024, cfg.num_classes * 4)
38 | M.init.normal_(self.pred_cls.weight, std=0.01)
39 | M.init.normal_(self.pred_delta.weight, std=0.001)
40 | for l in [self.pred_cls, self.pred_delta]:
41 | M.init.fill_(l.bias, 0)
42 |
43 | def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None):
44 | rcnn_rois, labels, bbox_targets = self.get_ground_truth(
45 | rcnn_rois, im_info, gt_boxes
46 | )
47 |
48 | fpn_fms = [fpn_fms[x] for x in self.in_features]
49 | pool_features = layers.roi_pool(
50 | fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method,
51 | )
52 | flatten_feature = F.flatten(pool_features, start_axis=1)
53 | roi_feature = F.relu(self.fc1(flatten_feature))
54 | roi_feature = F.relu(self.fc2(roi_feature))
55 | pred_logits = self.pred_cls(roi_feature)
56 | pred_offsets = self.pred_delta(roi_feature)
57 |
58 | if self.training:
59 | # loss for rcnn classification
60 | loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1)
61 | # loss for rcnn regression
62 | pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4)
63 | num_samples = labels.shape[0]
64 | fg_mask = labels > 0
65 | loss_rcnn_bbox = layers.smooth_l1_loss(
66 | pred_offsets[fg_mask, labels[fg_mask] - 1],
67 | bbox_targets[fg_mask],
68 | self.cfg.rcnn_smooth_l1_beta,
69 | ).sum() / F.maximum(num_samples, mge.tensor(1))
70 |
71 | loss_dict = {
72 | "loss_rcnn_cls": loss_rcnn_cls,
73 | "loss_rcnn_bbox": loss_rcnn_bbox,
74 | }
75 | return loss_dict
76 | else:
77 | # slice 1 for removing background
78 | pred_scores = F.softmax(pred_logits, axis=1)[:, 1:]
79 | pred_offsets = pred_offsets.reshape(-1, 4)
80 | target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4)
81 | # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4)
82 | base_rois = F.broadcast_to(
83 | F.expand_dims(rcnn_rois[:, 1:5], axis=1), target_shape).reshape(-1, 4)
84 | pred_bbox = self.box_coder.decode(base_rois, pred_offsets)
85 | return pred_bbox, pred_scores
86 |
87 | def get_ground_truth(self, rpn_rois, im_info, gt_boxes):
88 | if not self.training:
89 | return rpn_rois, None, None
90 |
91 | return_rois = []
92 | return_labels = []
93 | return_bbox_targets = []
94 |
95 | # get per image proposals and gt_boxes
96 | for bid in range(gt_boxes.shape[0]):
97 | num_valid_boxes = im_info[bid, 4].astype("int32")
98 | gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :]
99 | batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid)
100 | gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1)
101 | batch_roi_mask = rpn_rois[:, 0] == bid
102 | # all_rois : [batch_id, x1, y1, x2, y2]
103 | all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois])
104 |
105 | overlaps = layers.get_iou(all_rois[:, 1:], gt_boxes_per_img)
106 |
107 | max_overlaps = overlaps.max(axis=1)
108 | gt_assignment = F.argmax(overlaps, axis=1).astype("int32")
109 | labels = gt_boxes_per_img[gt_assignment, 4]
110 |
111 | # ---------------- get the fg/bg labels for each roi ---------------#
112 | fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0)
113 | bg_mask = (
114 | (max_overlaps >= self.cfg.bg_threshold_low)
115 | & (max_overlaps < self.cfg.bg_threshold_high)
116 | )
117 |
118 | num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio)
119 | fg_inds_mask = layers.sample_labels(fg_mask, num_fg_rois, True, False)
120 | num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum())
121 | bg_inds_mask = layers.sample_labels(bg_mask, num_bg_rois, True, False)
122 |
123 | labels[bg_inds_mask] = 0
124 |
125 | keep_mask = fg_inds_mask | bg_inds_mask
126 | labels = labels[keep_mask].astype("int32")
127 | rois = all_rois[keep_mask]
128 | target_boxes = gt_boxes_per_img[gt_assignment[keep_mask], :4]
129 | bbox_targets = self.box_coder.encode(rois[:, 1:], target_boxes)
130 | bbox_targets = bbox_targets.reshape(-1, 4)
131 |
132 | return_rois.append(rois)
133 | return_labels.append(labels)
134 | return_bbox_targets.append(bbox_targets)
135 |
136 | return (
137 | F.concat(return_rois, axis=0).detach(),
138 | F.concat(return_labels, axis=0).detach(),
139 | F.concat(return_bbox_targets, axis=0).detach(),
140 | )
141 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/rpn.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import megengine.functional as F
10 | import megengine.module as M
11 |
12 | import layers
13 |
14 |
15 | class RPN(M.Module):
16 | def __init__(self, cfg):
17 | super().__init__()
18 | self.cfg = cfg
19 | self.box_coder = layers.BoxCoder(cfg.rpn_reg_mean, cfg.rpn_reg_std)
20 |
21 | # check anchor settings
22 | assert len(set(len(x) for x in cfg.anchor_scales)) == 1
23 | assert len(set(len(x) for x in cfg.anchor_ratios)) == 1
24 | self.num_cell_anchors = len(cfg.anchor_scales[0]) * len(cfg.anchor_ratios[0])
25 |
26 | rpn_channel = cfg.rpn_channel
27 | self.in_features = cfg.rpn_in_features
28 |
29 | self.anchor_generator = layers.AnchorBoxGenerator(
30 | anchor_scales=cfg.anchor_scales,
31 | anchor_ratios=cfg.anchor_ratios,
32 | strides=cfg.rpn_stride,
33 | offset=self.cfg.anchor_offset,
34 | )
35 |
36 | self.matcher = layers.Matcher(
37 | cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality
38 | )
39 |
40 | self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1)
41 | self.rpn_cls_score = M.Conv2d(
42 | rpn_channel, self.num_cell_anchors, kernel_size=1, stride=1
43 | )
44 | self.rpn_bbox_offsets = M.Conv2d(
45 | rpn_channel, self.num_cell_anchors * 4, kernel_size=1, stride=1
46 | )
47 |
48 | for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]:
49 | M.init.normal_(l.weight, std=0.01)
50 | M.init.fill_(l.bias, 0)
51 |
52 | def forward(self, features, im_info, boxes=None):
53 | # prediction
54 | features = [features[x] for x in self.in_features]
55 |
56 | # get anchors
57 | anchors_list = self.anchor_generator(features)
58 |
59 | pred_cls_logit_list = []
60 | pred_bbox_offset_list = []
61 | for x in features:
62 | t = F.relu(self.rpn_conv(x))
63 | scores = self.rpn_cls_score(t)
64 | pred_cls_logit_list.append(
65 | scores.reshape(
66 | scores.shape[0],
67 | self.num_cell_anchors,
68 | scores.shape[2],
69 | scores.shape[3],
70 | )
71 | )
72 | bbox_offsets = self.rpn_bbox_offsets(t)
73 | pred_bbox_offset_list.append(
74 | bbox_offsets.reshape(
75 | bbox_offsets.shape[0],
76 | self.num_cell_anchors,
77 | 4,
78 | bbox_offsets.shape[2],
79 | bbox_offsets.shape[3],
80 | )
81 | )
82 | # get rois from the predictions
83 | rpn_rois = self.find_top_rpn_proposals(
84 | pred_cls_logit_list, pred_bbox_offset_list, anchors_list, im_info
85 | )
86 |
87 | if self.training:
88 | rpn_labels, rpn_offsets = self.get_ground_truth(
89 | anchors_list, boxes, im_info[:, 4].astype("int32")
90 | )
91 | pred_cls_logits, pred_bbox_offsets = self.merge_rpn_score_box(
92 | pred_cls_logit_list, pred_bbox_offset_list
93 | )
94 |
95 | fg_mask = rpn_labels > 0
96 | valid_mask = rpn_labels >= 0
97 | num_valid = valid_mask.sum()
98 |
99 | # rpn classification loss
100 | loss_rpn_cls = F.loss.binary_cross_entropy(
101 | pred_cls_logits[valid_mask], rpn_labels[valid_mask]
102 | )
103 |
104 | # rpn regression loss
105 | loss_rpn_bbox = layers.smooth_l1_loss(
106 | pred_bbox_offsets[fg_mask],
107 | rpn_offsets[fg_mask],
108 | self.cfg.rpn_smooth_l1_beta,
109 | ).sum() / F.maximum(num_valid, 1)
110 |
111 | loss_dict = {"loss_rpn_cls": loss_rpn_cls, "loss_rpn_bbox": loss_rpn_bbox}
112 | return rpn_rois, loss_dict
113 | else:
114 | return rpn_rois
115 |
116 | def find_top_rpn_proposals(
117 | self, rpn_cls_score_list, rpn_bbox_offset_list, anchors_list, im_info
118 | ):
119 | prev_nms_top_n = (
120 | self.cfg.train_prev_nms_top_n
121 | if self.training
122 | else self.cfg.test_prev_nms_top_n
123 | )
124 | post_nms_top_n = (
125 | self.cfg.train_post_nms_top_n
126 | if self.training
127 | else self.cfg.test_post_nms_top_n
128 | )
129 |
130 | return_rois = []
131 |
132 | for bid in range(im_info.shape[0]):
133 | batch_proposal_list = []
134 | batch_score_list = []
135 | batch_level_list = []
136 | for l, (rpn_cls_score, rpn_bbox_offset, anchors) in enumerate(
137 | zip(rpn_cls_score_list, rpn_bbox_offset_list, anchors_list)
138 | ):
139 | # get proposals and scores
140 | offsets = rpn_bbox_offset[bid].transpose(2, 3, 0, 1).reshape(-1, 4)
141 | proposals = self.box_coder.decode(anchors, offsets)
142 |
143 | scores = rpn_cls_score[bid].transpose(1, 2, 0).flatten()
144 | scores.detach()
145 | # prev nms top n
146 | scores, order = F.topk(scores, descending=True, k=prev_nms_top_n)
147 | proposals = proposals[order]
148 |
149 | batch_proposal_list.append(proposals)
150 | batch_score_list.append(scores)
151 | batch_level_list.append(F.full_like(scores, l))
152 |
153 | # gather proposals, scores, level
154 | proposals = F.concat(batch_proposal_list, axis=0)
155 | scores = F.concat(batch_score_list, axis=0)
156 | levels = F.concat(batch_level_list, axis=0)
157 |
158 | proposals = layers.get_clipped_boxes(proposals, im_info[bid])
159 | # filter invalid proposals and apply total level nms
160 | keep_mask = layers.filter_boxes(proposals)
161 | proposals = proposals[keep_mask]
162 | scores = scores[keep_mask]
163 | levels = levels[keep_mask]
164 | nms_keep_inds = layers.batched_nms(
165 | proposals, scores, levels, self.cfg.rpn_nms_threshold, post_nms_top_n
166 | )
167 |
168 | # generate rois to rcnn head, rois shape (N, 5), info [batch_id, x1, y1, x2, y2]
169 | rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1)
170 | rois = rois[nms_keep_inds]
171 | batch_inds = F.full((rois.shape[0], 1), bid)
172 | batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1)
173 | return_rois.append(batch_rois)
174 |
175 | return_rois = F.concat(return_rois, axis=0)
176 | return return_rois.detach()
177 |
178 | def merge_rpn_score_box(self, rpn_cls_score_list, rpn_bbox_offset_list):
179 | final_rpn_cls_score_list = []
180 | final_rpn_bbox_offset_list = []
181 |
182 | for bid in range(rpn_cls_score_list[0].shape[0]):
183 | batch_rpn_cls_score_list = []
184 | batch_rpn_bbox_offset_list = []
185 |
186 | for i in range(len(self.in_features)):
187 | rpn_cls_scores = rpn_cls_score_list[i][bid].transpose(1, 2, 0).flatten()
188 | rpn_bbox_offsets = (
189 | rpn_bbox_offset_list[i][bid].transpose(2, 3, 0, 1).reshape(-1, 4)
190 | )
191 |
192 | batch_rpn_cls_score_list.append(rpn_cls_scores)
193 | batch_rpn_bbox_offset_list.append(rpn_bbox_offsets)
194 |
195 | batch_rpn_cls_scores = F.concat(batch_rpn_cls_score_list, axis=0)
196 | batch_rpn_bbox_offsets = F.concat(batch_rpn_bbox_offset_list, axis=0)
197 |
198 | final_rpn_cls_score_list.append(batch_rpn_cls_scores)
199 | final_rpn_bbox_offset_list.append(batch_rpn_bbox_offsets)
200 |
201 | final_rpn_cls_scores = F.concat(final_rpn_cls_score_list, axis=0)
202 | final_rpn_bbox_offsets = F.concat(final_rpn_bbox_offset_list, axis=0)
203 | return final_rpn_cls_scores, final_rpn_bbox_offsets
204 |
205 | def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts):
206 | anchors = F.concat(anchors_list, axis=0)
207 | labels_list = []
208 | offsets_list = []
209 |
210 | for bid in range(batched_gt_boxes.shape[0]):
211 | gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]
212 |
213 | overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
214 | matched_indices, labels = self.matcher(overlaps)
215 |
216 | offsets = self.box_coder.encode(anchors, gt_boxes[matched_indices, :4])
217 |
218 | # sample positive labels
219 | num_positive = int(self.cfg.num_sample_anchors * self.cfg.positive_anchor_ratio)
220 | labels = layers.sample_labels(labels, num_positive, 1, -1)
221 | # sample negative labels
222 | num_positive = (labels == 1).sum().astype("int32")
223 | num_negative = self.cfg.num_sample_anchors - num_positive
224 | labels = layers.sample_labels(labels, num_negative, 0, -1)
225 |
226 | labels_list.append(labels)
227 | offsets_list.append(offsets)
228 |
229 | return (
230 | F.concat(labels_list, axis=0).detach(),
231 | F.concat(offsets_list, axis=0).detach(),
232 | )
233 |
--------------------------------------------------------------------------------
/megengine_release/layers/det/sampling.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import megengine.functional as F
10 | from megengine.random import uniform
11 |
12 |
13 | def sample_labels(labels, num_samples, label_value, ignore_label=-1):
14 | """sample N labels with label value = sample_labels
15 |
16 | Args:
17 | labels(Tensor): shape of label is (N,)
18 | num_samples(int):
19 | label_value(int):
20 |
21 | Returns:
22 | label(Tensor): label after sampling
23 | """
24 | assert labels.ndim == 1, "Only tensor of dim 1 is supported."
25 | mask = (labels == label_value)
26 | num_valid = mask.sum()
27 | if num_valid <= num_samples:
28 | return labels
29 |
30 | random_tensor = F.zeros_like(labels).astype("float32")
31 | random_tensor[mask] = uniform(size=num_valid)
32 | _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid)
33 |
34 | labels[invalid_inds] = ignore_label
35 | return labels
36 |
--------------------------------------------------------------------------------
/megengine_release/layers/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 |
--------------------------------------------------------------------------------
/megengine_release/layers/tools/data_mapper.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from megengine.data.dataset import COCO, Objects365, PascalVOC
10 |
11 | data_mapper = dict(
12 | coco=COCO,
13 | objects365=Objects365,
14 | voc=PascalVOC,
15 | )
16 |
--------------------------------------------------------------------------------
/megengine_release/layers/tools/inference.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import argparse
10 |
11 | import cv2
12 |
13 | import megengine as mge
14 |
15 | from layers.tools.data_mapper import data_mapper
16 | from layers.tools.utils import DetEvaluator, import_from_file
17 |
18 | logger = mge.get_logger(__name__)
19 | logger.setLevel("INFO")
20 |
21 |
22 | def make_parser():
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument(
25 | "-f", "--file", default="net.py", type=str, help="net description file"
26 | )
27 | parser.add_argument(
28 | "-w", "--weight_file", default=None, type=str, help="weights file",
29 | )
30 | parser.add_argument("-i", "--image", type=str)
31 | return parser
32 |
33 |
34 | def main():
35 | parser = make_parser()
36 | args = parser.parse_args()
37 |
38 | current_network = import_from_file(args.file)
39 | cfg = current_network.Cfg()
40 | cfg.backbone_pretrained = False
41 | model = current_network.Net(cfg)
42 | model.eval()
43 |
44 | state_dict = mge.load(args.weight_file)
45 | if "state_dict" in state_dict:
46 | state_dict = state_dict["state_dict"]
47 | model.load_state_dict(state_dict)
48 |
49 | evaluator = DetEvaluator(model)
50 |
51 | ori_img = cv2.imread(args.image)
52 | image, im_info = DetEvaluator.process_inputs(
53 | ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size,
54 | )
55 | pred_res = evaluator.predict(
56 | image=mge.tensor(image),
57 | im_info=mge.tensor(im_info)
58 | )
59 | res_img = DetEvaluator.vis_det(
60 | ori_img,
61 | pred_res,
62 | is_show_label=True,
63 | classes=data_mapper[cfg.test_dataset["name"]].class_names,
64 | )
65 | cv2.imwrite("results.jpg", res_img)
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/megengine_release/layers/tools/nms.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import numpy as np
10 |
11 |
12 | def py_cpu_nms(dets, thresh):
13 | x1 = np.ascontiguousarray(dets[:, 0])
14 | y1 = np.ascontiguousarray(dets[:, 1])
15 | x2 = np.ascontiguousarray(dets[:, 2])
16 | y2 = np.ascontiguousarray(dets[:, 3])
17 |
18 | areas = (x2 - x1) * (y2 - y1)
19 | order = dets[:, 4].argsort()[::-1]
20 | keep = list()
21 |
22 | while order.size > 0:
23 | pick_idx = order[0]
24 | keep.append(pick_idx)
25 | order = order[1:]
26 |
27 | xx1 = np.maximum(x1[pick_idx], x1[order])
28 | yy1 = np.maximum(y1[pick_idx], y1[order])
29 | xx2 = np.minimum(x2[pick_idx], x2[order])
30 | yy2 = np.minimum(y2[pick_idx], y2[order])
31 |
32 | inter = np.maximum(xx2 - xx1, 0) * np.maximum(yy2 - yy1, 0)
33 | iou = inter / np.maximum(areas[pick_idx] + areas[order] - inter, 1e-5)
34 |
35 | order = order[iou <= thresh]
36 |
37 | return keep
38 |
--------------------------------------------------------------------------------
/megengine_release/models/ICD/ICD.py:
--------------------------------------------------------------------------------
1 | import megengine
2 | import megengine as mge
3 | from typing import Dict, List, Tuple
4 | import megengine.module as M
5 | import megengine.functional as F
6 | import numpy as np
7 | from .encoder import InstanceRegEncoder
8 | from .decoder import DecoderWrapper
9 | from .utility import PositionEmbeddingSine
10 |
11 |
12 | def mask_out_padding(fpn_features, images_sizes, images):
13 | # Modified from DETR: https://github.com/facebookresearch/detr/blob/main/LICENSE
14 | # NOTE: zeros for forground
15 | image_sizes = [(images_sizes[i, 2], images_sizes[i, 3]) for i in range(images_sizes.shape[0])]
16 | device = images_sizes.device
17 | h_, w_ = images.shape[-2:]
18 | masks = {}
19 | #assert len(feature_shapes) == len(self.feature_strides)
20 | for k, feat in fpn_features.items():
21 | # stride = 2 ** int(k[-1])
22 | N, _, H, W = feat.shape
23 | masks_per_feature_level = F.ones(
24 | (N, H, W), dtype='bool', device=device)
25 | stride = (h_ / H + w_ / W) / 2
26 | for img_idx, (h, w) in enumerate(image_sizes):
27 | masks_per_feature_level[
28 | img_idx,
29 | : int(np.ceil(float(h) / stride)),
30 | : int(np.ceil(float(w) / stride)),
31 | ] = 0
32 | masks[k] = F.expand_dims(masks_per_feature_level, 1) #masks_per_feature_level.unsqueeze(1)
33 | return masks
34 |
35 |
36 | class ICD(M.Module):
37 | def __init__(self, hidden_dim, cfg):
38 | super().__init__()
39 | self.pos_embedding = PositionEmbeddingSine(
40 | num_pos_feats=hidden_dim // 2,
41 | normalize=True)
42 |
43 | self.ins_encoder = InstanceRegEncoder(cfg)
44 | self.attention_module_aux = DecoderWrapper(cfg)
45 | self.attention_module_distill = DecoderWrapper(cfg)
46 | # NOTE(peizhen): 1e-05 is not large enough and emprically might cause sqrt(neg) nan
47 | self.distill_norm_ = M.LayerNorm(
48 | [hidden_dim // cfg.distiller.ATT_HEADS], eps=1e-04, affine=False)
49 | #self.distill_norm_ = LayerNorm([hidden_dim // cfg.distiller.ATT_HEADS])
50 | self.feat_keys = cfg.distiller.FEAT_KEYS
51 | self.weight_value = cfg.distiller.WEIGHT_VALUE
52 | self.temp_value = cfg.distiller.TEMP_VALUE
53 |
54 | self.loss_keys = []
55 | self.num_losses = 3
56 |
57 | def mimic_loss(self, svalue, tvalue, value_mask):
58 | return (F.loss.square_loss(svalue, tvalue, reduction='none').transpose(1, 2, 3, 0)
59 | * value_mask).mean(2).sum() / F.clip(value_mask.sum(), lower=1e-6)
60 |
61 | def forward(self, features_dict_tea, features_dict_stu, images, instances, image_info, distill_flag=0):
62 | '''
63 | contain_box_mask: 1d float tensor, [1., 0., ...], denoting whether each image contain exactly objects
64 | nr_actual_boxes_per_img: list of int, exact object number each image contains
65 | '''
66 | nr_actual_boxes_per_img = [image_info[i, -1] for i in range(image_info.shape[0])]
67 |
68 | masks = mask_out_padding(features_dict_tea, image_info, images)
69 |
70 | pos_embs = {k: self.pos_embedding(
71 | features_dict_tea[k], masks[k]) for k in self.feat_keys}
72 | pos_emb = F.concat([F.transpose(F.flatten(pos_embs[k], 2), (2, 0, 1)) for k in self.feat_keys], 0).detach() # S, N, C
73 | masks = F.concat([F.squeeze(F.flatten(masks[k], 2), 1)
74 | for k in self.feat_keys], 1).detach() # N, S
75 |
76 | loss_aux_dict, aux_info_dict = self.forward_aux(
77 | instances, features_dict_tea, image_info, {'mask_out': masks, 'pos_emb': pos_emb})
78 | loss_distill_dict = self.forward_distill(
79 | features_dict_stu, aux_info_dict, nr_actual_boxes_per_img, distill_flag, {'mask_out': masks, 'pos_emb': pos_emb})
80 | loss_aux_dict.update(loss_distill_dict)
81 | self.loss_keys = list(loss_aux_dict.keys())
82 | # print(self.loss_keys)
83 | return loss_aux_dict
84 |
85 | def forward_aux(self, instances, features_dict_tea, image_size, aux_input):
86 | # [S, N, C]
87 | feat = F.concat([F.flatten(features_dict_tea[k], start_axis=2).transpose(2, 0, 1)
88 | for k in self.feat_keys], 0).detach()
89 |
90 | # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K]
91 | # (0 for Fake Instance) in ins_mask
92 |
93 | # Below four variables provided by encoder forward have been detached before passing to here
94 | ins_feat, ins_mask, ins_mask_gt, pos_gt = self.ins_encoder(
95 | instances, pro_feats=features_dict_tea, image_size=image_size)
96 | decoded_feat, tmask, tvalue = self.attention_module_aux(
97 | ins_feat,
98 | feat,
99 | feat,
100 | query_mask=ins_mask,
101 | key_padding_mask=aux_input['mask_out'],
102 | pos_embedding=aux_input['pos_emb'])
103 |
104 | aux_info_dict = {
105 | 'encoded_ins': (ins_feat, ins_mask, ins_mask_gt),
106 | 'tmask': tmask,
107 | 'tvalue': tvalue,
108 | }
109 |
110 | loss_dict = dict()
111 | loss_dict = self.ins_encoder.loss(
112 | decoded_feat, ins_mask_gt, ins_mask, pos_gt)
113 |
114 | return loss_dict, aux_info_dict
115 |
116 |
117 | def forward_distill(self, features_dict_stu, aux_info_dict, nr_actual_boxes_per_img, distill_flag, aux_input):
118 | loss_dict = dict()
119 |
120 | assert set(self.feat_keys) == set(list(features_dict_stu.keys(
121 | ))), 'WARNING: Unequal keys for fpn and attention ! <%s> != <%s>' % (self.feat_keys, features_dict_stu.keys())
122 | # [S, N, C]
123 | feat = F.concat([F.flatten(features_dict_stu[k], start_axis=2).transpose(2, 0, 1)
124 | for k in self.feat_keys], 0)
125 |
126 | if distill_flag == 0:
127 | feat = feat.detach()
128 |
129 | # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K]
130 | # (0 for Fake Instance) in ins_mask
131 | ins_feat, ins_mask, ins_mask_gt = aux_info_dict['encoded_ins']
132 | max_ele = int(max(max(nr_actual_boxes_per_img), 1))
133 |
134 | # Note that mask is not normalized by softmax
135 | # load state dict, therefore we share almost all parameters
136 | _, _, svalue = self.attention_module_distill(
137 | ins_feat[:max_ele, :, :],
138 | feat,
139 | feat,
140 | query_mask=ins_mask[:, :max_ele],
141 | key_padding_mask=aux_input['mask_out'],
142 | pos_embedding=aux_input['pos_emb'],
143 | proj_only=True)
144 | tvalue = aux_info_dict['tvalue']
145 | tmask = aux_info_dict['tmask']
146 |
147 | # [bsz, heads, ins, Seq]
148 | svalue = self.distill_norm_(svalue)
149 | # [Seq, bsz, heads, channel]
150 | tvalue = self.distill_norm_(tvalue)
151 |
152 | # cosine similarity between features, unreal instances are masked
153 | # feat are compact features for each instaces
154 | # value is weighted attention maps refactored as different heads
155 | # mask is q, k relation masks for distillation
156 |
157 | # [bsz, heads, 1, S]
158 | value_mask = (F.softmax(tmask / self.temp_value, axis=-1)
159 | * F.expand_dims(F.expand_dims(ins_mask_gt, axis=1), axis=-1)
160 | ).sum(2, keepdims=True).detach()
161 | # NOTE(peizhen): value_mask[j, ...] for any j-th image if it contains no image, beforehand, we could use a pseudo box for images who haven't any box
162 | # this should similarly apply to ins_encoder's loss auxiliary task loss too (no box then corresponding image should not contribute loss)
163 |
164 | # [bsz, heads, 1, num_seq]
165 | # value_mask = value_mask * contain_box_mask.reshape(-1, 1, 1, 1)
166 | loss_dict = {'distill': self.mimic_loss(
167 | svalue, tvalue.detach(), value_mask) * self.weight_value}
168 | return loss_dict
169 |
--------------------------------------------------------------------------------
/megengine_release/models/ICD/__init__.py:
--------------------------------------------------------------------------------
1 | from .ICD import ICD
2 | from .utility import get_instance_list
3 |
4 | __all__ = [key for key in globals().keys() if not key.startswith('_')]
5 |
--------------------------------------------------------------------------------
/megengine_release/models/ICD/decoder.py:
--------------------------------------------------------------------------------
1 | import megengine as mge
2 | import megengine.module as M
3 | from megengine import functional as F
4 | import numpy as np
5 | from .transformer import MultiheadAttention
6 | #from .utility import has_nan_or_inf
7 |
8 | # mge.core.set_option('async_level', 0)
9 |
10 | class DecoderWrapper(M.Module):
11 | def __init__(self, cfg):
12 | super().__init__()
13 | channels = cfg.distiller.HIDDEN_DIM
14 | heads = cfg.distiller.ATT_HEADS
15 |
16 | # this is a local module derived from official implementation, we modify the last modules
17 | self.matt = MultiheadAttention(channels, heads)
18 |
19 | self.pos_projector = M.Linear(in_features=channels, out_features=channels)
20 | self.use_pos = cfg.distiller.USE_POS_EMBEDDING
21 | self.pos_on_v = cfg.distiller.DECODER_POSEMB_ON_V
22 |
23 | def with_pos_embed(self, tensor, pos):
24 | '''
25 | tensor: [S, N, C]
26 | pos: [S, N, C] or [S, 1, C]
27 | '''
28 | if not self.use_pos:
29 | return tensor
30 |
31 | pos = self.pos_projector(pos)
32 | return tensor if pos is None else tensor + pos
33 |
34 |
35 | def forward(self, q, k, v, query_mask=None, key_padding_mask=None, pos_embedding=None, proj_only=False):
36 | # q, v: [sequence_len, batch_size, channels]
37 | k = self.with_pos_embed(k, pos_embedding)
38 | if self.pos_on_v:
39 | v = self.with_pos_embed(v, pos_embedding)
40 | att, mask, values = self.matt(
41 | q, k, v, key_padding_mask=key_padding_mask, proj_only=proj_only)
42 | return att, mask, values
43 |
--------------------------------------------------------------------------------
/megengine_release/models/ICD/layers.py:
--------------------------------------------------------------------------------
1 | import megengine as mge
2 | import megengine.module as M
3 | from megengine import functional as F
4 | import numpy as np
5 |
6 | class MLP(M.Module):
7 | """ Very simple multi-layer perceptron (also called FFN)"""
8 |
9 | def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
10 | super().__init__()
11 | self.num_layers = num_layers
12 | h = [hidden_dim] * (num_layers - 1)
13 | self.layers = [M.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])]
14 |
15 | def forward(self, x):
16 | for i, layer in enumerate(self.layers):
17 | x = F.nn.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
18 | return x
19 |
--------------------------------------------------------------------------------
/megengine_release/models/ICD/utility.py:
--------------------------------------------------------------------------------
1 | import megengine as mge
2 | import megengine.module as M
3 | from megengine import functional as F
4 | import numpy as np
5 | import math
6 | # mge.core.set_option('async_level', 0)
7 |
8 |
9 | def safe_masked_fill(tensor: mge.Tensor, mask: mge.Tensor, val: float) -> mge.Tensor:
10 | '''
11 | same behavior as torch.tensor.masked_fill_(mask, val)
12 | '''
13 | assert mask.dtype == np.bool_
14 | discard_mask = ~mask
15 | keep_mask = mask.astype('float32')
16 | # NOTE(peizhen): simply tensor * ~mask + value * mask could not handle the value=float('+inf'/'-inf') case, since inf*0 = nan
17 | new_tensor = tensor * ~mask + F.where(mask, F.ones_like(mask) * val, F.zeros_like(mask))
18 | return new_tensor
19 |
20 |
21 | def has_nan_or_inf(inp):
22 | invalid_mask = F.logical_or(F.isnan(inp), F.isinf(inp))
23 | return invalid_mask.sum().item() > 0
24 |
25 |
26 | class PositionEmbeddingSine(M.Module):
27 | """
28 | This is a more standard version of the position embedding, very similar to the one
29 | used by the Attention is all you need paper, generalized to work on images.
30 | Modified from DETR: https://github.com/facebookresearch/detr/blob/main/LICENSE
31 | """
32 |
33 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
34 | super().__init__()
35 | self.num_pos_feats = num_pos_feats
36 | self.temperature = temperature
37 | self.normalize = normalize
38 | if scale is not None and normalize is False:
39 | raise ValueError("normalize should be True if scale is passed")
40 | if scale is None:
41 | scale = 2 * math.pi
42 | self.scale = scale
43 |
44 | def forward(self, x, mask):
45 | assert mask is not None
46 | not_mask = F.squeeze(~mask, 1) # ~mask.squeeze(1)
47 | # import ipdb; ipdb.set_trace()
48 | y_embed = F.cumsum(not_mask.astype('int32'), 1) # .cumsum(1, dtype=torch.float32)
49 | x_embed = F.cumsum(not_mask.astype('int32'), 2) # .cumsum(2, dtype=torch.float32)
50 | if self.normalize:
51 | eps = 1e-6
52 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
53 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
54 |
55 | dim_t = F.arange(self.num_pos_feats,
56 | dtype="float32", device=x.device)
57 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
58 |
59 | pos_x = F.expand_dims(x_embed, -1) / dim_t
60 | pos_y = F.expand_dims(y_embed, -1) / dim_t
61 | pos_x = F.flatten(F.stack(
62 | (F.sin(pos_x[:, :, :, 0::2]), F.cos(pos_x[:, :, :, 1::2])), axis=4), start_axis=3)
63 | pos_y = F.flatten(F.stack(
64 | (F.sin(pos_y[:, :, :, 0::2]), F.cos(pos_y[:, :, :, 1::2])), axis=4), start_axis=3)
65 | pos = F.transpose(F.concat((pos_y, pos_x), axis=3), (0, 3, 1, 2))
66 | return pos
67 |
68 |
69 | def get_valid_boxes(raw_boxes, terminate, ignore):
70 | '''
71 | Input:
72 | raw_boxes: (B, MAXN, 4+1)
73 | data_tensor: (B, C, H, W)
74 | Return:
75 | boxes: list of (Nb, 4)
76 | labels: list of (Nb,)
77 | '''
78 | # (B,)
79 | B = raw_boxes.shape[0]
80 | nr_valid_boxes = (1 - F.equal(raw_boxes[:, :, -1], terminate)).sum(axis=1).astype('int32')
81 |
82 | #print(f'nr_valid_boxes: {nr_valid_boxes}')
83 |
84 | # NOTE(peizhen): raw_boxes[i, :0, :4] will cause bug since ':0' indexing is invalid in megengine
85 | #boxes = [raw_boxes[i, :nr_valid_boxes[i], :4] for i in range(B)]
86 | #labels = [raw_boxes[i, :nr_valid_boxes[i], 4] for i in range(B)]
87 |
88 | # B x (Nb, 4) and B x (Nb,)
89 | boxes = list()
90 | labels = list()
91 | for i in range(B):
92 | num_valid = nr_valid_boxes[i].item()
93 | if num_valid > 0:
94 | boxes.append(raw_boxes[i, :num_valid, :4])
95 | labels.append(raw_boxes[i, :num_valid, 4])
96 | else:
97 | boxes.append(F.zeros((0, 4), dtype=raw_boxes.dtype, device=raw_boxes.device))
98 | labels.append(F.zeros((0,), dtype=raw_boxes.dtype, device=raw_boxes.device))
99 |
100 | # TODO(peizhen): currently discard those terms whose labels are -1. Need better operation ?
101 | # see backup/utility.py annotation part
102 | return boxes, labels
103 |
104 |
105 | def get_instance_list(image_size, gt_boxes_human, gt_boxes_car, terminate=-2, ignore=-1):
106 | '''
107 | Input:
108 | gt_boxes_human: (B, MAXN, 4+1)
109 | gt_boxes_car: (B, MAXN, 4+1)
110 | '''
111 | human_box_list, human_label_list = get_valid_boxes(gt_boxes_human, terminate, ignore)
112 | vehicle_box_list, vehicle_label_list = get_valid_boxes(gt_boxes_car, terminate, ignore)
113 | # (1) For `gt_boxes_human`, 1 denotes human. -2 denote invalid object determination (will be process as 0)
114 | # (2) For `gt_boxes_car`, 1 & 2 denotes different kinds of car, -2 denote invalid object determination (will still be 1 and 2)
115 |
116 | instances = list()
117 | contain_box_mask = list()
118 | nr_actual_boxes_per_img = list()
119 | for human_boxes, human_labels, vehicle_boxes, vehicle_labels in \
120 | zip(human_box_list, human_label_list, vehicle_box_list, vehicle_label_list):
121 | # (k, 4) and (k,)
122 | gt_boxes = F.concat([human_boxes, vehicle_boxes], axis=0).astype("float32")
123 | # Process gt_boxes_human's labels from 1 to 0. Naturally, car owns label 1 and 2
124 | gt_classes = F.concat([human_labels - 1, vehicle_labels], axis=0).astype("int32")
125 |
126 | contain_box_mask.append(gt_boxes.shape[0] > 0)
127 | assert gt_boxes.shape[0] == gt_classes.shape[0]
128 |
129 | # pad box for images who contain no any box to work around potential indexing bug (unlike in coco, an image in business dataset might contain no any image)
130 | nr_valid_objs = gt_boxes.shape[0]
131 | nr_actual_boxes_per_img.append(nr_valid_objs)
132 | if nr_valid_objs == 0:
133 | gt_boxes = F.zeros((1, 4), device=gt_boxes.device, dtype=gt_boxes.dtype)
134 | gt_classes = F.zeros((1,), device=gt_classes.device, dtype=gt_classes.dtype)
135 |
136 | instances.append({'image_size': image_size, 'gt_boxes': gt_boxes, 'gt_classes': gt_classes})
137 |
138 | # (bsz,)
139 | contain_box_mask = mge.Tensor(
140 | contain_box_mask, device=instances[0]['gt_boxes'].device, dtype='float32').detach()
141 |
142 | return instances, contain_box_mask, nr_actual_boxes_per_img
143 |
--------------------------------------------------------------------------------
/megengine_release/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | from .atss import *
10 | from .faster_rcnn import *
11 | from .fcos import *
12 | from .freeanchor import *
13 | from .retinanet import *
14 |
15 | _EXCLUDE = {}
16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
17 |
--------------------------------------------------------------------------------
/megengine_release/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 |
--------------------------------------------------------------------------------
/megengine_release/models/backbones/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 |
--------------------------------------------------------------------------------
/megengine_release/models/faster_rcnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import numpy as np
10 |
11 | import megengine.functional as F
12 | import megengine.module as M
13 |
14 | import models.backbones.resnet.model as resnet
15 | import layers
16 |
17 |
18 | class FasterRCNN(M.Module):
19 | """
20 | Implement Faster R-CNN (https://arxiv.org/abs/1506.01497).
21 | """
22 |
23 | def __init__(self, cfg):
24 | super().__init__()
25 | self.cfg = cfg
26 | # ----------------------- build backbone ------------------------ #
27 | bottom_up = getattr(resnet, cfg.backbone)(
28 | norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained
29 | )
30 | del bottom_up.fc
31 |
32 | # ----------------------- build FPN ----------------------------- #
33 | self.backbone = layers.FPN(
34 | bottom_up=bottom_up,
35 | in_features=cfg.fpn_in_features,
36 | out_channels=cfg.fpn_out_channels,
37 | norm=cfg.fpn_norm,
38 | top_block=layers.FPNP6(),
39 | strides=cfg.fpn_in_strides,
40 | channels=cfg.fpn_in_channels,
41 | )
42 |
43 | # ----------------------- build RPN ----------------------------- #
44 | self.rpn = layers.RPN(cfg)
45 |
46 | # ----------------------- build RCNN head ----------------------- #
47 | self.rcnn = layers.RCNN(cfg)
48 |
49 | def preprocess_image(self, image):
50 | padded_image = layers.get_padded_tensor(image, 32, 0.0)
51 | normed_image = (
52 | padded_image
53 | - np.array(self.cfg.img_mean, dtype="float32")[None, :, None, None]
54 | ) / np.array(self.cfg.img_std, dtype="float32")[None, :, None, None]
55 | return normed_image
56 |
57 | def forward(self, image, im_info, gt_boxes=None):
58 | image = self.preprocess_image(image)
59 | features = self.backbone(image)
60 |
61 | if self.training:
62 | return self._forward_train(features, im_info, gt_boxes)
63 | else:
64 | return self.inference(features, im_info)
65 |
66 | def _forward_train(self, features, im_info, gt_boxes):
67 | rpn_rois, rpn_losses = self.rpn(features, im_info, gt_boxes)
68 | rcnn_losses = self.rcnn(features, rpn_rois, im_info, gt_boxes)
69 |
70 | loss_rpn_cls = rpn_losses["loss_rpn_cls"]
71 | loss_rpn_bbox = rpn_losses["loss_rpn_bbox"]
72 | loss_rcnn_cls = rcnn_losses["loss_rcnn_cls"]
73 | loss_rcnn_bbox = rcnn_losses["loss_rcnn_bbox"]
74 | total_loss = loss_rpn_cls + loss_rpn_bbox + loss_rcnn_cls + loss_rcnn_bbox
75 |
76 | loss_dict = {
77 | "total_loss": total_loss,
78 | "rpn_cls": loss_rpn_cls,
79 | "rpn_bbox": loss_rpn_bbox,
80 | "rcnn_cls": loss_rcnn_cls,
81 | "rcnn_bbox": loss_rcnn_bbox,
82 | }
83 | self.cfg.losses_keys = list(loss_dict.keys())
84 | return loss_dict
85 |
86 | def inference(self, features, im_info):
87 | rpn_rois = self.rpn(features, im_info)
88 | pred_boxes, pred_score = self.rcnn(features, rpn_rois)
89 | pred_boxes = pred_boxes.reshape(-1, 4)
90 |
91 | scale_w = im_info[0, 1] / im_info[0, 3]
92 | scale_h = im_info[0, 0] / im_info[0, 2]
93 | pred_boxes = pred_boxes / F.concat([scale_w, scale_h, scale_w, scale_h], axis=0)
94 | clipped_boxes = layers.get_clipped_boxes(
95 | pred_boxes, im_info[0, 2:4]
96 | ).reshape(-1, self.cfg.num_classes, 4)
97 | return pred_score, clipped_boxes
98 |
99 |
100 | class FasterRCNNConfig:
101 | # pylint: disable=too-many-statements
102 | def __init__(self):
103 | self.backbone = "resnet50"
104 | self.backbone_pretrained = True
105 | self.backbone_norm = "FrozenBN"
106 | self.backbone_freeze_at = 2
107 | self.fpn_norm = None
108 | self.fpn_in_features = ["res2", "res3", "res4", "res5"]
109 | self.fpn_in_strides = [4, 8, 16, 32]
110 | self.fpn_in_channels = [256, 512, 1024, 2048]
111 | self.fpn_out_channels = 256
112 |
113 | # ------------------------ data cfg -------------------------- #
114 | self.train_dataset = dict(
115 | name="coco",
116 | root="train2017",
117 | ann_file="annotations/instances_train2017.json",
118 | remove_images_without_annotations=True,
119 | )
120 | self.test_dataset = dict(
121 | name="coco",
122 | root="val2017",
123 | ann_file="annotations/instances_val2017.json",
124 | remove_images_without_annotations=False,
125 | )
126 | self.num_classes = 80
127 | self.img_mean = [103.530, 116.280, 123.675] # BGR
128 | self.img_std = [57.375, 57.120, 58.395]
129 |
130 | # ----------------------- rpn cfg ------------------------- #
131 | self.rpn_stride = [4, 8, 16, 32, 64]
132 | self.rpn_in_features = ["p2", "p3", "p4", "p5", "p6"]
133 | self.rpn_channel = 256
134 | self.rpn_reg_mean = [0.0, 0.0, 0.0, 0.0]
135 | self.rpn_reg_std = [1.0, 1.0, 1.0, 1.0]
136 |
137 | self.anchor_scales = [[x] for x in [32, 64, 128, 256, 512]]
138 | self.anchor_ratios = [[0.5, 1, 2]]
139 | self.anchor_offset = 0.5
140 |
141 | self.match_thresholds = [0.3, 0.7]
142 | self.match_labels = [0, -1, 1]
143 | self.match_allow_low_quality = True
144 | self.rpn_nms_threshold = 0.7
145 | self.num_sample_anchors = 256
146 | self.positive_anchor_ratio = 0.5
147 |
148 | # ----------------------- rcnn cfg ------------------------- #
149 | self.rcnn_stride = [4, 8, 16, 32]
150 | self.rcnn_in_features = ["p2", "p3", "p4", "p5"]
151 | self.rcnn_reg_mean = [0.0, 0.0, 0.0, 0.0]
152 | self.rcnn_reg_std = [0.1, 0.1, 0.2, 0.2]
153 |
154 | self.pooling_method = "roi_align"
155 | self.pooling_size = (7, 7)
156 |
157 | self.num_rois = 512
158 | self.fg_ratio = 0.5
159 | self.fg_threshold = 0.5
160 | self.bg_threshold_high = 0.5
161 | self.bg_threshold_low = 0.0
162 | self.class_aware_box = True
163 |
164 | # ------------------------ loss cfg -------------------------- #
165 | self.rpn_smooth_l1_beta = 0 # use L1 loss
166 | self.rcnn_smooth_l1_beta = 0 # use L1 loss
167 | self.num_losses = 5
168 |
169 | # ------------------------ training cfg ---------------------- #
170 | self.train_image_short_size = (640, 672, 704, 736, 768, 800)
171 | self.train_image_max_size = 1333
172 | self.train_prev_nms_top_n = 2000
173 | self.train_post_nms_top_n = 1000
174 |
175 | self.basic_lr = 0.02 / 16 # The basic learning rate for single-image
176 | self.momentum = 0.9
177 | self.weight_decay = 1e-4
178 | self.log_interval = 20
179 | self.nr_images_epoch = 80000
180 | self.max_epoch = 54
181 | self.warm_iters = 500
182 | self.lr_decay_rate = 0.1
183 | self.lr_decay_stages = [42, 50]
184 |
185 | # ------------------------ testing cfg ----------------------- #
186 | self.test_image_short_size = 800
187 | self.test_image_max_size = 1333
188 | self.test_prev_nms_top_n = 1000
189 | self.test_post_nms_top_n = 1000
190 | self.test_max_boxes_per_image = 100
191 | self.test_vis_threshold = 0.3
192 | self.test_cls_threshold = 0.05
193 | self.test_nms = 0.5
194 |
--------------------------------------------------------------------------------
/megengine_release/requirements.txt:
--------------------------------------------------------------------------------
1 | megengine
2 | numpy==1.19.5
3 | opencv-python==4.5.3.56
4 | tqdm==4.62.3
5 | tabulate==0.8.9
6 |
--------------------------------------------------------------------------------
/megengine_release/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import argparse
10 | import json
11 | import os
12 | from tqdm import tqdm
13 |
14 | import megengine as mge
15 | import megengine.distributed as dist
16 | from megengine.data import DataLoader
17 |
18 | from layers.tools.data_mapper import data_mapper
19 | from layers.tools.utils import DetEvaluator, InferenceSampler, import_from_file
20 |
21 | logger = mge.get_logger(__name__)
22 | logger.setLevel("INFO")
23 | mge.device.set_prealloc_config(1024, 1024, 256 * 1024 * 1024, 4.0)
24 |
25 |
26 | def make_parser():
27 | parser = argparse.ArgumentParser()
28 | parser.add_argument(
29 | "-f", "--file", default="net.py", type=str, help="net description file"
30 | )
31 | parser.add_argument(
32 | "-w", "--weight_file", default=None, type=str, help="weights file",
33 | )
34 | parser.add_argument(
35 | "-n", "--devices", default=1, type=int, help="total number of gpus for testing",
36 | )
37 | parser.add_argument(
38 | "-d", "--dataset_dir", default="/data/datasets", type=str,
39 | )
40 | parser.add_argument("-se", "--start_epoch", default=-1, type=int)
41 | parser.add_argument("-ee", "--end_epoch", default=-1, type=int)
42 | return parser
43 |
44 |
45 | def main():
46 | # pylint: disable=import-outside-toplevel,too-many-branches,too-many-statements
47 | from pycocotools.coco import COCO
48 | from pycocotools.cocoeval import COCOeval
49 |
50 | parser = make_parser()
51 | args = parser.parse_args()
52 |
53 | current_network = import_from_file(args.file)
54 | cfg = current_network.Cfg()
55 |
56 | if args.weight_file:
57 | args.start_epoch = args.end_epoch = -1
58 | else:
59 | if args.start_epoch == -1:
60 | args.start_epoch = cfg.max_epoch - 1
61 | if args.end_epoch == -1:
62 | args.end_epoch = args.start_epoch
63 | assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch
64 |
65 | for epoch_num in range(args.start_epoch, args.end_epoch + 1):
66 | if args.weight_file:
67 | weight_file = args.weight_file
68 | else:
69 | weight_file = "log-of-{}/epoch_{}.pkl".format(
70 | os.path.basename(args.file).split(".")[0], epoch_num
71 | )
72 |
73 | if args.devices > 1:
74 | dist_worker = dist.launcher(n_gpus=args.devices)(worker)
75 | result_list = dist_worker(current_network, weight_file, args.dataset_dir)
76 | result_list = sum(result_list, [])
77 | else:
78 | result_list = worker(current_network, weight_file, args.dataset_dir)
79 |
80 | all_results = DetEvaluator.format(result_list, cfg)
81 | if args.weight_file:
82 | json_path = "{}_{}.json".format(
83 | os.path.basename(args.file).split(".")[0],
84 | os.path.basename(args.weight_file).split(".")[0],
85 | )
86 | else:
87 | json_path = "log-of-{}/epoch_{}.json".format(
88 | os.path.basename(args.file).split(".")[0], epoch_num
89 | )
90 | all_results = json.dumps(all_results)
91 |
92 | with open(json_path, "w") as fo:
93 | fo.write(all_results)
94 | logger.info("Save results to %s, start evaluation!", json_path)
95 |
96 | eval_gt = COCO(
97 | os.path.join(
98 | args.dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]
99 | )
100 | )
101 | eval_dt = eval_gt.loadRes(json_path)
102 | cocoEval = COCOeval(eval_gt, eval_dt, iouType="bbox")
103 | cocoEval.evaluate()
104 | cocoEval.accumulate()
105 | cocoEval.summarize()
106 | metrics = [
107 | "AP",
108 | "AP@0.5",
109 | "AP@0.75",
110 | "APs",
111 | "APm",
112 | "APl",
113 | "AR@1",
114 | "AR@10",
115 | "AR@100",
116 | "ARs",
117 | "ARm",
118 | "ARl",
119 | ]
120 | logger.info("mmAP".center(32, "-"))
121 | for i, m in enumerate(metrics):
122 | logger.info("|\t%s\t|\t%.03f\t|", m, cocoEval.stats[i])
123 | logger.info("-" * 32)
124 |
125 |
126 | def worker(current_network, weight_file, dataset_dir):
127 | cfg = current_network.Cfg()
128 | cfg.backbone_pretrained = False
129 |
130 | model = current_network.Net(cfg)
131 | model.eval()
132 |
133 | state_dict = mge.load(weight_file)
134 | if "state_dict" in state_dict:
135 | state_dict = state_dict["state_dict"]
136 | model.load_state_dict(state_dict)
137 |
138 | evaluator = DetEvaluator(model)
139 |
140 | test_loader = build_dataloader(dataset_dir, model.cfg)
141 | if dist.get_rank() == 0:
142 | test_loader = tqdm(test_loader)
143 |
144 | result_list = []
145 | for data in test_loader:
146 | image, im_info = DetEvaluator.process_inputs(
147 | data[0][0],
148 | model.cfg.test_image_short_size,
149 | model.cfg.test_image_max_size,
150 | )
151 | pred_res = evaluator.predict(
152 | image=mge.tensor(image),
153 | im_info=mge.tensor(im_info)
154 | )
155 | result = {
156 | "pred_boxes": pred_res,
157 | "image_id": int(data[1][2][0].split(".")[0].split("_")[-1]),
158 | }
159 | result_list.append(result)
160 | return result_list
161 |
162 |
163 | def build_dataloader(dataset_dir, cfg):
164 | val_dataset = data_mapper[cfg.test_dataset["name"]](
165 | os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["root"]),
166 | os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]),
167 | order=["image", "info"],
168 | )
169 | val_sampler = InferenceSampler(val_dataset, 1)
170 | val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2)
171 | return val_dataloader
172 |
173 |
174 | if __name__ == "__main__":
175 | main()
176 |
--------------------------------------------------------------------------------
/megengine_release/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | import argparse
10 | import bisect
11 | import copy
12 | import os
13 | import time
14 |
15 | import megengine as mge
16 | import megengine.distributed as dist
17 | from megengine.autodiff import GradManager
18 | from megengine.data import DataLoader, Infinite, RandomSampler
19 | from megengine.data import transform as T
20 | from megengine.optimizer import SGD
21 |
22 | from layers.tools.data_mapper import data_mapper
23 | from layers.tools.utils import (
24 | AverageMeter,
25 | DetectionPadCollator,
26 | GroupedRandomSampler,
27 | get_config_info,
28 | import_from_file
29 | )
30 |
31 | logger = mge.get_logger(__name__)
32 | logger.setLevel("INFO")
33 | mge.device.set_prealloc_config(1024, 1024, 256 * 1024 * 1024, 4.0)
34 |
35 |
36 | def make_parser():
37 | parser = argparse.ArgumentParser()
38 | parser.add_argument(
39 | "-f", "--file", default="net.py", type=str, help="net description file"
40 | )
41 | parser.add_argument(
42 | "-w", "--weight_file", default=None, type=str, help="weights file",
43 | )
44 | parser.add_argument(
45 | "-n", "--devices", default=1, type=int, help="total number of gpus for training",
46 | )
47 | parser.add_argument(
48 | "-b", "--batch_size", default=2, type=int, help="batch size for training",
49 | )
50 | parser.add_argument(
51 | "-d", "--dataset_dir", default="/data/datasets", type=str,
52 | )
53 |
54 | return parser
55 |
56 |
57 | def main():
58 | parser = make_parser()
59 | args = parser.parse_args()
60 |
61 | # ------------------------ begin training -------------------------- #
62 | logger.info("Device Count = %d", args.devices)
63 |
64 | log_dir = "log-of-{}".format(os.path.basename(args.file).split(".")[0])
65 | if not os.path.isdir(log_dir):
66 | os.makedirs(log_dir)
67 |
68 | if args.devices > 1:
69 | trainer = dist.launcher(worker, n_gpus=args.devices)
70 | trainer(args)
71 | else:
72 | worker(args)
73 |
74 |
75 | def worker(args):
76 | current_network = import_from_file(args.file)
77 |
78 | model = current_network.Net(current_network.Cfg())
79 | model.train()
80 |
81 | if dist.get_rank() == 0:
82 | logger.info(get_config_info(model.cfg))
83 | logger.info(repr(model))
84 |
85 | params_with_grad = []
86 | for name, param in model.named_parameters():
87 | if "bottom_up.conv1" in name and model.cfg.backbone_freeze_at >= 1:
88 | continue
89 | if "bottom_up.layer1" in name and model.cfg.backbone_freeze_at >= 2:
90 | continue
91 | params_with_grad.append(param)
92 |
93 | opt = SGD(
94 | params_with_grad,
95 | lr=model.cfg.basic_lr * args.batch_size * dist.get_world_size(),
96 | momentum=model.cfg.momentum,
97 | weight_decay=model.cfg.weight_decay,
98 | )
99 |
100 | # print('BASE LR:', model.cfg.basic_lr * args.batch_size * dist.get_world_size())
101 |
102 | gm = GradManager()
103 | if dist.get_world_size() > 1:
104 | gm.attach(
105 | params_with_grad,
106 | callbacks=[dist.make_allreduce_cb("mean", dist.WORLD)]
107 | )
108 | else:
109 | gm.attach(params_with_grad)
110 |
111 | if args.weight_file is not None:
112 | weights = mge.load(args.weight_file)
113 | model.backbone.bottom_up.load_state_dict(weights, strict=False)
114 | if dist.get_world_size() > 1:
115 | dist.bcast_list_(model.parameters()) # sync parameters
116 | dist.bcast_list_(model.buffers()) # sync buffers
117 |
118 | if dist.get_rank() == 0:
119 | logger.info("Prepare dataset")
120 | train_loader = iter(build_dataloader(args.batch_size, args.dataset_dir, model.cfg))
121 |
122 | for epoch in range(model.cfg.max_epoch):
123 | train_one_epoch(model, train_loader, opt, gm, epoch, args)
124 | if dist.get_rank() == 0:
125 | save_path = "log-of-{}/epoch_{}.pkl".format(
126 | os.path.basename(args.file).split(".")[0], epoch
127 | )
128 | mge.save(
129 | {"epoch": epoch, "state_dict": model.state_dict()}, save_path,
130 | )
131 | logger.info("dump weights to %s", save_path)
132 |
133 |
134 | def train_one_epoch(model, data_queue, opt, gm, epoch, args):
135 | def train_func(image, im_info, gt_boxes):
136 | with gm:
137 | loss_dict = model(image=image, im_info=im_info, gt_boxes=gt_boxes)
138 | gm.backward(loss_dict["total_loss"])
139 | loss_list = list(loss_dict.values())
140 | opt.step().clear_grad()
141 | return loss_list
142 |
143 | meter = AverageMeter(record_len=model.cfg.num_losses)
144 | time_meter = AverageMeter(record_len=2)
145 | log_interval = model.cfg.log_interval
146 | tot_step = model.cfg.nr_images_epoch // (args.batch_size * dist.get_world_size())
147 | for step in range(tot_step):
148 | adjust_learning_rate(opt, epoch, step, model.cfg, args)
149 |
150 | data_tik = time.time()
151 | mini_batch = next(data_queue)
152 | data_tok = time.time()
153 |
154 | tik = time.time()
155 | loss_list = train_func(
156 | image=mge.tensor(mini_batch["data"]),
157 | im_info=mge.tensor(mini_batch["im_info"]),
158 | gt_boxes=mge.tensor(mini_batch["gt_boxes"])
159 | )
160 | tok = time.time()
161 |
162 | time_meter.update([tok - tik, data_tok - data_tik])
163 |
164 | if dist.get_rank() == 0:
165 | info_str = "e%d, %d/%d, lr:%f, "
166 | loss_str = ", ".join(
167 | ["{}:%f".format(loss) for loss in model.cfg.losses_keys]
168 | )
169 | time_str = ", train_time:%.3fs, data_time:%.3fs"
170 | log_info_str = info_str + loss_str + time_str
171 | meter.update([loss.numpy() for loss in loss_list])
172 | if step % log_interval == 0:
173 | logger.info(
174 | log_info_str,
175 | epoch,
176 | step,
177 | tot_step,
178 | opt.param_groups[0]["lr"],
179 | *meter.average(),
180 | *time_meter.average()
181 | )
182 | meter.reset()
183 | time_meter.reset()
184 |
185 |
186 | def adjust_learning_rate(optimizer, epoch, step, cfg, args):
187 | base_lr = (
188 | cfg.basic_lr * dist.get_world_size() * args.batch_size * (
189 | cfg.lr_decay_rate
190 | ** bisect.bisect_right(cfg.lr_decay_stages, epoch)
191 | )
192 | )
193 | # print('UDPATE LR:', base_lr)
194 | # Warm up
195 | lr_factor = 1.0
196 | if epoch == 0 and step < cfg.warm_iters:
197 | lr_factor = (step + 1.0) / cfg.warm_iters
198 | for param_group in optimizer.param_groups:
199 | param_group["lr"] = base_lr * lr_factor
200 |
201 |
202 | def build_dataset(dataset_dir, cfg):
203 | data_cfg = copy.deepcopy(cfg.train_dataset)
204 | data_name = data_cfg.pop("name")
205 |
206 | data_cfg["root"] = os.path.join(dataset_dir, data_name, data_cfg["root"])
207 |
208 | if "ann_file" in data_cfg:
209 | data_cfg["ann_file"] = os.path.join(dataset_dir, data_name, data_cfg["ann_file"])
210 |
211 | data_cfg["order"] = ["image", "boxes", "boxes_category", "info"]
212 |
213 | return data_mapper[data_name](**data_cfg)
214 |
215 |
216 | # pylint: disable=dangerous-default-value
217 | def build_sampler(train_dataset, batch_size, aspect_grouping=[1]):
218 | def _compute_aspect_ratios(dataset):
219 | aspect_ratios = []
220 | for i in range(len(dataset)):
221 | info = dataset.get_img_info(i)
222 | aspect_ratios.append(info["height"] / info["width"])
223 | return aspect_ratios
224 |
225 | def _quantize(x, bins):
226 | return list(map(lambda y: bisect.bisect_right(sorted(bins), y), x))
227 |
228 | if len(aspect_grouping) == 0:
229 | return Infinite(RandomSampler(train_dataset, batch_size, drop_last=True))
230 |
231 | aspect_ratios = _compute_aspect_ratios(train_dataset)
232 | group_ids = _quantize(aspect_ratios, aspect_grouping)
233 | return Infinite(GroupedRandomSampler(train_dataset, batch_size, group_ids))
234 |
235 |
236 | def build_dataloader(batch_size, dataset_dir, cfg):
237 | train_dataset = build_dataset(dataset_dir, cfg)
238 | train_sampler = build_sampler(train_dataset, batch_size)
239 | train_dataloader = DataLoader(
240 | train_dataset,
241 | sampler=train_sampler,
242 | transform=T.Compose(
243 | transforms=[
244 | T.ShortestEdgeResize(
245 | cfg.train_image_short_size,
246 | cfg.train_image_max_size,
247 | sample_style="choice",
248 | ),
249 | T.RandomHorizontalFlip(),
250 | T.ToMode(),
251 | ],
252 | order=["image", "boxes", "boxes_category"],
253 | ),
254 | collator=DetectionPadCollator(),
255 | num_workers=2,
256 | )
257 | return train_dataloader
258 |
259 |
260 | if __name__ == "__main__":
261 | main()
262 |
--------------------------------------------------------------------------------
/pytorch_release/README.md:
--------------------------------------------------------------------------------
1 | # Instance-Conditional Knowledge Distillation for Object Detection
2 | This is an official implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection" in [Pytorch](https://pytorch.org), it supports various detectors from Detectron2 and AdelaiDet.
3 |
4 |
5 | # Requirements
6 | The project is depending on the following libraries. You may need to install Detectron2 and AdelaiDet mannully, please refer to their github pages.
7 | - Python3 (recommand 3.8)
8 | - pytorch == 1.9.0
9 | - torchvision == 0.10.0
10 | - opencv-python == 4.5.4.58
11 | - [Detectron2](https://github.com/facebookresearch/detectron2) == 0.5.0
12 | - [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) == 7bf9d87
13 |
14 | (To avoid conflict, we recommend to use the exact above versions.)
15 |
16 | Reference command for installation:
17 | ```
18 | # Switch to this directory (and maybe create a virtual environment)
19 | pip install pip --upgrade
20 | pip install -r requirements.txt
21 | pip install https://github.com/facebookresearch/detectron2/archive/refs/tags/v0.5.tar.gz
22 | pip install 'git+https://github.com/aim-uofa/AdelaiDet.git@7bf9d87'
23 | ```
24 |
25 | You will also need to prepare datasets according to [detectron2](https://github.com/facebookresearch/detectron2/tree/main/datasets), put your data under the following structure, and set the environment variable by `export DETECTRON2_DATASETS=/path/to/datasets`.
26 | ```
27 | $DETECTRON2_DATASETS/
28 | coco/
29 | annotations/
30 | instances_{train,val}2017.json
31 | {train,val}2017/
32 | # image files
33 | ```
34 |
35 | # Usage
36 | ## Train baseline models
37 | We use [train_baseline.py](./train_baseline.py) to train baseline models, it is very similar to [tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/main/tools/train_net.py).
38 |
39 | You can use any config files for detectron2 or AdelaiDet to specify a training setting.
40 | ```
41 | usage: train_baseline.py [-h] [--config-file FILE] [--resume] [--eval-only]
42 | [--num-gpus NUM_GPUS] [--num-machines NUM_MACHINES]
43 | [--machine-rank MACHINE_RANK] [--dist-url DIST_URL]
44 | ...
45 |
46 | positional arguments:
47 | opts Modify config options at the end of the command. For
48 | Yacs configs, use space-separated "PATH.KEY VALUE"
49 | pairs. For python-based LazyConfig, use
50 | "path.key=value".
51 |
52 | optional arguments:
53 | -h, --help show this help message and exit
54 | --config-file FILE path to config file
55 | --resume Whether to attempt to resume from the checkpoint
56 | directory. See documentation of
57 | `DefaultTrainer.resume_or_load()` for what it means.
58 | --eval-only perform evaluation only
59 | --num-gpus NUM_GPUS number of gpus *per machine*
60 | --num-machines NUM_MACHINES
61 | total number of machines
62 | --machine-rank MACHINE_RANK
63 | the rank of this machine (unique per machine)
64 | --dist-url DIST_URL initialization URL for pytorch distributed backend.
65 | See https://pytorch.org/docs/stable/distributed.html
66 | for details.
67 | ```
68 | ### Examples:
69 |
70 | Train a retinanet baseline detector on a single machine:
71 |
72 | ```
73 | train_baseline.py --num-gpus 8 --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
74 | ```
75 |
76 | Change some config options:
77 |
78 | ```
79 | train_baseline.py --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001
80 | ```
81 |
82 | Run on multiple machines:
83 | ```
84 | (machine0)$ train_baseline.py --machine-rank 0 --num-machines 2 --dist-url [--other-flags]
85 | (machine1)$ train_baseline.py --machine-rank 1 --num-machines 2 --dist-url [--other-flags]
86 | ```
87 |
88 | ## Train and distill models
89 | We leave everything the same as the above, except the entry ([train_distill.py](./train_distill.py)) and the config.
90 |
91 | ### Examples:
92 |
93 | Train RetinaNet with distillation:
94 |
95 | ```
96 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_retinanet
97 | ```
98 |
99 | Train Faster R-CNN with distillation:
100 |
101 | ```
102 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/RCNN_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_frcnn
103 | ```
104 |
105 | Train CondInst with distillation:
106 |
107 | ```
108 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/CondInst_R50_R101_icd.yaml OUTPUT_DIR output/icd_condinst
109 | ```
110 |
111 | ### Write distillation configs:
112 | To introduce how to write a config for distillation, let's see two examples:
113 |
114 | **If teacher model is released by detectron2 officially:**
115 |
116 | You can load checkpoint from detectron2 model_zoo API, set `MODEL_LOAD_OFFICIAL=True` and use the corresponding config file. You may also set `WEIGHT_VALUE` to the desired number.
117 |
118 | ```
119 | MODEL:
120 | DISTILLER:
121 | MODEL_LOAD_OFFICIAL: True
122 | MODEL_DISTILLER_CONFIG: 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'
123 |
124 | INS_ATT_MIMIC:
125 | WEIGHT_VALUE: 8.0
126 | ```
127 |
128 | Note: It also support configs from detectron2 new baselines, like [LSJ (large scale jitters) models](https://github.com/facebookresearch/detectron2/blob/main/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py), which could be helpful in practice.
129 |
130 |
131 | **If you want to use a standalone teacher trained by yourself:**
132 |
133 | If you train a teacher by ourselves, you may need to define a standalone config for the teacher. Set `MODEL_LOAD_OFFICIAL=False` and use a standalone config file.
134 |
135 | ```
136 | MODEL:
137 | DISTILLER:
138 | MODEL_LOAD_OFFICIAL: False
139 | MODEL_DISTILLER_CONFIG: 'Teachers/SOLOv2_R101_3x_ms.yaml'
140 |
141 | INS_ATT_MIMIC:
142 | WEIGHT_VALUE: 8.0
143 | ```
144 |
145 | For teacher's config, simply set pretrained weight to a checkpoint file:
146 | ```
147 | _BASE_: "../Base-SOLOv2.yaml"
148 | MODEL:
149 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9w7b3sjaXvqYQEQ"
150 | # This is the official release from AdelaiDet.
151 | RESNETS:
152 | DEPTH: 101
153 | ```
154 |
155 | You can find more options in [utils/build.py](utils/build.py)
156 |
157 | # Results
158 | For object detection in MS-COCO:
159 | | Model | Baseline (BoxAP) | + Ours (BoxAP) |
160 | | --- | :---: | :---: |
161 | | Faster R-CNN | 37.9 | 40.9 (+3.0) |
162 | | Retinanet | 37.4 | 40.7 (+3.3) |
163 | | FCOS | 39.4 | 42.9 (+3.5) |
164 |
165 | For instance-segmentation in MS-COCO:
166 | | Model | Baseline (BoxAP) | + Ours (BoxAP) | Baseline (MaskAP) | + Ours (MaskAP) |
167 | | --- | :---: | :---: | :---: | :---: |
168 | | Mask R-CNN | 38.6 | 41.2 (+2.6) | 35.2 | 37.4 (+2.2) |
169 | | SOLOv2 | - | - | 34.6 | 38.5 (+3.9) |
170 | | CondInst |39.7 | 43.7 (+4.0) | 35.7 | 39.1 (+3.4) |
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-CondInst.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "CondInst"
3 | MASK_ON: True
4 | BACKBONE:
5 | NAME: "build_fcos_resnet_fpn_backbone"
6 | RESNETS:
7 | OUT_FEATURES: ["res3", "res4", "res5"]
8 | FPN:
9 | IN_FEATURES: ["res3", "res4", "res5"]
10 | PROPOSAL_GENERATOR:
11 | NAME: "FCOS"
12 | FCOS:
13 | THRESH_WITH_CTR: True
14 | USE_SCALE: True
15 | CONDINST:
16 | MAX_PROPOSALS: 500
17 | DATASETS:
18 | TRAIN: ("coco_2017_train",)
19 | TEST: ("coco_2017_val",)
20 | SOLVER:
21 | IMS_PER_BATCH: 16
22 | BASE_LR: 0.01
23 | STEPS: (60000, 80000)
24 | MAX_ITER: 90000
25 | INPUT:
26 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-FCOS.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "OneStageDetector"
3 | BACKBONE:
4 | NAME: "build_fcos_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res3", "res4", "res5"]
9 | PROPOSAL_GENERATOR:
10 | NAME: "FCOS"
11 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
12 | DATASETS:
13 | TRAIN: ("coco_2017_train",)
14 | TEST: ("coco_2017_val",)
15 | SOLVER:
16 | IMS_PER_BATCH: 16
17 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
18 | STEPS: (60000, 80000)
19 | MAX_ITER: 90000
20 | INPUT:
21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | RPN:
4 | PRE_NMS_TOPK_TEST: 6000
5 | POST_NMS_TOPK_TEST: 1000
6 | ROI_HEADS:
7 | NAME: "Res5ROIHeads"
8 | DATASETS:
9 | TRAIN: ("coco_2017_train",)
10 | TEST: ("coco_2017_val",)
11 | SOLVER:
12 | IMS_PER_BATCH: 16
13 | BASE_LR: 0.02
14 | STEPS: (60000, 80000)
15 | MAX_ITER: 90000
16 | INPUT:
17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18 | VERSION: 2
19 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | RESNETS:
4 | OUT_FEATURES: ["res5"]
5 | RES5_DILATION: 2
6 | RPN:
7 | IN_FEATURES: ["res5"]
8 | PRE_NMS_TOPK_TEST: 6000
9 | POST_NMS_TOPK_TEST: 1000
10 | ROI_HEADS:
11 | NAME: "StandardROIHeads"
12 | IN_FEATURES: ["res5"]
13 | ROI_BOX_HEAD:
14 | NAME: "FastRCNNConvFCHead"
15 | NUM_FC: 2
16 | POOLER_RESOLUTION: 7
17 | ROI_MASK_HEAD:
18 | NAME: "MaskRCNNConvUpsampleHead"
19 | NUM_CONV: 4
20 | POOLER_RESOLUTION: 14
21 | DATASETS:
22 | TRAIN: ("coco_2017_train",)
23 | TEST: ("coco_2017_val",)
24 | SOLVER:
25 | IMS_PER_BATCH: 16
26 | BASE_LR: 0.02
27 | STEPS: (60000, 80000)
28 | MAX_ITER: 90000
29 | INPUT:
30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | VERSION: 2
32 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | NAME: "build_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
7 | FPN:
8 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
9 | ANCHOR_GENERATOR:
10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
12 | RPN:
13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level
16 | # Detectron1 uses 2000 proposals per-batch,
17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 | POST_NMS_TOPK_TRAIN: 1000
20 | POST_NMS_TOPK_TEST: 1000
21 | ROI_HEADS:
22 | NAME: "StandardROIHeads"
23 | IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 | ROI_BOX_HEAD:
25 | NAME: "FastRCNNConvFCHead"
26 | NUM_FC: 2
27 | POOLER_RESOLUTION: 7
28 | ROI_MASK_HEAD:
29 | NAME: "MaskRCNNConvUpsampleHead"
30 | NUM_CONV: 4
31 | POOLER_RESOLUTION: 14
32 | DATASETS:
33 | TRAIN: ("coco_2017_train",)
34 | TEST: ("coco_2017_val",)
35 | SOLVER:
36 | IMS_PER_BATCH: 16
37 | BASE_LR: 0.02
38 | STEPS: (60000, 80000)
39 | MAX_ITER: 90000
40 | CHECKPOINT_PERIOD: 10000
41 | TEST:
42 | EVAL_PERIOD: 10000
43 | INPUT:
44 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
45 | VERSION: 2
46 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "RetinaNet"
3 | BACKBONE:
4 | NAME: "build_retinanet_resnet_fpn_backbone"
5 | RESNETS:
6 | OUT_FEATURES: ["res3", "res4", "res5"]
7 | ANCHOR_GENERATOR:
8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
9 | FPN:
10 | IN_FEATURES: ["res3", "res4", "res5"]
11 | RETINANET:
12 | IOU_THRESHOLDS: [0.4, 0.5]
13 | IOU_LABELS: [0, -1, 1]
14 | SMOOTH_L1_LOSS_BETA: 0.0
15 | DATASETS:
16 | TRAIN: ("coco_2017_train",)
17 | TEST: ("coco_2017_val",)
18 | SOLVER:
19 | IMS_PER_BATCH: 16
20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
21 | STEPS: (60000, 80000)
22 | MAX_ITER: 90000
23 | CHECKPOINT_PERIOD: 10000
24 | INPUT:
25 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
26 | VERSION: 2
27 | TEST:
28 | EVAL_PERIOD: 10000
29 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Base-SOLOv2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "SOLOv2"
3 | MASK_ON: True
4 | BACKBONE:
5 | NAME: "build_resnet_fpn_backbone"
6 | RESNETS:
7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"]
8 | FPN:
9 | IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 | DATASETS:
11 | TRAIN: ("coco_2017_train",)
12 | TEST: ("coco_2017_val",)
13 | SOLVER:
14 | IMS_PER_BATCH: 16
15 | BASE_LR: 0.01
16 | WARMUP_FACTOR: 0.01
17 | WARMUP_ITERS: 1000
18 | STEPS: (60000, 80000)
19 | MAX_ITER: 90000
20 | INPUT:
21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | MASK_FORMAT: "bitmask"
23 | VERSION: 2
24 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/FCOS_R_101_DCN_FPN_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | META_ARCHITECTURE: FCOSBase
4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
5 | RESNETS:
6 | DEPTH: 101
7 | DEFORM_ON_PER_STAGE: [False, True, True, True]
8 | DEFORM_MODULATED: True
9 | SOLVER:
10 | STEPS: (120000, 160000)
11 | MAX_ITER: 180000
12 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/FCOS_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | META_ARCHITECTURE: FCOS
4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (60000, 80000)
9 | MAX_ITER: 90000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/FCOS_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | META_ARCHITECTURE: FCOS
4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (120000, 160000)
9 | MAX_ITER: 180000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/POTO_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | META_ARCHITECTURE: POTO
4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5 | RESNETS:
6 | DEPTH: 50
7 | FCOS:
8 | NMS_THRESH_TEST: 1.0
9 | NMS_TYPE: 'null'
10 | SOLVER:
11 | STEPS: (120000, 160000)
12 | MAX_ITER: 180000
13 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | LOAD_PROPOSALS: True
6 | RESNETS:
7 | DEPTH: 50
8 | PROPOSAL_GENERATOR:
9 | NAME: "PrecomputedProposals"
10 | DATASETS:
11 | TRAIN: ("coco_2017_train",)
12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13 | TEST: ("coco_2017_val",)
14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15 | DATALOADER:
16 | # proposals are part of the dataset_dicts, and take a lot of RAM
17 | NUM_WORKERS: 2
18 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_152_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-152.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 152
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x_bs8.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | LABEL_ENC:
8 | BYPASS_DISTILL: 80000
9 |
10 | SOLVER:
11 | IMS_PER_BATCH: 8
12 | BASE_LR: 0.01
13 | STEPS: (120000, 160000)
14 | MAX_ITER: 180000
15 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (120000, 160000)
9 | MAX_ITER: 180000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: False
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | MASK_ON: False
4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
5 | PIXEL_STD: [57.375, 57.120, 58.395]
6 | RESNETS:
7 | STRIDE_IN_1X1: False # this is a C2 model
8 | NUM_GROUPS: 32
9 | WIDTH_PER_GROUP: 8
10 | DEPTH: 101
11 | SOLVER:
12 | STEPS: (210000, 250000)
13 | MAX_ITER: 270000
14 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | RESNETS:
5 | DEPTH: 101
6 | SOLVER:
7 | STEPS: (210000, 250000)
8 | MAX_ITER: 270000
9 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_152_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-152.pkl"
4 | RESNETS:
5 | DEPTH: 152
6 | SOLVER:
7 | STEPS: (210000, 250000)
8 | MAX_ITER: 270000
9 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_1x_bs8.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 |
7 | LABEL_ENC:
8 | BYPASS_DISTILL: 80000
9 |
10 | SOLVER:
11 | IMS_PER_BATCH: 8
12 | BASE_LR: 0.005
13 | STEPS: (120000, 160000)
14 | MAX_ITER: 180000
15 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | SOLVER:
7 | STEPS: (120000, 160000)
8 | MAX_ITER: 180000
9 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | SOLVER:
7 | STEPS: (210000, 250000)
8 | MAX_ITER: 270000
9 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_X101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | MASK_ON: False
4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
5 | PIXEL_STD: [57.375, 57.120, 58.395]
6 | RESNETS:
7 | STRIDE_IN_1X1: False # this is a C2 model
8 | NUM_GROUPS: 32
9 | WIDTH_PER_GROUP: 8
10 | DEPTH: 101
11 | SOLVER:
12 | STEPS: (210000, 250000)
13 | MAX_ITER: 270000
14 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | META_ARCHITECTURE: "ProposalNetwork"
4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5 | MASK_ON: False
6 | RESNETS:
7 | DEPTH: 50
8 | RPN:
9 | PRE_NMS_TOPK_TEST: 12000
10 | POST_NMS_TOPK_TEST: 2000
11 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | META_ARCHITECTURE: "ProposalNetwork"
4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
5 | MASK_ON: False
6 | RESNETS:
7 | DEPTH: 50
8 | RPN:
9 | POST_NMS_TOPK_TEST: 2000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 101
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.train import train
2 | from ..common.optim import SGD as optimizer
3 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
4 | from ..common.data.coco import dataloader
5 | from ..common.models.mask_rcnn_c4 import model
6 |
7 | model.backbone.freeze_at = 2
8 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco import dataloader
4 | from ..common.models.mask_rcnn_fpn import model
5 | from ..common.train import train
6 |
7 | model.backbone.bottom_up.freeze_at = 2
8 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | RPN:
8 | BBOX_REG_LOSS_TYPE: "giou"
9 | BBOX_REG_LOSS_WEIGHT: 2.0
10 | ROI_BOX_HEAD:
11 | BBOX_REG_LOSS_TYPE: "giou"
12 | BBOX_REG_LOSS_WEIGHT: 10.0
13 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | SOLVER:
8 | STEPS: (210000, 250000)
9 | MAX_ITER: 270000
10 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | MASK_ON: True
4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
5 | PIXEL_STD: [57.375, 57.120, 58.395]
6 | RESNETS:
7 | STRIDE_IN_1X1: False # this is a C2 model
8 | NUM_GROUPS: 32
9 | WIDTH_PER_GROUP: 8
10 | DEPTH: 101
11 | SOLVER:
12 | STEPS: (210000, 250000)
13 | MAX_ITER: 270000
14 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco import dataloader
4 | from ..common.models.mask_rcnn_fpn import model
5 | from ..common.train import train
6 |
7 | from detectron2.config import LazyCall as L
8 | from detectron2.modeling.backbone import RegNet
9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
10 |
11 |
12 | # Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 | stem_class=SimpleStem,
16 | stem_width=32,
17 | block_class=ResBottleneckBlock,
18 | depth=23,
19 | w_a=38.65,
20 | w_0=96,
21 | w_m=2.43,
22 | group_width=40,
23 | freeze_at=2,
24 | norm="FrozenBN",
25 | out_features=["s1", "s2", "s3", "s4"],
26 | )
27 | model.pixel_std = [57.375, 57.120, 58.395]
28 |
29 | optimizer.weight_decay = 5e-5
30 | train.init_checkpoint = (
31 | "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
32 | )
33 | # RegNets benefit from enabling cudnn benchmark mode
34 | train.cudnn_benchmark = True
35 |
--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco import dataloader
4 | from ..common.models.mask_rcnn_fpn import model
5 | from ..common.train import train
6 |
7 | from detectron2.config import LazyCall as L
8 | from detectron2.modeling.backbone import RegNet
9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
10 |
11 |
12 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 | stem_class=SimpleStem,
16 | stem_width=32,
17 | block_class=ResBottleneckBlock,
18 | depth=22,
19 | w_a=31.41,
20 | w_0=96,
21 | w_m=2.24,
22 | group_width=64,
23 | se_ratio=0.25,
24 | freeze_at=2,
25 | norm="FrozenBN",
26 | out_features=["s1", "s2", "s3", "s4"],
27 | )
28 | model.pixel_std = [57.375, 57.120, 58.395]
29 |
30 | optimizer.weight_decay = 5e-5
31 | train.init_checkpoint = (
32 | "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
33 | )
34 | # RegNets benefit from enabling cudnn benchmark mode
35 | train.cudnn_benchmark = True
36 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/CondInst_R50_R101_icd.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-CondInst.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | DISTILLER:
7 | MODEL_LOAD_OFFICIAL: False
8 | MODEL_DISTILLER_CONFIG: 'Teachers/CondIns_R101_3x_ms.yaml'
9 |
10 | INS_ATT_MIMIC:
11 | WEIGHT_VALUE: 8.0
12 |
13 | SOLVER:
14 | STEPS: (60000, 80000)
15 | MAX_ITER: 90000
16 | CLIP_GRADIENTS: {"ENABLED": True}
--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/FCOS_R50_R101_icd.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | DISTILLER:
7 | MODEL_LOAD_OFFICIAL: False
8 | MODEL_DISTILLER_CONFIG: 'Teachers/FCOS_R101_2x_ms.yaml'
9 | # NOTE: FCOS only release a 2x model, we use another 3x model trained by ourselves to report in the paper.
10 |
11 | INS_ATT_MIMIC:
12 | WEIGHT_VALUE: 8.0
13 |
14 | SOLVER:
15 | STEPS: (60000, 80000)
16 | MAX_ITER: 90000
17 | CLIP_GRADIENTS: {"ENABLED": True}
--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/MaskRCNN_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | MASK_ON: True
5 | RESNETS:
6 | DEPTH: 50
7 | DISTILLER:
8 | MODEL_LOAD_OFFICIAL: True
9 | MODEL_DISTILLER_CONFIG: 'COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml'
10 |
11 | INS:
12 | INPUT_FEATS: ['p2', 'p3', 'p4', 'p5', 'p6']
13 | MAX_LABELS: 100
14 |
15 | INS_ATT_MIMIC:
16 | WEIGHT_VALUE: 3.0
17 |
18 | SOLVER:
19 | STEPS: (60000, 80000)
20 | MAX_ITER: 90000
21 | CLIP_GRADIENTS: {"ENABLED": True}
22 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/RCNN_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | DISTILLER:
7 | MODEL_LOAD_OFFICIAL: True
8 | MODEL_DISTILLER_CONFIG: 'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml'
9 |
10 | INS:
11 | INPUT_FEATS: ['p2', 'p3', 'p4', 'p5', 'p6']
12 | MAX_LABELS: 100
13 |
14 | INS_ATT_MIMIC:
15 | WEIGHT_VALUE: 3.0
16 |
17 | SOLVER:
18 | STEPS: (60000, 80000)
19 | MAX_ITER: 90000
20 | CLIP_GRADIENTS: {"ENABLED": True}
21 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/SOLOv2_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-SOLOv2.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | DISTILLER:
7 | MODEL_LOAD_OFFICIAL: False
8 | MODEL_DISTILLER_CONFIG: 'Teachers/SOLOv2_R101_3x_ms.yaml'
9 |
10 | INS_ATT_MIMIC:
11 | WEIGHT_VALUE: 8.0
12 |
13 | SOLVER:
14 | STEPS: (60000, 80000)
15 | MAX_ITER: 90000
16 | CLIP_GRADIENTS: {"ENABLED": True}
--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 | RESNETS:
5 | DEPTH: 50
6 | DISTILLER:
7 | MODEL_LOAD_OFFICIAL: True
8 | MODEL_DISTILLER_CONFIG: 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'
9 |
10 | INS_ATT_MIMIC:
11 | WEIGHT_VALUE: 8.0
12 |
13 | SOLVER:
14 | STEPS: (60000, 80000)
15 | MAX_ITER: 90000
16 | CLIP_GRADIENTS: {"ENABLED": True}
--------------------------------------------------------------------------------
/pytorch_release/configs/Teachers/CondIns_R101_3x_ms.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-CondInst.yaml"
2 | MODEL:
3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/M8nNxSR5iNP4qyO/download"
4 | RESNETS:
5 | DEPTH: 101
6 |
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Teachers/FCOS_R101_2x_ms.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | MODEL:
3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/M3UOT6JcyHy2QW1/download"
4 | RESNETS:
5 | DEPTH: 101
6 |
7 |
--------------------------------------------------------------------------------
/pytorch_release/configs/Teachers/SOLOv2_R101_3x_ms.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-SOLOv2.yaml"
2 | MODEL:
3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9w7b3sjaXvqYQEQ"
4 | RESNETS:
5 | DEPTH: 101
6 |
7 |
--------------------------------------------------------------------------------
/pytorch_release/models/distiller.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 |
5 | import torch.nn.functional as F
6 | from .utils import *
7 |
8 |
9 | from detectron2.utils.registry import Registry
10 |
11 | DISTILLER_REGISTRY = Registry("DISTILLER") # noqa F401 isort:skip
12 | DISTILLER_REGISTRY.__doc__ = """
13 | Registry for meta-architectures, i.e. the whole model.
14 |
15 | The registered object will be called with `obj(cfg)`
16 | and expected to return a `nn.Module` object.
17 | """
18 |
19 |
20 | def build_distiller(cfg, name, student, teacher):
21 | """
22 | Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
23 | Note that it does not load any weights from ``cfg``.
24 | """
25 | model = DISTILLER_REGISTRY.get(name)(cfg, student, teacher)
26 | model.to(torch.device(cfg.MODEL.DEVICE))
27 | return model
28 |
29 |
30 | @DISTILLER_REGISTRY.register()
31 | class InstanceConditionalDistillation(nn.Module):
32 | """
33 | Distillation with multi-head attention. Mimic attention and features.
34 | """
35 |
36 | def __init__(self, cfg, student, teacher) -> None:
37 | super().__init__()
38 | self.cfg = cfg
39 | self.student = [student]
40 |
41 | self.cfg = cfg
42 | hidden_dim = cfg.MODEL.DISTILLER.INS.HIDDEN_DIM
43 |
44 | self.pos_embedding = PositionEmbeddingSine(
45 | hidden_dim // 2, normalize=True)
46 |
47 | self.teacher_ptr = [teacher]
48 | self.attention_module = build_decoder_module(
49 | cfg)
50 |
51 | self.feat_keys = cfg.MODEL.DISTILLER.INS.INPUT_FEATS
52 |
53 | self.weight_value = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.WEIGHT_VALUE
54 | self.temp_value = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_VALUE
55 | if self.temp_value < 0:
56 | self.temp_value = nn.Parameter(torch.ones([1]).mean())
57 |
58 | self.distill_norm_type = cfg.MODEL.DISTILLER.INS.DISTILL_NORM
59 |
60 | self.distill_negative = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.DISTILL_NEGATIVE
61 | self.use_pos_embds = cfg.MODEL.DISTILLER.INS.USE_POS_EMBEDDING
62 |
63 | self.predictor = MLP(hidden_dim, hidden_dim, 1, 3)
64 |
65 | if self.distill_norm_type == 'ln':
66 | self.distill_norm_ = nn.LayerNorm(
67 | [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False)
68 | self.distill_norm_tea = nn.LayerNorm(
69 | [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False)
70 | elif self.distill_norm_type == 'tln':
71 | self.distill_norm_ = nn.Sequential()
72 | self.distill_norm_tea = nn.LayerNorm(
73 | [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False)
74 | else:
75 | self.distill_norm_ = nn.Sequential()
76 | self.distill_norm_tea = nn.Sequential()
77 |
78 | self.loss_form = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.LOSS_FORM
79 |
80 | def concate_multiscale_reps(self, feat, pos_emb, mask):
81 | # permute and concate features form multiscale to a tensor under transformer definition
82 | keys = self.feat_keys
83 |
84 | feat = torch.cat([feat[k].flatten(2).permute(2, 0, 1)
85 | for k in keys], 0) # S, N, C
86 | pos_emb = torch.cat([pos_emb[k].flatten(2).permute(
87 | 2, 0, 1) for k in keys], 0) # S, N, C
88 | mask = torch.cat([mask[k].flatten(2).squeeze(1)
89 | for k in keys], 1) # N, S
90 | return feat, pos_emb, mask
91 |
92 | def bce_identification_loss(self, feat_list, ins_mask, ins_mask_gt):
93 | # this is the identification loss that identifies a given instance is real or fake
94 | positive_mask = (~ins_mask).float()
95 |
96 | loss_dict = {}
97 | for i, dfeat in enumerate(feat_list):
98 | f_pre = self.predictor(dfeat)
99 |
100 | loss = (F.binary_cross_entropy_with_logits(f_pre.squeeze(-1).T, ins_mask_gt, reduction='none') *
101 | positive_mask).sum() / positive_mask.sum()
102 |
103 | loss_dict['stu_bce.%s.loss' % i] = loss
104 |
105 | return loss_dict
106 |
107 | def mimic_loss(self, svalue, tvalue, value_mask):
108 | # value: num_seq, bsz, heads, channel
109 | # mask: [bsz, heads, 1, Seq]
110 | #value_mask = value_mask ** self.power_factor
111 | if self.loss_form in ['mse', 'MSE']:
112 | return ((F.mse_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
113 | * value_mask).sum(-1) / value_mask.sum(-1).clamp(min=1e-6)).mean()
114 | elif self.loss_form in ['l1', 'L1']:
115 | return (F.l1_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
116 | * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)
117 | elif self.loss_form in ['smoothL1']:
118 | return (F.smooth_l1_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
119 | * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)
120 | elif self.loss_form in ['L2', 'l2']:
121 | return ((F.mse_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
122 | * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)) ** 0.5
123 |
124 | def forward(self, features_dict, features_dict_tea):
125 | if isinstance(self.temp_value, nn.Parameter):
126 | self.temp_value.data = self.temp_value.data.clamp(min=0.1, max=8)
127 | else:
128 | if self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_DECAY:
129 | decay_to = self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_DECAY_TO
130 | ratio = features_dict['iteration'] / self.cfg.SOLVER.MAX_ITER
131 | self.temp_value = ratio * decay_to + \
132 | (1 - ratio) * self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_VALUE
133 |
134 | images = features_dict['images']
135 | batched_inputs = features_dict['batched_inputs']
136 | fpn_outputs = features_dict['fpn_feat']
137 |
138 | # assert set(self.feat_keys) == set(list(fpn_outputs.keys(
139 | # ))), 'WARNING: Unequal keys for fpn and attention ! <%s> != <%s>' % (self.feat_keys, fpn_outputs.keys())
140 |
141 | if features_dict['distill_flag'] == 0:
142 | fpn_outputs = {k: v.detach() for k, v in fpn_outputs.items()}
143 |
144 | # mask_out: zero for foreground, one for bg: BoolTensor(N, 1, H, W)
145 | mask_out = mask_out_padding(fpn_outputs, images)
146 |
147 | # fpn_outputs = self.scale_adapter(fpn_outputs)
148 | pos_embs = {k: self.pos_embedding(
149 | fpn_outputs[k], mask_out[k]) for k in self.feat_keys}
150 | # feat, pos: [S, N, C]; mask: [N, S]
151 | feat, pos_embs, mask_padding = self.concate_multiscale_reps(
152 | fpn_outputs, pos_embs, mask_out)
153 |
154 | # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K]
155 | # NOTE: (0 for Fake Instance) in ins_mask
156 | ins_feat, ins_mask, ins_mask_gt = features_dict_tea['aux_feat']['encoded_ins']
157 | ins_feat = ins_feat.detach()
158 |
159 | if self.distill_negative:
160 | ins_mask_gt = (~ins_mask).detach().float()
161 | max_ele = None # slice to the last element
162 | else:
163 | # calculate an element mask to reduce unnessessary computation
164 | max_ele = ins_mask_gt.long().sum(-1).max().item()
165 |
166 | # Note that mask is not normalized by softmax
167 |
168 | decoded_feat_list, att_mask_list, value_list = self.attention_module(
169 | ins_feat[:max_ele, :, :], feat, feat, query_mask=ins_mask[:, :max_ele], key_padding_mask=mask_padding, pos_embedding=pos_embs, proj_only=True)
170 |
171 | decoded_value_tea = features_dict_tea['aux_feat']['decoded_value']
172 | decoded_mask_tea = features_dict_tea['aux_feat']['decoded_mask']
173 |
174 | loss_value = torch.tensor([0.0], device=ins_mask_gt.device).mean()
175 | for i, (tmask, svalue, tvalue) in enumerate(zip(decoded_mask_tea, value_list, decoded_value_tea)):
176 | tmask = tmask.detach() # bsz, heads, num_ins, num_seq
177 |
178 | # num_seq, bsz, heads, channel
179 | tvalue = self.distill_norm_tea(tvalue)
180 | tvalue = tvalue.detach()
181 |
182 | if self.weight_value > 0:
183 | with torch.no_grad():
184 | value_mask = ((tmask / self.temp_value).softmax(-1) *
185 | ins_mask_gt.unsqueeze(1).unsqueeze(-1)).sum(2, keepdim=True)
186 | # [bsz, heads, ins, Seq]
187 |
188 | svalue = self.distill_norm_(svalue)
189 | loss_value += self.mimic_loss(svalue,
190 | tvalue, value_mask) * self.weight_value
191 |
192 | loss_dict = {
193 | 'matt.value': loss_value / len(decoded_feat_list),
194 | }
195 |
196 | if isinstance(self.temp_value, nn.Parameter):
197 | loss_dict['temp.value'] = self.temp_value.detach()
198 |
199 | return loss_dict
200 |
--------------------------------------------------------------------------------
/pytorch_release/models/models.py:
--------------------------------------------------------------------------------
1 | from abc import abstractmethod
2 | import torch
3 | from torch import nn
4 | from detectron2.utils.events import get_event_storage
5 | from .utils import *
6 | from .distiller import build_distiller
7 | from .teacher import build_teacher
8 |
9 | from detectron2.utils.events import EventWriter, get_event_storage
10 |
11 |
12 | class Distillator(nn.Module):
13 | def __init__(self, cfg, student) -> None:
14 | super().__init__()
15 | self.cfg = cfg
16 | self.student_buffer = [student] # as a printer
17 |
18 | self.teacher = build_teacher(cfg, student)
19 |
20 | distillers = []
21 | for dis_name in cfg.MODEL.DISTILLER.TYPES:
22 | distillers.append(build_distiller(
23 | cfg, dis_name, student, self.teacher))
24 |
25 | self.distillers = nn.ModuleList(distillers)
26 |
27 | self.register_buffer(
28 | "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
29 | self.register_buffer(
30 | "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
31 |
32 | def forward(self, raw_output, forward_only=False, teacher_only=False):
33 | '''
34 | Input:
35 | batched_inputs, images, r_features, features, gts
36 | Output:
37 | losses_tea : loss dict
38 | r_features_tea : features from backbone
39 | features_tea : features from FPN
40 | '''
41 | if teacher_only:
42 | loss_dict, _ = self.teacher(raw_output, None, None, None)
43 | return loss_dict
44 |
45 | r_feats = raw_output['backbone_feat']
46 | fpn_feats = raw_output['fpn_feat']
47 | batched_inputs = raw_output['batched_inputs']
48 | images = raw_output['images']
49 | iteration = raw_output['iteration']
50 |
51 | if iteration < self.cfg.MODEL.DISTILLER.BYPASS_DISTILL or iteration > self.cfg.MODEL.DISTILLER.BYPASS_DISTILL_AFTER:
52 | distill_flag = self.cfg.MODEL.DISTILLER.DISTILL_OFF
53 | else:
54 | distill_flag = self.cfg.MODEL.DISTILLER.DISTILL_ON
55 |
56 | raw_output['distill_flag'] = distill_flag
57 |
58 | storage = get_event_storage()
59 | storage.put_scalar('distill_flag', distill_flag, False)
60 |
61 | if forward_only:
62 | with torch.no_grad():
63 | loss_dict, feat_dict_tea = self.teacher(
64 | batched_inputs, images, r_feats, fpn_feats)
65 | else:
66 | loss_dict, feat_dict_tea = self.teacher(
67 | batched_inputs, images, r_feats, fpn_feats)
68 |
69 | for i, distiller in enumerate(self.distillers):
70 | loss_d = distiller(raw_output, feat_dict_tea)
71 | loss_d = {'distill.%s.%s' % (i, k): v for k, v in loss_d.items()}
72 | loss_dict.update(loss_d)
73 |
74 | return loss_dict
75 |
--------------------------------------------------------------------------------
/pytorch_release/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.9.0
2 | torchvision==0.10.0
3 | opencv-python==4.5.4.58
--------------------------------------------------------------------------------