├── .gitignore
├── LICENSE
├── Poster.png
├── README.md
├── megengine_release
    ├── README.md
    ├── configs
    │   ├── __init__.py
    │   ├── atss_res101_coco_3x_800size.py
    │   ├── atss_res18_coco_3x_800size.py
    │   ├── atss_res34_coco_3x_800size.py
    │   ├── atss_res50_coco_3x_800size.py
    │   ├── atss_resx101_coco_2x_800size.py
    │   ├── faster_rcnn_res101_coco_3x_800size.py
    │   ├── faster_rcnn_res18_coco_3x_800size.py
    │   ├── faster_rcnn_res34_coco_3x_800size.py
    │   ├── faster_rcnn_res50_coco_3x_800size.py
    │   ├── faster_rcnn_resx101_coco_2x_800size.py
    │   ├── fcos_res101_coco_3x_800size.py
    │   ├── fcos_res18_coco_3x_800size.py
    │   ├── fcos_res34_coco_3x_800size.py
    │   ├── fcos_res50_coco_3x_800size.py
    │   ├── fcos_resx101_coco_2x_800size.py
    │   ├── freeanchor_res101_coco_3x_800size.py
    │   ├── freeanchor_res18_coco_3x_800size.py
    │   ├── freeanchor_res34_coco_3x_800size.py
    │   ├── freeanchor_res50_coco_3x_800size.py
    │   ├── freeanchor_resx101_coco_2x_800size.py
    │   ├── retinanet_res101_coco_3x_800size.py
    │   ├── retinanet_res18_coco_3x_800size.py
    │   ├── retinanet_res34_coco_3x_800size.py
    │   ├── retinanet_res50_coco_3x_800size.py
    │   └── retinanet_resx101_coco_2x_800size.py
    ├── distill_configs
    │   ├── ICD.py
    │   ├── ICD_rcnn.py
    │   ├── atss_res50_coco_1x_800size.py
    │   ├── coco_obj.json
    │   ├── fcos_res50_coco_1x_800size.py
    │   └── retinanet_res50_coco_1x_800size.py
    ├── layers
    │   ├── __init__.py
    │   ├── basic
    │   │   ├── __init__.py
    │   │   ├── functional.py
    │   │   ├── nn.py
    │   │   └── norm.py
    │   ├── det
    │   │   ├── __init__.py
    │   │   ├── anchor.py
    │   │   ├── box_head.py
    │   │   ├── box_utils.py
    │   │   ├── fpn.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── point_head.py
    │   │   ├── pooler.py
    │   │   ├── rcnn.py
    │   │   ├── rpn.py
    │   │   └── sampling.py
    │   └── tools
    │   │   ├── __init__.py
    │   │   ├── data_mapper.py
    │   │   ├── inference.py
    │   │   ├── nms.py
    │   │   └── utils.py
    ├── models
    │   ├── ICD
    │   │   ├── ICD.py
    │   │   ├── __init__.py
    │   │   ├── decoder.py
    │   │   ├── encoder.py
    │   │   ├── layers.py
    │   │   ├── transformer.py
    │   │   └── utility.py
    │   ├── __init__.py
    │   ├── atss.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   └── resnet
    │   │   │   ├── __init__.py
    │   │   │   └── model.py
    │   ├── faster_rcnn.py
    │   ├── fcos.py
    │   ├── freeanchor.py
    │   └── retinanet.py
    ├── requirements.txt
    ├── test.py
    ├── train.py
    └── train_distill_icd.py
└── pytorch_release
    ├── README.md
    ├── configs
        ├── Base-CondInst.yaml
        ├── Base-FCOS.yaml
        ├── Base-RCNN-C4.yaml
        ├── Base-RCNN-DilatedC5.yaml
        ├── Base-RCNN-FPN.yaml
        ├── Base-RetinaNet.yaml
        ├── Base-SOLOv2.yaml
        ├── COCO-Detection
        │   ├── FCOS_R_101_DCN_FPN_2x.yaml
        │   ├── FCOS_R_50_FPN_1x.yaml
        │   ├── FCOS_R_50_FPN_2x.yaml
        │   ├── POTO_R_50_FPN_2x.yaml
        │   ├── fast_rcnn_R_50_FPN_1x.yaml
        │   ├── faster_rcnn_R_101_C4_3x.yaml
        │   ├── faster_rcnn_R_101_DC5_3x.yaml
        │   ├── faster_rcnn_R_101_FPN_3x.yaml
        │   ├── faster_rcnn_R_152_FPN_3x.yaml
        │   ├── faster_rcnn_R_50_C4_1x.yaml
        │   ├── faster_rcnn_R_50_C4_3x.yaml
        │   ├── faster_rcnn_R_50_DC5_1x.yaml
        │   ├── faster_rcnn_R_50_DC5_3x.yaml
        │   ├── faster_rcnn_R_50_FPN_1x.yaml
        │   ├── faster_rcnn_R_50_FPN_1x_bs8.yaml
        │   ├── faster_rcnn_R_50_FPN_2x.yaml
        │   ├── faster_rcnn_R_50_FPN_3x.yaml
        │   ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml
        │   ├── retinanet_R_101_FPN_3x.yaml
        │   ├── retinanet_R_152_FPN_3x.yaml
        │   ├── retinanet_R_50_FPN_1x.yaml
        │   ├── retinanet_R_50_FPN_1x_bs8.yaml
        │   ├── retinanet_R_50_FPN_2x.yaml
        │   ├── retinanet_R_50_FPN_3x.yaml
        │   ├── retinanet_X101_32x8d_FPN_3x.yaml
        │   ├── rpn_R_50_C4_1x.yaml
        │   └── rpn_R_50_FPN_1x.yaml
        ├── COCO-InstanceSegmentation
        │   ├── mask_rcnn_R_101_C4_3x.yaml
        │   ├── mask_rcnn_R_101_DC5_3x.yaml
        │   ├── mask_rcnn_R_101_FPN_3x.yaml
        │   ├── mask_rcnn_R_50_C4_1x.py
        │   ├── mask_rcnn_R_50_C4_1x.yaml
        │   ├── mask_rcnn_R_50_C4_3x.yaml
        │   ├── mask_rcnn_R_50_DC5_1x.yaml
        │   ├── mask_rcnn_R_50_DC5_3x.yaml
        │   ├── mask_rcnn_R_50_FPN_1x.py
        │   ├── mask_rcnn_R_50_FPN_1x.yaml
        │   ├── mask_rcnn_R_50_FPN_1x_giou.yaml
        │   ├── mask_rcnn_R_50_FPN_3x.yaml
        │   ├── mask_rcnn_X_101_32x8d_FPN_3x.yaml
        │   ├── mask_rcnn_regnetx_4gf_dds_fpn_1x.py
        │   └── mask_rcnn_regnety_4gf_dds_fpn_1x.py
        ├── Distillation-ICD
        │   ├── CondInst_R50_R101_icd.yaml
        │   ├── FCOS_R50_R101_icd.yaml
        │   ├── MaskRCNN_R_50_R101_icd_FPN_1x.yaml
        │   ├── RCNN_R_50_R101_icd_FPN_1x.yaml
        │   ├── SOLOv2_R_50_R101_icd_FPN_1x.yaml
        │   └── retinanet_R_50_R101_icd_FPN_1x.yaml
        ├── Teachers
        │   ├── CondIns_R101_3x_ms.yaml
        │   ├── FCOS_R101_2x_ms.yaml
        │   └── SOLOv2_R101_3x_ms.yaml
        └── coco_obj.json
    ├── models
        ├── distiller.py
        ├── layers
        │   └── transformer.py
        ├── models.py
        ├── teacher.py
        └── utils.py
    ├── requirements.txt
    ├── train_baseline.py
    ├── train_distill.py
    └── utils
        └── build.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *log*/
  2 | *.jpg
  3 | *.png
  4 | *output*
  5 | *_model_zoo/
  6 | 
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | cover/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | .pybuilder/
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 
141 | # pytype static type analyzer
142 | .pytype/
143 | 
144 | # Cython debug symbols
145 | cython_debug/
146 | 


--------------------------------------------------------------------------------
/Poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MegEngine/ICD/acf27269648e4538a9d6d22171d1abbcd4eceed1/Poster.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Instance-Conditional Knowledge Distillation for Object Detection
 2 | This is the official implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection", based on [MegEngine](./megengine_release/README.md) and [Pytorch](./pytorch_release/README.md). Go to the desired subfolders for more information and guidance!
 3 | 
 4 | 
 5 | <div align="center">
 6 |   <img src="Poster.png"/>
 7 | </div>
 8 | 
 9 | > [**Instance-Conditional Knowledge Distillation for Object Detection**](https://arxiv.org/abs/2110.12724),            
10 | > Zijian Kang, Peizhen Zhang, Xiangyu Zhang, Jian Sun, Nanning Zheng         
11 | > In Proc. of Advances in Neural Information Processing Systems (NeurIPS), 2021            
12 | > [[arXiv](https://arxiv.org/abs/2110.12724)][[Citation](#citation)][[OpenReview](https://openreview.net/forum?id=k7aeAz4Vbb)]
13 | 
14 | ## Usage 
15 | You can find two implementations for [MegEngine](./megengine_release/README.md) and [Pytorch](./pytorch_release/README.md) under two sub-folders. We use the latter one to report the performance in the paper. Switch to the subfolder for more information.
16 | 
17 | ### Try it in a few lines :
18 | Take the detectron2 implementation as an example, you can train your model in a few lines:
19 | ```
20 | cd pytorch_release
21 | 
22 | # Install dependancies
23 | pip install pip --upgrade
24 | pip install -r requirements.txt
25 | pip install https://github.com/facebookresearch/detectron2/archive/refs/tags/v0.5.tar.gz
26 | pip install 'git+https://github.com/aim-uofa/AdelaiDet.git@7bf9d87'
27 | 
28 | # Prepare dataset according to https://github.com/facebookresearch/detectron2/tree/main/datasets
29 | 
30 | # Train and distill a retinanet detector with ICD
31 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_retinanet
32 | ```
33 | 
34 | ## Performance
35 | For object detection in MS-COCO:
36 | | Model         | Baseline (BoxAP)     | + Ours (BoxAP)           | 
37 | | ---           | :---:        | :---:         |
38 | | Faster R-CNN     | 37.9         | 40.9 (+3.0)        |
39 | | Retinanet     | 37.4         | 40.7 (+3.3)         |
40 | | FCOS          | 39.4         | 42.9 (+3.5)         |
41 | 
42 | For instance-segmentation in MS-COCO:
43 | | Model         | Baseline (BoxAP)    | + Ours (BoxAP)          | Baseline (MaskAP)    | + Ours (MaskAP)          | 
44 | | ---           | :---:        | :---:         | :---:        | :---:         |
45 | | Mask R-CNN     | 38.6        | 41.2 (+2.6)         |  35.2 | 37.4 (+2.2) |
46 | | SOLOv2     | - | - | 34.6 | 38.5 (+3.9) |
47 | | CondInst        |39.7 | 43.7 (+4.0) | 35.7 | 39.1 (+3.4) |
48 | 
49 | ## Acknowledgement
50 | 
51 | Some files are modified from [MegEngine Models](https://github.com/MegEngine/Models) and [Detectron2](https://github.com/facebookresearch/detectron2). We also refer to [Pytorch](https://github.com/pytorch/pytorch), [DETR](https://github.com/facebookresearch/detr) and [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) for some implementations. 
52 | 
53 | 
54 | ## License
55 | 
56 | This repo is licensed under the Apache License, Version 2.0 (the "License").
57 | 
58 | ## Citation
59 | You can use the following BibTeX entry for citation in your research.
60 | ```
61 | @inproceedings{icd_neurips2021,
62 |  author = {Kang, Zijian and Zhang, Peizhen and Zhang, Xiangyu and Sun, Jian and Zheng, Nanning},
63 |  booktitle = {Advances in Neural Information Processing Systems},
64 |  editor = {M. Ranzato and A. Beygelzimer and Y. Dauphin and P.S. Liang and J. Wortman Vaughan},
65 |  pages = {16468--16480},
66 |  publisher = {Curran Associates, Inc.},
67 |  title = {Instance-Conditional Knowledge Distillation for Object Detection},
68 |  url = {https://proceedings.neurips.cc/paper/2021/file/892c91e0a653ba19df81a90f89d99bcd-Paper.pdf},
69 |  volume = {34},
70 |  year = {2021}
71 | }
72 | ```
73 | 


--------------------------------------------------------------------------------
/megengine_release/README.md:
--------------------------------------------------------------------------------
  1 | # Instance-Conditional Knowledge Distillation for Object Detection
  2 | This is a [MegEngine](https://github.com/MegEngine/MegEngine) implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection", based on [MegEngine Models](https://github.com/MegEngine/Models).
  3 | 
  4 | ## Requirements
  5 | 
  6 | ### Installation
  7 | 
  8 | In order to run the code, please prepare a CUDA environment with:
  9 | - Python 3 (3.6 is recommended)
 10 | - [MegEngine](https://github.com/MegEngine/MegEngine)
 11 | 
 12 | 
 13 | 1. Install dependencies.
 14 | 
 15 | ```
 16 | pip3 install --upgrade pip
 17 | pip3 install -r requirements.txt
 18 | ```
 19 | 
 20 | 2. Prepare [MS-COCO 2017 dataset](http://cocodataset.org/#download)，put it to a proper directory with the following structures:
 21 | 
 22 | ```
 23 | /path/to/
 24 |     |->coco
 25 |     |    |annotations
 26 |     |    |train2017
 27 |     |    |val2017
 28 | ```
 29 | 
 30 | 
 31 | [Microsoft COCO: Common Objects in Context](https://arxiv.org/abs/1405.0312) Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollár, and C Lawrence Zitnick. European Conference on Computer Vision (ECCV), 2014.
 32 | 
 33 | ## Usage
 34 | 
 35 | ### Train baseline models
 36 | 
 37 | Following [MegEngine Models](https://github.com/MegEngine/Models):
 38 | ```bash
 39 | python3 train.py -f distill_configs/retinanet_res50_coco_1x_800size.py -n 8 \
 40 |                        -d /data/Datasets
 41 | ```
 42 | 
 43 | `train.py` arguments：
 44 | 
 45 | - `-f`, config file for the network.
 46 | - `-n`, required devices(gpu).
 47 | - `-w`, pretrained backbone weights.
 48 | - `-b`, training `batch size`, default is 2.
 49 | - `-d`, dataset root，default is `/data/datasets`.
 50 | 
 51 | 
 52 | ### Train with distillation
 53 | 
 54 | ```bash
 55 | python3 train_distill_icd.py -f distill_configs/retinanet_res50_coco_1x_800size.py \ 
 56 |     -n 8 -l -d /data/Datasets -tf configs/retinanet_res101_coco_3x_800size.py \
 57 |     -df distill_configs/ICD.py \
 58 |     -tw _model_zoo/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl
 59 | ```
 60 | 
 61 | `train_distill_icd.py` arguments：
 62 | 
 63 | - `-f`, config file for the student network.
 64 | - `-w`, pretrained backbone weights.
 65 | - `-tf`, config file for the teacher network.
 66 | - `-tw`, pretrained weights for the teacher.
 67 | - `-df`, config file for the distillation module, `distill_configs/ICD.py` by default. 
 68 | - `-l`, use the inheriting strategy, load pretrained parameters.
 69 | - `-n`, required devices(gpu).
 70 | - `-b`, training `batch size`, default is 2.
 71 | - `-d`, dataset root，default is `/data/datasets`.
 72 | 
 73 | Note that we set `backbone_pretrained` in distill configs, where backbone weights will be loaded automatically, that `-w` can be omitted. Checkpoints will be saved to a log-xxx directory.
 74 | 
 75 | ### Evaluate
 76 | 
 77 | ```
 78 | python3 test.py -f distill_configs/retinanet_res50_coco_3x_800size.py -n 8 \
 79 |      -w log-of-xxx/epoch_17.pkl -d /data/Datasets/
 80 | ```
 81 | 
 82 | `test.py` arguments：
 83 | 
 84 | - `-f`, config file for the network.
 85 | - `-n`, required devices(gpu).
 86 | - `-w`, pretrained weights.
 87 | - `-d`, dataset root，default is `/data/datasets`.
 88 | 
 89 | ## Examples and Results
 90 | ### Steps
 91 | 1. Download the pretrained teacher model to ```_model_zoo``` directory.
 92 | 2. Train baseline or distill with ICD.
 93 | 3. Evaluate checkpoints (use the last checkpoint by default).
 94 | 
 95 | ### Example of Common Detectors
 96 | 
 97 | #### RetinaNet
 98 | - [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár. IEEE International Conference on Computer Vision (ICCV), 2017.
 99 | 
100 | 
101 | - Teacher RetinaNet-R101-3x:
102 | https://data.megengine.org.cn/models/weights/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl
103 | 
104 | 
105 | - Config: distill_configs/retinanet_res50_coco_1x_800size.py
106 | 
107 | Command: 
108 | ```
109 | python3 train_distill_icd.py -f distill_configs/retinanet_res50_coco_1x_800size.py \
110 |     -n 8 -l -d /data/Datasets -tf configs/retinanet_res101_coco_3x_800size.py \
111 |     -df distill_configs/ICD.py \
112 |     -tw _model_zoo/retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl
113 | ```
114 | 
115 | #### FCOS
116 | 
117 | - [FCOS: Fully Convolutional One-Stage Object Detection](https://arxiv.org/abs/1904.01355) Zhi Tian, Chunhua Shen, Hao Chen, and Tong He. IEEE International Conference on Computer Vision (ICCV), 2019.
118 | 
119 | - Teacher FCOS-R101-3x:
120 | https://data.megengine.org.cn/models/weights/fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl
121 | 
122 | 
123 | - Config: distill_configs/fcos_res50_coco_1x_800size.py
124 | 
125 | Command: 
126 | ```
127 | python3 train_distill_icd.py -f distill_configs/fcos_res50_coco_1x_800size.py \
128 |     -n 8 -l -d /data/Datasets -tf configs/fcos_res101_coco_3x_800size.py \
129 |     -df distill_configs/ICD.py \
130 |     -tw _model_zoo/fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl
131 | ```
132 | 
133 | #### ATSS
134 | 
135 | - [Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection](https://arxiv.org/abs/1912.02424) Shifeng Zhang, Cheng Chi, Yongqiang Yao, Zhen Lei, and Stan Z. Li. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2020.
136 | 
137 | - Teacher ATSS-R101-3x:
138 | https://data.megengine.org.cn/models/weights/atss_res101_coco_3x_800size_44dot7_9181687e.pkl
139 | 
140 | 
141 | - Config: distill_configs/atss_res50_coco_1x_800size.py
142 | 
143 | Command: 
144 | ```
145 | python3 train_distill_icd.py -f distill_configs/atss_res50_coco_1x_800size.py \
146 |     -n 8 -l -d /data/Datasets -tf configs/atss_res101_coco_3x_800size.py \
147 |     -df distill_configs/ICD.py \
148 |     -tw _model_zoo/atss_res101_coco_3x_800size_44dot7_9181687e.pkl
149 | ```
150 | 
151 | ### Results of AP in MS-COCO:
152 | 
153 | | Model         | Baseline     | +ICD          | 
154 | | ---           | :---:        | :---:         |
155 | | Retinanet     | 36.8         | 40.3          |
156 | | FCOS          | 40.0         | 43.3          |
157 | | ATSS          | 39.6         | 43.0          |
158 | 
159 | 
160 | ### Notice
161 | 
162 | - Results of this implementation are mainly for demonstration, please refer to the Detectron2 version for reproduction. 
163 | 
164 | - We simply adopt the hyperparameter from Detectron2 version, further tunning could be helpful.
165 | 
166 | - There is a known CUDA memory issue related to MegEngine: the actual memory consumption will be much larger than the theoretical value, due to the memory fragmentation. This is expected to be fixed in a future version of MegEngine.


--------------------------------------------------------------------------------
/megengine_release/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | from .atss_res18_coco_3x_800size import atss_res18_coco_3x_800size
 2 | from .atss_res34_coco_3x_800size import atss_res34_coco_3x_800size
 3 | from .atss_res50_coco_3x_800size import atss_res50_coco_3x_800size
 4 | from .atss_res101_coco_3x_800size import atss_res101_coco_3x_800size
 5 | from .atss_resx101_coco_2x_800size import atss_resx101_coco_2x_800size
 6 | from .faster_rcnn_res18_coco_3x_800size import faster_rcnn_res18_coco_3x_800size
 7 | from .faster_rcnn_res34_coco_3x_800size import faster_rcnn_res34_coco_3x_800size
 8 | from .faster_rcnn_res50_coco_3x_800size import faster_rcnn_res50_coco_3x_800size
 9 | from .faster_rcnn_res101_coco_3x_800size import faster_rcnn_res101_coco_3x_800size
10 | from .faster_rcnn_resx101_coco_2x_800size import faster_rcnn_resx101_coco_2x_800size
11 | from .fcos_res18_coco_3x_800size import fcos_res18_coco_3x_800size
12 | from .fcos_res34_coco_3x_800size import fcos_res34_coco_3x_800size
13 | from .fcos_res50_coco_3x_800size import fcos_res50_coco_3x_800size
14 | from .fcos_res101_coco_3x_800size import fcos_res101_coco_3x_800size
15 | from .fcos_resx101_coco_2x_800size import fcos_resx101_coco_2x_800size
16 | from .freeanchor_res18_coco_3x_800size import freeanchor_res18_coco_3x_800size
17 | from .freeanchor_res34_coco_3x_800size import freeanchor_res34_coco_3x_800size
18 | from .freeanchor_res50_coco_3x_800size import freeanchor_res50_coco_3x_800size
19 | from .freeanchor_res101_coco_3x_800size import freeanchor_res101_coco_3x_800size
20 | from .freeanchor_resx101_coco_2x_800size import freeanchor_resx101_coco_2x_800size
21 | from .retinanet_res18_coco_3x_800size import retinanet_res18_coco_3x_800size
22 | from .retinanet_res34_coco_3x_800size import retinanet_res34_coco_3x_800size
23 | from .retinanet_res50_coco_3x_800size import retinanet_res50_coco_3x_800size
24 | from .retinanet_res101_coco_3x_800size import retinanet_res101_coco_3x_800size
25 | from .retinanet_resx101_coco_2x_800size import retinanet_resx101_coco_2x_800size
26 | 
27 | _EXCLUDE = {}
28 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
29 | 


--------------------------------------------------------------------------------
/megengine_release/configs/atss_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "atss_res101_coco_3x_800size_44dot7_9181687e.pkl"
24 | )
25 | def atss_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     ATSS trained from COCO dataset.
28 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomATSSConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.ATSS(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.ATSS
38 | Cfg = CustomATSSConfig
39 | 


--------------------------------------------------------------------------------
/megengine_release/configs/atss_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "atss_res18_coco_3x_800size_38dot3_58e249d5.pkl"
25 | )
26 | def atss_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     ATSS trained from COCO dataset.
29 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomATSSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.ATSS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.ATSS
39 | Cfg = CustomATSSConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/atss_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "atss_res34_coco_3x_800size_41dot5_ec16a67b.pkl"
25 | )
26 | def atss_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     ATSS trained from COCO dataset.
29 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomATSSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.ATSS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.ATSS
39 | Cfg = CustomATSSConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/atss_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl"
17 | )
18 | def atss_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     ATSS trained from COCO dataset.
21 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.ATSSConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.ATSS(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.ATSS
31 | Cfg = models.ATSSConfig
32 | 


--------------------------------------------------------------------------------
/megengine_release/configs/atss_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomATSSConfig(models.ATSSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "atss_resx101_coco_2x_800size_45dot6_b3a91b36.pkl"
26 | )
27 | def atss_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     ATSS trained from COCO dataset.
30 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomATSSConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.ATSS(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.ATSS
40 | Cfg = CustomATSSConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "faster_rcnn_res101_coco_3x_800size_42dot6_2538b0ff.pkl"
24 | )
25 | def faster_rcnn_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     Faster-RCNN FPN trained from COCO dataset.
28 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomFasterRCNNConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.FasterRCNN(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.FasterRCNN
38 | Cfg = CustomFasterRCNNConfig
39 | 


--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [64, 128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "faster_rcnn_res18_coco_3x_800size_35dot7_a33835ca.pkl"
25 | )
26 | def faster_rcnn_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     Faster-RCNN FPN trained from COCO dataset.
29 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFasterRCNNConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FasterRCNN(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FasterRCNN
39 | Cfg = CustomFasterRCNNConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [64, 128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "faster_rcnn_res34_coco_3x_800size_39dot6_11fca4d4.pkl"
25 | )
26 | def faster_rcnn_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     Faster-RCNN FPN trained from COCO dataset.
29 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFasterRCNNConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FasterRCNN(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FasterRCNN
39 | Cfg = CustomFasterRCNNConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "faster_rcnn_res50_coco_3x_800size_40dot1_8682ff1a.pkl"
17 | )
18 | def faster_rcnn_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     Faster-RCNN FPN trained from COCO dataset.
21 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FasterRCNNConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.FasterRCNN(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.FasterRCNN
31 | Cfg = models.FasterRCNNConfig
32 | 


--------------------------------------------------------------------------------
/megengine_release/configs/faster_rcnn_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFasterRCNNConfig(models.FasterRCNNConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "faster_rcnn_resx101_coco_2x_800size_44dot1_e5e0060b.pkl"
26 | )
27 | def faster_rcnn_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     Faster-RCNN FPN trained from COCO dataset.
30 |     `"Faster-RCNN" <https://arxiv.org/abs/1506.01497>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomFasterRCNNConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.FasterRCNN(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.FasterRCNN
40 | Cfg = CustomFasterRCNNConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "fcos_res101_coco_3x_800size_44dot3_f38e8df1.pkl"
24 | )
25 | def fcos_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     FCOS trained from COCO dataset.
28 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomFCOSConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.FCOS(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.FCOS
38 | Cfg = CustomFCOSConfig
39 | 


--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "fcos_res18_coco_3x_800size_37dot6_adab0136.pkl"
25 | )
26 | def fcos_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FCOS trained from COCO dataset.
29 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFCOSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FCOS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FCOS
39 | Cfg = CustomFCOSConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "fcos_res34_coco_3x_800size_41dot0_8ba4633f.pkl"
25 | )
26 | def fcos_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FCOS trained from COCO dataset.
29 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = CustomFCOSConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FCOS(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FCOS
39 | Cfg = CustomFCOSConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/fcos_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl"
17 | )
18 | def fcos_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     FCOS trained from COCO dataset.
21 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FCOSConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.FCOS(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.FCOS
31 | Cfg = models.FCOSConfig
32 | 


--------------------------------------------------------------------------------
/megengine_release/configs/fcos_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFCOSConfig(models.FCOSConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "fcos_resx101_coco_2x_800size_44dot8_42ac8e82.pkl"
26 | )
27 | def fcos_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     FCOS trained from COCO dataset.
30 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomFCOSConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.FCOS(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.FCOS
40 | Cfg = CustomFCOSConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "freeanchor_res101_coco_3x_800size_43dot9_8c707d7d.pkl"
24 | )
25 | def freeanchor_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     FreeAnchor trained from COCO dataset.
28 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = models.FreeAnchorConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.FreeAnchor(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.FreeAnchor
38 | Cfg = CustomFreeAnchorConfig
39 | 


--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "freeanchor_res18_coco_3x_800size_38dot1_3d0559a8.pkl"
25 | )
26 | def freeanchor_res18_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FreeAnchor trained from COCO dataset.
29 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = models.FreeAnchorConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FreeAnchor(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FreeAnchor
39 | Cfg = CustomFreeAnchorConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 | 
21 | 
22 | @hub.pretrained(
23 |     "https://data.megengine.org.cn/models/weights/"
24 |     "freeanchor_res34_coco_3x_800size_41dot1_3b03734e.pkl"
25 | )
26 | def freeanchor_res34_coco_3x_800size(**kwargs):
27 |     r"""
28 |     FreeAnchor trained from COCO dataset.
29 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
30 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
31 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
32 |     """
33 |     cfg = models.FreeAnchorConfig()
34 |     cfg.backbone_pretrained = False
35 |     return models.FreeAnchor(cfg, **kwargs)
36 | 
37 | 
38 | Net = models.FreeAnchor
39 | Cfg = CustomFreeAnchorConfig
40 | 


--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "freeanchor_res50_coco_3x_800size_42dot1_5c567f14.pkl"
17 | )
18 | def freeanchor_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     FreeAnchor trained from COCO dataset.
21 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FreeAnchorConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.FreeAnchor(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.FreeAnchor
31 | Cfg = models.FreeAnchorConfig
32 | 


--------------------------------------------------------------------------------
/megengine_release/configs/freeanchor_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomFreeAnchorConfig(models.FreeAnchorConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "freeanchor_resx101_coco_2x_800size_44dot9_5a23fca7.pkl"
26 | )
27 | def freeanchor_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     FreeAnchor trained from COCO dataset.
30 |     `"FreeAnchor" <https://arxiv.org/abs/1909.02466>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = models.FreeAnchorConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.FreeAnchor(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.FreeAnchor
40 | Cfg = CustomFreeAnchorConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res101_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet101"
19 | 
20 | 
21 | @hub.pretrained(
22 |     "https://data.megengine.org.cn/models/weights/"
23 |     "retinanet_res101_coco_3x_800size_41dot4_73b01887.pkl"
24 | )
25 | def retinanet_res101_coco_3x_800size(**kwargs):
26 |     r"""
27 |     RetinaNet trained from COCO dataset.
28 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
29 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
30 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
31 |     """
32 |     cfg = CustomRetinaNetConfig()
33 |     cfg.backbone_pretrained = False
34 |     return models.RetinaNet(cfg, **kwargs)
35 | 
36 | 
37 | Net = models.RetinaNet
38 | Cfg = CustomRetinaNetConfig
39 | 


--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res18_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet18"
19 |         self.fpn_in_channels = [128, 256, 512]
20 |         self.fpn_top_in_channel = 512
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "retinanet_res18_coco_3x_800size_35dot3_0c4956c8.pkl"
26 | )
27 | def retinanet_res18_coco_3x_800size(**kwargs):
28 |     r"""
29 |     RetinaNet trained from COCO dataset.
30 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomRetinaNetConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.RetinaNet(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res34_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnet34"
19 |         self.fpn_in_channels = [128, 256, 512]
20 |         self.fpn_top_in_channel = 512
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "retinanet_res34_coco_3x_800size_38dot4_3485f9ec.pkl"
26 | )
27 | def retinanet_res34_coco_3x_800size(**kwargs):
28 |     r"""
29 |     RetinaNet trained from COCO dataset.
30 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomRetinaNetConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.RetinaNet(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_res50_coco_3x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl"
17 | )
18 | def retinanet_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     RetinaNet trained from COCO dataset.
21 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.RetinaNetConfig()
26 |     cfg.backbone_pretrained = False
27 |     return models.RetinaNet(cfg, **kwargs)
28 | 
29 | 
30 | Net = models.RetinaNet
31 | Cfg = models.RetinaNetConfig
32 | 


--------------------------------------------------------------------------------
/megengine_release/configs/retinanet_resx101_coco_2x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | class CustomRetinaNetConfig(models.RetinaNetConfig):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         self.backbone = "resnext101_32x8d"
19 |         self.max_epoch = 36
20 |         self.lr_decay_stages = [24, 32]
21 | 
22 | 
23 | @hub.pretrained(
24 |     "https://data.megengine.org.cn/models/weights/"
25 |     "retinanet_resx101_coco_2x_800size_42dot3_1502eace.pkl"
26 | )
27 | def retinanet_resx101_coco_2x_800size(**kwargs):
28 |     r"""
29 |     RetinaNet trained from COCO dataset.
30 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
31 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
32 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
33 |     """
34 |     cfg = CustomRetinaNetConfig()
35 |     cfg.backbone_pretrained = False
36 |     return models.RetinaNet(cfg, **kwargs)
37 | 
38 | 
39 | Net = models.RetinaNet
40 | Cfg = CustomRetinaNetConfig
41 | 


--------------------------------------------------------------------------------
/megengine_release/distill_configs/ICD.py:
--------------------------------------------------------------------------------
 1 | import megengine.module as M
 2 | import megengine.functional as F
 3 | from models.ICD.ICD import ICD
 4 | from easydict import EasyDict as edict
 5 | 
 6 | def get_distillator():
 7 |     cfg = edict({
 8 |         'distiller': {
 9 |             'FEAT_KEYS': ['p3', 'p4', 'p5', 'p6', 'p7'],
10 |             'WEIGHT_VALUE': 8.0,
11 |             'TEMP_VALUE': 1.0,
12 |             'NUM_SCALE_SPLITS': 5,
13 |             'HIDDEN_DIM': 256,
14 |             'NUM_CLASSES': 80,
15 |             'MAX_LABELS': 300,
16 |             'ATT_HEADS': 8,
17 |             'USE_POS_EMBEDDING': True,
18 |             'DECODER_POSEMB_ON_V': False,
19 | 
20 |         },
21 |     })
22 |     return ICD(256, cfg)
23 | 
24 | Net = get_distillator


--------------------------------------------------------------------------------
/megengine_release/distill_configs/ICD_rcnn.py:
--------------------------------------------------------------------------------
 1 | import megengine.module as M
 2 | import megengine.functional as F
 3 | from models.ICD.ICD import ICD
 4 | from easydict import EasyDict as edict
 5 | 
 6 | def get_distillator():
 7 |     cfg = edict({
 8 |         'distiller': {
 9 |             'FEAT_KEYS': ['p2', 'p3', 'p4', 'p5', 'p6'],
10 |             'WEIGHT_VALUE': 3.0,
11 |             'TEMP_VALUE': 1.0,
12 |             'HIDDEN_DIM': 256,
13 |             'NUM_CLASSES': 80,
14 |             'MAX_LABELS': 300,
15 |             'ATT_HEADS': 8,
16 |             'USE_POS_EMBEDDING': True,
17 |             'DECODER_POSEMB_ON_V': False,
18 | 
19 |         },
20 |     })
21 |     return ICD(256, cfg)
22 | 
23 | Net = get_distillator


--------------------------------------------------------------------------------
/megengine_release/distill_configs/atss_res50_coco_1x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "atss_res50_coco_3x_800size_42dot6_9a92ed8c.pkl"
17 | )
18 | def atss_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     ATSS trained from COCO dataset.
21 |     `"ATSS" <https://arxiv.org/abs/1912.02424>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.ATSSConfig()
26 |     cfg.backbone_pretrained = True
27 |     cfg.max_epoch = 18
28 |     cfg.lr_decay_stages = [12, 16]
29 |     return models.ATSS(cfg, **kwargs)
30 | 
31 | 
32 | def get_cfg():
33 |     cfg = models.ATSSConfig()
34 |     cfg.backbone_pretrained = True
35 |     cfg.max_epoch = 18
36 |     cfg.lr_decay_stages = [12, 16]
37 | 
38 |     return cfg
39 | 
40 | 
41 | Net = models.ATSS
42 | Cfg = get_cfg
43 | 


--------------------------------------------------------------------------------
/megengine_release/distill_configs/fcos_res50_coco_1x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "fcos_res50_coco_3x_800size_42dot2_b16f9c8b.pkl"
17 | )
18 | def fcos_res50_coco_3x_800size(**kwargs):
19 |     r"""
20 |     FCOS trained from COCO dataset.
21 |     `"FCOS" <https://arxiv.org/abs/1904.01355>`_
22 |     `"FPN" <htctps://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.FCOSConfig()
26 |     cfg.backbone_pretrained = True
27 |     cfg.max_epoch = 18
28 |     cfg.lr_decay_stages = [12, 16]
29 |     return models.FCOS(cfg, **kwargs)
30 | 
31 | 
32 | def get_cfg():
33 |     cfg = models.FCOSConfig()
34 |     cfg.backbone_pretrained = True
35 |     cfg.max_epoch = 18
36 |     cfg.lr_decay_stages = [12, 16]
37 | 
38 |     return cfg
39 | 
40 | 
41 | Net = models.FCOS
42 | Cfg = get_cfg
43 | 


--------------------------------------------------------------------------------
/megengine_release/distill_configs/retinanet_res50_coco_1x_800size.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine import hub
10 | 
11 | import models
12 | 
13 | 
14 | @hub.pretrained(
15 |     "https://data.megengine.org.cn/models/weights/"
16 |     "retinanet_res50_coco_3x_800size_39dot3_8eaec532.pkl"
17 | )
18 | def retinanet_res50_coco_1x_800size(**kwargs):
19 |     r"""
20 |     RetinaNet trained from COCO dataset.
21 |     `"RetinaNet" <https://arxiv.org/abs/1708.02002>`_
22 |     `"FPN" <https://arxiv.org/abs/1612.03144>`_
23 |     `"COCO" <https://arxiv.org/abs/1405.0312>`_
24 |     """
25 |     cfg = models.RetinaNetConfig()
26 |     cfg.backbone_pretrained = True
27 |     cfg.max_epoch = 18
28 |     cfg.lr_decay_stages = [12, 16]
29 | 
30 |     return models.RetinaNet(cfg, **kwargs)
31 | 
32 | 
33 | def get_cfg():
34 |     cfg = models.RetinaNetConfig()
35 |     cfg.backbone_pretrained = True
36 |     cfg.max_epoch = 18
37 |     cfg.lr_decay_stages = [12, 16]
38 | 
39 |     return cfg
40 | 
41 | Net = models.RetinaNet
42 | Cfg = get_cfg
43 | 


--------------------------------------------------------------------------------
/megengine_release/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .basic import *
10 | from .det import *
11 | 
12 | _EXCLUDE = {}
13 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
14 | 


--------------------------------------------------------------------------------
/megengine_release/layers/basic/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .functional import *
10 | from .nn import *
11 | from .norm import *
12 | 
13 | _EXCLUDE = {}
14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
15 | 


--------------------------------------------------------------------------------
/megengine_release/layers/basic/functional.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | from typing import Optional
 10 | 
 11 | import numpy as np
 12 | 
 13 | import megengine.distributed as dist
 14 | import megengine.functional as F
 15 | from megengine import Tensor
 16 | 
 17 | 
 18 | def get_padded_tensor(
 19 |     array: Tensor, multiple_number: int = 32, pad_value: float = 0
 20 | ) -> Tensor:
 21 |     """ pad the nd-array to multiple stride of th e
 22 | 
 23 |     Args:
 24 |         array (Tensor):
 25 |             the tensor with the shape of [batch, channel, height, width]
 26 |         multiple_number (int):
 27 |             make the height and width can be divided by multiple_number
 28 |         pad_value (int): the value to be padded
 29 | 
 30 |     Returns:
 31 |         padded_array (Tensor)
 32 |     """
 33 |     batch, chl, t_height, t_width = array.shape
 34 |     padded_height = (
 35 |         (t_height + multiple_number - 1) // multiple_number * multiple_number
 36 |     )
 37 |     padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number
 38 | 
 39 |     padded_array = F.full(
 40 |         (batch, chl, padded_height, padded_width), pad_value, dtype=array.dtype
 41 |     )
 42 | 
 43 |     ndim = array.ndim
 44 |     if ndim == 4:
 45 |         padded_array[:, :, :t_height, :t_width] = array
 46 |     elif ndim == 3:
 47 |         padded_array[:, :t_height, :t_width] = array
 48 |     else:
 49 |         raise Exception("Not supported tensor dim: %d" % ndim)
 50 |     return padded_array
 51 | 
 52 | 
 53 | def safelog(x, eps=None):
 54 |     if eps is None:
 55 |         eps = np.finfo(x.dtype).eps
 56 |     return F.log(F.maximum(x, eps))
 57 | 
 58 | 
 59 | def batched_nms(
 60 |     boxes: Tensor, scores: Tensor, idxs: Tensor, iou_thresh: float, max_output: Optional[int] = None
 61 | ) -> Tensor:
 62 |     r"""
 63 |     Performs non-maximum suppression (NMS) on the boxes according to
 64 |     their intersection-over-union (IoU).
 65 | 
 66 |     :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on;
 67 |         each box is expected to be in `(x1, y1, x2, y2)` format.
 68 |     :param iou_thresh: ``IoU`` threshold for overlapping.
 69 |     :param idxs: tensor of shape `(N,)`, the class indexs of boxes in the batch.
 70 |     :param scores: tensor of shape `(N,)`, the score of boxes.
 71 |     :return: indices of the elements that have been kept by NMS.
 72 | 
 73 |     Examples:
 74 | 
 75 |     .. testcode::
 76 | 
 77 |         import numpy as np
 78 |         from megengine import tensor
 79 | 
 80 |         x = np.zeros((100,4))
 81 |         np.random.seed(42)
 82 |         x[:,:2] = np.random.rand(100,2) * 20
 83 |         x[:,2:] = np.random.rand(100,2) * 20 + 100
 84 |         scores = tensor(np.random.rand(100))
 85 |         idxs = tensor(np.random.randint(0, 10, 100))
 86 |         inp = tensor(x)
 87 |         result = batched_nms(inp, scores, idxs, iou_thresh=0.6)
 88 |         print(result.numpy())
 89 | 
 90 |     Outputs:
 91 | 
 92 |     .. testoutput::
 93 | 
 94 |         [75 41 99 98 69 64 11 27 35 18]
 95 | 
 96 |     """
 97 |     assert (
 98 |         boxes.ndim == 2 and boxes.shape[1] == 4
 99 |     ), "the expected shape of boxes is (N, 4)"
100 |     assert scores.ndim == 1, "the expected shape of scores is (N,)"
101 |     assert idxs.ndim == 1, "the expected shape of idxs is (N,)"
102 |     assert (
103 |         boxes.shape[0] == scores.shape[0] == idxs.shape[0]
104 |     ), "number of boxes, scores and idxs are not matched"
105 | 
106 |     idxs = idxs.detach()
107 |     max_coordinate = boxes.max()
108 |     offsets = idxs.astype("float32") * (max_coordinate + 1)
109 |     boxes = boxes + offsets.reshape(-1, 1)
110 |     return F.nn.nms(boxes, scores, iou_thresh, max_output)
111 | 
112 | 
113 | def all_reduce_mean(array: Tensor) -> Tensor:
114 |     if dist.get_world_size() > 1:
115 |         array = dist.functional.all_reduce_sum(array) / dist.get_world_size()
116 |     return array
117 | 


--------------------------------------------------------------------------------
/megengine_release/layers/basic/nn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | from collections import namedtuple
28 | 
29 | import megengine.module as M
30 | 
31 | 
32 | class Conv2d(M.Conv2d):
33 |     """
34 |     A wrapper around :class:`megengine.module.Conv2d`.
35 |     """
36 | 
37 |     def __init__(self, *args, **kwargs):
38 |         """
39 |         Extra keyword arguments supported in addition to
40 |         `megengine.module.Conv2d`.
41 | 
42 |         Args:
43 |             norm (M.Module, optional): a normalization layer
44 |             activation (callable(Tensor) -> Tensor): a callable activation
45 |                 function
46 |         """
47 |         norm = kwargs.pop("norm", None)
48 |         activation = kwargs.pop("activation", None)
49 |         super().__init__(*args, **kwargs)
50 | 
51 |         self.norm = norm
52 |         self.activation = activation
53 | 
54 |     def forward(self, x):
55 |         x = super().forward(x)
56 |         if self.norm is not None:
57 |             x = self.norm(x)
58 |         if self.activation is not None:
59 |             x = self.activation(x)
60 |         return x
61 | 
62 | 
63 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
64 |     """
65 |     A simple structure that contains basic shape specification about a tensor.
66 |     Useful for getting the modules output channels when building the graph.
67 |     """
68 | 
69 |     def __new__(cls, channels=None, height=None, width=None, stride=None):
70 |         return super().__new__(cls, channels, height, width, stride)
71 | 


--------------------------------------------------------------------------------
/megengine_release/layers/basic/norm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ---------------------------------------------------------------------
16 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
17 | #
18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
19 | #
20 | # Unless required by applicable law or agreed to in writing,
21 | # software distributed under the License is distributed on an
22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | #
24 | # This file has been modified by Megvii ("Megvii Modifications").
25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
26 | # ---------------------------------------------------------------------
27 | from functools import partial
28 | 
29 | import megengine.module as M
30 | from megengine.module.normalization import GroupNorm, InstanceNorm, LayerNorm
31 | 
32 | 
33 | def get_norm(norm):
34 |     """
35 |     Args:
36 |         norm (str): currently support "BN", "SyncBN", "FrozenBN", "GN", "LN" and "IN"
37 | 
38 |     Returns:
39 |         M.Module or None: the normalization layer
40 |     """
41 |     if norm is None:
42 |         return None
43 |     norm = {
44 |         "BN": M.BatchNorm2d,
45 |         "SyncBN": M.SyncBatchNorm,
46 |         "FrozenBN": partial(M.BatchNorm2d, freeze=True),
47 |         "GN": GroupNorm,
48 |         "LN": LayerNorm,
49 |         "IN": InstanceNorm,
50 |     }[norm]
51 |     return norm
52 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .anchor import *
10 | from .box_head import *
11 | from .box_utils import *
12 | from .fpn import *
13 | from .loss import *
14 | from .matcher import *
15 | from .point_head import *
16 | from .pooler import *
17 | from .rcnn import *
18 | from .rpn import *
19 | from .sampling import *
20 | 
21 | _EXCLUDE = {}
22 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
23 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/anchor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import math
 10 | from abc import ABCMeta, abstractmethod
 11 | from typing import List
 12 | 
 13 | import numpy as np
 14 | 
 15 | import megengine.functional as F
 16 | from megengine import Tensor, tensor
 17 | 
 18 | 
 19 | def meshgrid(x, y):
 20 |     assert len(x.shape) == 1
 21 |     assert len(y.shape) == 1
 22 |     mesh_shape = (y.shape[0], x.shape[0])
 23 |     mesh_x = F.broadcast_to(x, mesh_shape)
 24 |     mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape)
 25 |     return mesh_x, mesh_y
 26 | 
 27 | 
 28 | def create_anchor_grid(featmap_size, offsets, stride, device):
 29 |     step_x, step_y = featmap_size
 30 |     shift = offsets * stride
 31 | 
 32 |     grid_x = F.arange(shift, step_x * stride + shift, step=stride, device=device)
 33 |     grid_y = F.arange(shift, step_y * stride + shift, step=stride, device=device)
 34 |     grids_x, grids_y = meshgrid(grid_y, grid_x)
 35 |     return grids_x.reshape(-1), grids_y.reshape(-1)
 36 | 
 37 | 
 38 | class BaseAnchorGenerator(metaclass=ABCMeta):
 39 |     """base class for anchor generator.
 40 |     """
 41 | 
 42 |     def __init__(self):
 43 |         pass
 44 | 
 45 |     @property
 46 |     @abstractmethod
 47 |     def anchor_dim(self):
 48 |         pass
 49 | 
 50 |     @abstractmethod
 51 |     def generate_anchors_by_features(self, sizes, device) -> List[Tensor]:
 52 |         pass
 53 | 
 54 |     def __call__(self, featmaps):
 55 |         feat_sizes = [fmap.shape[-2:] for fmap in featmaps]
 56 |         return self.generate_anchors_by_features(feat_sizes, featmaps[0].device)
 57 | 
 58 | 
 59 | class AnchorBoxGenerator(BaseAnchorGenerator):
 60 |     """default anchor box generator, usually used in anchor-based methods.
 61 |     This class generate anchors by feature map in level.
 62 |     Args:
 63 |         anchor_scales (list): anchor scales based on stride.
 64 |             The practical anchor scale is anchor_scale * stride
 65 |         anchor_ratios (list): anchor aspect ratios.
 66 |         strides (list): strides of inputs.
 67 |         offset (float): center point offset. default is 0.5.
 68 |     """
 69 | 
 70 |     # pylint: disable=dangerous-default-value
 71 |     def __init__(
 72 |         self,
 73 |         anchor_scales: list = [[32], [64], [128], [256], [512]],
 74 |         anchor_ratios: list = [[0.5, 1, 2]],
 75 |         strides: list = [4, 8, 16, 32, 64],
 76 |         offset: float = 0.5,
 77 |     ):
 78 |         super().__init__()
 79 |         self.anchor_scales = np.array(anchor_scales, dtype="float32")
 80 |         self.anchor_ratios = np.array(anchor_ratios, dtype="float32")
 81 |         self.strides = strides
 82 |         self.offset = offset
 83 |         self.num_features = len(strides)
 84 | 
 85 |         self.base_anchors = self._different_level_anchors(anchor_scales, anchor_ratios)
 86 | 
 87 |     @property
 88 |     def anchor_dim(self):
 89 |         return 4
 90 | 
 91 |     def _different_level_anchors(self, scales, ratios):
 92 |         if len(scales) == 1:
 93 |             scales *= self.num_features
 94 |         assert len(scales) == self.num_features
 95 | 
 96 |         if len(ratios) == 1:
 97 |             ratios *= self.num_features
 98 |         assert len(ratios) == self.num_features
 99 |         return [
100 |             tensor(self.generate_base_anchors(scale, ratio))
101 |             for scale, ratio in zip(scales, ratios)
102 |         ]
103 | 
104 |     def generate_base_anchors(self, scales, ratios):
105 |         base_anchors = []
106 |         areas = [s ** 2.0 for s in scales]
107 |         for area in areas:
108 |             for ratio in ratios:
109 |                 w = math.sqrt(area / ratio)
110 |                 h = ratio * w
111 |                 # center-based anchor
112 |                 x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0
113 |                 base_anchors.append([x0, y0, x1, y1])
114 |         return base_anchors
115 | 
116 |     def generate_anchors_by_features(self, sizes, device):
117 |         all_anchors = []
118 |         assert len(sizes) == self.num_features, (
119 |             "input features expected {}, got {}".format(self.num_features, len(sizes))
120 |         )
121 |         for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors):
122 |             grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device)
123 |             grids = F.stack([grid_x, grid_y, grid_x, grid_y], axis=1)
124 |             all_anchors.append(
125 |                 (F.expand_dims(grids, axis=1) + F.expand_dims(base_anchor, axis=0)).reshape(-1, 4)
126 |             )
127 |         return all_anchors
128 | 
129 | 
130 | class AnchorPointGenerator(BaseAnchorGenerator):
131 |     """default anchor point generator, usually used in anchor-free methods.
132 |     This class generate anchors by feature map in level.
133 |     Args:
134 |         num_anchors (int): number of anchors per location
135 |         strides (list): strides of inputs.
136 |         offset (float): center point offset. default is 0.5.
137 |     """
138 | 
139 |     # pylint: disable=dangerous-default-value
140 |     def __init__(
141 |         self,
142 |         num_anchors: int = 1,
143 |         strides: list = [4, 8, 16, 32, 64],
144 |         offset: float = 0.5,
145 |     ):
146 |         super().__init__()
147 |         self.num_anchors = num_anchors
148 |         self.strides = strides
149 |         self.offset = offset
150 |         self.num_features = len(strides)
151 | 
152 |     @property
153 |     def anchor_dim(self):
154 |         return 2
155 | 
156 |     def generate_anchors_by_features(self, sizes, device):
157 |         all_anchors = []
158 |         assert len(sizes) == self.num_features, (
159 |             "input features expected {}, got {}".format(self.num_features, len(sizes))
160 |         )
161 |         for size, stride in zip(sizes, self.strides):
162 |             grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device)
163 |             grids = F.stack([grid_x, grid_y], axis=1)
164 |             all_anchors.append(
165 |                 F.broadcast_to(
166 |                     F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2)
167 |                 ).reshape(-1, 2)
168 |             )  # FIXME: need F.repeat
169 |         return all_anchors
170 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/box_head.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import math
10 | from typing import List
11 | 
12 | import megengine.module as M
13 | from megengine import Tensor
14 | 
15 | import layers
16 | 
17 | 
18 | class BoxHead(M.Module):
19 |     """
20 |     The head used when anchor boxes are adopted for object classification and box regression.
21 |     """
22 | 
23 |     def __init__(self, cfg, input_shape: List[layers.ShapeSpec]):
24 |         super().__init__()
25 | 
26 |         in_channels = input_shape[0].channels
27 |         num_classes = cfg.num_classes
28 |         num_convs = 4
29 |         prior_prob = cfg.cls_prior_prob
30 |         num_anchors = [
31 |             len(cfg.anchor_scales[i]) * len(cfg.anchor_ratios[i])
32 |             for i in range(len(input_shape))
33 |         ]
34 | 
35 |         assert (
36 |             len(set(num_anchors)) == 1
37 |         ), "not support different number of anchors between levels"
38 |         num_anchors = num_anchors[0]
39 | 
40 |         cls_subnet = []
41 |         bbox_subnet = []
42 |         for _ in range(num_convs):
43 |             cls_subnet.append(
44 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
45 |             )
46 |             cls_subnet.append(M.ReLU())
47 |             bbox_subnet.append(
48 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
49 |             )
50 |             bbox_subnet.append(M.ReLU())
51 | 
52 |         self.cls_subnet = M.Sequential(*cls_subnet)
53 |         self.bbox_subnet = M.Sequential(*bbox_subnet)
54 |         self.cls_score = M.Conv2d(
55 |             in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
56 |         )
57 |         self.bbox_pred = M.Conv2d(
58 |             in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
59 |         )
60 | 
61 |         # Initialization
62 |         for modules in [
63 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred
64 |         ]:
65 |             for layer in modules.modules():
66 |                 if isinstance(layer, M.Conv2d):
67 |                     M.init.normal_(layer.weight, mean=0, std=0.01)
68 |                     M.init.fill_(layer.bias, 0)
69 | 
70 |         # Use prior in model initialization to improve stability
71 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
72 |         M.init.fill_(self.cls_score.bias, bias_value)
73 | 
74 |     def forward(self, features: List[Tensor]):
75 |         logits, offsets = [], []
76 |         for feature in features:
77 |             logits.append(self.cls_score(self.cls_subnet(feature)))
78 |             offsets.append(self.bbox_pred(self.bbox_subnet(feature)))
79 |         return logits, offsets
80 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/box_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | from abc import ABCMeta, abstractmethod
 10 | 
 11 | import numpy as np
 12 | 
 13 | import megengine.functional as F
 14 | from megengine import Tensor
 15 | 
 16 | 
 17 | class BoxCoderBase(metaclass=ABCMeta):
 18 |     """Boxcoder class.
 19 |     """
 20 | 
 21 |     def __init__(self):
 22 |         pass
 23 | 
 24 |     @abstractmethod
 25 |     def encode(self) -> Tensor:
 26 |         pass
 27 | 
 28 |     @abstractmethod
 29 |     def decode(self) -> Tensor:
 30 |         pass
 31 | 
 32 | 
 33 | class BoxCoder(BoxCoderBase, metaclass=ABCMeta):
 34 |     # pylint: disable=dangerous-default-value
 35 |     def __init__(
 36 |         self,
 37 |         reg_mean=[0.0, 0.0, 0.0, 0.0],
 38 |         reg_std=[1.0, 1.0, 1.0, 1.0],
 39 |     ):
 40 |         """
 41 |         Args:
 42 |             reg_mean(np.ndarray): [x0_mean, x1_mean, y0_mean, y1_mean] or None
 43 |             reg_std(np.ndarray):  [x0_std, x1_std, y0_std, y1_std] or None
 44 | 
 45 |         """
 46 |         self.reg_mean = np.array(reg_mean, dtype="float32")[None, :]
 47 |         self.reg_std = np.array(reg_std, dtype="float32")[None, :]
 48 |         super().__init__()
 49 | 
 50 |     @staticmethod
 51 |     def _box_ltrb_to_cs_opr(bbox, addaxis=None):
 52 |         """ transform the left-top right-bottom encoding bounding boxes
 53 |         to center and size encodings"""
 54 |         bbox_width = bbox[:, 2] - bbox[:, 0]
 55 |         bbox_height = bbox[:, 3] - bbox[:, 1]
 56 |         bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width
 57 |         bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height
 58 |         if addaxis is None:
 59 |             return bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y
 60 |         else:
 61 |             return (
 62 |                 F.expand_dims(bbox_width, addaxis),
 63 |                 F.expand_dims(bbox_height, addaxis),
 64 |                 F.expand_dims(bbox_ctr_x, addaxis),
 65 |                 F.expand_dims(bbox_ctr_y, addaxis),
 66 |             )
 67 | 
 68 |     def encode(self, bbox: Tensor, gt: Tensor) -> Tensor:
 69 |         bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y = self._box_ltrb_to_cs_opr(bbox)
 70 |         gt_width, gt_height, gt_ctr_x, gt_ctr_y = self._box_ltrb_to_cs_opr(gt)
 71 | 
 72 |         target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width
 73 |         target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height
 74 |         target_dw = F.log(gt_width / bbox_width)
 75 |         target_dh = F.log(gt_height / bbox_height)
 76 |         target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1)
 77 | 
 78 |         target -= self.reg_mean
 79 |         target /= self.reg_std
 80 |         return target
 81 | 
 82 |     def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
 83 |         deltas *= self.reg_std
 84 |         deltas += self.reg_mean
 85 | 
 86 |         (
 87 |             anchor_width,
 88 |             anchor_height,
 89 |             anchor_ctr_x,
 90 |             anchor_ctr_y,
 91 |         ) = self._box_ltrb_to_cs_opr(anchors, 1)
 92 |         pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width
 93 |         pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height
 94 |         pred_width = anchor_width * F.exp(deltas[:, 2::4])
 95 |         pred_height = anchor_height * F.exp(deltas[:, 3::4])
 96 | 
 97 |         pred_x1 = pred_ctr_x - 0.5 * pred_width
 98 |         pred_y1 = pred_ctr_y - 0.5 * pred_height
 99 |         pred_x2 = pred_ctr_x + 0.5 * pred_width
100 |         pred_y2 = pred_ctr_y + 0.5 * pred_height
101 | 
102 |         pred_box = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=2)
103 |         pred_box = pred_box.reshape(pred_box.shape[0], -1)
104 | 
105 |         return pred_box
106 | 
107 | 
108 | class PointCoder(BoxCoderBase, metaclass=ABCMeta):
109 |     def encode(self, point: Tensor, gt: Tensor) -> Tensor:
110 |         return F.concat([point - gt[..., :2], gt[..., 2:] - point], axis=-1)
111 | 
112 |     def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
113 |         return F.stack([
114 |             F.expand_dims(anchors[:, 0], axis=1) - deltas[:, 0::4],
115 |             F.expand_dims(anchors[:, 1], axis=1) - deltas[:, 1::4],
116 |             F.expand_dims(anchors[:, 0], axis=1) + deltas[:, 2::4],
117 |             F.expand_dims(anchors[:, 1], axis=1) + deltas[:, 3::4],
118 |         ], axis=2).reshape(deltas.shape)
119 | 
120 | 
121 | def get_iou(boxes1: Tensor, boxes2: Tensor, return_ioa=False) -> Tensor:
122 |     """
123 |     Given two lists of boxes of size N and M,
124 |     compute the IoU (intersection over union)
125 |     between __all__ N x M pairs of boxes.
126 |     The box order must be (xmin, ymin, xmax, ymax).
127 | 
128 |     Args:
129 |         boxes1 (Tensor): boxes tensor with shape (N, 4)
130 |         boxes2 (Tensor): boxes tensor with shape (M, 4)
131 |         return_ioa (Bool): wheather return Intersection over Boxes1 or not, default: False
132 | 
133 |     Returns:
134 |         iou (Tensor): IoU matrix, shape (N,M).
135 |     """
136 |     b_box1 = F.expand_dims(boxes1, axis=1)
137 |     b_box2 = F.expand_dims(boxes2, axis=0)
138 | 
139 |     iw = F.minimum(b_box1[:, :, 2], b_box2[:, :, 2]) - F.maximum(
140 |         b_box1[:, :, 0], b_box2[:, :, 0]
141 |     )
142 |     ih = F.minimum(b_box1[:, :, 3], b_box2[:, :, 3]) - F.maximum(
143 |         b_box1[:, :, 1], b_box2[:, :, 1]
144 |     )
145 |     inter = F.maximum(iw, 0) * F.maximum(ih, 0)
146 | 
147 |     area_box1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
148 |     area_box2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
149 | 
150 |     union = F.expand_dims(area_box1, axis=1) + F.expand_dims(area_box2, axis=0) - inter
151 |     overlaps = F.maximum(inter / union, 0)
152 | 
153 |     if return_ioa:
154 |         ioa = F.maximum(inter / area_box1, 0)
155 |         return overlaps, ioa
156 | 
157 |     return overlaps
158 | 
159 | 
160 | def get_clipped_boxes(boxes, hw):
161 |     """ Clip the boxes into the image region."""
162 |     # x1 >=0
163 |     box_x1 = F.clip(boxes[:, 0::4], lower=0, upper=hw[1])
164 |     # y1 >=0
165 |     box_y1 = F.clip(boxes[:, 1::4], lower=0, upper=hw[0])
166 |     # x2 < im_info[1]
167 |     box_x2 = F.clip(boxes[:, 2::4], lower=0, upper=hw[1])
168 |     # y2 < im_info[0]
169 |     box_y2 = F.clip(boxes[:, 3::4], lower=0, upper=hw[0])
170 | 
171 |     clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1)
172 | 
173 |     return clip_box
174 | 
175 | 
176 | def filter_boxes(boxes, size=0):
177 |     width = boxes[:, 2] - boxes[:, 0]
178 |     height = boxes[:, 3] - boxes[:, 1]
179 |     keep = (width > size) & (height > size)
180 |     return keep
181 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/fpn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Codes contributed by Facebook: Copyright 2019 - present, Facebook, Inc
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ---------------------------------------------------------------------
 16 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 17 | #
 18 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 19 | #
 20 | # Unless required by applicable law or agreed to in writing,
 21 | # software distributed under the License is distributed on an
 22 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 23 | #
 24 | # This file has been modified by Megvii ("Megvii Modifications").
 25 | # All Megvii Modifications are Copyright (C) 2014-2021 Megvii Inc. All rights reserved.
 26 | # ---------------------------------------------------------------------
 27 | import math
 28 | from typing import List
 29 | 
 30 | import megengine.functional as F
 31 | import megengine.module as M
 32 | 
 33 | import layers
 34 | 
 35 | 
 36 | class FPN(M.Module):
 37 |     """
 38 |     This module implements Feature Pyramid Network.
 39 |     It creates pyramid features built on top of some input feature maps which
 40 |     are produced by the backbone networks like ResNet.
 41 |     """
 42 | 
 43 |     # pylint: disable=dangerous-default-value
 44 |     def __init__(
 45 |         self,
 46 |         bottom_up: M.Module,
 47 |         in_features: List[str],
 48 |         out_channels: int = 256,
 49 |         norm: str = None,
 50 |         top_block: M.Module = None,
 51 |         strides: List[int] = [8, 16, 32],
 52 |         channels: List[int] = [512, 1024, 2048],
 53 |     ):
 54 |         """
 55 |         Args:
 56 |             bottom_up (M.Module): module representing the bottom up sub-network.
 57 |                 it generates multi-scale feature maps which formatted as a
 58 |                 dict like {'res3': res3_feature, 'res4': res4_feature}
 59 |             in_features (list[str]): list of input feature maps keys coming
 60 |                 from the `bottom_up` which will be used in FPN.
 61 |                 e.g. ['res3', 'res4', 'res5']
 62 |             out_channels (int): number of channels used in the output
 63 |                 feature maps.
 64 |             norm (str): the normalization type.
 65 |             top_block (nn.Module or None): the module build upon FPN layers.
 66 |         """
 67 |         super(FPN, self).__init__()
 68 | 
 69 |         in_strides = strides
 70 |         in_channels = channels
 71 |         norm = layers.get_norm(norm)
 72 | 
 73 |         use_bias = norm is None
 74 |         self.lateral_convs = list()
 75 |         self.output_convs = list()
 76 | 
 77 |         for idx, in_channels in enumerate(in_channels):
 78 |             lateral_norm = None if norm is None else norm(out_channels)
 79 |             output_norm = None if norm is None else norm(out_channels)
 80 | 
 81 |             lateral_conv = layers.Conv2d(
 82 |                 in_channels,
 83 |                 out_channels,
 84 |                 kernel_size=1,
 85 |                 bias=use_bias,
 86 |                 norm=lateral_norm,
 87 |             )
 88 |             output_conv = layers.Conv2d(
 89 |                 out_channels,
 90 |                 out_channels,
 91 |                 kernel_size=3,
 92 |                 stride=1,
 93 |                 padding=1,
 94 |                 bias=use_bias,
 95 |                 norm=output_norm,
 96 |             )
 97 |             M.init.msra_normal_(lateral_conv.weight, mode="fan_in")
 98 |             M.init.msra_normal_(output_conv.weight, mode="fan_in")
 99 | 
100 |             if use_bias:
101 |                 M.init.fill_(lateral_conv.bias, 0)
102 |                 M.init.fill_(output_conv.bias, 0)
103 | 
104 |             stage = int(math.log2(in_strides[idx]))
105 | 
106 |             setattr(self, "fpn_lateral{}".format(stage), lateral_conv)
107 |             setattr(self, "fpn_output{}".format(stage), output_conv)
108 |             self.lateral_convs.insert(0, lateral_conv)
109 |             self.output_convs.insert(0, output_conv)
110 | 
111 |         self.top_block = top_block
112 |         self.in_features = in_features
113 |         self.bottom_up = bottom_up
114 | 
115 |         # follow the common practices, FPN features are named to "p<stage>",
116 |         # like ["p2", "p3", ..., "p6"]
117 |         self._out_feature_strides = {
118 |             "p{}".format(int(math.log2(s))): s for s in in_strides
119 |         }
120 | 
121 |         # top block output feature maps.
122 |         if self.top_block is not None:
123 |             for s in range(stage, stage + self.top_block.num_levels):
124 |                 self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)
125 | 
126 |         self._out_features = list(sorted(self._out_feature_strides.keys()))
127 |         self._out_feature_channels = {k: out_channels for k in self._out_features}
128 | 
129 |     def forward(self, x):
130 |         bottom_up_features = self.bottom_up.extract_features(x)
131 |         x = [bottom_up_features[f] for f in self.in_features[::-1]]
132 | 
133 |         results = []
134 |         prev_features = self.lateral_convs[0](x[0])
135 |         results.append(self.output_convs[0](prev_features))
136 | 
137 |         for features, lateral_conv, output_conv in zip(
138 |             x[1:], self.lateral_convs[1:], self.output_convs[1:]
139 |         ):
140 |             top_down_features = F.nn.interpolate(
141 |                 prev_features, features.shape[2:], mode="BILINEAR"
142 |             )
143 |             lateral_features = lateral_conv(features)
144 |             prev_features = lateral_features + top_down_features
145 |             results.insert(0, output_conv(prev_features))
146 | 
147 |         if self.top_block is not None:
148 |             top_block_in_feature = bottom_up_features.get(
149 |                 self.top_block.in_feature, None
150 |             )
151 |             if top_block_in_feature is None:
152 |                 top_block_in_feature = results[
153 |                     self._out_features.index(self.top_block.in_feature)
154 |                 ]
155 |             results.extend(self.top_block(top_block_in_feature))
156 | 
157 |         return dict(zip(self._out_features, results))
158 | 
159 |     def output_shape(self):
160 |         return {
161 |             name: layers.ShapeSpec(
162 |                 channels=self._out_feature_channels[name],
163 |                 stride=self._out_feature_strides[name],
164 |             )
165 |             for name in self._out_features
166 |         }
167 | 
168 | 
169 | class FPNP6(M.Module):
170 |     """
171 |     used in FPN, generate a downsampled P6 feature from P5.
172 |     """
173 | 
174 |     def __init__(self, in_feature="p5"):
175 |         super().__init__()
176 |         self.num_levels = 1
177 |         self.in_feature = in_feature
178 |         self.pool = M.MaxPool2d(kernel_size=1, stride=2, padding=0)
179 | 
180 |     def forward(self, x):
181 |         return [self.pool(x)]
182 | 
183 | 
184 | class LastLevelP6P7(M.Module):
185 |     """
186 |     This module is used in RetinaNet to generate extra layers, P6 and P7 from
187 |     C5 feature.
188 |     """
189 | 
190 |     def __init__(self, in_channels: int, out_channels: int, in_feature="res5"):
191 |         super().__init__()
192 |         self.num_levels = 2
193 |         if in_feature == "p5":
194 |             assert in_channels == out_channels
195 |         self.in_feature = in_feature
196 |         self.p6 = M.Conv2d(in_channels, out_channels, 3, 2, 1)
197 |         self.p7 = M.Conv2d(out_channels, out_channels, 3, 2, 1)
198 | 
199 |     def forward(self, x):
200 |         p6 = self.p6(x)
201 |         p7 = self.p7(F.relu(p6))
202 |         return [p6, p7]
203 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/loss.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import megengine.functional as F
 10 | from megengine import Tensor
 11 | 
 12 | 
 13 | def binary_cross_entropy(logits: Tensor, targets: Tensor) -> Tensor:
 14 |     r"""Binary Cross Entropy
 15 | 
 16 |     Args:
 17 |         logits (Tensor):
 18 |             the predicted logits
 19 |         targets (Tensor):
 20 |             the assigned targets with the same shape as logits
 21 | 
 22 |     Returns:
 23 |         the calculated binary cross entropy.
 24 |     """
 25 |     return -(targets * F.logsigmoid(logits) + (1 - targets) * F.logsigmoid(-logits))
 26 | 
 27 | 
 28 | def sigmoid_focal_loss(
 29 |     logits: Tensor, targets: Tensor, alpha: float = -1, gamma: float = 0,
 30 | ) -> Tensor:
 31 |     r"""Focal Loss for Dense Object Detection:
 32 |     <https://arxiv.org/pdf/1708.02002.pdf>
 33 | 
 34 |     .. math::
 35 | 
 36 |         FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t)
 37 | 
 38 |     Args:
 39 |         logits (Tensor):
 40 |             the predicted logits
 41 |         targets (Tensor):
 42 |             the assigned targets with the same shape as logits
 43 |         alpha (float):
 44 |             parameter to mitigate class imbalance. Default: -1
 45 |         gamma (float):
 46 |             parameter to mitigate easy/hard loss imbalance. Default: 0
 47 | 
 48 |     Returns:
 49 |         the calculated focal loss.
 50 |     """
 51 |     scores = F.sigmoid(logits)
 52 |     loss = binary_cross_entropy(logits, targets)
 53 |     if gamma != 0:
 54 |         loss *= (targets * (1 - scores) + (1 - targets) * scores) ** gamma
 55 |     if alpha >= 0:
 56 |         loss *= targets * alpha + (1 - targets) * (1 - alpha)
 57 |     return loss
 58 | 
 59 | 
 60 | def smooth_l1_loss(pred: Tensor, target: Tensor, beta: float = 1.0) -> Tensor:
 61 |     r"""Smooth L1 Loss
 62 | 
 63 |     Args:
 64 |         pred (Tensor):
 65 |             the predictions
 66 |         target (Tensor):
 67 |             the assigned targets with the same shape as pred
 68 |         beta (int):
 69 |             the parameter of smooth l1 loss.
 70 | 
 71 |     Returns:
 72 |         the calculated smooth l1 loss.
 73 |     """
 74 |     x = pred - target
 75 |     abs_x = F.abs(x)
 76 |     if beta < 1e-5:
 77 |         loss = abs_x
 78 |     else:
 79 |         in_loss = 0.5 * x ** 2 / beta
 80 |         out_loss = abs_x - 0.5 * beta
 81 |         loss = F.where(abs_x < beta, in_loss, out_loss)
 82 |     return loss
 83 | 
 84 | 
 85 | def iou_loss(
 86 |     pred: Tensor, target: Tensor, box_mode: str = "xyxy", loss_type: str = "iou", eps: float = 1e-8,
 87 | ) -> Tensor:
 88 |     if box_mode == "ltrb":
 89 |         pred = F.concat([-pred[..., :2], pred[..., 2:]], axis=-1)
 90 |         target = F.concat([-target[..., :2], target[..., 2:]], axis=-1)
 91 |     elif box_mode != "xyxy":
 92 |         raise NotImplementedError
 93 | 
 94 |     pred_area = F.maximum(pred[..., 2] - pred[..., 0], 0) * F.maximum(
 95 |         pred[..., 3] - pred[..., 1], 0
 96 |     )
 97 |     target_area = F.maximum(target[..., 2] - target[..., 0], 0) * F.maximum(
 98 |         target[..., 3] - target[..., 1], 0
 99 |     )
100 | 
101 |     w_intersect = F.maximum(
102 |         F.minimum(pred[..., 2], target[..., 2]) - F.maximum(pred[..., 0], target[..., 0]), 0
103 |     )
104 |     h_intersect = F.maximum(
105 |         F.minimum(pred[..., 3], target[..., 3]) - F.maximum(pred[..., 1], target[..., 1]), 0
106 |     )
107 | 
108 |     area_intersect = w_intersect * h_intersect
109 |     area_union = pred_area + target_area - area_intersect
110 |     ious = area_intersect / F.maximum(area_union, eps)
111 | 
112 |     if loss_type == "iou":
113 |         loss = -F.log(F.maximum(ious, eps))
114 |     elif loss_type == "linear_iou":
115 |         loss = 1 - ious
116 |     elif loss_type == "giou":
117 |         g_w_intersect = F.maximum(pred[..., 2], target[..., 2]) - F.minimum(
118 |             pred[..., 0], target[..., 0]
119 |         )
120 |         g_h_intersect = F.maximum(pred[..., 3], target[..., 3]) - F.minimum(
121 |             pred[..., 1], target[..., 1]
122 |         )
123 |         ac_union = g_w_intersect * g_h_intersect
124 |         gious = ious - (ac_union - area_union) / F.maximum(ac_union, eps)
125 |         loss = 1 - gious
126 |     return loss
127 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/matcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import megengine.functional as F
10 | 
11 | 
12 | class Matcher:
13 | 
14 |     def __init__(self, thresholds, labels, allow_low_quality_matches=False):
15 |         assert len(thresholds) + 1 == len(labels), "thresholds and labels are not matched"
16 |         assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
17 |         thresholds.append(float("inf"))
18 |         thresholds.insert(0, -float("inf"))
19 | 
20 |         self.thresholds = thresholds
21 |         self.labels = labels
22 |         self.allow_low_quality_matches = allow_low_quality_matches
23 | 
24 |     def __call__(self, matrix):
25 |         """
26 |         matrix(tensor): A two dim tensor with shape of (N, M). N is number of GT-boxes,
27 |             while M is the number of anchors in detection.
28 |         """
29 |         assert len(matrix.shape) == 2
30 |         max_scores = matrix.max(axis=0)
31 |         match_indices = F.argmax(matrix, axis=0)
32 | 
33 |         # default ignore label: -1
34 |         labels = F.full_like(match_indices, -1)
35 | 
36 |         for label, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]):
37 |             mask = (max_scores >= low) & (max_scores < high)
38 |             labels[mask] = label
39 | 
40 |         if self.allow_low_quality_matches:
41 |             mask = (matrix == F.max(matrix, axis=1, keepdims=True)).sum(axis=0) > 0
42 |             labels[mask] = 1
43 | 
44 |         return match_indices, labels
45 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/point_head.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import math
10 | from typing import List
11 | 
12 | import numpy as np
13 | 
14 | import megengine as mge
15 | import megengine.functional as F
16 | import megengine.module as M
17 | from megengine import Tensor
18 | from megengine.module.normalization import GroupNorm
19 | 
20 | import layers
21 | 
22 | 
23 | class PointHead(M.Module):
24 |     """
25 |     The head used when anchor points are adopted for object classification and box regression.
26 |     """
27 | 
28 |     def __init__(self, cfg, input_shape: List[layers.ShapeSpec]):
29 |         super().__init__()
30 |         self.stride_list = cfg.stride
31 | 
32 |         in_channels = input_shape[0].channels
33 |         num_classes = cfg.num_classes
34 |         num_convs = 4
35 |         prior_prob = cfg.cls_prior_prob
36 |         num_anchors = [cfg.num_anchors] * len(input_shape)
37 | 
38 |         assert (
39 |             len(set(num_anchors)) == 1
40 |         ), "not support different number of anchors between levels"
41 |         num_anchors = num_anchors[0]
42 | 
43 |         cls_subnet = []
44 |         bbox_subnet = []
45 |         for _ in range(num_convs):
46 |             cls_subnet.append(
47 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
48 |             )
49 |             cls_subnet.append(GroupNorm(32, in_channels))
50 |             cls_subnet.append(M.ReLU())
51 |             bbox_subnet.append(
52 |                 M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
53 |             )
54 |             bbox_subnet.append(GroupNorm(32, in_channels))
55 |             bbox_subnet.append(M.ReLU())
56 | 
57 |         self.cls_subnet = M.Sequential(*cls_subnet)
58 |         self.bbox_subnet = M.Sequential(*bbox_subnet)
59 |         self.cls_score = M.Conv2d(
60 |             in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
61 |         )
62 |         self.bbox_pred = M.Conv2d(
63 |             in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1
64 |         )
65 |         self.ctrness = M.Conv2d(
66 |             in_channels, num_anchors * 1, kernel_size=3, stride=1, padding=1
67 |         )
68 | 
69 |         # Initialization
70 |         for modules in [
71 |             self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred,
72 |             self.ctrness
73 |         ]:
74 |             for layer in modules.modules():
75 |                 if isinstance(layer, M.Conv2d):
76 |                     M.init.normal_(layer.weight, mean=0, std=0.01)
77 |                     M.init.fill_(layer.bias, 0)
78 | 
79 |         # Use prior in model initialization to improve stability
80 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
81 |         M.init.fill_(self.cls_score.bias, bias_value)
82 | 
83 |         self.scale_list = mge.Parameter(np.ones(len(self.stride_list), dtype="float32"))
84 | 
85 |     def forward(self, features: List[Tensor]):
86 |         logits, offsets, ctrness = [], [], []
87 |         for feature, scale, stride in zip(features, self.scale_list, self.stride_list):
88 |             logits.append(self.cls_score(self.cls_subnet(feature)))
89 |             bbox_subnet = self.bbox_subnet(feature)
90 |             offsets.append(F.relu(self.bbox_pred(bbox_subnet) * scale) * stride)
91 |             ctrness.append(self.ctrness(bbox_subnet))
92 |         return logits, offsets, ctrness
93 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/pooler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import math
10 | 
11 | import megengine.functional as F
12 | 
13 | 
14 | def roi_pool(
15 |     rpn_fms, rois, stride, pool_shape, pooler_type="roi_align",
16 | ):
17 |     rois = rois.detach()
18 |     assert len(stride) == len(rpn_fms)
19 |     canonical_level = 4
20 |     canonical_box_size = 224
21 |     min_level = int(math.log2(stride[0]))
22 |     max_level = int(math.log2(stride[-1]))
23 | 
24 |     num_fms = len(rpn_fms)
25 |     box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])
26 |     assigned_level = F.floor(
27 |         canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / math.log(2)
28 |     ).astype("int32")
29 |     assigned_level = F.minimum(assigned_level, max_level)
30 |     assigned_level = F.maximum(assigned_level, min_level)
31 |     assigned_level = assigned_level - min_level
32 | 
33 |     # avoid empty assignment
34 |     assigned_level = F.concat(
35 |         [assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device)],
36 |     )
37 |     rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))])
38 | 
39 |     pool_list, inds_list = [], []
40 |     for i in range(num_fms):
41 |         _, inds = F.cond_take(assigned_level == i, assigned_level)
42 |         level_rois = rois[inds]
43 | 
44 |         if pooler_type == "roi_pool":
45 |             pool_fm = F.nn.roi_pooling(
46 |                 rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]
47 |             )
48 |         elif pooler_type == "roi_align":
49 |             pool_fm = F.nn.roi_align(
50 |                 rpn_fms[i],
51 |                 level_rois,
52 |                 pool_shape,
53 |                 mode="average",
54 |                 spatial_scale=1.0 / stride[i],
55 |                 sample_points=2,
56 |                 aligned=True,
57 |             )
58 |         pool_list.append(pool_fm)
59 |         inds_list.append(inds)
60 | 
61 |     fm_order = F.argsort(F.concat(inds_list, axis=0))
62 |     pool_feature = F.concat(pool_list, axis=0)
63 |     pool_feature = pool_feature[fm_order][:-num_fms]
64 | 
65 |     return pool_feature
66 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/rcnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import megengine as mge
 10 | import megengine.functional as F
 11 | import megengine.module as M
 12 | 
 13 | import layers
 14 | 
 15 | 
 16 | class RCNN(M.Module):
 17 | 
 18 |     def __init__(self, cfg):
 19 |         super().__init__()
 20 |         self.cfg = cfg
 21 |         self.box_coder = layers.BoxCoder(cfg.rcnn_reg_mean, cfg.rcnn_reg_std)
 22 | 
 23 |         # roi head
 24 |         self.in_features = cfg.rcnn_in_features
 25 |         self.stride = cfg.rcnn_stride
 26 |         self.pooling_method = cfg.pooling_method
 27 |         self.pooling_size = cfg.pooling_size
 28 | 
 29 |         self.fc1 = M.Linear(256 * self.pooling_size[0] * self.pooling_size[1], 1024)
 30 |         self.fc2 = M.Linear(1024, 1024)
 31 |         for l in [self.fc1, self.fc2]:
 32 |             M.init.normal_(l.weight, std=0.01)
 33 |             M.init.fill_(l.bias, 0)
 34 | 
 35 |         # box predictor
 36 |         self.pred_cls = M.Linear(1024, cfg.num_classes + 1)
 37 |         self.pred_delta = M.Linear(1024, cfg.num_classes * 4)
 38 |         M.init.normal_(self.pred_cls.weight, std=0.01)
 39 |         M.init.normal_(self.pred_delta.weight, std=0.001)
 40 |         for l in [self.pred_cls, self.pred_delta]:
 41 |             M.init.fill_(l.bias, 0)
 42 | 
 43 |     def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None):
 44 |         rcnn_rois, labels, bbox_targets = self.get_ground_truth(
 45 |             rcnn_rois, im_info, gt_boxes
 46 |         )
 47 | 
 48 |         fpn_fms = [fpn_fms[x] for x in self.in_features]
 49 |         pool_features = layers.roi_pool(
 50 |             fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method,
 51 |         )
 52 |         flatten_feature = F.flatten(pool_features, start_axis=1)
 53 |         roi_feature = F.relu(self.fc1(flatten_feature))
 54 |         roi_feature = F.relu(self.fc2(roi_feature))
 55 |         pred_logits = self.pred_cls(roi_feature)
 56 |         pred_offsets = self.pred_delta(roi_feature)
 57 | 
 58 |         if self.training:
 59 |             # loss for rcnn classification
 60 |             loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1)
 61 |             # loss for rcnn regression
 62 |             pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4)
 63 |             num_samples = labels.shape[0]
 64 |             fg_mask = labels > 0
 65 |             loss_rcnn_bbox = layers.smooth_l1_loss(
 66 |                 pred_offsets[fg_mask, labels[fg_mask] - 1],
 67 |                 bbox_targets[fg_mask],
 68 |                 self.cfg.rcnn_smooth_l1_beta,
 69 |             ).sum() / F.maximum(num_samples, mge.tensor(1))
 70 | 
 71 |             loss_dict = {
 72 |                 "loss_rcnn_cls": loss_rcnn_cls,
 73 |                 "loss_rcnn_bbox": loss_rcnn_bbox,
 74 |             }
 75 |             return loss_dict
 76 |         else:
 77 |             # slice 1 for removing background
 78 |             pred_scores = F.softmax(pred_logits, axis=1)[:, 1:]
 79 |             pred_offsets = pred_offsets.reshape(-1, 4)
 80 |             target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4)
 81 |             # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4)
 82 |             base_rois = F.broadcast_to(
 83 |                 F.expand_dims(rcnn_rois[:, 1:5], axis=1), target_shape).reshape(-1, 4)
 84 |             pred_bbox = self.box_coder.decode(base_rois, pred_offsets)
 85 |             return pred_bbox, pred_scores
 86 | 
 87 |     def get_ground_truth(self, rpn_rois, im_info, gt_boxes):
 88 |         if not self.training:
 89 |             return rpn_rois, None, None
 90 | 
 91 |         return_rois = []
 92 |         return_labels = []
 93 |         return_bbox_targets = []
 94 | 
 95 |         # get per image proposals and gt_boxes
 96 |         for bid in range(gt_boxes.shape[0]):
 97 |             num_valid_boxes = im_info[bid, 4].astype("int32")
 98 |             gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :]
 99 |             batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid)
100 |             gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1)
101 |             batch_roi_mask = rpn_rois[:, 0] == bid
102 |             # all_rois : [batch_id, x1, y1, x2, y2]
103 |             all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois])
104 | 
105 |             overlaps = layers.get_iou(all_rois[:, 1:], gt_boxes_per_img)
106 | 
107 |             max_overlaps = overlaps.max(axis=1)
108 |             gt_assignment = F.argmax(overlaps, axis=1).astype("int32")
109 |             labels = gt_boxes_per_img[gt_assignment, 4]
110 | 
111 |             # ---------------- get the fg/bg labels for each roi ---------------#
112 |             fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0)
113 |             bg_mask = (
114 |                 (max_overlaps >= self.cfg.bg_threshold_low)
115 |                 & (max_overlaps < self.cfg.bg_threshold_high)
116 |             )
117 | 
118 |             num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio)
119 |             fg_inds_mask = layers.sample_labels(fg_mask, num_fg_rois, True, False)
120 |             num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum())
121 |             bg_inds_mask = layers.sample_labels(bg_mask, num_bg_rois, True, False)
122 | 
123 |             labels[bg_inds_mask] = 0
124 | 
125 |             keep_mask = fg_inds_mask | bg_inds_mask
126 |             labels = labels[keep_mask].astype("int32")
127 |             rois = all_rois[keep_mask]
128 |             target_boxes = gt_boxes_per_img[gt_assignment[keep_mask], :4]
129 |             bbox_targets = self.box_coder.encode(rois[:, 1:], target_boxes)
130 |             bbox_targets = bbox_targets.reshape(-1, 4)
131 | 
132 |             return_rois.append(rois)
133 |             return_labels.append(labels)
134 |             return_bbox_targets.append(bbox_targets)
135 | 
136 |         return (
137 |             F.concat(return_rois, axis=0).detach(),
138 |             F.concat(return_labels, axis=0).detach(),
139 |             F.concat(return_bbox_targets, axis=0).detach(),
140 |         )
141 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/rpn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import megengine.functional as F
 10 | import megengine.module as M
 11 | 
 12 | import layers
 13 | 
 14 | 
 15 | class RPN(M.Module):
 16 |     def __init__(self, cfg):
 17 |         super().__init__()
 18 |         self.cfg = cfg
 19 |         self.box_coder = layers.BoxCoder(cfg.rpn_reg_mean, cfg.rpn_reg_std)
 20 | 
 21 |         # check anchor settings
 22 |         assert len(set(len(x) for x in cfg.anchor_scales)) == 1
 23 |         assert len(set(len(x) for x in cfg.anchor_ratios)) == 1
 24 |         self.num_cell_anchors = len(cfg.anchor_scales[0]) * len(cfg.anchor_ratios[0])
 25 | 
 26 |         rpn_channel = cfg.rpn_channel
 27 |         self.in_features = cfg.rpn_in_features
 28 | 
 29 |         self.anchor_generator = layers.AnchorBoxGenerator(
 30 |             anchor_scales=cfg.anchor_scales,
 31 |             anchor_ratios=cfg.anchor_ratios,
 32 |             strides=cfg.rpn_stride,
 33 |             offset=self.cfg.anchor_offset,
 34 |         )
 35 | 
 36 |         self.matcher = layers.Matcher(
 37 |             cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality
 38 |         )
 39 | 
 40 |         self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1)
 41 |         self.rpn_cls_score = M.Conv2d(
 42 |             rpn_channel, self.num_cell_anchors, kernel_size=1, stride=1
 43 |         )
 44 |         self.rpn_bbox_offsets = M.Conv2d(
 45 |             rpn_channel, self.num_cell_anchors * 4, kernel_size=1, stride=1
 46 |         )
 47 | 
 48 |         for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]:
 49 |             M.init.normal_(l.weight, std=0.01)
 50 |             M.init.fill_(l.bias, 0)
 51 | 
 52 |     def forward(self, features, im_info, boxes=None):
 53 |         # prediction
 54 |         features = [features[x] for x in self.in_features]
 55 | 
 56 |         # get anchors
 57 |         anchors_list = self.anchor_generator(features)
 58 | 
 59 |         pred_cls_logit_list = []
 60 |         pred_bbox_offset_list = []
 61 |         for x in features:
 62 |             t = F.relu(self.rpn_conv(x))
 63 |             scores = self.rpn_cls_score(t)
 64 |             pred_cls_logit_list.append(
 65 |                 scores.reshape(
 66 |                     scores.shape[0],
 67 |                     self.num_cell_anchors,
 68 |                     scores.shape[2],
 69 |                     scores.shape[3],
 70 |                 )
 71 |             )
 72 |             bbox_offsets = self.rpn_bbox_offsets(t)
 73 |             pred_bbox_offset_list.append(
 74 |                 bbox_offsets.reshape(
 75 |                     bbox_offsets.shape[0],
 76 |                     self.num_cell_anchors,
 77 |                     4,
 78 |                     bbox_offsets.shape[2],
 79 |                     bbox_offsets.shape[3],
 80 |                 )
 81 |             )
 82 |         # get rois from the predictions
 83 |         rpn_rois = self.find_top_rpn_proposals(
 84 |             pred_cls_logit_list, pred_bbox_offset_list, anchors_list, im_info
 85 |         )
 86 | 
 87 |         if self.training:
 88 |             rpn_labels, rpn_offsets = self.get_ground_truth(
 89 |                 anchors_list, boxes, im_info[:, 4].astype("int32")
 90 |             )
 91 |             pred_cls_logits, pred_bbox_offsets = self.merge_rpn_score_box(
 92 |                 pred_cls_logit_list, pred_bbox_offset_list
 93 |             )
 94 | 
 95 |             fg_mask = rpn_labels > 0
 96 |             valid_mask = rpn_labels >= 0
 97 |             num_valid = valid_mask.sum()
 98 | 
 99 |             # rpn classification loss
100 |             loss_rpn_cls = F.loss.binary_cross_entropy(
101 |                 pred_cls_logits[valid_mask], rpn_labels[valid_mask]
102 |             )
103 | 
104 |             # rpn regression loss
105 |             loss_rpn_bbox = layers.smooth_l1_loss(
106 |                 pred_bbox_offsets[fg_mask],
107 |                 rpn_offsets[fg_mask],
108 |                 self.cfg.rpn_smooth_l1_beta,
109 |             ).sum() / F.maximum(num_valid, 1)
110 | 
111 |             loss_dict = {"loss_rpn_cls": loss_rpn_cls, "loss_rpn_bbox": loss_rpn_bbox}
112 |             return rpn_rois, loss_dict
113 |         else:
114 |             return rpn_rois
115 | 
116 |     def find_top_rpn_proposals(
117 |         self, rpn_cls_score_list, rpn_bbox_offset_list, anchors_list, im_info
118 |     ):
119 |         prev_nms_top_n = (
120 |             self.cfg.train_prev_nms_top_n
121 |             if self.training
122 |             else self.cfg.test_prev_nms_top_n
123 |         )
124 |         post_nms_top_n = (
125 |             self.cfg.train_post_nms_top_n
126 |             if self.training
127 |             else self.cfg.test_post_nms_top_n
128 |         )
129 | 
130 |         return_rois = []
131 | 
132 |         for bid in range(im_info.shape[0]):
133 |             batch_proposal_list = []
134 |             batch_score_list = []
135 |             batch_level_list = []
136 |             for l, (rpn_cls_score, rpn_bbox_offset, anchors) in enumerate(
137 |                 zip(rpn_cls_score_list, rpn_bbox_offset_list, anchors_list)
138 |             ):
139 |                 # get proposals and scores
140 |                 offsets = rpn_bbox_offset[bid].transpose(2, 3, 0, 1).reshape(-1, 4)
141 |                 proposals = self.box_coder.decode(anchors, offsets)
142 | 
143 |                 scores = rpn_cls_score[bid].transpose(1, 2, 0).flatten()
144 |                 scores.detach()
145 |                 # prev nms top n
146 |                 scores, order = F.topk(scores, descending=True, k=prev_nms_top_n)
147 |                 proposals = proposals[order]
148 | 
149 |                 batch_proposal_list.append(proposals)
150 |                 batch_score_list.append(scores)
151 |                 batch_level_list.append(F.full_like(scores, l))
152 | 
153 |             # gather proposals, scores, level
154 |             proposals = F.concat(batch_proposal_list, axis=0)
155 |             scores = F.concat(batch_score_list, axis=0)
156 |             levels = F.concat(batch_level_list, axis=0)
157 | 
158 |             proposals = layers.get_clipped_boxes(proposals, im_info[bid])
159 |             # filter invalid proposals and apply total level nms
160 |             keep_mask = layers.filter_boxes(proposals)
161 |             proposals = proposals[keep_mask]
162 |             scores = scores[keep_mask]
163 |             levels = levels[keep_mask]
164 |             nms_keep_inds = layers.batched_nms(
165 |                 proposals, scores, levels, self.cfg.rpn_nms_threshold, post_nms_top_n
166 |             )
167 | 
168 |             # generate rois to rcnn head, rois shape (N, 5), info [batch_id, x1, y1, x2, y2]
169 |             rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1)
170 |             rois = rois[nms_keep_inds]
171 |             batch_inds = F.full((rois.shape[0], 1), bid)
172 |             batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1)
173 |             return_rois.append(batch_rois)
174 | 
175 |         return_rois = F.concat(return_rois, axis=0)
176 |         return return_rois.detach()
177 | 
178 |     def merge_rpn_score_box(self, rpn_cls_score_list, rpn_bbox_offset_list):
179 |         final_rpn_cls_score_list = []
180 |         final_rpn_bbox_offset_list = []
181 | 
182 |         for bid in range(rpn_cls_score_list[0].shape[0]):
183 |             batch_rpn_cls_score_list = []
184 |             batch_rpn_bbox_offset_list = []
185 | 
186 |             for i in range(len(self.in_features)):
187 |                 rpn_cls_scores = rpn_cls_score_list[i][bid].transpose(1, 2, 0).flatten()
188 |                 rpn_bbox_offsets = (
189 |                     rpn_bbox_offset_list[i][bid].transpose(2, 3, 0, 1).reshape(-1, 4)
190 |                 )
191 | 
192 |                 batch_rpn_cls_score_list.append(rpn_cls_scores)
193 |                 batch_rpn_bbox_offset_list.append(rpn_bbox_offsets)
194 | 
195 |             batch_rpn_cls_scores = F.concat(batch_rpn_cls_score_list, axis=0)
196 |             batch_rpn_bbox_offsets = F.concat(batch_rpn_bbox_offset_list, axis=0)
197 | 
198 |             final_rpn_cls_score_list.append(batch_rpn_cls_scores)
199 |             final_rpn_bbox_offset_list.append(batch_rpn_bbox_offsets)
200 | 
201 |         final_rpn_cls_scores = F.concat(final_rpn_cls_score_list, axis=0)
202 |         final_rpn_bbox_offsets = F.concat(final_rpn_bbox_offset_list, axis=0)
203 |         return final_rpn_cls_scores, final_rpn_bbox_offsets
204 | 
205 |     def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts):
206 |         anchors = F.concat(anchors_list, axis=0)
207 |         labels_list = []
208 |         offsets_list = []
209 | 
210 |         for bid in range(batched_gt_boxes.shape[0]):
211 |             gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]
212 | 
213 |             overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
214 |             matched_indices, labels = self.matcher(overlaps)
215 | 
216 |             offsets = self.box_coder.encode(anchors, gt_boxes[matched_indices, :4])
217 | 
218 |             # sample positive labels
219 |             num_positive = int(self.cfg.num_sample_anchors * self.cfg.positive_anchor_ratio)
220 |             labels = layers.sample_labels(labels, num_positive, 1, -1)
221 |             # sample negative labels
222 |             num_positive = (labels == 1).sum().astype("int32")
223 |             num_negative = self.cfg.num_sample_anchors - num_positive
224 |             labels = layers.sample_labels(labels, num_negative, 0, -1)
225 | 
226 |             labels_list.append(labels)
227 |             offsets_list.append(offsets)
228 | 
229 |         return (
230 |             F.concat(labels_list, axis=0).detach(),
231 |             F.concat(offsets_list, axis=0).detach(),
232 |         )
233 | 


--------------------------------------------------------------------------------
/megengine_release/layers/det/sampling.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import megengine.functional as F
10 | from megengine.random import uniform
11 | 
12 | 
13 | def sample_labels(labels, num_samples, label_value, ignore_label=-1):
14 |     """sample N labels with label value = sample_labels
15 | 
16 |     Args:
17 |         labels(Tensor): shape of label is (N,)
18 |         num_samples(int):
19 |         label_value(int):
20 | 
21 |     Returns:
22 |         label(Tensor): label after sampling
23 |     """
24 |     assert labels.ndim == 1, "Only tensor of dim 1 is supported."
25 |     mask = (labels == label_value)
26 |     num_valid = mask.sum()
27 |     if num_valid <= num_samples:
28 |         return labels
29 | 
30 |     random_tensor = F.zeros_like(labels).astype("float32")
31 |     random_tensor[mask] = uniform(size=num_valid)
32 |     _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid)
33 | 
34 |     labels[invalid_inds] = ignore_label
35 |     return labels
36 | 


--------------------------------------------------------------------------------
/megengine_release/layers/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/megengine_release/layers/tools/data_mapper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from megengine.data.dataset import COCO, Objects365, PascalVOC
10 | 
11 | data_mapper = dict(
12 |     coco=COCO,
13 |     objects365=Objects365,
14 |     voc=PascalVOC,
15 | )
16 | 


--------------------------------------------------------------------------------
/megengine_release/layers/tools/inference.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import argparse
10 | 
11 | import cv2
12 | 
13 | import megengine as mge
14 | 
15 | from layers.tools.data_mapper import data_mapper
16 | from layers.tools.utils import DetEvaluator, import_from_file
17 | 
18 | logger = mge.get_logger(__name__)
19 | logger.setLevel("INFO")
20 | 
21 | 
22 | def make_parser():
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         "-f", "--file", default="net.py", type=str, help="net description file"
26 |     )
27 |     parser.add_argument(
28 |         "-w", "--weight_file", default=None, type=str, help="weights file",
29 |     )
30 |     parser.add_argument("-i", "--image", type=str)
31 |     return parser
32 | 
33 | 
34 | def main():
35 |     parser = make_parser()
36 |     args = parser.parse_args()
37 | 
38 |     current_network = import_from_file(args.file)
39 |     cfg = current_network.Cfg()
40 |     cfg.backbone_pretrained = False
41 |     model = current_network.Net(cfg)
42 |     model.eval()
43 | 
44 |     state_dict = mge.load(args.weight_file)
45 |     if "state_dict" in state_dict:
46 |         state_dict = state_dict["state_dict"]
47 |     model.load_state_dict(state_dict)
48 | 
49 |     evaluator = DetEvaluator(model)
50 | 
51 |     ori_img = cv2.imread(args.image)
52 |     image, im_info = DetEvaluator.process_inputs(
53 |         ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size,
54 |     )
55 |     pred_res = evaluator.predict(
56 |         image=mge.tensor(image),
57 |         im_info=mge.tensor(im_info)
58 |     )
59 |     res_img = DetEvaluator.vis_det(
60 |         ori_img,
61 |         pred_res,
62 |         is_show_label=True,
63 |         classes=data_mapper[cfg.test_dataset["name"]].class_names,
64 |     )
65 |     cv2.imwrite("results.jpg", res_img)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/megengine_release/layers/tools/nms.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | import numpy as np
10 | 
11 | 
12 | def py_cpu_nms(dets, thresh):
13 |     x1 = np.ascontiguousarray(dets[:, 0])
14 |     y1 = np.ascontiguousarray(dets[:, 1])
15 |     x2 = np.ascontiguousarray(dets[:, 2])
16 |     y2 = np.ascontiguousarray(dets[:, 3])
17 | 
18 |     areas = (x2 - x1) * (y2 - y1)
19 |     order = dets[:, 4].argsort()[::-1]
20 |     keep = list()
21 | 
22 |     while order.size > 0:
23 |         pick_idx = order[0]
24 |         keep.append(pick_idx)
25 |         order = order[1:]
26 | 
27 |         xx1 = np.maximum(x1[pick_idx], x1[order])
28 |         yy1 = np.maximum(y1[pick_idx], y1[order])
29 |         xx2 = np.minimum(x2[pick_idx], x2[order])
30 |         yy2 = np.minimum(y2[pick_idx], y2[order])
31 | 
32 |         inter = np.maximum(xx2 - xx1, 0) * np.maximum(yy2 - yy1, 0)
33 |         iou = inter / np.maximum(areas[pick_idx] + areas[order] - inter, 1e-5)
34 | 
35 |         order = order[iou <= thresh]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/megengine_release/models/ICD/ICD.py:
--------------------------------------------------------------------------------
  1 | import megengine
  2 | import megengine as mge
  3 | from typing import Dict, List, Tuple
  4 | import megengine.module as M
  5 | import megengine.functional as F
  6 | import numpy as np
  7 | from .encoder import InstanceRegEncoder
  8 | from .decoder import DecoderWrapper
  9 | from .utility import PositionEmbeddingSine 
 10 | 
 11 | 
 12 | def mask_out_padding(fpn_features, images_sizes, images):
 13 |     # Modified from DETR: https://github.com/facebookresearch/detr/blob/main/LICENSE
 14 |     # NOTE: zeros for forground
 15 |     image_sizes = [(images_sizes[i, 2], images_sizes[i, 3]) for i in range(images_sizes.shape[0])]
 16 |     device = images_sizes.device
 17 |     h_, w_ = images.shape[-2:]
 18 |     masks = {}
 19 |     #assert len(feature_shapes) == len(self.feature_strides)
 20 |     for k, feat in fpn_features.items():
 21 |         # stride = 2 ** int(k[-1])
 22 |         N, _, H, W = feat.shape
 23 |         masks_per_feature_level = F.ones(
 24 |             (N, H, W), dtype='bool', device=device)
 25 |         stride = (h_ / H + w_ / W) / 2
 26 |         for img_idx, (h, w) in enumerate(image_sizes):
 27 |             masks_per_feature_level[
 28 |                 img_idx,
 29 |                 : int(np.ceil(float(h) / stride)),
 30 |                 : int(np.ceil(float(w) / stride)),
 31 |             ] = 0
 32 |         masks[k] = F.expand_dims(masks_per_feature_level, 1) #masks_per_feature_level.unsqueeze(1)
 33 |     return masks
 34 | 
 35 | 
 36 | class ICD(M.Module):
 37 |     def __init__(self, hidden_dim, cfg):
 38 |         super().__init__()
 39 |         self.pos_embedding = PositionEmbeddingSine(
 40 |             num_pos_feats=hidden_dim // 2,
 41 |             normalize=True)
 42 | 
 43 |         self.ins_encoder = InstanceRegEncoder(cfg)
 44 |         self.attention_module_aux = DecoderWrapper(cfg)
 45 |         self.attention_module_distill = DecoderWrapper(cfg)
 46 |         # NOTE(peizhen): 1e-05 is not large enough and emprically might cause sqrt(neg) nan
 47 |         self.distill_norm_ = M.LayerNorm(
 48 |             [hidden_dim // cfg.distiller.ATT_HEADS], eps=1e-04, affine=False)
 49 |         #self.distill_norm_ = LayerNorm([hidden_dim // cfg.distiller.ATT_HEADS])
 50 |         self.feat_keys = cfg.distiller.FEAT_KEYS
 51 |         self.weight_value = cfg.distiller.WEIGHT_VALUE
 52 |         self.temp_value = cfg.distiller.TEMP_VALUE
 53 | 
 54 |         self.loss_keys = []
 55 |         self.num_losses = 3
 56 | 
 57 |     def mimic_loss(self, svalue, tvalue, value_mask):
 58 |         return (F.loss.square_loss(svalue, tvalue, reduction='none').transpose(1, 2, 3, 0)
 59 |                 * value_mask).mean(2).sum() / F.clip(value_mask.sum(), lower=1e-6)
 60 | 
 61 |     def forward(self, features_dict_tea, features_dict_stu, images, instances, image_info, distill_flag=0):
 62 |         '''
 63 |         contain_box_mask: 1d float tensor, [1., 0., ...], denoting whether each image contain exactly objects
 64 |         nr_actual_boxes_per_img: list of int, exact object number each image contains
 65 |         '''
 66 |         nr_actual_boxes_per_img = [image_info[i, -1] for i in range(image_info.shape[0])]
 67 | 
 68 |         masks = mask_out_padding(features_dict_tea, image_info, images)
 69 | 
 70 |         pos_embs = {k: self.pos_embedding(
 71 |             features_dict_tea[k], masks[k]) for k in self.feat_keys}
 72 |         pos_emb = F.concat([F.transpose(F.flatten(pos_embs[k], 2), (2, 0, 1)) for k in self.feat_keys], 0).detach()  # S, N, C
 73 |         masks = F.concat([F.squeeze(F.flatten(masks[k], 2), 1)
 74 |                           for k in self.feat_keys], 1).detach()  # N, S
 75 | 
 76 |         loss_aux_dict, aux_info_dict = self.forward_aux(
 77 |             instances, features_dict_tea, image_info, {'mask_out': masks, 'pos_emb': pos_emb})
 78 |         loss_distill_dict = self.forward_distill(
 79 |             features_dict_stu, aux_info_dict, nr_actual_boxes_per_img, distill_flag, {'mask_out': masks, 'pos_emb': pos_emb})
 80 |         loss_aux_dict.update(loss_distill_dict)
 81 |         self.loss_keys = list(loss_aux_dict.keys())
 82 |         # print(self.loss_keys)
 83 |         return loss_aux_dict
 84 | 
 85 |     def forward_aux(self, instances, features_dict_tea, image_size, aux_input):
 86 |         # [S, N, C]
 87 |         feat = F.concat([F.flatten(features_dict_tea[k], start_axis=2).transpose(2, 0, 1)
 88 |                         for k in self.feat_keys], 0).detach()
 89 | 
 90 |         # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K]
 91 |         # (0 for Fake Instance) in ins_mask
 92 | 
 93 |         # Below four variables provided by encoder forward have been detached before passing to here
 94 |         ins_feat, ins_mask, ins_mask_gt, pos_gt = self.ins_encoder(
 95 |             instances, pro_feats=features_dict_tea, image_size=image_size)
 96 |         decoded_feat, tmask, tvalue = self.attention_module_aux(
 97 |             ins_feat,
 98 |             feat,
 99 |             feat,
100 |             query_mask=ins_mask,
101 |             key_padding_mask=aux_input['mask_out'],
102 |             pos_embedding=aux_input['pos_emb'])
103 | 
104 |         aux_info_dict = {
105 |             'encoded_ins': (ins_feat, ins_mask, ins_mask_gt),
106 |             'tmask': tmask,
107 |             'tvalue': tvalue,
108 |         }
109 | 
110 |         loss_dict = dict()
111 |         loss_dict = self.ins_encoder.loss(
112 |             decoded_feat, ins_mask_gt, ins_mask, pos_gt)
113 | 
114 |         return loss_dict, aux_info_dict
115 | 
116 | 
117 |     def forward_distill(self, features_dict_stu, aux_info_dict, nr_actual_boxes_per_img, distill_flag, aux_input):
118 |         loss_dict = dict()
119 | 
120 |         assert set(self.feat_keys) == set(list(features_dict_stu.keys(
121 |         ))), 'WARNING: Unequal keys for fpn and attention ! <%s> != <%s>' % (self.feat_keys, features_dict_stu.keys())
122 |         # [S, N, C]
123 |         feat = F.concat([F.flatten(features_dict_stu[k], start_axis=2).transpose(2, 0, 1)
124 |                         for k in self.feat_keys], 0)
125 | 
126 |         if distill_flag == 0:
127 |             feat = feat.detach()
128 | 
129 |         # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K]
130 |         # (0 for Fake Instance) in ins_mask
131 |         ins_feat, ins_mask, ins_mask_gt = aux_info_dict['encoded_ins']
132 |         max_ele = int(max(max(nr_actual_boxes_per_img), 1))
133 | 
134 |         # Note that mask is not normalized by softmax
135 |         # load state dict, therefore we share almost all parameters
136 |         _, _, svalue = self.attention_module_distill(
137 |             ins_feat[:max_ele, :, :],
138 |             feat,
139 |             feat,
140 |             query_mask=ins_mask[:, :max_ele],
141 |             key_padding_mask=aux_input['mask_out'],
142 |             pos_embedding=aux_input['pos_emb'],
143 |             proj_only=True)
144 |         tvalue = aux_info_dict['tvalue']
145 |         tmask = aux_info_dict['tmask']
146 | 
147 |         # [bsz, heads, ins, Seq]
148 |         svalue = self.distill_norm_(svalue)
149 |         # [Seq, bsz, heads, channel]
150 |         tvalue = self.distill_norm_(tvalue)
151 | 
152 |         # cosine similarity between features, unreal instances are masked
153 |         # feat are compact features for each instaces
154 |         # value is weighted attention maps refactored as different heads
155 |         # mask is q, k relation masks for distillation
156 | 
157 |         # [bsz, heads, 1, S]
158 |         value_mask = (F.softmax(tmask / self.temp_value, axis=-1)
159 |                       * F.expand_dims(F.expand_dims(ins_mask_gt, axis=1), axis=-1)
160 |                       ).sum(2, keepdims=True).detach()
161 |         # NOTE(peizhen): value_mask[j, ...] for any j-th image if it contains no image, beforehand, we could use a pseudo box for images who haven't any box
162 |         # this should similarly apply to ins_encoder's loss auxiliary task loss too (no box then corresponding image should not contribute loss)
163 | 
164 |         # [bsz, heads, 1, num_seq]
165 |         # value_mask = value_mask * contain_box_mask.reshape(-1, 1, 1, 1)
166 |         loss_dict = {'distill': self.mimic_loss(
167 |             svalue, tvalue.detach(), value_mask) * self.weight_value}
168 |         return loss_dict
169 | 


--------------------------------------------------------------------------------
/megengine_release/models/ICD/__init__.py:
--------------------------------------------------------------------------------
1 | from .ICD import ICD
2 | from .utility import get_instance_list
3 | 
4 | __all__ = [key for key in globals().keys() if not key.startswith('_')]
5 | 


--------------------------------------------------------------------------------
/megengine_release/models/ICD/decoder.py:
--------------------------------------------------------------------------------
 1 | import megengine as mge
 2 | import megengine.module as M
 3 | from megengine import functional as F
 4 | import numpy as np
 5 | from .transformer import MultiheadAttention
 6 | #from .utility import has_nan_or_inf
 7 | 
 8 | # mge.core.set_option('async_level', 0)
 9 | 
10 | class DecoderWrapper(M.Module):
11 |     def __init__(self, cfg):
12 |         super().__init__()
13 |         channels = cfg.distiller.HIDDEN_DIM
14 |         heads = cfg.distiller.ATT_HEADS
15 | 
16 |         # this is a local module derived from official implementation, we modify the last modules
17 |         self.matt = MultiheadAttention(channels, heads)
18 | 
19 |         self.pos_projector = M.Linear(in_features=channels, out_features=channels)
20 |         self.use_pos = cfg.distiller.USE_POS_EMBEDDING
21 |         self.pos_on_v = cfg.distiller.DECODER_POSEMB_ON_V
22 | 
23 |     def with_pos_embed(self, tensor, pos):
24 |         '''
25 |         tensor: [S, N, C]
26 |         pos: [S, N, C] or [S, 1, C]
27 |         '''
28 |         if not self.use_pos:
29 |             return tensor
30 | 
31 |         pos = self.pos_projector(pos)
32 |         return tensor if pos is None else tensor + pos
33 | 
34 | 
35 |     def forward(self, q, k, v, query_mask=None, key_padding_mask=None, pos_embedding=None, proj_only=False):
36 |         # q, v: [sequence_len, batch_size, channels]
37 |         k = self.with_pos_embed(k, pos_embedding)
38 |         if self.pos_on_v:
39 |             v = self.with_pos_embed(v, pos_embedding)
40 |         att, mask, values = self.matt(
41 |             q, k, v, key_padding_mask=key_padding_mask, proj_only=proj_only)
42 |         return att, mask, values
43 | 


--------------------------------------------------------------------------------
/megengine_release/models/ICD/layers.py:
--------------------------------------------------------------------------------
 1 | import megengine as mge
 2 | import megengine.module as M
 3 | from megengine import functional as F
 4 | import numpy as np
 5 | 
 6 | class MLP(M.Module):
 7 |     """ Very simple multi-layer perceptron (also called FFN)"""
 8 | 
 9 |     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
10 |         super().__init__()
11 |         self.num_layers = num_layers
12 |         h = [hidden_dim] * (num_layers - 1)
13 |         self.layers = [M.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])]
14 | 
15 |     def forward(self, x):
16 |         for i, layer in enumerate(self.layers):
17 |             x = F.nn.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
18 |         return x
19 | 


--------------------------------------------------------------------------------
/megengine_release/models/ICD/utility.py:
--------------------------------------------------------------------------------
  1 | import megengine as mge
  2 | import megengine.module as M
  3 | from megengine import functional as F
  4 | import numpy as np
  5 | import math
  6 | # mge.core.set_option('async_level', 0)
  7 | 
  8 | 
  9 | def safe_masked_fill(tensor: mge.Tensor, mask: mge.Tensor, val: float) -> mge.Tensor:
 10 |     '''
 11 |     same behavior as torch.tensor.masked_fill_(mask, val)
 12 |     '''
 13 |     assert mask.dtype == np.bool_
 14 |     discard_mask = ~mask
 15 |     keep_mask = mask.astype('float32')
 16 |     # NOTE(peizhen): simply tensor * ~mask + value * mask could not handle the value=float('+inf'/'-inf') case, since inf*0 = nan
 17 |     new_tensor = tensor * ~mask + F.where(mask, F.ones_like(mask) * val, F.zeros_like(mask))
 18 |     return new_tensor
 19 | 
 20 | 
 21 | def has_nan_or_inf(inp):
 22 |     invalid_mask = F.logical_or(F.isnan(inp), F.isinf(inp))
 23 |     return invalid_mask.sum().item() > 0
 24 | 
 25 | 
 26 | class PositionEmbeddingSine(M.Module):
 27 |     """
 28 |     This is a more standard version of the position embedding, very similar to the one
 29 |     used by the Attention is all you need paper, generalized to work on images.
 30 |     Modified from DETR: https://github.com/facebookresearch/detr/blob/main/LICENSE
 31 |     """
 32 | 
 33 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
 34 |         super().__init__()
 35 |         self.num_pos_feats = num_pos_feats
 36 |         self.temperature = temperature
 37 |         self.normalize = normalize
 38 |         if scale is not None and normalize is False:
 39 |             raise ValueError("normalize should be True if scale is passed")
 40 |         if scale is None:
 41 |             scale = 2 * math.pi
 42 |         self.scale = scale
 43 | 
 44 |     def forward(self, x, mask):
 45 |         assert mask is not None
 46 |         not_mask = F.squeeze(~mask, 1)  # ~mask.squeeze(1)
 47 |         # import ipdb; ipdb.set_trace()
 48 |         y_embed = F.cumsum(not_mask.astype('int32'), 1)  # .cumsum(1, dtype=torch.float32)
 49 |         x_embed = F.cumsum(not_mask.astype('int32'), 2)  # .cumsum(2, dtype=torch.float32)
 50 |         if self.normalize:
 51 |             eps = 1e-6
 52 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
 53 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
 54 | 
 55 |         dim_t = F.arange(self.num_pos_feats,
 56 |                          dtype="float32", device=x.device)
 57 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
 58 | 
 59 |         pos_x = F.expand_dims(x_embed, -1) / dim_t
 60 |         pos_y = F.expand_dims(y_embed, -1) / dim_t
 61 |         pos_x = F.flatten(F.stack(
 62 |             (F.sin(pos_x[:, :, :, 0::2]), F.cos(pos_x[:, :, :, 1::2])), axis=4), start_axis=3)
 63 |         pos_y = F.flatten(F.stack(
 64 |             (F.sin(pos_y[:, :, :, 0::2]), F.cos(pos_y[:, :, :, 1::2])), axis=4), start_axis=3)
 65 |         pos = F.transpose(F.concat((pos_y, pos_x), axis=3), (0, 3, 1, 2))
 66 |         return pos
 67 | 
 68 | 
 69 | def get_valid_boxes(raw_boxes, terminate, ignore):
 70 |     '''
 71 |     Input:
 72 |         raw_boxes: (B, MAXN, 4+1)
 73 |         data_tensor: (B, C, H, W)
 74 |     Return:
 75 |         boxes: list of (Nb, 4)
 76 |         labels:  list of (Nb,)
 77 |     '''
 78 |     # (B,)
 79 |     B = raw_boxes.shape[0]
 80 |     nr_valid_boxes = (1 - F.equal(raw_boxes[:, :, -1], terminate)).sum(axis=1).astype('int32')
 81 | 
 82 |     #print(f'nr_valid_boxes: {nr_valid_boxes}')
 83 | 
 84 |     # NOTE(peizhen): raw_boxes[i, :0, :4] will cause bug since ':0' indexing is invalid in megengine
 85 |     #boxes = [raw_boxes[i, :nr_valid_boxes[i], :4] for i in range(B)]
 86 |     #labels = [raw_boxes[i, :nr_valid_boxes[i], 4] for i in range(B)]
 87 | 
 88 |     # B x (Nb, 4) and B x (Nb,)
 89 |     boxes = list()
 90 |     labels = list()
 91 |     for i in range(B):
 92 |         num_valid = nr_valid_boxes[i].item()
 93 |         if num_valid > 0:
 94 |             boxes.append(raw_boxes[i, :num_valid, :4])
 95 |             labels.append(raw_boxes[i, :num_valid, 4])
 96 |         else:
 97 |             boxes.append(F.zeros((0, 4), dtype=raw_boxes.dtype, device=raw_boxes.device))
 98 |             labels.append(F.zeros((0,), dtype=raw_boxes.dtype, device=raw_boxes.device))
 99 | 
100 |     # TODO(peizhen): currently discard those terms whose labels are -1. Need better operation ?
101 |     # see backup/utility.py annotation part
102 |     return boxes, labels
103 | 
104 | 
105 | def get_instance_list(image_size, gt_boxes_human, gt_boxes_car, terminate=-2, ignore=-1):
106 |     '''
107 |     Input:
108 |         gt_boxes_human: (B, MAXN, 4+1)
109 |         gt_boxes_car:   (B, MAXN, 4+1)
110 |     '''
111 |     human_box_list, human_label_list = get_valid_boxes(gt_boxes_human, terminate, ignore)
112 |     vehicle_box_list, vehicle_label_list = get_valid_boxes(gt_boxes_car, terminate, ignore)
113 |     # (1) For `gt_boxes_human`, 1 denotes human. -2 denote invalid object determination (will be process as 0)
114 |     # (2) For `gt_boxes_car`,   1 & 2 denotes different kinds of car, -2 denote invalid object determination (will still be 1 and 2)
115 | 
116 |     instances = list()
117 |     contain_box_mask = list()
118 |     nr_actual_boxes_per_img = list()
119 |     for human_boxes, human_labels, vehicle_boxes, vehicle_labels in \
120 |             zip(human_box_list, human_label_list, vehicle_box_list, vehicle_label_list):
121 |         # (k, 4) and (k,)
122 |         gt_boxes = F.concat([human_boxes, vehicle_boxes], axis=0).astype("float32")
123 |         # Process gt_boxes_human's labels from 1 to 0. Naturally, car owns label 1 and 2
124 |         gt_classes = F.concat([human_labels - 1, vehicle_labels], axis=0).astype("int32")
125 | 
126 |         contain_box_mask.append(gt_boxes.shape[0] > 0)
127 |         assert gt_boxes.shape[0] == gt_classes.shape[0]
128 | 
129 |         # pad box for images who contain no any box to work around potential indexing bug (unlike in coco, an image in business dataset might contain no any image)
130 |         nr_valid_objs = gt_boxes.shape[0]
131 |         nr_actual_boxes_per_img.append(nr_valid_objs)
132 |         if nr_valid_objs == 0:
133 |             gt_boxes = F.zeros((1, 4), device=gt_boxes.device, dtype=gt_boxes.dtype)
134 |             gt_classes = F.zeros((1,), device=gt_classes.device, dtype=gt_classes.dtype)
135 | 
136 |         instances.append({'image_size': image_size, 'gt_boxes': gt_boxes, 'gt_classes': gt_classes})
137 | 
138 |     # (bsz,)
139 |     contain_box_mask = mge.Tensor(
140 |         contain_box_mask, device=instances[0]['gt_boxes'].device, dtype='float32').detach()
141 | 
142 |     return instances, contain_box_mask, nr_actual_boxes_per_img
143 | 


--------------------------------------------------------------------------------
/megengine_release/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
 3 | #
 4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 5 | #
 6 | # Unless required by applicable law or agreed to in writing,
 7 | # software distributed under the License is distributed on an
 8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | from .atss import *
10 | from .faster_rcnn import *
11 | from .fcos import *
12 | from .freeanchor import *
13 | from .retinanet import *
14 | 
15 | _EXCLUDE = {}
16 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
17 | 


--------------------------------------------------------------------------------
/megengine_release/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/megengine_release/models/backbones/resnet/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
3 | #
4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 | #
6 | # Unless required by applicable law or agreed to in writing,
7 | # software distributed under the License is distributed on an
8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | 


--------------------------------------------------------------------------------
/megengine_release/models/faster_rcnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import numpy as np
 10 | 
 11 | import megengine.functional as F
 12 | import megengine.module as M
 13 | 
 14 | import models.backbones.resnet.model as resnet
 15 | import layers
 16 | 
 17 | 
 18 | class FasterRCNN(M.Module):
 19 |     """
 20 |     Implement Faster R-CNN (https://arxiv.org/abs/1506.01497).
 21 |     """
 22 | 
 23 |     def __init__(self, cfg):
 24 |         super().__init__()
 25 |         self.cfg = cfg
 26 |         # ----------------------- build backbone ------------------------ #
 27 |         bottom_up = getattr(resnet, cfg.backbone)(
 28 |             norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained
 29 |         )
 30 |         del bottom_up.fc
 31 | 
 32 |         # ----------------------- build FPN ----------------------------- #
 33 |         self.backbone = layers.FPN(
 34 |             bottom_up=bottom_up,
 35 |             in_features=cfg.fpn_in_features,
 36 |             out_channels=cfg.fpn_out_channels,
 37 |             norm=cfg.fpn_norm,
 38 |             top_block=layers.FPNP6(),
 39 |             strides=cfg.fpn_in_strides,
 40 |             channels=cfg.fpn_in_channels,
 41 |         )
 42 | 
 43 |         # ----------------------- build RPN ----------------------------- #
 44 |         self.rpn = layers.RPN(cfg)
 45 | 
 46 |         # ----------------------- build RCNN head ----------------------- #
 47 |         self.rcnn = layers.RCNN(cfg)
 48 | 
 49 |     def preprocess_image(self, image):
 50 |         padded_image = layers.get_padded_tensor(image, 32, 0.0)
 51 |         normed_image = (
 52 |             padded_image
 53 |             - np.array(self.cfg.img_mean, dtype="float32")[None, :, None, None]
 54 |         ) / np.array(self.cfg.img_std, dtype="float32")[None, :, None, None]
 55 |         return normed_image
 56 | 
 57 |     def forward(self, image, im_info, gt_boxes=None):
 58 |         image = self.preprocess_image(image)
 59 |         features = self.backbone(image)
 60 | 
 61 |         if self.training:
 62 |             return self._forward_train(features, im_info, gt_boxes)
 63 |         else:
 64 |             return self.inference(features, im_info)
 65 | 
 66 |     def _forward_train(self, features, im_info, gt_boxes):
 67 |         rpn_rois, rpn_losses = self.rpn(features, im_info, gt_boxes)
 68 |         rcnn_losses = self.rcnn(features, rpn_rois, im_info, gt_boxes)
 69 | 
 70 |         loss_rpn_cls = rpn_losses["loss_rpn_cls"]
 71 |         loss_rpn_bbox = rpn_losses["loss_rpn_bbox"]
 72 |         loss_rcnn_cls = rcnn_losses["loss_rcnn_cls"]
 73 |         loss_rcnn_bbox = rcnn_losses["loss_rcnn_bbox"]
 74 |         total_loss = loss_rpn_cls + loss_rpn_bbox + loss_rcnn_cls + loss_rcnn_bbox
 75 | 
 76 |         loss_dict = {
 77 |             "total_loss": total_loss,
 78 |             "rpn_cls": loss_rpn_cls,
 79 |             "rpn_bbox": loss_rpn_bbox,
 80 |             "rcnn_cls": loss_rcnn_cls,
 81 |             "rcnn_bbox": loss_rcnn_bbox,
 82 |         }
 83 |         self.cfg.losses_keys = list(loss_dict.keys())
 84 |         return loss_dict
 85 | 
 86 |     def inference(self, features, im_info):
 87 |         rpn_rois = self.rpn(features, im_info)
 88 |         pred_boxes, pred_score = self.rcnn(features, rpn_rois)
 89 |         pred_boxes = pred_boxes.reshape(-1, 4)
 90 | 
 91 |         scale_w = im_info[0, 1] / im_info[0, 3]
 92 |         scale_h = im_info[0, 0] / im_info[0, 2]
 93 |         pred_boxes = pred_boxes / F.concat([scale_w, scale_h, scale_w, scale_h], axis=0)
 94 |         clipped_boxes = layers.get_clipped_boxes(
 95 |             pred_boxes, im_info[0, 2:4]
 96 |         ).reshape(-1, self.cfg.num_classes, 4)
 97 |         return pred_score, clipped_boxes
 98 | 
 99 | 
100 | class FasterRCNNConfig:
101 |     # pylint: disable=too-many-statements
102 |     def __init__(self):
103 |         self.backbone = "resnet50"
104 |         self.backbone_pretrained = True
105 |         self.backbone_norm = "FrozenBN"
106 |         self.backbone_freeze_at = 2
107 |         self.fpn_norm = None
108 |         self.fpn_in_features = ["res2", "res3", "res4", "res5"]
109 |         self.fpn_in_strides = [4, 8, 16, 32]
110 |         self.fpn_in_channels = [256, 512, 1024, 2048]
111 |         self.fpn_out_channels = 256
112 | 
113 |         # ------------------------ data cfg -------------------------- #
114 |         self.train_dataset = dict(
115 |             name="coco",
116 |             root="train2017",
117 |             ann_file="annotations/instances_train2017.json",
118 |             remove_images_without_annotations=True,
119 |         )
120 |         self.test_dataset = dict(
121 |             name="coco",
122 |             root="val2017",
123 |             ann_file="annotations/instances_val2017.json",
124 |             remove_images_without_annotations=False,
125 |         )
126 |         self.num_classes = 80
127 |         self.img_mean = [103.530, 116.280, 123.675]  # BGR
128 |         self.img_std = [57.375, 57.120, 58.395]
129 | 
130 |         # ----------------------- rpn cfg ------------------------- #
131 |         self.rpn_stride = [4, 8, 16, 32, 64]
132 |         self.rpn_in_features = ["p2", "p3", "p4", "p5", "p6"]
133 |         self.rpn_channel = 256
134 |         self.rpn_reg_mean = [0.0, 0.0, 0.0, 0.0]
135 |         self.rpn_reg_std = [1.0, 1.0, 1.0, 1.0]
136 | 
137 |         self.anchor_scales = [[x] for x in [32, 64, 128, 256, 512]]
138 |         self.anchor_ratios = [[0.5, 1, 2]]
139 |         self.anchor_offset = 0.5
140 | 
141 |         self.match_thresholds = [0.3, 0.7]
142 |         self.match_labels = [0, -1, 1]
143 |         self.match_allow_low_quality = True
144 |         self.rpn_nms_threshold = 0.7
145 |         self.num_sample_anchors = 256
146 |         self.positive_anchor_ratio = 0.5
147 | 
148 |         # ----------------------- rcnn cfg ------------------------- #
149 |         self.rcnn_stride = [4, 8, 16, 32]
150 |         self.rcnn_in_features = ["p2", "p3", "p4", "p5"]
151 |         self.rcnn_reg_mean = [0.0, 0.0, 0.0, 0.0]
152 |         self.rcnn_reg_std = [0.1, 0.1, 0.2, 0.2]
153 | 
154 |         self.pooling_method = "roi_align"
155 |         self.pooling_size = (7, 7)
156 | 
157 |         self.num_rois = 512
158 |         self.fg_ratio = 0.5
159 |         self.fg_threshold = 0.5
160 |         self.bg_threshold_high = 0.5
161 |         self.bg_threshold_low = 0.0
162 |         self.class_aware_box = True
163 | 
164 |         # ------------------------ loss cfg -------------------------- #
165 |         self.rpn_smooth_l1_beta = 0  # use L1 loss
166 |         self.rcnn_smooth_l1_beta = 0  # use L1 loss
167 |         self.num_losses = 5
168 | 
169 |         # ------------------------ training cfg ---------------------- #
170 |         self.train_image_short_size = (640, 672, 704, 736, 768, 800)
171 |         self.train_image_max_size = 1333
172 |         self.train_prev_nms_top_n = 2000
173 |         self.train_post_nms_top_n = 1000
174 | 
175 |         self.basic_lr = 0.02 / 16  # The basic learning rate for single-image
176 |         self.momentum = 0.9
177 |         self.weight_decay = 1e-4
178 |         self.log_interval = 20
179 |         self.nr_images_epoch = 80000
180 |         self.max_epoch = 54
181 |         self.warm_iters = 500
182 |         self.lr_decay_rate = 0.1
183 |         self.lr_decay_stages = [42, 50]
184 | 
185 |         # ------------------------ testing cfg ----------------------- #
186 |         self.test_image_short_size = 800
187 |         self.test_image_max_size = 1333
188 |         self.test_prev_nms_top_n = 1000
189 |         self.test_post_nms_top_n = 1000
190 |         self.test_max_boxes_per_image = 100
191 |         self.test_vis_threshold = 0.3
192 |         self.test_cls_threshold = 0.05
193 |         self.test_nms = 0.5
194 | 


--------------------------------------------------------------------------------
/megengine_release/requirements.txt:
--------------------------------------------------------------------------------
1 | megengine
2 | numpy==1.19.5
3 | opencv-python==4.5.3.56
4 | tqdm==4.62.3
5 | tabulate==0.8.9
6 | 


--------------------------------------------------------------------------------
/megengine_release/test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import argparse
 10 | import json
 11 | import os
 12 | from tqdm import tqdm
 13 | 
 14 | import megengine as mge
 15 | import megengine.distributed as dist
 16 | from megengine.data import DataLoader
 17 | 
 18 | from layers.tools.data_mapper import data_mapper
 19 | from layers.tools.utils import DetEvaluator, InferenceSampler, import_from_file
 20 | 
 21 | logger = mge.get_logger(__name__)
 22 | logger.setLevel("INFO")
 23 | mge.device.set_prealloc_config(1024, 1024, 256 * 1024 * 1024, 4.0)
 24 | 
 25 | 
 26 | def make_parser():
 27 |     parser = argparse.ArgumentParser()
 28 |     parser.add_argument(
 29 |         "-f", "--file", default="net.py", type=str, help="net description file"
 30 |     )
 31 |     parser.add_argument(
 32 |         "-w", "--weight_file", default=None, type=str, help="weights file",
 33 |     )
 34 |     parser.add_argument(
 35 |         "-n", "--devices", default=1, type=int, help="total number of gpus for testing",
 36 |     )
 37 |     parser.add_argument(
 38 |         "-d", "--dataset_dir", default="/data/datasets", type=str,
 39 |     )
 40 |     parser.add_argument("-se", "--start_epoch", default=-1, type=int)
 41 |     parser.add_argument("-ee", "--end_epoch", default=-1, type=int)
 42 |     return parser
 43 | 
 44 | 
 45 | def main():
 46 |     # pylint: disable=import-outside-toplevel,too-many-branches,too-many-statements
 47 |     from pycocotools.coco import COCO
 48 |     from pycocotools.cocoeval import COCOeval
 49 | 
 50 |     parser = make_parser()
 51 |     args = parser.parse_args()
 52 | 
 53 |     current_network = import_from_file(args.file)
 54 |     cfg = current_network.Cfg()
 55 | 
 56 |     if args.weight_file:
 57 |         args.start_epoch = args.end_epoch = -1
 58 |     else:
 59 |         if args.start_epoch == -1:
 60 |             args.start_epoch = cfg.max_epoch - 1
 61 |         if args.end_epoch == -1:
 62 |             args.end_epoch = args.start_epoch
 63 |         assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch
 64 | 
 65 |     for epoch_num in range(args.start_epoch, args.end_epoch + 1):
 66 |         if args.weight_file:
 67 |             weight_file = args.weight_file
 68 |         else:
 69 |             weight_file = "log-of-{}/epoch_{}.pkl".format(
 70 |                 os.path.basename(args.file).split(".")[0], epoch_num
 71 |             )
 72 | 
 73 |         if args.devices > 1:
 74 |             dist_worker = dist.launcher(n_gpus=args.devices)(worker)
 75 |             result_list = dist_worker(current_network, weight_file, args.dataset_dir)
 76 |             result_list = sum(result_list, [])
 77 |         else:
 78 |             result_list = worker(current_network, weight_file, args.dataset_dir)
 79 | 
 80 |         all_results = DetEvaluator.format(result_list, cfg)
 81 |         if args.weight_file:
 82 |             json_path = "{}_{}.json".format(
 83 |                 os.path.basename(args.file).split(".")[0],
 84 |                 os.path.basename(args.weight_file).split(".")[0],
 85 |             )
 86 |         else:
 87 |             json_path = "log-of-{}/epoch_{}.json".format(
 88 |                 os.path.basename(args.file).split(".")[0], epoch_num
 89 |             )
 90 |         all_results = json.dumps(all_results)
 91 | 
 92 |         with open(json_path, "w") as fo:
 93 |             fo.write(all_results)
 94 |         logger.info("Save results to %s, start evaluation!", json_path)
 95 | 
 96 |         eval_gt = COCO(
 97 |             os.path.join(
 98 |                 args.dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]
 99 |             )
100 |         )
101 |         eval_dt = eval_gt.loadRes(json_path)
102 |         cocoEval = COCOeval(eval_gt, eval_dt, iouType="bbox")
103 |         cocoEval.evaluate()
104 |         cocoEval.accumulate()
105 |         cocoEval.summarize()
106 |         metrics = [
107 |             "AP",
108 |             "AP@0.5",
109 |             "AP@0.75",
110 |             "APs",
111 |             "APm",
112 |             "APl",
113 |             "AR@1",
114 |             "AR@10",
115 |             "AR@100",
116 |             "ARs",
117 |             "ARm",
118 |             "ARl",
119 |         ]
120 |         logger.info("mmAP".center(32, "-"))
121 |         for i, m in enumerate(metrics):
122 |             logger.info("|\t%s\t|\t%.03f\t|", m, cocoEval.stats[i])
123 |         logger.info("-" * 32)
124 | 
125 | 
126 | def worker(current_network, weight_file, dataset_dir):
127 |     cfg = current_network.Cfg()
128 |     cfg.backbone_pretrained = False
129 | 
130 |     model = current_network.Net(cfg)
131 |     model.eval()
132 | 
133 |     state_dict = mge.load(weight_file)
134 |     if "state_dict" in state_dict:
135 |         state_dict = state_dict["state_dict"]
136 |     model.load_state_dict(state_dict)
137 | 
138 |     evaluator = DetEvaluator(model)
139 | 
140 |     test_loader = build_dataloader(dataset_dir, model.cfg)
141 |     if dist.get_rank() == 0:
142 |         test_loader = tqdm(test_loader)
143 | 
144 |     result_list = []
145 |     for data in test_loader:
146 |         image, im_info = DetEvaluator.process_inputs(
147 |             data[0][0],
148 |             model.cfg.test_image_short_size,
149 |             model.cfg.test_image_max_size,
150 |         )
151 |         pred_res = evaluator.predict(
152 |             image=mge.tensor(image),
153 |             im_info=mge.tensor(im_info)
154 |         )
155 |         result = {
156 |             "pred_boxes": pred_res,
157 |             "image_id": int(data[1][2][0].split(".")[0].split("_")[-1]),
158 |         }
159 |         result_list.append(result)
160 |     return result_list
161 | 
162 | 
163 | def build_dataloader(dataset_dir, cfg):
164 |     val_dataset = data_mapper[cfg.test_dataset["name"]](
165 |         os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["root"]),
166 |         os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]),
167 |         order=["image", "info"],
168 |     )
169 |     val_sampler = InferenceSampler(val_dataset, 1)
170 |     val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2)
171 |     return val_dataloader
172 | 
173 | 
174 | if __name__ == "__main__":
175 |     main()
176 | 


--------------------------------------------------------------------------------
/megengine_release/train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # This repo is licensed under the Apache License, Version 2.0 (the "License")
  3 | #
  4 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5 | #
  6 | # Unless required by applicable law or agreed to in writing,
  7 | # software distributed under the License is distributed on an
  8 | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9 | import argparse
 10 | import bisect
 11 | import copy
 12 | import os
 13 | import time
 14 | 
 15 | import megengine as mge
 16 | import megengine.distributed as dist
 17 | from megengine.autodiff import GradManager
 18 | from megengine.data import DataLoader, Infinite, RandomSampler
 19 | from megengine.data import transform as T
 20 | from megengine.optimizer import SGD
 21 | 
 22 | from layers.tools.data_mapper import data_mapper
 23 | from layers.tools.utils import (
 24 |     AverageMeter,
 25 |     DetectionPadCollator,
 26 |     GroupedRandomSampler,
 27 |     get_config_info,
 28 |     import_from_file
 29 | )
 30 | 
 31 | logger = mge.get_logger(__name__)
 32 | logger.setLevel("INFO")
 33 | mge.device.set_prealloc_config(1024, 1024, 256 * 1024 * 1024, 4.0)
 34 | 
 35 | 
 36 | def make_parser():
 37 |     parser = argparse.ArgumentParser()
 38 |     parser.add_argument(
 39 |         "-f", "--file", default="net.py", type=str, help="net description file"
 40 |     )
 41 |     parser.add_argument(
 42 |         "-w", "--weight_file", default=None, type=str, help="weights file",
 43 |     )
 44 |     parser.add_argument(
 45 |         "-n", "--devices", default=1, type=int, help="total number of gpus for training",
 46 |     )
 47 |     parser.add_argument(
 48 |         "-b", "--batch_size", default=2, type=int, help="batch size for training",
 49 |     )
 50 |     parser.add_argument(
 51 |         "-d", "--dataset_dir", default="/data/datasets", type=str,
 52 |     )
 53 | 
 54 |     return parser
 55 | 
 56 | 
 57 | def main():
 58 |     parser = make_parser()
 59 |     args = parser.parse_args()
 60 | 
 61 |     # ------------------------ begin training -------------------------- #
 62 |     logger.info("Device Count = %d", args.devices)
 63 | 
 64 |     log_dir = "log-of-{}".format(os.path.basename(args.file).split(".")[0])
 65 |     if not os.path.isdir(log_dir):
 66 |         os.makedirs(log_dir)
 67 | 
 68 |     if args.devices > 1:
 69 |         trainer = dist.launcher(worker, n_gpus=args.devices)
 70 |         trainer(args)
 71 |     else:
 72 |         worker(args)
 73 | 
 74 | 
 75 | def worker(args):
 76 |     current_network = import_from_file(args.file)
 77 | 
 78 |     model = current_network.Net(current_network.Cfg())
 79 |     model.train()
 80 | 
 81 |     if dist.get_rank() == 0:
 82 |         logger.info(get_config_info(model.cfg))
 83 |         logger.info(repr(model))
 84 | 
 85 |     params_with_grad = []
 86 |     for name, param in model.named_parameters():
 87 |         if "bottom_up.conv1" in name and model.cfg.backbone_freeze_at >= 1:
 88 |             continue
 89 |         if "bottom_up.layer1" in name and model.cfg.backbone_freeze_at >= 2:
 90 |             continue
 91 |         params_with_grad.append(param)
 92 | 
 93 |     opt = SGD(
 94 |         params_with_grad,
 95 |         lr=model.cfg.basic_lr * args.batch_size * dist.get_world_size(),
 96 |         momentum=model.cfg.momentum,
 97 |         weight_decay=model.cfg.weight_decay,
 98 |     )
 99 | 
100 |     # print('BASE LR:', model.cfg.basic_lr * args.batch_size * dist.get_world_size())
101 | 
102 |     gm = GradManager()
103 |     if dist.get_world_size() > 1:
104 |         gm.attach(
105 |             params_with_grad,
106 |             callbacks=[dist.make_allreduce_cb("mean", dist.WORLD)]
107 |         )
108 |     else:
109 |         gm.attach(params_with_grad)
110 | 
111 |     if args.weight_file is not None:
112 |         weights = mge.load(args.weight_file)
113 |         model.backbone.bottom_up.load_state_dict(weights, strict=False)
114 |     if dist.get_world_size() > 1:
115 |         dist.bcast_list_(model.parameters())  # sync parameters
116 |         dist.bcast_list_(model.buffers())  # sync buffers
117 | 
118 |     if dist.get_rank() == 0:
119 |         logger.info("Prepare dataset")
120 |     train_loader = iter(build_dataloader(args.batch_size, args.dataset_dir, model.cfg))
121 | 
122 |     for epoch in range(model.cfg.max_epoch):
123 |         train_one_epoch(model, train_loader, opt, gm, epoch, args)
124 |         if dist.get_rank() == 0:
125 |             save_path = "log-of-{}/epoch_{}.pkl".format(
126 |                 os.path.basename(args.file).split(".")[0], epoch
127 |             )
128 |             mge.save(
129 |                 {"epoch": epoch, "state_dict": model.state_dict()}, save_path,
130 |             )
131 |             logger.info("dump weights to %s", save_path)
132 | 
133 | 
134 | def train_one_epoch(model, data_queue, opt, gm, epoch, args):
135 |     def train_func(image, im_info, gt_boxes):
136 |         with gm:
137 |             loss_dict = model(image=image, im_info=im_info, gt_boxes=gt_boxes)
138 |             gm.backward(loss_dict["total_loss"])
139 |             loss_list = list(loss_dict.values())
140 |         opt.step().clear_grad()
141 |         return loss_list
142 | 
143 |     meter = AverageMeter(record_len=model.cfg.num_losses)
144 |     time_meter = AverageMeter(record_len=2)
145 |     log_interval = model.cfg.log_interval
146 |     tot_step = model.cfg.nr_images_epoch // (args.batch_size * dist.get_world_size())
147 |     for step in range(tot_step):
148 |         adjust_learning_rate(opt, epoch, step, model.cfg, args)
149 | 
150 |         data_tik = time.time()
151 |         mini_batch = next(data_queue)
152 |         data_tok = time.time()
153 | 
154 |         tik = time.time()
155 |         loss_list = train_func(
156 |             image=mge.tensor(mini_batch["data"]),
157 |             im_info=mge.tensor(mini_batch["im_info"]),
158 |             gt_boxes=mge.tensor(mini_batch["gt_boxes"])
159 |         )
160 |         tok = time.time()
161 | 
162 |         time_meter.update([tok - tik, data_tok - data_tik])
163 | 
164 |         if dist.get_rank() == 0:
165 |             info_str = "e%d, %d/%d, lr:%f, "
166 |             loss_str = ", ".join(
167 |                 ["{}:%f".format(loss) for loss in model.cfg.losses_keys]
168 |             )
169 |             time_str = ", train_time:%.3fs, data_time:%.3fs"
170 |             log_info_str = info_str + loss_str + time_str
171 |             meter.update([loss.numpy() for loss in loss_list])
172 |             if step % log_interval == 0:
173 |                 logger.info(
174 |                     log_info_str,
175 |                     epoch,
176 |                     step,
177 |                     tot_step,
178 |                     opt.param_groups[0]["lr"],
179 |                     *meter.average(),
180 |                     *time_meter.average()
181 |                 )
182 |                 meter.reset()
183 |                 time_meter.reset()
184 | 
185 | 
186 | def adjust_learning_rate(optimizer, epoch, step, cfg, args):
187 |     base_lr = (
188 |         cfg.basic_lr * dist.get_world_size() * args.batch_size * (
189 |             cfg.lr_decay_rate
190 |             ** bisect.bisect_right(cfg.lr_decay_stages, epoch)
191 |         )
192 |     )
193 |     # print('UDPATE LR:', base_lr)
194 |     # Warm up
195 |     lr_factor = 1.0
196 |     if epoch == 0 and step < cfg.warm_iters:
197 |         lr_factor = (step + 1.0) / cfg.warm_iters
198 |     for param_group in optimizer.param_groups:
199 |         param_group["lr"] = base_lr * lr_factor
200 | 
201 | 
202 | def build_dataset(dataset_dir, cfg):
203 |     data_cfg = copy.deepcopy(cfg.train_dataset)
204 |     data_name = data_cfg.pop("name")
205 | 
206 |     data_cfg["root"] = os.path.join(dataset_dir, data_name, data_cfg["root"])
207 | 
208 |     if "ann_file" in data_cfg:
209 |         data_cfg["ann_file"] = os.path.join(dataset_dir, data_name, data_cfg["ann_file"])
210 | 
211 |     data_cfg["order"] = ["image", "boxes", "boxes_category", "info"]
212 | 
213 |     return data_mapper[data_name](**data_cfg)
214 | 
215 | 
216 | # pylint: disable=dangerous-default-value
217 | def build_sampler(train_dataset, batch_size, aspect_grouping=[1]):
218 |     def _compute_aspect_ratios(dataset):
219 |         aspect_ratios = []
220 |         for i in range(len(dataset)):
221 |             info = dataset.get_img_info(i)
222 |             aspect_ratios.append(info["height"] / info["width"])
223 |         return aspect_ratios
224 | 
225 |     def _quantize(x, bins):
226 |         return list(map(lambda y: bisect.bisect_right(sorted(bins), y), x))
227 | 
228 |     if len(aspect_grouping) == 0:
229 |         return Infinite(RandomSampler(train_dataset, batch_size, drop_last=True))
230 | 
231 |     aspect_ratios = _compute_aspect_ratios(train_dataset)
232 |     group_ids = _quantize(aspect_ratios, aspect_grouping)
233 |     return Infinite(GroupedRandomSampler(train_dataset, batch_size, group_ids))
234 | 
235 | 
236 | def build_dataloader(batch_size, dataset_dir, cfg):
237 |     train_dataset = build_dataset(dataset_dir, cfg)
238 |     train_sampler = build_sampler(train_dataset, batch_size)
239 |     train_dataloader = DataLoader(
240 |         train_dataset,
241 |         sampler=train_sampler,
242 |         transform=T.Compose(
243 |             transforms=[
244 |                 T.ShortestEdgeResize(
245 |                     cfg.train_image_short_size,
246 |                     cfg.train_image_max_size,
247 |                     sample_style="choice",
248 |                 ),
249 |                 T.RandomHorizontalFlip(),
250 |                 T.ToMode(),
251 |             ],
252 |             order=["image", "boxes", "boxes_category"],
253 |         ),
254 |         collator=DetectionPadCollator(),
255 |         num_workers=2,
256 |     )
257 |     return train_dataloader
258 | 
259 | 
260 | if __name__ == "__main__":
261 |     main()
262 | 


--------------------------------------------------------------------------------
/pytorch_release/README.md:
--------------------------------------------------------------------------------
  1 | # Instance-Conditional Knowledge Distillation for Object Detection
  2 | This is an official implementation of the paper "Instance-Conditional Knowledge Distillation for Object Detection" in [Pytorch](https://pytorch.org), it supports various detectors from Detectron2 and AdelaiDet. 
  3 | 
  4 | 
  5 | # Requirements
  6 | The project is depending on the following libraries. You may need to install Detectron2 and AdelaiDet mannully, please refer to their github pages.
  7 | - Python3 (recommand 3.8)
  8 | - pytorch == 1.9.0
  9 | - torchvision == 0.10.0
 10 | - opencv-python == 4.5.4.58
 11 | - [Detectron2](https://github.com/facebookresearch/detectron2) == 0.5.0
 12 | - [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) == 7bf9d87 
 13 | 
 14 | (To avoid conflict, we recommend to use the exact above versions.)
 15 | 
 16 | Reference command for installation:
 17 | ```
 18 | # Switch to this directory (and maybe create a virtual environment)
 19 | pip install pip --upgrade
 20 | pip install -r requirements.txt
 21 | pip install https://github.com/facebookresearch/detectron2/archive/refs/tags/v0.5.tar.gz
 22 | pip install 'git+https://github.com/aim-uofa/AdelaiDet.git@7bf9d87'
 23 | ```
 24 | 
 25 | You will also need to prepare datasets according to [detectron2](https://github.com/facebookresearch/detectron2/tree/main/datasets), put your data under the following structure, and set the environment variable by `export DETECTRON2_DATASETS=/path/to/datasets`.
 26 | ```
 27 | $DETECTRON2_DATASETS/
 28 |   coco/
 29 |   annotations/
 30 |     instances_{train,val}2017.json
 31 |     {train,val}2017/
 32 |       # image files
 33 | ```
 34 | 
 35 | # Usage
 36 | ## Train baseline models
 37 | We use [train_baseline.py](./train_baseline.py) to train baseline models, it is very similar to [tools/train_net.py](https://github.com/facebookresearch/detectron2/blob/main/tools/train_net.py). 
 38 | 
 39 | You can use any config files for detectron2 or AdelaiDet to specify a training setting.
 40 | ```
 41 | usage: train_baseline.py [-h] [--config-file FILE] [--resume] [--eval-only]
 42 |                          [--num-gpus NUM_GPUS] [--num-machines NUM_MACHINES]
 43 |                          [--machine-rank MACHINE_RANK] [--dist-url DIST_URL]
 44 |                          ...
 45 | 
 46 | positional arguments:
 47 |   opts                  Modify config options at the end of the command. For
 48 |                         Yacs configs, use space-separated "PATH.KEY VALUE"
 49 |                         pairs. For python-based LazyConfig, use
 50 |                         "path.key=value".
 51 | 
 52 | optional arguments:
 53 |   -h, --help            show this help message and exit
 54 |   --config-file FILE    path to config file
 55 |   --resume              Whether to attempt to resume from the checkpoint
 56 |                         directory. See documentation of
 57 |                         `DefaultTrainer.resume_or_load()` for what it means.
 58 |   --eval-only           perform evaluation only
 59 |   --num-gpus NUM_GPUS   number of gpus *per machine*
 60 |   --num-machines NUM_MACHINES
 61 |                         total number of machines
 62 |   --machine-rank MACHINE_RANK
 63 |                         the rank of this machine (unique per machine)
 64 |   --dist-url DIST_URL   initialization URL for pytorch distributed backend.
 65 |                         See https://pytorch.org/docs/stable/distributed.html
 66 |                         for details.
 67 | ```
 68 | ### Examples:
 69 | 
 70 | Train a retinanet baseline detector on a single machine:
 71 | 
 72 | ```
 73 | train_baseline.py --num-gpus 8 --config-file configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml
 74 | ```
 75 | 
 76 | Change some config options:
 77 | 
 78 | ```
 79 | train_baseline.py --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001
 80 | ```
 81 | 
 82 | Run on multiple machines:
 83 | ```
 84 | (machine0)$ train_baseline.py --machine-rank 0 --num-machines 2 --dist-url <URL> [--other-flags]
 85 | (machine1)$ train_baseline.py --machine-rank 1 --num-machines 2 --dist-url <URL> [--other-flags]
 86 | ```
 87 | 
 88 | ## Train and distill models
 89 | We leave everything the same as the above, except the entry ([train_distill.py](./train_distill.py)) and the config. 
 90 | 
 91 | ### Examples:
 92 | 
 93 | Train RetinaNet with distillation:
 94 | 
 95 | ```
 96 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_retinanet
 97 | ```
 98 | 
 99 | Train Faster R-CNN with distillation:
100 | 
101 | ```
102 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/RCNN_R_50_R101_icd_FPN_1x.yaml OUTPUT_DIR output/icd_frcnn
103 | ```
104 | 
105 | Train CondInst with distillation:
106 | 
107 | ```
108 | python3 train_distill.py --num-gpus 8 --resume --config-file configs/Distillation-ICD/CondInst_R50_R101_icd.yaml OUTPUT_DIR output/icd_condinst
109 | ```
110 | 
111 | ### Write distillation configs:
112 | To introduce how to write a config for distillation, let's see two examples:
113 | 
114 | **If teacher model is released by detectron2 officially:**
115 | 
116 | You can load checkpoint from detectron2 model_zoo API, set `MODEL_LOAD_OFFICIAL=True` and use the corresponding config file. You may also set `WEIGHT_VALUE` to the desired number. 
117 | 
118 | ```
119 | MODEL:
120 |   DISTILLER:
121 |     MODEL_LOAD_OFFICIAL: True
122 |     MODEL_DISTILLER_CONFIG: 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'
123 |       
124 |     INS_ATT_MIMIC:
125 |       WEIGHT_VALUE: 8.0
126 | ```
127 | 
128 | Note: It also support configs from detectron2 new baselines, like [LSJ (large scale jitters) models](https://github.com/facebookresearch/detectron2/blob/main/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py), which could be helpful in practice.
129 | 
130 | 
131 | **If you want to use a standalone teacher trained by yourself:**
132 | 
133 | If you train a teacher by ourselves, you may need to define a standalone config for the teacher. Set `MODEL_LOAD_OFFICIAL=False` and use a standalone config file.
134 | 
135 | ``` 
136 | MODEL:
137 |   DISTILLER:
138 |     MODEL_LOAD_OFFICIAL: False
139 |     MODEL_DISTILLER_CONFIG: 'Teachers/SOLOv2_R101_3x_ms.yaml'
140 |       
141 |     INS_ATT_MIMIC:
142 |       WEIGHT_VALUE: 8.0
143 | ```
144 | 
145 | For teacher's config, simply set pretrained weight to a checkpoint file:
146 | ```
147 | _BASE_: "../Base-SOLOv2.yaml"
148 | MODEL:
149 |   WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9w7b3sjaXvqYQEQ" 
150 |   # This is the official release from AdelaiDet.
151 |   RESNETS:
152 |     DEPTH: 101
153 | ```
154 | 
155 | You can find more options in [utils/build.py](utils/build.py)
156 | 
157 | # Results
158 | For object detection in MS-COCO:
159 | | Model         | Baseline (BoxAP)     | + Ours (BoxAP)           | 
160 | | ---           | :---:        | :---:         |
161 | | Faster R-CNN     | 37.9         | 40.9 (+3.0)        |
162 | | Retinanet     | 37.4         | 40.7 (+3.3)         |
163 | | FCOS          | 39.4         | 42.9 (+3.5)         |
164 | 
165 | For instance-segmentation in MS-COCO:
166 | | Model         | Baseline (BoxAP)    | + Ours (BoxAP)          | Baseline (MaskAP)    | + Ours (MaskAP)          | 
167 | | ---           | :---:        | :---:         | :---:        | :---:         |
168 | | Mask R-CNN     | 38.6        | 41.2 (+2.6)         |  35.2 | 37.4 (+2.2) |
169 | | SOLOv2     | - | - | 34.6 | 38.5 (+3.9) |
170 | | CondInst        |39.7 | 43.7 (+4.0) | 35.7 | 39.1 (+3.4) |


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-CondInst.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CondInst"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "FCOS"
12 |   FCOS:
13 |     THRESH_WITH_CTR: True
14 |     USE_SCALE: True
15 |   CONDINST:
16 |     MAX_PROPOSALS: 500
17 | DATASETS:
18 |   TRAIN: ("coco_2017_train",)
19 |   TEST: ("coco_2017_val",)
20 | SOLVER:
21 |   IMS_PER_BATCH: 16
22 |   BASE_LR: 0.01
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | INPUT:
26 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-FCOS.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "OneStageDetector"
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res3", "res4", "res5"]
 9 |   PROPOSAL_GENERATOR:
10 |     NAME: "FCOS"
11 |   # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
12 | DATASETS:
13 |   TRAIN: ("coco_2017_train",)
14 |   TEST: ("coco_2017_val",)
15 | SOLVER:
16 |   IMS_PER_BATCH: 16
17 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
18 |   STEPS: (60000, 80000)
19 |   MAX_ITER: 90000
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeads"
 8 | DATASETS:
 9 |   TRAIN: ("coco_2017_train",)
10 |   TEST: ("coco_2017_val",)
11 | SOLVER:
12 |   IMS_PER_BATCH: 16
13 |   BASE_LR: 0.02
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18 | VERSION: 2
19 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res5"]
 5 |     RES5_DILATION: 2
 6 |   RPN:
 7 |     IN_FEATURES: ["res5"]
 8 |     PRE_NMS_TOPK_TEST: 6000
 9 |     POST_NMS_TOPK_TEST: 1000
10 |   ROI_HEADS:
11 |     NAME: "StandardROIHeads"
12 |     IN_FEATURES: ["res5"]
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_FC: 2
16 |     POOLER_RESOLUTION: 7
17 |   ROI_MASK_HEAD:
18 |     NAME: "MaskRCNNConvUpsampleHead"
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 14
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | VERSION: 2
32 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 |   CHECKPOINT_PERIOD: 10000
41 | TEST:
42 |   EVAL_PERIOD: 10000
43 | INPUT:
44 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
45 | VERSION: 2
46 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   BACKBONE:
 4 |     NAME: "build_retinanet_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   ANCHOR_GENERATOR:
 8 |     SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
 9 |   FPN:
10 |     IN_FEATURES: ["res3", "res4", "res5"]
11 |   RETINANET:
12 |     IOU_THRESHOLDS: [0.4, 0.5]
13 |     IOU_LABELS: [0, -1, 1]
14 |     SMOOTH_L1_LOSS_BETA: 0.0
15 | DATASETS:
16 |   TRAIN: ("coco_2017_train",)
17 |   TEST: ("coco_2017_val",)
18 | SOLVER:
19 |   IMS_PER_BATCH: 16
20 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
21 |   STEPS: (60000, 80000)
22 |   MAX_ITER: 90000
23 |   CHECKPOINT_PERIOD: 10000
24 | INPUT:
25 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
26 | VERSION: 2
27 | TEST:
28 |   EVAL_PERIOD: 10000
29 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Base-SOLOv2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "SOLOv2"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   TEST: ("coco_2017_val",)
13 | SOLVER:
14 |   IMS_PER_BATCH: 16
15 |   BASE_LR: 0.01
16 |   WARMUP_FACTOR: 0.01
17 |   WARMUP_ITERS: 1000
18 |   STEPS: (60000, 80000)
19 |   MAX_ITER: 90000
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 |   MASK_FORMAT: "bitmask"
23 | VERSION: 2
24 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/FCOS_R_101_DCN_FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: FCOSBase
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
 8 |     DEFORM_MODULATED: True
 9 | SOLVER:
10 |   STEPS: (120000, 160000)
11 |   MAX_ITER: 180000
12 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/FCOS_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: FCOS
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (60000, 80000)
 9 |   MAX_ITER: 90000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/FCOS_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: FCOS
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (120000, 160000)
 9 |   MAX_ITER: 180000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/POTO_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: POTO
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   FCOS:
 8 |     NMS_THRESH_TEST: 1.0
 9 |   NMS_TYPE: 'null'
10 | SOLVER:
11 |   STEPS: (120000, 160000)
12 |   MAX_ITER: 180000
13 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   LOAD_PROPOSALS: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   PROPOSAL_GENERATOR:
 9 |     NAME: "PrecomputedProposals"
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13 |   TEST: ("coco_2017_val",)
14 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15 | DATALOADER:
16 |   # proposals are part of the dataset_dicts, and take a lot of RAM
17 |   NUM_WORKERS: 2
18 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_152_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-152.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 152
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x_bs8.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   LABEL_ENC:
 8 |     BYPASS_DISTILL: 80000
 9 |     
10 | SOLVER:
11 |   IMS_PER_BATCH: 8
12 |   BASE_LR: 0.01
13 |   STEPS: (120000, 160000) 
14 |   MAX_ITER: 180000
15 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (120000, 160000)
 9 |   MAX_ITER: 180000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_152_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-152.pkl"
4 |   RESNETS:
5 |     DEPTH: 152
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_1x_bs8.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | 
 7 |   LABEL_ENC:
 8 |     BYPASS_DISTILL: 80000
 9 |     
10 | SOLVER:
11 |   IMS_PER_BATCH: 8
12 |   BASE_LR: 0.005
13 |   STEPS: (120000, 160000) 
14 |   MAX_ITER: 180000
15 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_2x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (120000, 160000)
8 |   MAX_ITER: 180000
9 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/retinanet_X101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     PRE_NMS_TOPK_TEST: 12000
10 |     POST_NMS_TOPK_TEST: 2000
11 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     POST_NMS_TOPK_TEST: 2000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.train import train
2 | from ..common.optim import SGD as optimizer
3 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
4 | from ..common.data.coco import dataloader
5 | from ..common.models.mask_rcnn_c4 import model
6 | 
7 | model.backbone.freeze_at = 2
8 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py:
--------------------------------------------------------------------------------
1 | from ..common.optim import SGD as optimizer
2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
3 | from ..common.data.coco import dataloader
4 | from ..common.models.mask_rcnn_fpn import model
5 | from ..common.train import train
6 | 
7 | model.backbone.bottom_up.freeze_at = 2
8 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   RPN:
 8 |     BBOX_REG_LOSS_TYPE: "giou"
 9 |     BBOX_REG_LOSS_WEIGHT: 2.0
10 |   ROI_BOX_HEAD:
11 |     BBOX_REG_LOSS_TYPE: "giou"
12 |     BBOX_REG_LOSS_WEIGHT: 10.0
13 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py:
--------------------------------------------------------------------------------
 1 | from ..common.optim import SGD as optimizer
 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
 3 | from ..common.data.coco import dataloader
 4 | from ..common.models.mask_rcnn_fpn import model
 5 | from ..common.train import train
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.modeling.backbone import RegNet
 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
10 | 
11 | 
12 | # Replace default ResNet with RegNetX-4GF from the DDS paper. Config source:
13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9  # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 |     stem_class=SimpleStem,
16 |     stem_width=32,
17 |     block_class=ResBottleneckBlock,
18 |     depth=23,
19 |     w_a=38.65,
20 |     w_0=96,
21 |     w_m=2.43,
22 |     group_width=40,
23 |     freeze_at=2,
24 |     norm="FrozenBN",
25 |     out_features=["s1", "s2", "s3", "s4"],
26 | )
27 | model.pixel_std = [57.375, 57.120, 58.395]
28 | 
29 | optimizer.weight_decay = 5e-5
30 | train.init_checkpoint = (
31 |     "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth"
32 | )
33 | # RegNets benefit from enabling cudnn benchmark mode
34 | train.cudnn_benchmark = True
35 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py:
--------------------------------------------------------------------------------
 1 | from ..common.optim import SGD as optimizer
 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
 3 | from ..common.data.coco import dataloader
 4 | from ..common.models.mask_rcnn_fpn import model
 5 | from ..common.train import train
 6 | 
 7 | from detectron2.config import LazyCall as L
 8 | from detectron2.modeling.backbone import RegNet
 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock
10 | 
11 | 
12 | # Replace default ResNet with RegNetY-4GF from the DDS paper. Config source:
13 | # https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10  # noqa
14 | model.backbone.bottom_up = L(RegNet)(
15 |     stem_class=SimpleStem,
16 |     stem_width=32,
17 |     block_class=ResBottleneckBlock,
18 |     depth=22,
19 |     w_a=31.41,
20 |     w_0=96,
21 |     w_m=2.24,
22 |     group_width=64,
23 |     se_ratio=0.25,
24 |     freeze_at=2,
25 |     norm="FrozenBN",
26 |     out_features=["s1", "s2", "s3", "s4"],
27 | )
28 | model.pixel_std = [57.375, 57.120, 58.395]
29 | 
30 | optimizer.weight_decay = 5e-5
31 | train.init_checkpoint = (
32 |     "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth"
33 | )
34 | # RegNets benefit from enabling cudnn benchmark mode
35 | train.cudnn_benchmark = True
36 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/CondInst_R50_R101_icd.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   DISTILLER:
 7 |     MODEL_LOAD_OFFICIAL: False
 8 |     MODEL_DISTILLER_CONFIG: 'Teachers/CondIns_R101_3x_ms.yaml'
 9 | 
10 |     INS_ATT_MIMIC:
11 |       WEIGHT_VALUE: 8.0
12 | 
13 | SOLVER:
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 |   CLIP_GRADIENTS: {"ENABLED": True}


--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/FCOS_R50_R101_icd.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   DISTILLER:
 7 |     MODEL_LOAD_OFFICIAL: False
 8 |     MODEL_DISTILLER_CONFIG: 'Teachers/FCOS_R101_2x_ms.yaml'
 9 |     # NOTE: FCOS only release a 2x model, we use another 3x model trained by ourselves to report in the paper.
10 | 
11 |     INS_ATT_MIMIC:
12 |       WEIGHT_VALUE: 8.0
13 | 
14 | SOLVER:
15 |   STEPS: (60000, 80000)
16 |   MAX_ITER: 90000
17 |   CLIP_GRADIENTS: {"ENABLED": True}


--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/MaskRCNN_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   DISTILLER:
 8 |     MODEL_LOAD_OFFICIAL: True
 9 |     MODEL_DISTILLER_CONFIG: 'COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml'
10 | 
11 |     INS:
12 |       INPUT_FEATS: ['p2', 'p3', 'p4', 'p5', 'p6']
13 |       MAX_LABELS: 100
14 | 
15 |     INS_ATT_MIMIC:
16 |       WEIGHT_VALUE: 3.0
17 |       
18 | SOLVER:
19 |   STEPS: (60000, 80000)
20 |   MAX_ITER: 90000
21 |   CLIP_GRADIENTS: {"ENABLED": True}
22 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/RCNN_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   DISTILLER:
 7 |     MODEL_LOAD_OFFICIAL: True
 8 |     MODEL_DISTILLER_CONFIG: 'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml'
 9 | 
10 |     INS:
11 |       INPUT_FEATS: ['p2', 'p3', 'p4', 'p5', 'p6']
12 |       MAX_LABELS: 100
13 | 
14 |     INS_ATT_MIMIC:
15 |       WEIGHT_VALUE: 3.0
16 |       
17 | SOLVER:
18 |   STEPS: (60000, 80000)
19 |   MAX_ITER: 90000
20 |   CLIP_GRADIENTS: {"ENABLED": True}
21 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/SOLOv2_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-SOLOv2.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   DISTILLER:
 7 |     MODEL_LOAD_OFFICIAL: False
 8 |     MODEL_DISTILLER_CONFIG: 'Teachers/SOLOv2_R101_3x_ms.yaml'
 9 |       
10 |     INS_ATT_MIMIC:
11 |       WEIGHT_VALUE: 8.0
12 | 
13 | SOLVER:
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 |   CLIP_GRADIENTS: {"ENABLED": True}


--------------------------------------------------------------------------------
/pytorch_release/configs/Distillation-ICD/retinanet_R_50_R101_icd_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RetinaNet.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   DISTILLER:
 7 |     MODEL_LOAD_OFFICIAL: True
 8 |     MODEL_DISTILLER_CONFIG: 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'
 9 |       
10 |     INS_ATT_MIMIC:
11 |       WEIGHT_VALUE: 8.0
12 | 
13 | SOLVER:
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 |   CLIP_GRADIENTS: {"ENABLED": True}


--------------------------------------------------------------------------------
/pytorch_release/configs/Teachers/CondIns_R101_3x_ms.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-CondInst.yaml"
2 | MODEL:
3 |   WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/M8nNxSR5iNP4qyO/download"
4 |   RESNETS:
5 |     DEPTH: 101
6 | 
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Teachers/FCOS_R101_2x_ms.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-FCOS.yaml"
2 | MODEL:
3 |   WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/M3UOT6JcyHy2QW1/download"
4 |   RESNETS:
5 |     DEPTH: 101
6 | 
7 | 


--------------------------------------------------------------------------------
/pytorch_release/configs/Teachers/SOLOv2_R101_3x_ms.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-SOLOv2.yaml"
2 | MODEL:
3 |   WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9w7b3sjaXvqYQEQ"
4 |   RESNETS:
5 |     DEPTH: 101
6 | 
7 | 


--------------------------------------------------------------------------------
/pytorch_release/models/distiller.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | 
  5 | import torch.nn.functional as F
  6 | from .utils import *
  7 | 
  8 | 
  9 | from detectron2.utils.registry import Registry
 10 | 
 11 | DISTILLER_REGISTRY = Registry("DISTILLER")  # noqa F401 isort:skip
 12 | DISTILLER_REGISTRY.__doc__ = """
 13 | Registry for meta-architectures, i.e. the whole model.
 14 | 
 15 | The registered object will be called with `obj(cfg)`
 16 | and expected to return a `nn.Module` object.
 17 | """
 18 | 
 19 | 
 20 | def build_distiller(cfg, name, student, teacher):
 21 |     """
 22 |     Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``.
 23 |     Note that it does not load any weights from ``cfg``.
 24 |     """
 25 |     model = DISTILLER_REGISTRY.get(name)(cfg, student, teacher)
 26 |     model.to(torch.device(cfg.MODEL.DEVICE))
 27 |     return model
 28 | 
 29 | 
 30 | @DISTILLER_REGISTRY.register()
 31 | class InstanceConditionalDistillation(nn.Module):
 32 |     """
 33 |     Distillation with multi-head attention. Mimic attention and features. 
 34 |     """
 35 | 
 36 |     def __init__(self, cfg, student, teacher) -> None:
 37 |         super().__init__()
 38 |         self.cfg = cfg
 39 |         self.student = [student]
 40 | 
 41 |         self.cfg = cfg
 42 |         hidden_dim = cfg.MODEL.DISTILLER.INS.HIDDEN_DIM
 43 | 
 44 |         self.pos_embedding = PositionEmbeddingSine(
 45 |             hidden_dim // 2, normalize=True)
 46 | 
 47 |         self.teacher_ptr = [teacher]
 48 |         self.attention_module = build_decoder_module(
 49 |             cfg)
 50 | 
 51 |         self.feat_keys = cfg.MODEL.DISTILLER.INS.INPUT_FEATS
 52 | 
 53 |         self.weight_value = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.WEIGHT_VALUE
 54 |         self.temp_value = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_VALUE
 55 |         if self.temp_value < 0:
 56 |             self.temp_value = nn.Parameter(torch.ones([1]).mean())
 57 | 
 58 |         self.distill_norm_type = cfg.MODEL.DISTILLER.INS.DISTILL_NORM
 59 | 
 60 |         self.distill_negative = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.DISTILL_NEGATIVE
 61 |         self.use_pos_embds = cfg.MODEL.DISTILLER.INS.USE_POS_EMBEDDING
 62 | 
 63 |         self.predictor = MLP(hidden_dim, hidden_dim, 1, 3)
 64 | 
 65 |         if self.distill_norm_type == 'ln':
 66 |             self.distill_norm_ = nn.LayerNorm(
 67 |                 [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False)
 68 |             self.distill_norm_tea = nn.LayerNorm(
 69 |                 [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False)
 70 |         elif self.distill_norm_type == 'tln':
 71 |             self.distill_norm_ = nn.Sequential()
 72 |             self.distill_norm_tea = nn.LayerNorm(
 73 |                 [hidden_dim // cfg.MODEL.DISTILLER.INS.ATT_HEADS], elementwise_affine=False)
 74 |         else:
 75 |             self.distill_norm_ = nn.Sequential()
 76 |             self.distill_norm_tea = nn.Sequential()
 77 | 
 78 |         self.loss_form = cfg.MODEL.DISTILLER.INS_ATT_MIMIC.LOSS_FORM
 79 | 
 80 |     def concate_multiscale_reps(self, feat, pos_emb, mask):
 81 |         # permute and concate features form multiscale to a tensor under transformer definition
 82 |         keys = self.feat_keys
 83 | 
 84 |         feat = torch.cat([feat[k].flatten(2).permute(2, 0, 1)
 85 |                           for k in keys], 0)  # S, N, C
 86 |         pos_emb = torch.cat([pos_emb[k].flatten(2).permute(
 87 |             2, 0, 1) for k in keys], 0)  # S, N, C
 88 |         mask = torch.cat([mask[k].flatten(2).squeeze(1)
 89 |                           for k in keys], 1)  # N, S
 90 |         return feat, pos_emb, mask
 91 | 
 92 |     def bce_identification_loss(self, feat_list, ins_mask, ins_mask_gt):
 93 |         # this is the identification loss that identifies a given instance is real or fake
 94 |         positive_mask = (~ins_mask).float()
 95 | 
 96 |         loss_dict = {}
 97 |         for i, dfeat in enumerate(feat_list):
 98 |             f_pre = self.predictor(dfeat)
 99 | 
100 |             loss = (F.binary_cross_entropy_with_logits(f_pre.squeeze(-1).T, ins_mask_gt, reduction='none') *
101 |                     positive_mask).sum() / positive_mask.sum()
102 | 
103 |             loss_dict['stu_bce.%s.loss' % i] = loss
104 | 
105 |         return loss_dict
106 | 
107 |     def mimic_loss(self, svalue, tvalue, value_mask):
108 |         # value: num_seq, bsz, heads, channel
109 |         # mask: [bsz, heads, 1, Seq]
110 |         #value_mask = value_mask ** self.power_factor
111 |         if self.loss_form in ['mse', 'MSE']:
112 |             return ((F.mse_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
113 |                      * value_mask).sum(-1) / value_mask.sum(-1).clamp(min=1e-6)).mean()
114 |         elif self.loss_form in ['l1', 'L1']:
115 |             return (F.l1_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
116 |                     * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)
117 |         elif self.loss_form in ['smoothL1']:
118 |             return (F.smooth_l1_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
119 |                     * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)
120 |         elif self.loss_form in ['L2', 'l2']:
121 |             return ((F.mse_loss(svalue, tvalue, reduction='none').permute(1, 2, 3, 0)
122 |                      * value_mask).mean(2).sum() / value_mask.sum().clamp(min=1e-6)) ** 0.5
123 | 
124 |     def forward(self, features_dict, features_dict_tea):
125 |         if isinstance(self.temp_value, nn.Parameter):
126 |             self.temp_value.data = self.temp_value.data.clamp(min=0.1, max=8)
127 |         else:
128 |             if self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_DECAY:
129 |                 decay_to = self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_DECAY_TO
130 |                 ratio = features_dict['iteration'] / self.cfg.SOLVER.MAX_ITER
131 |                 self.temp_value = ratio * decay_to + \
132 |                     (1 - ratio) * self.cfg.MODEL.DISTILLER.INS_ATT_MIMIC.TEMP_VALUE
133 | 
134 |         images = features_dict['images']
135 |         batched_inputs = features_dict['batched_inputs']
136 |         fpn_outputs = features_dict['fpn_feat']
137 | 
138 |         # assert set(self.feat_keys) == set(list(fpn_outputs.keys(
139 |         # ))), 'WARNING: Unequal keys for fpn and attention ! <%s> != <%s>' % (self.feat_keys, fpn_outputs.keys())
140 | 
141 |         if features_dict['distill_flag'] == 0:
142 |             fpn_outputs = {k: v.detach() for k, v in fpn_outputs.items()}
143 | 
144 |         # mask_out: zero for foreground, one for bg: BoolTensor(N, 1, H, W)
145 |         mask_out = mask_out_padding(fpn_outputs, images)
146 | 
147 |         # fpn_outputs = self.scale_adapter(fpn_outputs)
148 |         pos_embs = {k: self.pos_embedding(
149 |             fpn_outputs[k], mask_out[k]) for k in self.feat_keys}
150 |         # feat, pos: [S, N, C]; mask: [N, S]
151 |         feat, pos_embs, mask_padding = self.concate_multiscale_reps(
152 |             fpn_outputs, pos_embs, mask_out)
153 | 
154 |         # instance encoding: [K, N, C], ins_mask: bool[N, K], instance_gt: (0-1)[N, K]
155 |         # NOTE: (0 for Fake Instance) in ins_mask
156 |         ins_feat, ins_mask, ins_mask_gt = features_dict_tea['aux_feat']['encoded_ins']
157 |         ins_feat = ins_feat.detach()
158 | 
159 |         if self.distill_negative:
160 |             ins_mask_gt = (~ins_mask).detach().float()
161 |             max_ele = None  # slice to the last element
162 |         else:
163 |             # calculate an element mask to reduce unnessessary computation
164 |             max_ele = ins_mask_gt.long().sum(-1).max().item()
165 | 
166 |         # Note that mask is not normalized by softmax
167 | 
168 |         decoded_feat_list, att_mask_list, value_list = self.attention_module(
169 |             ins_feat[:max_ele, :, :], feat, feat, query_mask=ins_mask[:, :max_ele], key_padding_mask=mask_padding, pos_embedding=pos_embs, proj_only=True)
170 | 
171 |         decoded_value_tea = features_dict_tea['aux_feat']['decoded_value']
172 |         decoded_mask_tea = features_dict_tea['aux_feat']['decoded_mask']
173 | 
174 |         loss_value = torch.tensor([0.0], device=ins_mask_gt.device).mean()
175 |         for i, (tmask, svalue, tvalue) in enumerate(zip(decoded_mask_tea, value_list, decoded_value_tea)):
176 |             tmask = tmask.detach()  # bsz, heads, num_ins, num_seq
177 | 
178 |             # num_seq, bsz, heads, channel
179 |             tvalue = self.distill_norm_tea(tvalue)
180 |             tvalue = tvalue.detach()
181 | 
182 |             if self.weight_value > 0:
183 |                 with torch.no_grad():
184 |                     value_mask = ((tmask / self.temp_value).softmax(-1) *
185 |                                   ins_mask_gt.unsqueeze(1).unsqueeze(-1)).sum(2, keepdim=True)
186 |                     # [bsz, heads, ins, Seq]
187 | 
188 |                 svalue = self.distill_norm_(svalue)
189 |                 loss_value += self.mimic_loss(svalue,
190 |                                               tvalue, value_mask) * self.weight_value
191 | 
192 |         loss_dict = {
193 |             'matt.value': loss_value / len(decoded_feat_list),
194 |         }
195 | 
196 |         if isinstance(self.temp_value, nn.Parameter):
197 |             loss_dict['temp.value'] = self.temp_value.detach()
198 | 
199 |         return loss_dict
200 | 


--------------------------------------------------------------------------------
/pytorch_release/models/models.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | import torch
 3 | from torch import nn
 4 | from detectron2.utils.events import get_event_storage
 5 | from .utils import *
 6 | from .distiller import build_distiller
 7 | from .teacher import build_teacher
 8 | 
 9 | from detectron2.utils.events import EventWriter, get_event_storage
10 | 
11 | 
12 | class Distillator(nn.Module):
13 |     def __init__(self, cfg, student) -> None:
14 |         super().__init__()
15 |         self.cfg = cfg
16 |         self.student_buffer = [student]  # as a printer
17 | 
18 |         self.teacher = build_teacher(cfg, student)
19 | 
20 |         distillers = []
21 |         for dis_name in cfg.MODEL.DISTILLER.TYPES:
22 |             distillers.append(build_distiller(
23 |                 cfg, dis_name, student, self.teacher))
24 | 
25 |         self.distillers = nn.ModuleList(distillers)
26 | 
27 |         self.register_buffer(
28 |             "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
29 |         self.register_buffer(
30 |             "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
31 | 
32 |     def forward(self, raw_output, forward_only=False, teacher_only=False):
33 |         '''
34 |         Input:
35 |             batched_inputs, images, r_features, features, gts
36 |         Output:
37 |             losses_tea      : loss dict
38 |             r_features_tea  : features from backbone
39 |             features_tea    : features from FPN
40 |         '''
41 |         if teacher_only:
42 |             loss_dict, _ = self.teacher(raw_output, None, None, None)
43 |             return loss_dict
44 | 
45 |         r_feats = raw_output['backbone_feat']
46 |         fpn_feats = raw_output['fpn_feat']
47 |         batched_inputs = raw_output['batched_inputs']
48 |         images = raw_output['images']
49 |         iteration = raw_output['iteration']
50 | 
51 |         if iteration < self.cfg.MODEL.DISTILLER.BYPASS_DISTILL or iteration > self.cfg.MODEL.DISTILLER.BYPASS_DISTILL_AFTER:
52 |             distill_flag = self.cfg.MODEL.DISTILLER.DISTILL_OFF
53 |         else:
54 |             distill_flag = self.cfg.MODEL.DISTILLER.DISTILL_ON
55 | 
56 |         raw_output['distill_flag'] = distill_flag
57 | 
58 |         storage = get_event_storage()
59 |         storage.put_scalar('distill_flag', distill_flag, False)
60 | 
61 |         if forward_only:
62 |             with torch.no_grad():
63 |                 loss_dict, feat_dict_tea = self.teacher(
64 |                     batched_inputs, images, r_feats, fpn_feats)
65 |         else:
66 |             loss_dict, feat_dict_tea = self.teacher(
67 |                 batched_inputs, images, r_feats, fpn_feats)
68 | 
69 |         for i, distiller in enumerate(self.distillers):
70 |             loss_d = distiller(raw_output, feat_dict_tea)
71 |             loss_d = {'distill.%s.%s' % (i, k): v for k, v in loss_d.items()}
72 |             loss_dict.update(loss_d)
73 | 
74 |         return loss_dict
75 | 


--------------------------------------------------------------------------------
/pytorch_release/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.9.0
2 | torchvision==0.10.0
3 | opencv-python==4.5.4.58


--------------------------------------------------------------------------------