├── .gitignore
├── README.md
├── assets
    ├── demo.gif
    └── pipeline.png
├── configs
    ├── Base-dance.yaml
    ├── Base-dsnake.yaml
    ├── Dance_R_101_3x.yaml
    ├── Dance_R_50_3x.yaml
    └── Dsnake_R_50_1x.yaml
├── core
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── config.py
    │   └── defaults.py
    ├── data
    │   ├── __init__.py
    │   ├── builtin.py
    │   └── datasets
    │   │   ├── __init__.py
    │   │   └── register_coco_edge.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── coco_evaluation.py
    │   ├── cocoeval.py
    │   ├── edge_map_evaluation.py
    │   └── evaluator.py
    ├── layers
    │   ├── __init__.py
    │   ├── deform_conv.py
    │   ├── extreme_utils
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   ├── src
    │   │   │   ├── cuda_common.h
    │   │   │   ├── nms.cu
    │   │   │   ├── nms.h
    │   │   │   └── utils.cu
    │   │   ├── utils.cpp
    │   │   └── utils.h
    │   ├── losses.py
    │   └── ml_nms.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── dla.py
    │   │   ├── fpn.py
    │   │   ├── mobilenet.py
    │   │   └── vovnet.py
    │   ├── dsnake_baseline
    │   │   ├── __init__.py
    │   │   ├── af_two_stage.py
    │   │   ├── dsnake_head.py
    │   │   └── postprocessing.py
    │   ├── edge_snake
    │   │   ├── __init__.py
    │   │   ├── dance.py
    │   │   ├── draft.py
    │   │   ├── edge_det.py
    │   │   └── snake_head.py
    │   ├── fcos
    │   │   ├── __init__.py
    │   │   ├── fcos.py
    │   │   └── fcos_outputs.py
    │   ├── fcose
    │   │   ├── __init__.py
    │   │   ├── deeplab_resnet.py
    │   │   ├── dextr.py
    │   │   ├── dextr_eval.py
    │   │   ├── dextr_helper.py
    │   │   ├── extreme_detector.py
    │   │   ├── fcose.py
    │   │   ├── fcose_outputs.py
    │   │   └── utils.py
    │   ├── one_stage_detector.py
    │   ├── poolers.py
    │   └── postprocessing.py
    ├── structures
    │   ├── __init__.py
    │   ├── points_set.py
    │   └── pointset.py
    └── utils
    │   ├── __init__.py
    │   ├── comm.py
    │   ├── timer.py
    │   └── visualizer.py
├── datasets
    ├── __init__.py
    ├── prepare_edge_map.py
    └── prepare_edge_map_cityscapes.py
├── output
    └── .gitignore
├── requirements.txt
├── setup.py
└── train_net.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Customized
  2 | sync*.sh
  3 | .vscode
  4 | .idea
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | cover/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | # .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 | 
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 | 
109 | # SageMath parsed files
110 | *.sage.py
111 | 
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | 
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 | 
125 | # Rope project settings
126 | .ropeproject
127 | 
128 | # mkdocs documentation
129 | /site
130 | 
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 | 
136 | # Pyre type checker
137 | .pyre/
138 | 
139 | # pytype static type analyzer
140 | .pytype/
141 | 
142 | # Cython debug symbols
143 | cython_debug/
144 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | If you are interested in ML model serving, feel free to have a look at my another project on [model serving framework](https://github.com/mosecorg/mosec)!
 2 | 
 3 | ---
 4 | # dance
 5 | 
 6 | A Deep Attentive Contour Model for Efficient Instance Segmentation ([PDF](https://openaccess.thecvf.com/content/WACV2021/html/Liu_DANCE_A_Deep_Attentive_Contour_Model_for_Efficient_Instance_Segmentation_WACV_2021_paper.html))
 7 | 
 8 | | ![](./assets/pipeline.png) | ![](assets/demo.gif) |
 9 | | :------------------------: | :------------------: |
10 | |     *DANCE's Pipeline*     |    *Illustration*    |
11 | 
12 | ### *note*
13 | The codes in `master` branch are mainly for experiments on COCO; if you want to have a look on the codes for experiments on SBD / Cityscapes, welcome to checkout the `snake` branch, which is developed based on [the codebase of previous art](https://github.com/zju3dv/snake).
14 | 
15 | ## Get started
16 | 1. Prepare the environment (the scripts are just examples)
17 |    - gcc & g++ ≥ 5
18 |    - Python 3.6.8 (developed & tested on this version)
19 |      - `conda create --name dance python==3.6.8`
20 |      - `conda deactivate && conda activate dance`
21 |    - PyTorch 1.4 with CUDA 10.1
22 |      - `conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 -c pytorch`
23 | 2. Clone this project and install framework / package dependency
24 |    - clone dance and install dependencies: `git clone https://github.com/lkevinzc/dance && cd dance && pip install -r requirements.txt && cd ..`
25 |    - clone Detectron2 and install v0.1: `git clone https://github.com/facebookresearch/detectron2.git && cd detectron2 && git checkout 1a7daee064eeca2d7fddce4ba74b74183ba1d4a0 && python -m pip install -e . && cd ..`
26 |    - install cpp utils: `cd dance/core/layers/extreme_utils && export CUDA_HOME="/usr/local/cuda-10.1" && python setup.py build_ext --inplace`
27 |    - install pycocotools: `pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
28 |    - fix the `fvcore` version: `pip install fvcore==0.1.1.dev200512`
29 | 3. Prepare dataset
30 |    - Download form [COCO official website](https://cocodataset.org/#download)
31 |    - put it at `dance/datasets/coco`
32 | 4. Download pre-trained model (metrics on COCO test-dev)
33 | 
34 | |  model name   |  AP   | AP50  | AP75  |                                          weights                                           |
35 | | :-----------: | :---: | :---: | :---: | :----------------------------------------------------------------------------------------: |
36 | | dance_r50_3x  | 36.8  | 58.5  | 39.0  | [link](https://drive.google.com/file/d/1oh0ZkBgnYu6t4dlPNlfxEnhWruA87DIt/view?usp=sharing) |
37 | | dance_r101_3x | 38.1  | 60.2  | 40.5  | [link](https://drive.google.com/file/d/1H5eyu06qBpyw-We7CYEs4IxpdZvouJBo/view?usp=sharing) |
38 | 
39 |  *note*: put them under `output/`
40 | 
41 | ## Evaluation
42 | ```bash
43 | python train_net.py --config-file configs/Dance_R_50_3x.yaml --eval-only MODEL.WEIGHTS ./output/r50_3x_model_final.pth
44 | 
45 | python train_net.py --config-file configs/Dance_R_101_3x.yaml --eval-only MODEL.WEIGHTS ./output/r101_3x_model_final.pth
46 | ```
47 | 
48 | ## Discussion
49 | Any discussion or suggestion is welcome! Feel free to contact the author via `liuzichen@u.nus.edu`  :)
50 | 
51 | ## Citation
52 | If you find this project helpful for your research, please consider citing using BibTeX below:
53 | ```tex
54 | @InProceedings{liu2021dance,
55 |     author    = {Liu, Zichen and Liew, Jun Hao and Chen, Xiangyu and Feng, Jiashi},
56 |     title     = {DANCE: A Deep Attentive Contour Model for Efficient Instance Segmentation},
57 |     booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)},
58 |     month     = {January},
59 |     year      = {2021},
60 |     pages     = {345-354}
61 | }
62 | ```
63 | 
64 | 


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/assets/demo.gif


--------------------------------------------------------------------------------
/assets/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/assets/pipeline.png


--------------------------------------------------------------------------------
/configs/Base-dance.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "Dance"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "FCOSE"
12 | DATASETS:
13 |   TRAIN: ("coco_2017_train_edge",)
14 |   TEST: ("coco_2017_val_edge",)
15 | SOLVER:
16 |   IMS_PER_BATCH: 8 # 2 GPUs or 4 GPUs
17 |   BASE_LR: 0.005  # Note that RetinaNet uses a different default learning rate
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000
20 |   CHECKPOINT_PERIOD: 20000
21 | INPUT:
22 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
23 | VERSION: 2
24 | 


--------------------------------------------------------------------------------
/configs/Base-dsnake.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "FcosSnake"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "FCOS"
12 | DATASETS:
13 |   TRAIN: ("coco_2017_train",)
14 |   TEST: ("coco_2017_val",)
15 | SOLVER:
16 |   IMS_PER_BATCH: 8 # 2 GPUs
17 |   BASE_LR: 0.005  # Note that RetinaNet uses a different default learning rate
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000
20 |   CHECKPOINT_PERIOD: 20000
21 | INPUT:
22 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
23 | VERSION: 2
24 | 


--------------------------------------------------------------------------------
/configs/Dance_R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-DANCE.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |   EDGE_HEAD:
 7 |     NAME: "EdgeSnakeFPNHead"
 8 |     CONVS_DIM: 256
 9 |     STRONG_FEAT: True
10 |     IN_FEATURES: ['p2', 'p3', 'p4', 'p5']
11 |   SNAKE_HEAD:
12 |     NUM_SAMPLING: 196
13 |     DETACH: True
14 |     INITIAL: 'box'
15 |     NEW_MATCHING: True
16 |     ATTENTION: True
17 |     INDIVIDUAL_SCALE: True
18 | SOLVER:
19 |   IMS_PER_BATCH: 6 # 2 GPUs
20 |   BASE_LR: 0.00375
21 |   STEPS: (660000, 700000)
22 |   MAX_ITER: 720000
23 |   CHECKPOINT_PERIOD: 5000
24 | OUTPUT_DIR: "output/coco/dance_r101_3x/"
25 | 
26 | #DATASETS:
27 | #  TEST: ("coco_2017_test-dev",)
28 | 


--------------------------------------------------------------------------------
/configs/Dance_R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-DANCE.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   EDGE_HEAD:
 7 |     NAME: "EdgeSnakeFPNHead"
 8 |     CONVS_DIM: 256
 9 |     STRONG_FEAT: True
10 |     IN_FEATURES: ['p2', 'p3', 'p4', 'p5']
11 |   SNAKE_HEAD:
12 |     NUM_SAMPLING: 196
13 |     DETACH: True
14 |     INITIAL: 'box'
15 |     NEW_MATCHING: True
16 |     ATTENTION: True
17 |     INDIVIDUAL_SCALE: True
18 | SOLVER:
19 |   STEPS: (480000, 520000)
20 |   MAX_ITER: 540000
21 |   CHECKPOINT_PERIOD: 60000
22 | OUTPUT_DIR: "output/coco/dance_r50_3x/"
23 | 
24 | #DATASETS:
25 | #  TEST: ("coco_2017_test-dev",)
26 | 


--------------------------------------------------------------------------------
/configs/Dsnake_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-dsnake.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 |   EDGE_HEAD:
7 |     CONVS_DIM: 256
8 | OUTPUT_DIR: "output/coco/dsnake/R_50_1x/"
9 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
1 | from core import modeling


--------------------------------------------------------------------------------
/core/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import get_cfg
2 | 
3 | __all__ = [
4 |     "get_cfg",
5 | ]
6 | 


--------------------------------------------------------------------------------
/core/config/config.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import CfgNode
 2 | 
 3 | 
 4 | def get_cfg() -> CfgNode:
 5 |     """
 6 |     Get a copy of the default config.
 7 | 
 8 |     Returns:
 9 |         a detectron2 CfgNode instance.
10 |     """
11 |     from .defaults import _C
12 | 
13 |     return _C.clone()
14 | 


--------------------------------------------------------------------------------
/core/config/defaults.py:
--------------------------------------------------------------------------------
  1 | from detectron2.config.defaults import _C
  2 | from detectron2.config import CfgNode as CN
  3 | 
  4 | 
  5 | # ---------------------------------------------------------------------------- #
  6 | # Additional Configs
  7 | # ---------------------------------------------------------------------------- #
  8 | _C.MODEL.MOBILENET = False
  9 | _C.MODEL.USE_VOVNET = False
 10 | 
 11 | # ---------------------------------------------------------------------------- #
 12 | # MY CONFIG (ZC)
 13 | # ---------------------------------------------------------------------------- #
 14 | _C.MODEL.DANCE = CN()
 15 | 
 16 | # Channeling the input for mask_pred used for model evaluation
 17 | # Use NO to avoid error during evaluation when turn MASK_ON but no mask_pred output.
 18 | _C.MODEL.DANCE.MASK_IN = "OCT_RLE"  # {'BOX', 'OCT_BIT', 'OCT_RLE', 'MASK', 'NO'}
 19 | _C.MODEL.DANCE.SEMANTIC_FILTER = False
 20 | _C.MODEL.DANCE.SEMANTIC_FILTER_TH = 0.1
 21 | _C.MODEL.DANCE.ROI_SIZE = 28
 22 | 
 23 | 
 24 | _C.MODEL.DANCE.RE_COMP_BOX = False
 25 | 
 26 | # ---------------------------------------------------------------------------- #
 27 | # Deformable Convolution Head (ZC)
 28 | # ---------------------------------------------------------------------------- #
 29 | _C.MODEL.DEFORM_HEAD = CN()
 30 | _C.MODEL.DEFORM_HEAD.ON = False
 31 | _C.MODEL.DEFORM_HEAD.NUM_CONVS = 256
 32 | _C.MODEL.DEFORM_HEAD.NORM = "GN"
 33 | _C.MODEL.DEFORM_HEAD.USE_MODULATED = False
 34 | 
 35 | # ---------------------------------------------------------------------------- #
 36 | # Snake Head (ZC)
 37 | # ---------------------------------------------------------------------------- #
 38 | _C.MODEL.SNAKE_HEAD = CN()
 39 | 
 40 | _C.MODEL.SNAKE_HEAD.DETACH = False
 41 | 
 42 | 
 43 | _C.MODEL.SNAKE_HEAD.ORIGINAL = False
 44 | 
 45 | _C.MODEL.SNAKE_HEAD.STRUCTURE = "sequential"  # {"sequential", "parallel"};
 46 | 
 47 | # circular conv net / graph conv net
 48 | _C.MODEL.SNAKE_HEAD.CONV_TYPE = "ccn"  # {"ccn", "gcn"};
 49 | 
 50 | _C.MODEL.SNAKE_HEAD.FEAT_DIM = 128
 51 | 
 52 | _C.MODEL.SNAKE_HEAD.NUM_ITER = (0, 0, 1)  # correspond to the convs
 53 | _C.MODEL.SNAKE_HEAD.NUM_CONVS = 2
 54 | _C.MODEL.SNAKE_HEAD.STRONGER = False
 55 | 
 56 | _C.MODEL.SNAKE_HEAD.MULTI_OFFSET = 1
 57 | 
 58 | _C.MODEL.SNAKE_HEAD.SKIP = False
 59 | _C.MODEL.SNAKE_HEAD.NUM_LAYER = (8, 8, 8)
 60 | _C.MODEL.SNAKE_HEAD.CIR_DILATIONS = (
 61 |     (1, 1, 1, 2, 2, 4, 4),
 62 |     (1, 1, 1, 2, 2, 4, 4),
 63 |     (1, 1, 1, 2, 2, 4, 4),
 64 | )  # by default the first one is 1.
 65 | 
 66 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_ON = False
 67 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_NUM_LAYER = 5
 68 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_CIR_DILATIONS = (
 69 |     1,
 70 |     2,
 71 |     2,
 72 |     4,
 73 | )  # by default the first one is 1.
 74 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_FEAT_DIM = 128
 75 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_MIN_AREA = 5 * 5
 76 | # _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_PERTURB = True
 77 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_LOSS_WEIGHT = 1.0
 78 | 
 79 | _C.MODEL.SNAKE_HEAD.PRE_OFFSET = False  # first snake also predict a global offset
 80 | 
 81 | _C.MODEL.SNAKE_HEAD.USE_ASPP = False
 82 | _C.MODEL.SNAKE_HEAD.ASPP_DIM = 64
 83 | _C.MODEL.SNAKE_HEAD.ASPP_DILATIONS = (1, 6, 12, 18)
 84 | 
 85 | _C.MODEL.SNAKE_HEAD.USE_PSP = False
 86 | _C.MODEL.SNAKE_HEAD.PSP_SIZE = (1, 2, 3, 6)
 87 | 
 88 | _C.MODEL.SNAKE_HEAD.LAST_UP_SAMPLE = False
 89 | _C.MODEL.SNAKE_HEAD.UP_SAMPLE_RATE = 2
 90 | _C.MODEL.SNAKE_HEAD.LAST_CHAMFER = False
 91 | _C.MODEL.SNAKE_HEAD.LAST_CHAMFER_WEIGHT = 5.0 / 3
 92 | _C.MODEL.SNAKE_HEAD.LAST_NEIGHBOR = False
 93 | 
 94 | _C.MODEL.SNAKE_HEAD.TRACK_PATH = False
 95 | 
 96 | _C.MODEL.SNAKE_HEAD.NEW_MATCHING = False
 97 | 
 98 | _C.MODEL.SNAKE_HEAD.INITIAL = "octagon"  # {"octagon", "box"};
 99 | _C.MODEL.SNAKE_HEAD.DE_LOC_TYPE = "derange"  # {"derange", "demean"}
100 | _C.MODEL.SNAKE_HEAD.LOCAL_SPATIAL = False
101 | 
102 | _C.MODEL.SNAKE_HEAD.INDIVIDUAL_SCALE = False
103 | 
104 | _C.MODEL.SNAKE_HEAD.LOSS_TYPE = "smoothl1"  # {"smoothl1", "chamfer"}
105 | _C.MODEL.SNAKE_HEAD.LOSS_ADAPTIVE = False
106 | _C.MODEL.SNAKE_HEAD.LOSS_SEPARATE_REFINE = False
107 | _C.MODEL.SNAKE_HEAD.LOSS_WEIGH = False
108 | _C.MODEL.SNAKE_HEAD.LOSS_DISTRIBUTION = (1.0 / 3, 1.0 / 3, 2.0 / 3)
109 | _C.MODEL.SNAKE_HEAD.LOSS_L1_BETA = 0.11
110 | _C.MODEL.SNAKE_HEAD.EDGE_IN = False
111 | _C.MODEL.SNAKE_HEAD.PRED_EDGE = False
112 | _C.MODEL.SNAKE_HEAD.EDGE_IN_SEPARATE = (False, False)
113 | _C.MODEL.SNAKE_HEAD.EDGE_POSITION = "before"  # {"before", "after"}
114 | _C.MODEL.SNAKE_HEAD.DILATIONS = (1, 1)
115 | _C.MODEL.SNAKE_HEAD.COORD_CONV = (False, False)
116 | _C.MODEL.SNAKE_HEAD.EDGE_IN_TH = -1.0  # used for inference
117 | 
118 | _C.MODEL.SNAKE_HEAD.FILTER_WIDTH = 4
119 | 
120 | _C.MODEL.SNAKE_HEAD.USE_DEFORMABLE = (False, False)
121 | 
122 | _C.MODEL.SNAKE_HEAD.NUM_SAMPLING = 128
123 | _C.MODEL.SNAKE_HEAD.MARK_INDEX = False
124 | _C.MODEL.SNAKE_HEAD.REORDER_METHOD = "dsnake"  # {'dsnake', 'curvegcn'}
125 | _C.MODEL.SNAKE_HEAD.JITTERING = 0.0
126 | _C.MODEL.SNAKE_HEAD.POINT_WEIGH = False
127 | 
128 | _C.MODEL.SNAKE_HEAD.ATTENTION = False
129 | _C.MODEL.SNAKE_HEAD.SELECTIVE_REFINE = False
130 | _C.MODEL.SNAKE_HEAD.DOUBLE_SELECTIVE_REFINE = False
131 | 
132 | 
133 | # utils
134 | _C.MODEL.SNAKE_HEAD.VIS_PATH = False
135 | 
136 | 
137 | # ---------------------------------------------------------------------------- #
138 | # Edge Prediction Head (ZC)
139 | # ---------------------------------------------------------------------------- #
140 | _C.MODEL.EDGE_HEAD = CN()
141 | _C.MODEL.EDGE_HEAD.NAME = "EdgeFPNHead"
142 | 
143 | _C.MODEL.EDGE_HEAD.TRAIN = True
144 | 
145 | _C.MODEL.EDGE_HEAD.IN_FEATURES = ["p2"]
146 | _C.MODEL.EDGE_HEAD.STRONG_FEAT = False
147 | # Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for
148 | # the correposnding pixel.
149 | _C.MODEL.EDGE_HEAD.IGNORE_VALUE = 255
150 | # Number of classes in the edge prediction head
151 | _C.MODEL.EDGE_HEAD.NUM_CLASSES = 1  # (only foreground or not)
152 | # Number of channels in the 3x3 convs inside semantic-FPN heads.
153 | _C.MODEL.EDGE_HEAD.CONVS_DIM = 128
154 | # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
155 | _C.MODEL.EDGE_HEAD.COMMON_STRIDE = 4
156 | # Normalization method for the convolution layers. Options: "" (no norm), "GN".
157 | _C.MODEL.EDGE_HEAD.NORM = "GN"
158 | _C.MODEL.EDGE_HEAD.BCE_WEIGHT = (
159 |     0  # 1:1 BCE harms the training, very small BCE not helpful
160 | )
161 | 
162 | _C.MODEL.EDGE_HEAD.LOSS_WEIGHT = 1
163 | 
164 | 
165 | # ---------------------------------------------------------------------------- #
166 | # Investigation Configs (ZC)
167 | # ---------------------------------------------------------------------------- #
168 | _C.TEST.GT_IN = CN()
169 | _C.TEST.GT_IN.ON = False
170 | _C.TEST.GT_IN.WHAT = ["edge", "instance"]  # {"edge", "instance"}
171 | 
172 | 
173 | # ---------------------------------------------------------------------------- #
174 | # VoV Backbone
175 | # ---------------------------------------------------------------------------- #
176 | _C.MODEL.VOVNET = CN()
177 | 
178 | _C.MODEL.VOVNET.CONV_BODY = "V-39-eSE"
179 | _C.MODEL.VOVNET.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"]
180 | 
181 | # Options: FrozenBN, GN, "SyncBN", "BN"
182 | _C.MODEL.VOVNET.NORM = "FrozenBN"
183 | 
184 | _C.MODEL.VOVNET.OUT_CHANNELS = 256
185 | 
186 | _C.MODEL.VOVNET.BACKBONE_OUT_CHANNELS = 256
187 | 
188 | 
189 | # ---------------------------------------------------------------------------- #
190 | # DLA backbone
191 | # ---------------------------------------------------------------------------- #
192 | 
193 | _C.MODEL.DLA = CN()
194 | _C.MODEL.DLA.CONV_BODY = "DLA34"
195 | _C.MODEL.DLA.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"]
196 | 
197 | # Options: FrozenBN, GN, "SyncBN", "BN"
198 | _C.MODEL.DLA.NORM = "FrozenBN"
199 | 
200 | # ---------------------------------------------------------------------------- #
201 | # FCOS Head
202 | # ---------------------------------------------------------------------------- #
203 | _C.MODEL.FCOS = CN()
204 | 
205 | # This is the number of foreground classes.
206 | _C.MODEL.FCOS.NUM_CLASSES = 80
207 | _C.MODEL.FCOS.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"]
208 | _C.MODEL.FCOS.FPN_STRIDES = [8, 16, 32, 64, 128]
209 | _C.MODEL.FCOS.PRIOR_PROB = 0.01
210 | _C.MODEL.FCOS.INFERENCE_TH_TRAIN = 0.05
211 | _C.MODEL.FCOS.INFERENCE_TH_TEST = 0.05
212 | _C.MODEL.FCOS.NMS_TH = 0.6
213 | _C.MODEL.FCOS.PRE_NMS_TOPK_TRAIN = 1000
214 | _C.MODEL.FCOS.PRE_NMS_TOPK_TEST = 1000
215 | _C.MODEL.FCOS.POST_NMS_TOPK_TRAIN = 100
216 | _C.MODEL.FCOS.POST_NMS_TOPK_TEST = 100
217 | _C.MODEL.FCOS.TOP_LEVELS = 2
218 | _C.MODEL.FCOS.NORM = "GN"  # Support GN or none
219 | _C.MODEL.FCOS.USE_SCALE = True
220 | 
221 | # Multiply centerness before threshold
222 | # This will affect the final performance by about 0.05 AP but save some time
223 | _C.MODEL.FCOS.THRESH_WITH_CTR = False
224 | 
225 | # Focal loss parameters
226 | _C.MODEL.FCOS.LOSS_ALPHA = 0.25
227 | _C.MODEL.FCOS.LOSS_GAMMA = 2.0
228 | _C.MODEL.FCOS.SIZES_OF_INTEREST = [64, 128, 256, 512]
229 | _C.MODEL.FCOS.USE_RELU = True
230 | _C.MODEL.FCOS.USE_DEFORMABLE = False
231 | 
232 | # the number of convolutions used in the cls and bbox tower
233 | _C.MODEL.FCOS.NUM_CLS_CONVS = 4
234 | _C.MODEL.FCOS.NUM_BOX_CONVS = 4
235 | _C.MODEL.FCOS.NUM_SHARE_CONVS = 0
236 | _C.MODEL.FCOS.CENTER_SAMPLE = True
237 | _C.MODEL.FCOS.POS_RADIUS = 1.5
238 | _C.MODEL.FCOS.LOC_LOSS_TYPE = "giou"
239 | _C.MODEL.FCOS.EXT_LOSS_TYPE = "smoothl1"
240 | 
241 | 
242 | # ---------------------------------------------------------------------------- #
243 | # Misc options
244 | # ---------------------------------------------------------------------------- #
245 | _C.SEED = 77
246 | 


--------------------------------------------------------------------------------
/core/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import builtin  # ensure the builtin datasets are registered
2 | 
3 | 


--------------------------------------------------------------------------------
/core/data/builtin.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata
  4 | from detectron2.data.datasets.register_coco import register_coco_instances
  5 | from detectron2.data.datasets.coco import load_coco_json
  6 | from detectron2.data import DatasetCatalog, MetadataCatalog
  7 | from .datasets import register_coco_edge_map, register_cityscapes_edge_map
  8 | 
  9 | '''
 10 | Register COCO dataset with edge map annotations
 11 | '''
 12 | 
 13 | SPLITS_COCO_W_EDGE = {
 14 |     "coco_2017_train_edge": (
 15 |         # original directory/annotations coco detection
 16 |         "coco/train2017",
 17 |         "coco/annotations/instances_train2017.json",
 18 |         # directory for edge map created by datasets/prepare_edge_map.py
 19 |         # takes ~ 12 mins on a machine with 64 Xeon(R) Gold 6130 CPUs
 20 |         "coco/edge_train2017"
 21 |     ),
 22 |     "coco_2017_val_edge": (
 23 |         "coco/val2017",
 24 |         "coco/annotations/instances_val2017.json",
 25 |         "coco/edge_val2017"
 26 |     ),
 27 | }
 28 | 
 29 | 
 30 | def register_all_coco_edge(root="datasets"):
 31 |     for name, (image_root, json_file, edge_root) in SPLITS_COCO_W_EDGE.items():
 32 |         # Assume pre-defined datasets live in `./datasets`.
 33 |         register_coco_edge_map(
 34 |             name,
 35 |             _get_builtin_metadata("coco"),
 36 |             os.path.join(root, image_root),
 37 |             os.path.join(root, edge_root),
 38 |             os.path.join(root, json_file) if "://" not in json_file else json_file
 39 |         )
 40 | 
 41 | 
 42 | register_all_coco_edge()
 43 | 
 44 | '''
 45 | Register CITYSCAPES dataset with edge map annotations
 46 | '''
 47 | 
 48 | SPLITS_CITY_W_EDGE = {
 49 |     "cityscapes_train_edge": (
 50 |         # original directory/annotations coco detection
 51 |         "cityscape-coco/coco_img/train",
 52 |         "cityscape-coco/coco_ann/instance_train.json",
 53 |         "cityscape-coco/edge_train"
 54 |     ),
 55 |     "cityscapes_val_edge": (
 56 |         "cityscape-coco/coco_img/val",
 57 |         "cityscape-coco/coco_ann/instance_val.json",
 58 |         "cityscape-coco/edge_val"
 59 |     ),
 60 | }
 61 | 
 62 | 
 63 | def register_all_cityscapes_edge(root="datasets"):
 64 |     for name, (image_root, json_file, edge_root) in SPLITS_CITY_W_EDGE.items():
 65 |         # Assume pre-defined datasets live in `./datasets`.
 66 |         register_cityscapes_edge_map(
 67 |             name,
 68 |             {},
 69 |             os.path.join(root, image_root),
 70 |             os.path.join(root, edge_root),
 71 |             os.path.join(root, json_file) if "://" not in json_file else json_file
 72 |         )
 73 | 
 74 | 
 75 | register_all_cityscapes_edge()
 76 | 
 77 | 
 78 | def register_cityscapes(root="datasets"):
 79 |     # Assume pre-defined datasets live in `./datasets`.
 80 |     DatasetCatalog.register('cityscapes_coco_fine_instance_seg_train',
 81 |                             lambda: load_coco_json(
 82 |                                 os.path.join(root, 'cityscape-coco/coco_ann/instance_train.json'),
 83 |                                 os.path.join(root, 'cityscape-coco/coco_img/train'),
 84 |                                 'cityscapes_coco_fine_instance_seg_train'))
 85 | 
 86 |     DatasetCatalog.register('cityscapes_coco_fine_instance_seg_val',
 87 |                             lambda: load_coco_json(
 88 |                                 os.path.join(root, 'cityscape-coco/coco_ann/instance_val.json'),
 89 |                                 os.path.join(root, 'cityscape-coco/coco_img/val'),
 90 |                                 'cityscapes_coco_fine_instance_seg_val'))
 91 |     MetadataCatalog.get('cityscapes_coco_fine_instance_seg_train').set(
 92 |         evaluator_type="coco",
 93 |     )
 94 |     MetadataCatalog.get('cityscapes_coco_fine_instance_seg_val').set(
 95 |         evaluator_type="coco",
 96 |     )
 97 | 
 98 | 
 99 | register_cityscapes()
100 | 
101 | '''
102 | Register SBD dataset
103 | '''
104 | 
105 | _PREDEFINED_SPLITS_SBD = {
106 |     "sbd_train": ("sbd/images", "sbd/annotations/sbd_train_instance.json"),
107 |     "sbd_val": ("sbd/images", "sbd/annotations/sbd_val_instance.json"),
108 | }
109 | 
110 | SBD_CATEGORIES = [
111 |     {"color": [220, 20, 60], 'id': 1, 'name': 'aeroplane'},
112 |     {"color": [119, 11, 32], 'id': 2, 'name': 'bicycle'},
113 |     {"color": [0, 0, 142], 'id': 3, 'name': 'bird'},
114 |     {"color": [0, 0, 230], 'id': 4, 'name': 'boat'},
115 |     {"color": [106, 0, 228], 'id': 5, 'name': 'bottle'},
116 |     {"color": [0, 60, 100], 'id': 6, 'name': 'bus'},
117 |     {"color": [0, 80, 100], 'id': 7, 'name': 'car'},
118 |     {"color": [0, 0, 70], 'id': 8, 'name': 'cat'},
119 |     {"color": [0, 0, 192], 'id': 9, 'name': 'chair'},
120 |     {"color": [250, 170, 30], 'id': 10, 'name': 'cow'},
121 |     {"color": [100, 170, 30], 'id': 11, 'name': 'diningtable'},
122 |     {"color": [220, 220, 0], 'id': 12, 'name': 'dog'},
123 |     {"color": [175, 116, 175], 'id': 13, 'name': 'horse'},
124 |     {"color": [0, 82, 0], 'id': 14, 'name': 'motorbike'},
125 |     {"color": [0, 82, 100], 'id': 15, 'name': 'person'},
126 |     {"color": [82, 82, 100], 'id': 16, 'name': 'pottedplant'},
127 |     {"color": [182, 8, 100], 'id': 17, 'name': 'sheep'},
128 |     {"color": [182, 8, 0], 'id': 18, 'name': 'sofa'},
129 |     {"color": [182, 18, 0], 'id': 19, 'name': 'train'},
130 |     {"color": [12, 18, 192], 'id': 20, 'name': 'tvmonitor'}
131 | ]
132 | 
133 | thing_ids = [k["id"] for k in SBD_CATEGORIES]
134 | thing_colors = [k["color"] for k in SBD_CATEGORIES]
135 | assert len(thing_ids) == 20, len(thing_ids)
136 | # Mapping from the incontiguous COCO category id to an id in [0, 19]
137 | thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
138 | thing_classes = [k["name"] for k in SBD_CATEGORIES]
139 | metadata = {
140 |     "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
141 |     "thing_classes": thing_classes,
142 |     "thing_colors": thing_colors,
143 | }
144 | 
145 | 
146 | def register_all_coco(root="datasets"):
147 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_SBD.items():
148 |         # Assume pre-defined datasets live in `./datasets`.
149 |         register_coco_instances(
150 |             key,
151 |             metadata,
152 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
153 |             os.path.join(root, image_root),
154 |         )
155 | 
156 | 
157 | register_all_coco()
158 | 


--------------------------------------------------------------------------------
/core/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .register_coco_edge import register_coco_edge_map, register_cityscapes_edge_map
2 | 


--------------------------------------------------------------------------------
/core/data/datasets/register_coco_edge.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from detectron2.data import DatasetCatalog, MetadataCatalog
 4 | from detectron2.data.datasets import load_coco_json, load_sem_seg
 5 | 
 6 | 
 7 | def register_coco_edge_map(
 8 |         name, metadata, image_root, edge_root, instances_json
 9 | ):
10 |     ds_name = name
11 |     DatasetCatalog.register(
12 |         ds_name,
13 |         lambda: merge_to_panoptic(
14 |             load_coco_json(instances_json, image_root, ds_name),
15 |             load_sem_seg(edge_root, image_root),
16 |         ),
17 |     )
18 |     MetadataCatalog.get(ds_name).set(
19 |         image_root=image_root,
20 |         edge_root=edge_root,
21 |         json_file=instances_json,
22 |         evaluator_type="coco+edge_map",
23 |         **metadata
24 |     )
25 | 
26 |     semantic_name = name + "_edgeonly"
27 |     DatasetCatalog.register(semantic_name, lambda: load_sem_seg(edge_root, image_root))
28 |     MetadataCatalog.get(semantic_name).set(
29 |         sem_seg_root=edge_root, image_root=image_root, evaluator_type="sem_seg", **metadata
30 |     )
31 | 
32 | 
33 | def register_cityscapes_edge_map(
34 |         name, metadata, image_root, edge_root, instances_json
35 | ):
36 |     ds_name = name
37 |     DatasetCatalog.register(
38 |         ds_name,
39 |         lambda: merge_to_panoptic(
40 |             load_coco_json(instances_json, image_root, ds_name),
41 |             load_sem_seg(edge_root, image_root, image_ext='png'),
42 |         ),
43 |     )
44 |     MetadataCatalog.get(ds_name).set(
45 |         image_root=image_root,
46 |         edge_root=edge_root,
47 |         json_file=instances_json,
48 |         evaluator_type="coco+edge_map",
49 |         **metadata
50 |     )
51 | 
52 |     semantic_name = name + "_edgeonly"
53 |     DatasetCatalog.register(semantic_name, lambda: load_sem_seg(edge_root, image_root))
54 |     MetadataCatalog.get(semantic_name).set(
55 |         sem_seg_root=edge_root, image_root=image_root, evaluator_type="sem_seg", **metadata
56 |     )
57 | 
58 | 
59 | def merge_to_panoptic(detection_dicts, sem_seg_dicts):
60 |     """
61 |     Create dataset dicts for panoptic segmentation, by
62 |     merging two dicts using "file_name" field to match their entries.
63 | 
64 |     Args:
65 |         detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
66 |         sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
67 | 
68 |     Returns:
69 |         list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
70 |             both detection_dicts and sem_seg_dicts that correspond to the same image.
71 |             The function assumes that the same key in different dicts has the same value.
72 |     """
73 |     results = []
74 |     sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
75 |     assert len(sem_seg_file_to_entry) > 0
76 | 
77 |     for det_dict in detection_dicts:
78 |         dic = copy.copy(det_dict)
79 |         dic.update(sem_seg_file_to_entry[dic["file_name"]])
80 |         results.append(dic)
81 |     return results
82 | 


--------------------------------------------------------------------------------
/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_evaluation import COCOEvaluator
2 | from .edge_map_evaluation import EdgeMapEvaluator
3 | from .cocoeval import COCOeval


--------------------------------------------------------------------------------
/core/evaluation/edge_map_evaluation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import itertools
  3 | import json
  4 | import logging
  5 | import numpy as np
  6 | import os
  7 | from collections import OrderedDict
  8 | import PIL.Image as Image
  9 | import pycocotools.mask as mask_util
 10 | import torch
 11 | from fvcore.common.file_io import PathManager
 12 | 
 13 | from detectron2.data import DatasetCatalog, MetadataCatalog
 14 | from detectron2.utils.comm import all_gather, is_main_process, synchronize
 15 | 
 16 | from detectron2.evaluation.evaluator import DatasetEvaluator
 17 | 
 18 | 
 19 | class EdgeMapEvaluator(DatasetEvaluator):
 20 |     """
 21 |     Evaluate semantic segmentation
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self, dataset_name, distributed, num_classes, ignore_label=255, output_dir=None
 26 |     ):
 27 |         """
 28 |         Args:
 29 |             dataset_name (str): name of the dataset to be evaluated.
 30 |             distributed (True): if True, will collect results from all ranks for evaluation.
 31 |                 Otherwise, will evaluate the results in the current process.
 32 |             num_classes (int): number of classes
 33 |             ignore_label (int): value in semantic segmentation ground truth. Predictions for the
 34 |             corresponding pixels should be ignored.
 35 |             output_dir (str): an output directory to dump results.
 36 |         """
 37 |         self._dataset_name = dataset_name
 38 |         self._distributed = distributed
 39 |         self._output_dir = output_dir
 40 |         self._num_classes = num_classes
 41 |         self._ignore_label = ignore_label
 42 |         self._N = num_classes + 1
 43 | 
 44 |         self._cpu_device = torch.device("cpu")
 45 |         self._logger = logging.getLogger(__name__)
 46 | 
 47 |         self.input_file_to_gt_file = {
 48 |             dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
 49 |             for dataset_record in DatasetCatalog.get(dataset_name)
 50 |         }
 51 | 
 52 |         meta = MetadataCatalog.get(dataset_name)
 53 |         # Dict that maps contiguous training ids to COCO category ids
 54 |         try:
 55 |             c2d = meta.stuff_dataset_id_to_contiguous_id
 56 |             self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()}
 57 |         except AttributeError:
 58 |             self._contiguous_id_to_dataset_id = None
 59 | 
 60 |     def reset(self):
 61 |         self._conf_matrix = np.zeros((self._N, self._N), dtype=np.int64)
 62 |         self._predictions = []
 63 | 
 64 |     def process(self, inputs, outputs):
 65 |         """
 66 |         Args:
 67 |             inputs: the inputs to a model.
 68 |                 It is a list of dicts. Each dict corresponds to an image and
 69 |                 contains keys like "height", "width", "file_name".
 70 |             outputs: the outputs of a model. It is either list of semantic segmentation predictions
 71 |                 (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
 72 |                 segmentation prediction in the same format.
 73 |         """
 74 |         for input, output in zip(inputs, outputs):
 75 |             # TODO: Just use 0.5 as threshold; should we change?
 76 |             output = (output["edge_map"] > 0.5).to(self._cpu_device)
 77 |             pred = np.array(output, dtype=np.int)
 78 |             with PathManager.open(
 79 |                 self.input_file_to_gt_file[input["file_name"]], "rb"
 80 |             ) as f:
 81 |                 gt = np.array(Image.open(f), dtype=np.int)
 82 | 
 83 |             gt[gt == self._ignore_label] = 0  # NOTE: 0 - background in edge map.
 84 | 
 85 |             self._conf_matrix += np.bincount(
 86 |                 self._N * pred.reshape(-1) + gt.reshape(-1), minlength=self._N ** 2
 87 |             ).reshape(self._N, self._N)
 88 | 
 89 |             self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
 90 | 
 91 |     def evaluate(self):
 92 |         """
 93 |         Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
 94 | 
 95 |         * Mean intersection-over-union averaged across classes (mIoU)
 96 |         * Frequency Weighted IoU (fwIoU)
 97 |         * Mean pixel accuracy averaged across classes (mACC)
 98 |         * Pixel Accuracy (pACC)
 99 |         """
100 |         if self._distributed:
101 |             synchronize()
102 |             conf_matrix_list = all_gather(self._conf_matrix)
103 |             self._predictions = all_gather(self._predictions)
104 |             self._predictions = list(itertools.chain(*self._predictions))
105 |             if not is_main_process():
106 |                 return
107 | 
108 |             self._conf_matrix = np.zeros_like(self._conf_matrix)
109 |             for conf_matrix in conf_matrix_list:
110 |                 self._conf_matrix += conf_matrix
111 | 
112 |         if self._output_dir:
113 |             PathManager.mkdirs(self._output_dir)
114 |             file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
115 |             with PathManager.open(file_path, "w") as f:
116 |                 f.write(json.dumps(self._predictions))
117 | 
118 |         acc = np.zeros(self._num_classes, dtype=np.float)
119 |         iou = np.zeros(self._num_classes, dtype=np.float)
120 |         tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
121 |         pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
122 |         class_weights = pos_gt / np.sum(pos_gt)
123 |         pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
124 |         acc_valid = pos_gt > 0
125 |         acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
126 |         iou_valid = (pos_gt + pos_pred) > 0
127 |         union = pos_gt + pos_pred - tp
128 |         iou[acc_valid] = tp[acc_valid] / union[acc_valid]
129 |         macc = np.sum(acc) / np.sum(acc_valid)
130 |         miou = np.sum(iou) / np.sum(iou_valid)
131 |         fiou = np.sum(iou * class_weights)
132 |         pacc = np.sum(tp) / np.sum(pos_gt)
133 | 
134 |         res = {}
135 |         res["mIoU"] = 100 * miou
136 |         res["fwIoU"] = 100 * fiou
137 |         res["mACC"] = 100 * macc
138 |         res["pACC"] = 100 * pacc
139 | 
140 |         if self._output_dir:
141 |             file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
142 |             with PathManager.open(file_path, "wb") as f:
143 |                 torch.save(res, f)
144 |         results = OrderedDict({"edge_map": res})
145 |         self._logger.info(results)
146 |         return results
147 | 
148 |     def encode_json_sem_seg(self, sem_seg, input_file_name):
149 |         """
150 |         Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
151 |         See http://cocodataset.org/#format-results
152 |         """
153 |         json_list = []
154 |         for label in np.unique(sem_seg):
155 |             if self._contiguous_id_to_dataset_id is not None:
156 |                 assert (
157 |                     label in self._contiguous_id_to_dataset_id
158 |                 ), "Label {} is not in the metadata info for {}".format(
159 |                     label, self._dataset_name
160 |                 )
161 |                 dataset_id = self._contiguous_id_to_dataset_id[label]
162 |             else:
163 |                 dataset_id = int(label)
164 |             mask = (sem_seg == label).astype(np.uint8)
165 |             mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
166 |             mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
167 |             json_list.append(
168 |                 {
169 |                     "file_name": input_file_name,
170 |                     "category_id": dataset_id,
171 |                     "segmentation": mask_rle,
172 |                 }
173 |             )
174 |         return json_list
175 | 


--------------------------------------------------------------------------------
/core/evaluation/evaluator.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import datetime
  3 | import logging
  4 | import time
  5 | from collections import OrderedDict
  6 | from contextlib import contextmanager
  7 | import torch
  8 | 
  9 | from detectron2.utils.comm import is_main_process
 10 | from detectron2.utils.logger import log_every_n_seconds
 11 | 
 12 | 
 13 | class DatasetEvaluator:
 14 |     """
 15 |     Base class for a dataset evaluator.
 16 | 
 17 |     The function :func:`inference_on_dataset` runs the model over
 18 |     all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
 19 | 
 20 |     This class will accumulate information of the inputs/outputs (by :meth:`process`),
 21 |     and produce evaluation results in the end (by :meth:`evaluate`).
 22 |     """
 23 | 
 24 |     def reset(self):
 25 |         """
 26 |         Preparation for a new round of evaluation.
 27 |         Should be called before starting a round of evaluation.
 28 |         """
 29 |         pass
 30 | 
 31 |     def process(self, input, output):
 32 |         """
 33 |         Process an input/output pair.
 34 | 
 35 |         Args:
 36 |             input: the input that's used to call the model.
 37 |             output: the return value of `model(input)`
 38 |         """
 39 |         pass
 40 | 
 41 |     def evaluate(self):
 42 |         """
 43 |         Evaluate/summarize the performance, after processing all input/output pairs.
 44 | 
 45 |         Returns:
 46 |             dict:
 47 |                 A new evaluator class can return a dict of arbitrary format
 48 |                 as long as the user can process the results.
 49 |                 In our train_net.py, we expect the following format:
 50 | 
 51 |                 * key: the name of the task (e.g., bbox)
 52 |                 * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
 53 |         """
 54 |         pass
 55 | 
 56 | 
 57 | class DatasetEvaluators(DatasetEvaluator):
 58 |     def __init__(self, evaluators):
 59 |         assert len(evaluators)
 60 |         super().__init__()
 61 |         self._evaluators = evaluators
 62 | 
 63 |     def reset(self):
 64 |         for evaluator in self._evaluators:
 65 |             evaluator.reset()
 66 | 
 67 |     def process(self, input, output):
 68 |         for evaluator in self._evaluators:
 69 |             evaluator.process(input, output)
 70 | 
 71 |     def evaluate(self):
 72 |         results = OrderedDict()
 73 |         for evaluator in self._evaluators:
 74 |             result = evaluator.evaluate()
 75 |             if is_main_process() and result is not None:
 76 |                 for k, v in result.items():
 77 |                     assert (
 78 |                         k not in results
 79 |                     ), "Different evaluators produce results with the same key {}".format(k)
 80 |                     results[k] = v
 81 |         return results
 82 | 
 83 | 
 84 | def inference_on_dataset(model, data_loader, evaluator):
 85 |     """
 86 |     Run model on the data_loader and evaluate the metrics with evaluator.
 87 |     Also benchmark the inference speed of `model.forward` accurately.
 88 |     The model will be used in eval mode.
 89 | 
 90 |     Args:
 91 |         model (nn.Module): a module which accepts an object from
 92 |             `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
 93 | 
 94 |             If you wish to evaluate a model in `training` mode instead, you can
 95 |             wrap the given model and override its behavior of `.eval()` and `.train()`.
 96 |         data_loader: an iterable object with a length.
 97 |             The elements it generates will be the inputs to the model.
 98 |         evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want
 99 |             to benchmark, but don't want to do any evaluation.
100 | 
101 |     Returns:
102 |         The return value of `evaluator.evaluate()`
103 |     """
104 |     num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
105 |     logger = logging.getLogger(__name__)
106 |     logger.info("Start inference on {} images".format(len(data_loader)))
107 | 
108 |     total = len(data_loader)  # inference data loader must have a fixed length
109 |     if evaluator is None:
110 |         # create a no-op evaluator
111 |         evaluator = DatasetEvaluators([])
112 |     evaluator.reset()
113 | 
114 |     num_warmup = min(5, total - 1)
115 |     start_time = time.perf_counter()
116 |     total_compute_time = 0
117 |     with inference_context(model), torch.no_grad():
118 |         for idx, inputs in enumerate(data_loader):
119 |             if idx == num_warmup:
120 |                 start_time = time.perf_counter()
121 |                 total_compute_time = 0
122 | 
123 |             start_compute_time = time.perf_counter()
124 |             outputs = model(inputs)
125 |             if torch.cuda.is_available():
126 |                 torch.cuda.synchronize()
127 |             total_compute_time += time.perf_counter() - start_compute_time
128 |             evaluator.process(inputs, outputs)
129 | 
130 |             iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
131 |             seconds_per_img = total_compute_time / iters_after_start
132 |             if idx >= num_warmup * 2 or seconds_per_img > 5:
133 |                 total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
134 |                 eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
135 |                 log_every_n_seconds(
136 |                     logging.INFO,
137 |                     "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
138 |                         idx + 1, total, seconds_per_img, str(eta)
139 |                     ),
140 |                     n=5,
141 |                 )
142 | 
143 |     # Measure the time only for this worker (before the synchronization barrier)
144 |     total_time = time.perf_counter() - start_time
145 |     total_time_str = str(datetime.timedelta(seconds=total_time))
146 |     # NOTE this format is parsed by grep
147 |     logger.info(
148 |         "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
149 |             total_time_str, total_time / (total - num_warmup), num_devices
150 |         )
151 |     )
152 |     total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
153 |     logger.info(
154 |         "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
155 |             total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
156 |         )
157 |     )
158 | 
159 |     results = evaluator.evaluate()
160 |     # An evaluator may return None when not in main process.
161 |     # Replace it by an empty dict instead to make it easier for downstream code to handle
162 |     if results is None:
163 |         results = {}
164 |     return results
165 | 
166 | 
167 | @contextmanager
168 | def inference_context(model):
169 |     """
170 |     A context where the model is temporarily changed to eval mode,
171 |     and restored to previous mode afterwards.
172 | 
173 |     Args:
174 |         model: a torch Module
175 |     """
176 |     training_mode = model.training
177 |     model.eval()
178 |     yield
179 |     model.train(training_mode)
180 | 


--------------------------------------------------------------------------------
/core/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .losses import EXTLoss, DiceLoss, SmoothL1Loss, IOULoss
2 | from .deform_conv import DFConv2d
3 | from .ml_nms import ml_nms
4 | from .extreme_utils import _ext as extreme_utils
5 | 
6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
7 | 


--------------------------------------------------------------------------------
/core/layers/deform_conv.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Codes from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/deform_conv.py)
  3 | """
  4 | import torch
  5 | from torch import nn
  6 | 
  7 | from detectron2.layers import Conv2d
  8 | 
  9 | 
 10 | class _NewEmptyTensorOp(torch.autograd.Function):
 11 |     @staticmethod
 12 |     def forward(ctx, x, new_shape):
 13 |         ctx.shape = x.shape
 14 |         return x.new_empty(new_shape)
 15 | 
 16 |     @staticmethod
 17 |     def backward(ctx, grad):
 18 |         shape = ctx.shape
 19 |         return _NewEmptyTensorOp.apply(grad, shape), None
 20 | 
 21 | 
 22 | class DFConv2d(nn.Module):
 23 |     """
 24 |     Deformable convolutional layer with configurable
 25 |     deformable groups, dilations and groups.
 26 | 
 27 |     Code is from:
 28 |     https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py
 29 | 
 30 | 
 31 |     """
 32 | 
 33 |     def __init__(
 34 |             self,
 35 |             in_channels,
 36 |             out_channels,
 37 |             with_modulated_dcn=True,
 38 |             kernel_size=3,
 39 |             stride=1,
 40 |             groups=1,
 41 |             dilation=1,
 42 |             deformable_groups=1,
 43 |             bias=False,
 44 |             padding=None
 45 |     ):
 46 |         super(DFConv2d, self).__init__()
 47 |         if isinstance(kernel_size, (list, tuple)):
 48 |             assert isinstance(stride, (list, tuple))
 49 |             assert isinstance(dilation, (list, tuple))
 50 |             assert len(kernel_size) == 2
 51 |             assert len(stride) == 2
 52 |             assert len(dilation) == 2
 53 |             padding = (
 54 |                 dilation[0] * (kernel_size[0] - 1) // 2,
 55 |                 dilation[1] * (kernel_size[1] - 1) // 2
 56 |             )
 57 |             offset_base_channels = kernel_size[0] * kernel_size[1]
 58 |         else:
 59 |             padding = dilation * (kernel_size - 1) // 2
 60 |             offset_base_channels = kernel_size * kernel_size
 61 |         if with_modulated_dcn:
 62 |             from detectron2.layers.deform_conv import ModulatedDeformConv
 63 |             offset_channels = offset_base_channels * 3  # default: 27
 64 |             conv_block = ModulatedDeformConv
 65 |         else:
 66 |             from detectron2.layers.deform_conv import DeformConv
 67 |             offset_channels = offset_base_channels * 2  # default: 18
 68 |             conv_block = DeformConv
 69 |         self.offset = Conv2d(
 70 |             in_channels,
 71 |             deformable_groups * offset_channels,
 72 |             kernel_size=kernel_size,
 73 |             stride=stride,
 74 |             padding=padding,
 75 |             groups=1,
 76 |             dilation=dilation
 77 |         )
 78 |         for l in [self.offset, ]:
 79 |             nn.init.kaiming_uniform_(l.weight, a=1)
 80 |             torch.nn.init.constant_(l.bias, 0.)
 81 |         self.conv = conv_block(
 82 |             in_channels,
 83 |             out_channels,
 84 |             kernel_size=kernel_size,
 85 |             stride=stride,
 86 |             padding=padding,
 87 |             dilation=dilation,
 88 |             groups=groups,
 89 |             deformable_groups=deformable_groups,
 90 |             bias=bias
 91 |         )
 92 |         self.with_modulated_dcn = with_modulated_dcn
 93 |         self.kernel_size = kernel_size
 94 |         self.stride = stride
 95 |         self.padding = padding
 96 |         self.dilation = dilation
 97 |         self.offset_split = offset_base_channels * deformable_groups * 2
 98 | 
 99 |     def forward(self, x, return_offset=False):
100 |         if x.numel() > 0:
101 |             if not self.with_modulated_dcn:
102 |                 offset_mask = self.offset(x)
103 |                 x = self.conv(x, offset_mask)
104 |             else:
105 |                 offset_mask = self.offset(x)
106 |                 offset = offset_mask[:, :self.offset_split, :, :]
107 |                 mask = offset_mask[:, self.offset_split:, :, :].sigmoid()
108 |                 x = self.conv(x, offset, mask)
109 |             if return_offset:
110 |                 return x, offset_mask
111 |             return x
112 |         # get output shape
113 |         output_shape = [
114 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
115 |             for i, p, di, k, d in zip(
116 |                 x.shape[-2:],
117 |                 self.padding,
118 |                 self.dilation,
119 |                 self.kernel_size,
120 |                 self.stride
121 |             )
122 |         ]
123 |         output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape
124 |         return _NewEmptyTensorOp.apply(x, output_shape)
125 | 


--------------------------------------------------------------------------------
/core/layers/extreme_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/core/layers/extreme_utils/__init__.py


--------------------------------------------------------------------------------
/core/layers/extreme_utils/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 3 | import os
 4 | import glob
 5 | 
 6 | 
 7 | def get_extensions():
 8 |     this_dir = os.path.dirname(os.path.abspath(__file__))
 9 |     main_file = glob.glob(os.path.join(this_dir, '*.cpp'))
10 |     source_cuda = glob.glob(os.path.join(this_dir, 'src', '*.cu'))
11 |     sources = main_file + source_cuda
12 |     include_dirs = [this_dir]
13 |     ext_modules = [
14 |         CUDAExtension(
15 |             name='_ext',
16 |             sources=sources,
17 |             include_dirs=include_dirs
18 |         )
19 |     ]
20 |     return ext_modules
21 | 
22 | 
23 | setup(
24 |     ext_modules=get_extensions(),
25 |     cmdclass={'build_ext': BuildExtension}
26 | )
27 | 


--------------------------------------------------------------------------------
/core/layers/extreme_utils/src/cuda_common.h:
--------------------------------------------------------------------------------
 1 | #include <float.h>
 2 | #include <stdio.h>
 3 | #include <cuda.h>
 4 | #include <cuda_runtime.h>
 5 | #include <cuda_runtime_api.h>
 6 | #include <stdio.h>
 7 | 
 8 | #ifndef CUDA_COMMON_H_
 9 | #define CUDA_COMMON_H_
10 | 
11 | #define DIST(x1,y1,z1,x2,y2,z2) (((x1)-(x2))*((x1)-(x2))+((y1)-(y2))*((y1)-(y2))+((z1)-(z2))*((z1)-(z2)))
12 | #define DIST2D(x1,y1,x2,y2) (((x1)-(x2))*((x1)-(x2))+((y1)-(y2))*((y1)-(y2)))
13 | 
14 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
15 | 
16 | void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
17 | {
18 |     if (code != cudaSuccess)
19 |     {
20 |         fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
21 |         if (abort) exit(code);
22 |     }
23 | }
24 | 
25 | int infTwoExp(int val)
26 | {
27 |     int inf=1;
28 |     while(val>inf) inf<<=1;
29 |     return inf;
30 | }
31 | 
32 | void getGPULayout(
33 |         int dim0,int dim1,int dim2,
34 |         int* bdim0,int* bdim1,int* bdim2,
35 |         int* tdim0,int* tdim1,int* tdim2
36 | )
37 | {
38 |     (*tdim2)=64;
39 |     if(dim2<(*tdim2)) (*tdim2)=infTwoExp(dim2);
40 |     (*bdim2)=dim2/(*tdim2);
41 |     if(dim2%(*tdim2)>0) (*bdim2)++;
42 | 
43 |     (*tdim1)=1024/(*tdim2);
44 |     if(dim1<(*tdim1)) (*tdim1)=infTwoExp(dim1);
45 |     (*bdim1)=dim1/(*tdim1);
46 |     if(dim1%(*tdim1)>0) (*bdim1)++;
47 | 
48 |     (*tdim0)=1024/((*tdim1)*(*tdim2));
49 |     if(dim0<(*tdim0)) (*tdim0)=infTwoExp(dim0);
50 |     (*bdim0)=dim0/(*tdim0);
51 |     if(dim0%(*tdim0)>0) (*bdim0)++;
52 | }
53 | #endif
54 | 


--------------------------------------------------------------------------------
/core/layers/extreme_utils/src/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)}).sort(0, false));
128 | }
129 | 
130 | 


--------------------------------------------------------------------------------
/core/layers/extreme_utils/src/nms.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
5 | 
6 | 


--------------------------------------------------------------------------------
/core/layers/extreme_utils/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | 
 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 5 |     m.def("collect_extreme_point", &collect_extreme_point, "collect_extreme_point");
 6 |     m.def("calculate_edge_num", &calculate_edge_num, "calculate_edge_num");
 7 |     m.def("calculate_wnp", &calculate_wnp, "calculate_wnp");
 8 |     m.def("roll_array", &roll_array, "roll_array");
 9 |     m.def("nms", &nms, "non-maximum suppression");
10 | }
11 | 


--------------------------------------------------------------------------------
/core/layers/extreme_utils/utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | #include "src/nms.h"
 4 | 
 5 | 
 6 | at::Tensor collect_extreme_point(
 7 |     const at::Tensor& ext_hm,
 8 |     const at::Tensor& bbox,
 9 |     const at::Tensor& radius,
10 |     const at::Tensor& vote,
11 |     const at::Tensor& ct
12 | );
13 | 
14 | 
15 | void calculate_edge_num(
16 |     at::Tensor& edge_num,
17 |     const at::Tensor& edge_num_sum,
18 |     const at::Tensor& edge_idx_sort,
19 |     const int p_num
20 | );
21 | 
22 | 
23 | std::tuple<at::Tensor, at::Tensor> calculate_wnp(
24 |     const at::Tensor& edge_num,
25 |     const at::Tensor& edge_start_idx,
26 |     const int p_num
27 | );
28 | 
29 | 
30 | at::Tensor roll_array(
31 |     const at::Tensor& array,
32 |     const at::Tensor& step
33 | );
34 | 
35 | 
36 | at::Tensor nms(const at::Tensor& dets,
37 |                const at::Tensor& scores,
38 |                const float threshold) {
39 |     if (dets.numel() == 0)
40 |         return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
41 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
42 |     return nms_cuda(b, threshold);
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/core/layers/losses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | import functools
  5 | 
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | def reduce_loss(loss, reduction):
 10 |     reduction_enum = F._Reduction.get_enum(reduction)
 11 |     # none: 0, elementwise_mean:1, sum: 2
 12 |     if reduction_enum == 0:
 13 |         return loss
 14 |     elif reduction_enum == 1:
 15 |         return loss.mean()
 16 |     elif reduction_enum == 2:
 17 |         return loss.sum()
 18 | 
 19 | 
 20 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
 21 |     # if weight is specified, apply element-wise weight
 22 |     if weight is not None:
 23 |         loss = loss * weight
 24 | 
 25 |     # if avg_factor is not specified, just reduce the loss
 26 |     if avg_factor is None:
 27 |         loss = reduce_loss(loss, reduction)
 28 |     else:
 29 |         # if reduction is mean, then average the loss by avg_factor
 30 |         if reduction == 'mean':
 31 |             loss = loss.sum() / avg_factor
 32 |         # if reduction is 'none', then do nothing, otherwise raise an error
 33 |         elif reduction != 'none':
 34 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 35 |     return loss
 36 | 
 37 | 
 38 | def weighted_loss(loss_func):
 39 |     @functools.wraps(loss_func)
 40 |     def wrapper(pred,
 41 |                 target,
 42 |                 weight=None,
 43 |                 reduction='mean',
 44 |                 avg_factor=None,
 45 |                 **kwargs):
 46 |         # get element-wise loss
 47 |         loss = loss_func(pred, target, **kwargs)
 48 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
 49 |         return loss
 50 | 
 51 |     return wrapper
 52 | 
 53 | 
 54 | @weighted_loss
 55 | def smooth_l1_loss(pred, target, beta=1.0):
 56 |     assert beta > 0
 57 |     assert pred.size() == target.size() and target.numel() > 0
 58 |     diff = torch.abs(pred - target)
 59 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
 60 |                        diff - 0.5 * beta)
 61 |     return loss
 62 | 
 63 | 
 64 | class SmoothL1Loss(nn.Module):
 65 | 
 66 |     def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
 67 |         super(SmoothL1Loss, self).__init__()
 68 |         self.beta = beta
 69 |         self.reduction = reduction
 70 |         self.loss_weight = loss_weight
 71 | 
 72 |     def forward(self,
 73 |                 pred,
 74 |                 target,
 75 |                 weight=None,
 76 |                 avg_factor=None,
 77 |                 reduction_override=None,
 78 |                 **kwargs):
 79 |         assert reduction_override in (None, 'none', 'mean', 'sum')
 80 |         reduction = (
 81 |             reduction_override if reduction_override else self.reduction)
 82 |         loss = self.loss_weight * smooth_l1_loss(
 83 |             pred,
 84 |             target,
 85 |             weight,
 86 |             beta=self.beta,
 87 |             reduction=reduction,
 88 |             avg_factor=avg_factor,
 89 |             **kwargs)
 90 |         return loss
 91 | 
 92 | 
 93 | class EXTLoss(nn.Module):
 94 |     def __init__(self, ext_loss_type='smoothl1'):
 95 |         super(EXTLoss, self).__init__()
 96 |         self.ext_loss_type = ext_loss_type
 97 |         if ext_loss_type == 'smoothl1':
 98 |             self.loss_func = nn.SmoothL1Loss(reduction='none')
 99 | 
100 |     def forward(self, pred, target, weight=None):
101 |         losses = self.loss_func(pred, target).sum(dim=1)
102 |         if weight is not None:
103 |             return (losses * weight).sum()
104 |         else:
105 |             return losses.sum()
106 | 
107 | 
108 | class DiceLoss(nn.Module):
109 |     def __init__(self,
110 |                  bce_weight=0,
111 |                  ignore_value=255):
112 |         super(DiceLoss, self).__init__()
113 |         self.ignore_value = ignore_value
114 |         if bce_weight != 0:
115 |             self.bce_crit = nn.BCELoss()
116 |         else:
117 |             self.bce_crit = None
118 |         self.bce_weight = bce_weight
119 | 
120 |     def forward(self, pred, target):
121 |         if len(target.size()) == 3:
122 |             target = target.unsqueeze(1)
123 |         assert pred.size() == target.size()
124 | 
125 |         target = target.float()
126 | 
127 |         if self.ignore_value:
128 |             mask = torch.ne(target, self.ignore_value).float()
129 |             pred *= mask
130 |             target *= mask
131 | 
132 |         p2 = pred * pred
133 |         g2 = target * target
134 |         pg = pred * target
135 | 
136 |         p2 = torch.sum(p2, (3, 2, 1))
137 |         g2 = torch.sum(g2, (3, 2, 1))
138 |         pg = torch.sum(pg, (3, 2, 1))
139 | 
140 |         dice_coef = (2 * pg) / (p2 + g2 + 0.0001)
141 | 
142 |         dice_loss = (1.0 - dice_coef).sum()
143 |         dice_loss /= target.size(0)
144 | 
145 |         if self.bce_crit is not None:
146 |             bce_loss = self.bce_crit(pred, target)
147 |             dice_loss += self.bce_weight * bce_loss
148 | 
149 |         return dice_loss
150 | 
151 | 
152 | class IOULoss(nn.Module):
153 |     """
154 |     Codes from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/iou_loss.py)
155 |     """
156 |     def __init__(self, loc_loss_type='iou'):
157 |         super(IOULoss, self).__init__()
158 |         self.loc_loss_type = loc_loss_type
159 | 
160 |     def forward(self, pred, target, weight=None):
161 |         pred_left = pred[:, 0]
162 |         pred_top = pred[:, 1]
163 |         pred_right = pred[:, 2]
164 |         pred_bottom = pred[:, 3]
165 | 
166 |         target_left = target[:, 0]
167 |         target_top = target[:, 1]
168 |         target_right = target[:, 2]
169 |         target_bottom = target[:, 3]
170 | 
171 |         target_aera = (target_left + target_right) * \
172 |                       (target_top + target_bottom)
173 |         pred_aera = (pred_left + pred_right) * \
174 |                     (pred_top + pred_bottom)
175 | 
176 |         w_intersect = torch.min(pred_left, target_left) + \
177 |                       torch.min(pred_right, target_right)
178 |         h_intersect = torch.min(pred_bottom, target_bottom) + \
179 |                       torch.min(pred_top, target_top)
180 | 
181 |         g_w_intersect = torch.max(pred_left, target_left) + \
182 |                         torch.max(pred_right, target_right)
183 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + \
184 |                         torch.max(pred_top, target_top)
185 |         ac_uion = g_w_intersect * g_h_intersect
186 | 
187 |         area_intersect = w_intersect * h_intersect
188 |         area_union = target_aera + pred_aera - area_intersect
189 | 
190 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
191 |         gious = ious - (ac_uion - area_union) / ac_uion
192 |         if self.loc_loss_type == 'iou':
193 |             losses = -torch.log(ious)
194 |         elif self.loc_loss_type == 'linear_iou':
195 |             losses = 1 - ious
196 |         elif self.loc_loss_type == 'giou':
197 |             losses = 1 - gious
198 |         else:
199 |             raise NotImplementedError
200 | 
201 |         if weight is not None:
202 |             return (losses * weight).sum()
203 |         else:
204 |             return losses.sum()
205 | 


--------------------------------------------------------------------------------
/core/layers/ml_nms.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Codes from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/ml_nms.py)
 3 | """
 4 | 
 5 | from detectron2.layers import batched_nms
 6 | 
 7 | 
 8 | def ml_nms(boxlist, nms_thresh, max_proposals=-1,
 9 |            score_field="scores", label_field="labels"):
10 |     """
11 |     Performs non-maximum suppression on a boxlist, with scores specified
12 |     in a boxlist field via score_field.
13 |     Arguments:
14 |         boxlist(BoxList)
15 |         nms_thresh (float)
16 |         max_proposals (int): if > 0, then only the top max_proposals are kept
17 |             after non-maximum suppression
18 |         score_field (str)
19 |     """
20 |     if nms_thresh <= 0:
21 |         return boxlist
22 |     boxes = boxlist.pred_boxes.tensor
23 |     scores = boxlist.scores
24 |     labels = boxlist.pred_classes
25 |     keep = batched_nms(boxes, scores, labels, nms_thresh)
26 |     if max_proposals > 0:
27 |         keep = keep[: max_proposals]
28 |     boxlist = boxlist[keep]
29 |     return boxlist
30 | 


--------------------------------------------------------------------------------
/core/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcos import FCOS
2 | from .fcose import FCOSE, ExtremeDetector
3 | from .dsnake_baseline import FcosSnake
4 | from .backbone import build_fcos_resnet_fpn_backbone
5 | from .one_stage_detector import OneStageDetector
6 | from .edge_snake import Dance


--------------------------------------------------------------------------------
/core/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import build_fcos_resnet_fpn_backbone
2 | from .vovnet import build_vovnet_backbone
3 | from .dla import build_fcos_dla_fpn_backbone
4 | 


--------------------------------------------------------------------------------
/core/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
 1 | import fvcore.nn.weight_init as weight_init
 2 | import torch.nn.functional as F
 3 | from detectron2.layers import ShapeSpec
 4 | from detectron2.modeling.backbone import FPN, build_resnet_backbone
 5 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
 6 | from torch import nn
 7 | 
 8 | from .mobilenet import build_mnv2_backbone
 9 | from .vovnet import build_vovnet_backbone
10 | 
11 | 
12 | class LastLevelP6P7(nn.Module):
13 |     """
14 |     This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from
15 |     C5 or P5 feature.
16 |     """
17 | 
18 |     def __init__(self, in_channels, out_channels, in_features="res5"):
19 |         super().__init__()
20 |         self.num_levels = 2
21 |         self.in_feature = in_features
22 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
23 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
24 |         for module in [self.p6, self.p7]:
25 |             weight_init.c2_xavier_fill(module)
26 | 
27 |     def forward(self, x):
28 |         p6 = self.p6(x)
29 |         p7 = self.p7(F.relu(p6))
30 |         return [p6, p7]
31 | 
32 | 
33 | class LastLevelP6(nn.Module):
34 |     """
35 |     This module is used in FCOS to generate extra layers
36 |     """
37 | 
38 |     def __init__(self, in_channels, out_channels, in_features="res5"):
39 |         super().__init__()
40 |         self.num_levels = 1
41 |         self.in_feature = in_features
42 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
43 |         for module in [self.p6]:
44 |             weight_init.c2_xavier_fill(module)
45 | 
46 |     def forward(self, x):
47 |         p6 = self.p6(x)
48 |         return [p6]
49 | 
50 | 
51 | @BACKBONE_REGISTRY.register()
52 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
53 |     """
54 |     Args:
55 |         cfg: a detectron2 CfgNode
56 | 
57 |     Returns:
58 |         backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
59 |     """
60 |     if cfg.MODEL.MOBILENET:
61 |         bottom_up = build_mnv2_backbone(cfg, input_shape)
62 |     elif cfg.MODEL.USE_VOVNET:
63 |         bottom_up = build_vovnet_backbone(cfg, input_shape)
64 |     else:
65 |         bottom_up = build_resnet_backbone(cfg, input_shape)
66 |     in_features = cfg.MODEL.FPN.IN_FEATURES
67 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
68 |     top_levels = cfg.MODEL.FCOS.TOP_LEVELS
69 |     in_channels_top = out_channels
70 |     if top_levels == 2:
71 |         top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
72 |     if top_levels == 1:
73 |         top_block = LastLevelP6(in_channels_top, out_channels, "p5")
74 |     elif top_levels == 0:
75 |         top_block = None
76 |     backbone = FPN(
77 |         bottom_up=bottom_up,
78 |         in_features=in_features,
79 |         out_channels=out_channels,
80 |         norm=cfg.MODEL.FPN.NORM,
81 |         top_block=top_block,
82 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
83 |     )
84 |     return backbone
85 | 


--------------------------------------------------------------------------------
/core/modeling/backbone/mobilenet.py:
--------------------------------------------------------------------------------
  1 | # taken from https://github.com/tonylins/pytorch-mobilenet-v2/
  2 | # Published by Ji Lin, tonylins
  3 | # licensed under the  Apache License, Version 2.0, January 2004
  4 | 
  5 | from torch import nn
  6 | from torch.nn import BatchNorm2d
  7 | 
  8 | # from detectron2.layers.batch_norm import NaiveSyncBatchNorm as BatchNorm2d
  9 | from detectron2.layers import Conv2d
 10 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
 11 | from detectron2.modeling.backbone import Backbone
 12 | 
 13 | 
 14 | def conv_bn(inp, oup, stride):
 15 |     return nn.Sequential(
 16 |         Conv2d(inp, oup, 3, stride, 1, bias=False),
 17 |         BatchNorm2d(oup),
 18 |         nn.ReLU6(inplace=True),
 19 |     )
 20 | 
 21 | 
 22 | def conv_1x1_bn(inp, oup):
 23 |     return nn.Sequential(
 24 |         Conv2d(inp, oup, 1, 1, 0, bias=False), BatchNorm2d(oup), nn.ReLU6(inplace=True)
 25 |     )
 26 | 
 27 | 
 28 | class InvertedResidual(nn.Module):
 29 |     def __init__(self, inp, oup, stride, expand_ratio):
 30 |         super(InvertedResidual, self).__init__()
 31 |         self.stride = stride
 32 |         assert stride in [1, 2]
 33 | 
 34 |         hidden_dim = int(round(inp * expand_ratio))
 35 |         self.use_res_connect = self.stride == 1 and inp == oup
 36 | 
 37 |         if expand_ratio == 1:
 38 |             self.conv = nn.Sequential(
 39 |                 # dw
 40 |                 Conv2d(
 41 |                     hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False
 42 |                 ),
 43 |                 BatchNorm2d(hidden_dim),
 44 |                 nn.ReLU6(inplace=True),
 45 |                 # pw-linear
 46 |                 Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 47 |                 BatchNorm2d(oup),
 48 |             )
 49 |         else:
 50 |             self.conv = nn.Sequential(
 51 |                 # pw
 52 |                 Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
 53 |                 BatchNorm2d(hidden_dim),
 54 |                 nn.ReLU6(inplace=True),
 55 |                 # dw
 56 |                 Conv2d(
 57 |                     hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False
 58 |                 ),
 59 |                 BatchNorm2d(hidden_dim),
 60 |                 nn.ReLU6(inplace=True),
 61 |                 # pw-linear
 62 |                 Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 63 |                 BatchNorm2d(oup),
 64 |             )
 65 | 
 66 |     def forward(self, x):
 67 |         if self.use_res_connect:
 68 |             return x + self.conv(x)
 69 |         else:
 70 |             return self.conv(x)
 71 | 
 72 | 
 73 | class MobileNetV2(Backbone):
 74 |     """
 75 |     Should freeze bn
 76 |     """
 77 | 
 78 |     def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.0):
 79 |         super(MobileNetV2, self).__init__()
 80 |         block = InvertedResidual
 81 |         input_channel = 32
 82 |         interverted_residual_setting = [
 83 |             # t, c, n, s
 84 |             [1, 16, 1, 1],
 85 |             [6, 24, 2, 2],
 86 |             [6, 32, 3, 2],
 87 |             [6, 64, 4, 2],
 88 |             [6, 96, 3, 1],
 89 |             [6, 160, 3, 2],
 90 |             [6, 320, 1, 1],
 91 |         ]
 92 | 
 93 |         # building first layer
 94 |         assert input_size % 32 == 0
 95 |         input_channel = int(input_channel * width_mult)
 96 |         self.return_features_indices = [3, 6, 13, 17]
 97 |         self.return_features_num_channels = []
 98 |         self.features = nn.ModuleList([conv_bn(3, input_channel, 2)])
 99 |         # building inverted residual blocks
100 |         for t, c, n, s in interverted_residual_setting:
101 |             output_channel = int(c * width_mult)
102 |             for i in range(n):
103 |                 if i == 0:
104 |                     self.features.append(
105 |                         block(input_channel, output_channel, s, expand_ratio=t)
106 |                     )
107 |                 else:
108 |                     self.features.append(
109 |                         block(input_channel, output_channel, 1, expand_ratio=t)
110 |                     )
111 |                 input_channel = output_channel
112 |                 if len(self.features) - 1 in self.return_features_indices:
113 |                     self.return_features_num_channels.append(output_channel)
114 | 
115 |         self._initialize_weights()
116 |         self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT)
117 | 
118 |     def _freeze_backbone(self, freeze_at):
119 |         for layer_index in range(freeze_at):
120 |             for p in self.features[layer_index].parameters():
121 |                 p.requires_grad = False
122 | 
123 |     def forward(self, x):
124 |         res = []
125 |         for i, m in enumerate(self.features):
126 |             x = m(x)
127 |             if i in self.return_features_indices:
128 |                 res.append(x)
129 |         return {"res{}".format(i + 2): r for i, r in enumerate(res)}
130 | 
131 |     def _initialize_weights(self):
132 |         for m in self.modules():
133 |             if isinstance(m, Conv2d):
134 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
135 |                 m.weight.data.normal_(0, (2.0 / n) ** 0.5)
136 |                 if m.bias is not None:
137 |                     m.bias.data.zero_()
138 |             elif isinstance(m, BatchNorm2d):
139 |                 m.weight.data.fill_(1)
140 |                 m.bias.data.zero_()
141 |             elif isinstance(m, nn.Linear):
142 |                 n = m.weight.size(1)
143 |                 m.weight.data.normal_(0, 0.01)
144 |                 m.bias.data.zero_()
145 | 
146 | 
147 | @BACKBONE_REGISTRY.register()
148 | def build_mnv2_backbone(cfg, input_shape):
149 |     """
150 |     Create a ResNet instance from config.
151 | 
152 |     Returns:
153 |         ResNet: a :class:`ResNet` instance.
154 |     """
155 |     out_features = cfg.MODEL.RESNETS.OUT_FEATURES
156 | 
157 |     out_feature_channels = {"res2": 24, "res3": 32, "res4": 96, "res5": 320}
158 |     out_feature_strides = {"res2": 4, "res3": 8, "res4": 16, "res5": 32}
159 |     model = MobileNetV2(cfg)
160 |     model._out_features = out_features
161 |     model._out_feature_channels = out_feature_channels
162 |     model._out_feature_strides = out_feature_strides
163 |     return model
164 | 


--------------------------------------------------------------------------------
/core/modeling/backbone/vovnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Youngwan Lee (ETRI) All Rights Reserved.
  2 | from collections import OrderedDict
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from detectron2.layers import FrozenBatchNorm2d, ShapeSpec, get_norm
  8 | from detectron2.modeling.backbone import Backbone
  9 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
 10 | from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool
 11 | 
 12 | __all__ = ["VoVNet", "build_vovnet_backbone", "build_vovnet_fpn_backbone"]
 13 | 
 14 | _NORM = False
 15 | 
 16 | VoVNet19_eSE = {
 17 |     "stage_conv_ch": [128, 160, 192, 224],
 18 |     "stage_out_ch": [256, 512, 768, 1024],
 19 |     "layer_per_block": 3,
 20 |     "block_per_stage": [1, 1, 1, 1],
 21 |     "eSE": True,
 22 | }
 23 | 
 24 | VoVNet39_eSE = {
 25 |     "stage_conv_ch": [128, 160, 192, 224],
 26 |     "stage_out_ch": [256, 512, 768, 1024],
 27 |     "layer_per_block": 5,
 28 |     "block_per_stage": [1, 1, 2, 2],
 29 |     "eSE": True,
 30 | }
 31 | 
 32 | VoVNet57_eSE = {
 33 |     "stage_conv_ch": [128, 160, 192, 224],
 34 |     "stage_out_ch": [256, 512, 768, 1024],
 35 |     "layer_per_block": 5,
 36 |     "block_per_stage": [1, 1, 4, 3],
 37 |     "eSE": True,
 38 | }
 39 | 
 40 | VoVNet99_eSE = {
 41 |     "stage_conv_ch": [128, 160, 192, 224],
 42 |     "stage_out_ch": [256, 512, 768, 1024],
 43 |     "layer_per_block": 5,
 44 |     "block_per_stage": [1, 3, 9, 3],
 45 |     "eSE": True,
 46 | }
 47 | 
 48 | _STAGE_SPECS = {
 49 |     "V-19-eSE": VoVNet19_eSE,
 50 |     "V-39-eSE": VoVNet39_eSE,
 51 |     "V-57-eSE": VoVNet57_eSE,
 52 |     "V-99-eSE": VoVNet99_eSE,
 53 | }
 54 | 
 55 | 
 56 | def conv3x3(
 57 |     in_channels,
 58 |     out_channels,
 59 |     module_name,
 60 |     postfix,
 61 |     stride=1,
 62 |     groups=1,
 63 |     kernel_size=3,
 64 |     padding=1,
 65 | ):
 66 |     """3x3 convolution with padding"""
 67 |     return [
 68 |         (
 69 |             f"{module_name}_{postfix}/conv",
 70 |             nn.Conv2d(
 71 |                 in_channels,
 72 |                 out_channels,
 73 |                 kernel_size=kernel_size,
 74 |                 stride=stride,
 75 |                 padding=padding,
 76 |                 groups=groups,
 77 |                 bias=False,
 78 |             ),
 79 |         ),
 80 |         (f"{module_name}_{postfix}/norm", get_norm(_NORM, out_channels)),
 81 |         (f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
 82 |     ]
 83 | 
 84 | 
 85 | def conv1x1(
 86 |     in_channels,
 87 |     out_channels,
 88 |     module_name,
 89 |     postfix,
 90 |     stride=1,
 91 |     groups=1,
 92 |     kernel_size=1,
 93 |     padding=0,
 94 | ):
 95 |     """1x1 convolution with padding"""
 96 |     return [
 97 |         (
 98 |             f"{module_name}_{postfix}/conv",
 99 |             nn.Conv2d(
100 |                 in_channels,
101 |                 out_channels,
102 |                 kernel_size=kernel_size,
103 |                 stride=stride,
104 |                 padding=padding,
105 |                 groups=groups,
106 |                 bias=False,
107 |             ),
108 |         ),
109 |         (f"{module_name}_{postfix}/norm", get_norm(_NORM, out_channels)),
110 |         (f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
111 |     ]
112 | 
113 | 
114 | class Hsigmoid(nn.Module):
115 |     def __init__(self, inplace=True):
116 |         super(Hsigmoid, self).__init__()
117 |         self.inplace = inplace
118 | 
119 |     def forward(self, x):
120 |         return F.relu6(x + 3.0, inplace=self.inplace) / 6.0
121 | 
122 | 
123 | class eSEModule(nn.Module):
124 |     def __init__(self, channel, reduction=4):
125 |         super(eSEModule, self).__init__()
126 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
127 |         self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0)
128 |         self.hsigmoid = Hsigmoid()
129 | 
130 |     def forward(self, x):
131 |         input = x
132 |         x = self.avg_pool(x)
133 |         x = self.fc(x)
134 |         x = self.hsigmoid(x)
135 |         return input * x
136 | 
137 | 
138 | class _OSA_module(nn.Module):
139 |     def __init__(
140 |         self,
141 |         in_ch,
142 |         stage_ch,
143 |         concat_ch,
144 |         layer_per_block,
145 |         module_name,
146 |         SE=False,
147 |         identity=False,
148 |     ):
149 | 
150 |         super(_OSA_module, self).__init__()
151 | 
152 |         self.identity = identity
153 |         self.layers = nn.ModuleList()
154 |         in_channel = in_ch
155 |         for i in range(layer_per_block):
156 |             self.layers.append(
157 |                 nn.Sequential(
158 |                     OrderedDict(conv3x3(in_channel, stage_ch, module_name, i))
159 |                 )
160 |             )
161 |             in_channel = stage_ch
162 | 
163 |         # feature aggregation
164 |         in_channel = in_ch + layer_per_block * stage_ch
165 |         self.concat = nn.Sequential(
166 |             OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat"))
167 |         )
168 | 
169 |         self.ese = eSEModule(concat_ch)
170 | 
171 |     def forward(self, x):
172 | 
173 |         identity_feat = x
174 | 
175 |         output = []
176 |         output.append(x)
177 |         for layer in self.layers:
178 |             x = layer(x)
179 |             output.append(x)
180 | 
181 |         x = torch.cat(output, dim=1)
182 |         xt = self.concat(x)
183 | 
184 |         xt = self.ese(xt)
185 | 
186 |         if self.identity:
187 |             xt = xt + identity_feat
188 | 
189 |         return xt
190 | 
191 | 
192 | class _OSA_stage(nn.Sequential):
193 |     def __init__(
194 |         self,
195 |         in_ch,
196 |         stage_ch,
197 |         concat_ch,
198 |         block_per_stage,
199 |         layer_per_block,
200 |         stage_num,
201 |         SE=False,
202 |     ):
203 |         super(_OSA_stage, self).__init__()
204 | 
205 |         if not stage_num == 2:
206 |             self.add_module(
207 |                 "Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
208 |             )
209 | 
210 |         if block_per_stage != 1:
211 |             SE = False
212 |         module_name = f"OSA{stage_num}_1"
213 |         self.add_module(
214 |             module_name,
215 |             _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE),
216 |         )
217 |         for i in range(block_per_stage - 1):
218 |             if i != block_per_stage - 2:  # last block
219 |                 SE = False
220 |             module_name = f"OSA{stage_num}_{i + 2}"
221 |             self.add_module(
222 |                 module_name,
223 |                 _OSA_module(
224 |                     concat_ch,
225 |                     stage_ch,
226 |                     concat_ch,
227 |                     layer_per_block,
228 |                     module_name,
229 |                     SE,
230 |                     identity=True,
231 |                 ),
232 |             )
233 | 
234 | 
235 | class VoVNet(Backbone):
236 |     def __init__(self, cfg, input_ch, out_features=None):
237 |         """
238 |         Args:
239 |             input_ch(int) : the number of input channel
240 |             out_features (list[str]): name of the layers whose outputs should
241 |                 be returned in forward. Can be anything in "stem", "stage2" ...
242 |         """
243 |         super(VoVNet, self).__init__()
244 | 
245 |         global _NORM
246 |         _NORM = cfg.MODEL.VOVNET.NORM
247 | 
248 |         stage_specs = _STAGE_SPECS[cfg.MODEL.VOVNET.CONV_BODY]
249 | 
250 |         config_stage_ch = stage_specs["stage_conv_ch"]
251 |         config_concat_ch = stage_specs["stage_out_ch"]
252 |         block_per_stage = stage_specs["block_per_stage"]
253 |         layer_per_block = stage_specs["layer_per_block"]
254 |         SE = stage_specs["eSE"]
255 | 
256 |         self._out_features = out_features
257 | 
258 |         # Stem module
259 |         stem = conv3x3(input_ch, 64, "stem", "1", 2)
260 |         stem += conv3x3(64, 64, "stem", "2", 1)
261 |         stem += conv3x3(64, 128, "stem", "3", 2)
262 |         self.add_module("stem", nn.Sequential((OrderedDict(stem))))
263 |         current_stirde = 4
264 |         self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde}
265 |         self._out_feature_channels = {"stem": 128}
266 | 
267 |         stem_out_ch = [128]
268 |         in_ch_list = stem_out_ch + config_concat_ch[:-1]
269 |         # OSA stages
270 |         self.stage_names = []
271 |         for i in range(4):  # num_stages
272 |             name = "stage%d" % (i + 2)  # stage 2 ... stage 5
273 |             self.stage_names.append(name)
274 |             self.add_module(
275 |                 name,
276 |                 _OSA_stage(
277 |                     in_ch_list[i],
278 |                     config_stage_ch[i],
279 |                     config_concat_ch[i],
280 |                     block_per_stage[i],
281 |                     layer_per_block,
282 |                     i + 2,
283 |                     SE,
284 |                 ),
285 |             )
286 | 
287 |             self._out_feature_channels[name] = config_concat_ch[i]
288 |             if not i == 0:
289 |                 self._out_feature_strides[name] = current_stirde = int(
290 |                     current_stirde * 2
291 |                 )
292 | 
293 |         # initialize weights
294 |         self._initialize_weights()
295 |         # Optionally freeze (requires_grad=False) parts of the backbone
296 |         self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT)
297 | 
298 |     def _initialize_weights(self):
299 |         for m in self.modules():
300 |             if isinstance(m, nn.Conv2d):
301 |                 nn.init.kaiming_normal_(m.weight)
302 | 
303 |     def _freeze_backbone(self, freeze_at):
304 |         if freeze_at < 0:
305 |             return
306 | 
307 |         for stage_index in range(freeze_at):
308 |             if stage_index == 0:
309 |                 m = self.stem  # stage 0 is the stem
310 |             else:
311 |                 m = getattr(self, "stage" + str(stage_index + 1))
312 |             for p in m.parameters():
313 |                 p.requires_grad = False
314 |                 FrozenBatchNorm2d.convert_frozen_batchnorm(self)
315 | 
316 |     def forward(self, x):
317 |         outputs = {}
318 |         x = self.stem(x)
319 |         if "stem" in self._out_features:
320 |             outputs["stem"] = x
321 |         for name in self.stage_names:
322 |             x = getattr(self, name)(x)
323 |             if name in self._out_features:
324 |                 outputs[name] = x
325 | 
326 |         return outputs
327 | 
328 |     def output_shape(self):
329 |         return {
330 |             name: ShapeSpec(
331 |                 channels=self._out_feature_channels[name],
332 |                 stride=self._out_feature_strides[name],
333 |             )
334 |             for name in self._out_features
335 |         }
336 | 
337 | 
338 | @BACKBONE_REGISTRY.register()
339 | def build_vovnet_backbone(cfg, input_shape):
340 |     """
341 |     Create a VoVNet instance from config.
342 | 
343 |     Returns:
344 |         VoVNet: a :class:`VoVNet` instance.
345 |     """
346 |     out_features = cfg.MODEL.VOVNET.OUT_FEATURES
347 |     return VoVNet(cfg, input_shape.channels, out_features=out_features)
348 | 
349 | 
350 | @BACKBONE_REGISTRY.register()
351 | def build_vovnet_fpn_backbone(cfg, input_shape: ShapeSpec):
352 |     """
353 |     Args:
354 |         cfg: a detectron2 CfgNode
355 | 
356 |     Returns:
357 |         backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
358 |     """
359 |     bottom_up = build_vovnet_backbone(cfg, input_shape)
360 |     in_features = cfg.MODEL.FPN.IN_FEATURES
361 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
362 |     backbone = FPN(
363 |         bottom_up=bottom_up,
364 |         in_features=in_features,
365 |         out_channels=out_channels,
366 |         norm=cfg.MODEL.FPN.NORM,
367 |         top_block=LastLevelMaxPool(),
368 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
369 |     )
370 |     return backbone
371 | 


--------------------------------------------------------------------------------
/core/modeling/dsnake_baseline/__init__.py:
--------------------------------------------------------------------------------
1 | from .af_two_stage import FcosSnake


--------------------------------------------------------------------------------
/core/modeling/dsnake_baseline/af_two_stage.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch
  3 | from torch import nn
  4 | 
  5 | from detectron2.structures import ImageList
  6 | from detectron2.utils.logger import log_first_n
  7 | 
  8 | from detectron2.modeling.backbone import build_backbone
  9 | from detectron2.modeling.proposal_generator import build_proposal_generator
 10 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
 11 | 
 12 | from .postprocessing import (
 13 |     detector_postprocess
 14 | )
 15 | 
 16 | from detectron2.structures import Instances, Boxes
 17 | from core.structures import ExtremePoints
 18 | 
 19 | from .dsnake_head import SnakeFPNHead
 20 | 
 21 | 
 22 | @META_ARCH_REGISTRY.register()
 23 | class FcosSnake(nn.Module):
 24 |     def __init__(self, cfg):
 25 |         super().__init__()
 26 |         self.device = torch.device(cfg.MODEL.DEVICE)
 27 | 
 28 |         self.backbone = build_backbone(cfg)
 29 |         self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
 30 |         self.refinement_head = SnakeFPNHead(cfg, self.backbone.output_shape())
 31 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
 32 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
 33 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 34 |         self.to(self.device)
 35 | 
 36 |         self.gt_input = cfg.TEST.GT_IN.WHAT if cfg.TEST.GT_IN.ON else (None,)
 37 | 
 38 |     def forward(self, batched_inputs):
 39 |         images = [x["image"].to(self.device) for x in batched_inputs]
 40 |         images = [self.normalizer(x) for x in images]
 41 |         images = ImageList.from_tensors(images, self.backbone.size_divisibility)
 42 | 
 43 |         features = self.backbone(images.tensor)
 44 | 
 45 |         if "instances" in batched_inputs[0] :
 46 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
 47 |         elif "targets" in batched_inputs[0]:
 48 |             log_first_n(
 49 |                 logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
 50 |             )
 51 |             gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
 52 |         else:
 53 |             gt_instances = None
 54 | 
 55 |         proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
 56 | 
 57 |         if not self.training:
 58 |             if 'instance' in self.gt_input:
 59 |                 assert gt_instances is not None
 60 | 
 61 |                 for im_i in range(len(gt_instances)):
 62 |                     gt_instances_per_im = gt_instances[im_i]
 63 |                     bboxes = gt_instances_per_im.gt_boxes.tensor
 64 |                     instances_per_im = Instances(proposals[im_i]._image_size)
 65 |                     instances_per_im.pred_boxes = Boxes(bboxes)
 66 |                     instances_per_im.pred_classes = gt_instances_per_im.gt_classes
 67 |                     instances_per_im.scores = torch.ones_like(gt_instances_per_im.gt_classes).to(bboxes.device)
 68 | 
 69 |                     if gt_instances_per_im.has("gt_masks"):
 70 |                         gt_masks = gt_instances_per_im.gt_masks
 71 |                         ext_pts_off = self.refinement_head.refine_head.get_simple_extreme_points(
 72 |                             gt_masks.polygons).to(bboxes.device)
 73 |                         ex_t = torch.stack([ext_pts_off[:, None, 0], bboxes[:, None, 1]], dim=2)
 74 |                         ex_l = torch.stack([bboxes[:, None, 0], ext_pts_off[:, None, 1]], dim=2)
 75 |                         ex_b = torch.stack([ext_pts_off[:, None, 2], bboxes[:, None, 3]], dim=2)
 76 |                         ex_r = torch.stack([bboxes[:, None, 2], ext_pts_off[:, None, 3]], dim=2)
 77 |                         instances_per_im.ext_points = ExtremePoints(
 78 |                             torch.cat([ex_t, ex_l, ex_b, ex_r], dim=1))
 79 |                     else:
 80 |                         quad = self.refinement_head.refine_head.get_quadrangle(bboxes).view(-1, 4, 2)
 81 |                         instances_per_im.ext_points = ExtremePoints(quad)
 82 | 
 83 |                     proposals[im_i] = instances_per_im
 84 | 
 85 |         head_losses, proposals = self.refinement_head(features, proposals, gt_instances)
 86 | 
 87 |         # In training, the proposals are not useful at all in RPN models; but not here
 88 |         # This makes RPN-only models about 5% slower.
 89 |         if self.training:
 90 |             proposal_losses.update(head_losses)
 91 |             return proposal_losses
 92 | 
 93 |         processed_results = []
 94 |         for results_per_image, input_per_image, image_size in zip(
 95 |                 proposals, batched_inputs, images.image_sizes
 96 |         ):
 97 |             height = input_per_image.get("height", image_size[0])
 98 |             width = input_per_image.get("width", image_size[1])
 99 |             instance_r = detector_postprocess(results_per_image,
100 |                                               height,
101 |                                               width)
102 |             processed_results.append(
103 |                 {"instances": instance_r}
104 |             )
105 | 
106 |         return processed_results
107 | 


--------------------------------------------------------------------------------
/core/modeling/dsnake_baseline/postprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | import numpy as np
 4 | from torch.nn import functional as F
 5 | import functools
 6 | import multiprocessing as mp
 7 | from detectron2.layers import ROIAlign
 8 | from detectron2.structures import Instances, polygons_to_bitmask
 9 | import pycocotools.mask as mask_util
10 | from core.structures import PolygonPoints
11 | 
12 | 
13 | def get_polygon_rles(polygons, image_shape):
14 |     # input: N x (p*2)
15 |     polygons = polygons.cpu().numpy()
16 |     h, w = image_shape
17 |     rles = [
18 |         mask_util.merge(mask_util.frPyObjects([p.tolist()], h, w))
19 |         for p in polygons
20 |     ]
21 |     return rles
22 | 
23 | 
24 | def detector_postprocess(results,
25 |                          output_height,
26 |                          output_width):
27 |     """
28 |     Resize the output instances.
29 |     The input images are often resized when entering an object detector.
30 |     As a result, we often need the outputs of the detector in a different
31 |     resolution from its inputs.
32 | 
33 |     This function will resize the raw outputs of an R-CNN detector
34 |     to produce outputs according to the desired output resolution.
35 | 
36 |     Args:
37 |         results (Instances): the raw outputs from the detector.
38 |             `results.image_size` contains the input image resolution the detector sees.
39 |             This object might be modified in-place.
40 |         output_height, output_width: the desired output resolution.
41 | 
42 |     Returns:
43 |         Instances: the resized output from the model, based on the output resolution
44 |     """
45 |     # the results.image_size here is the one the model saw, typically (800, xxxx)
46 |     scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
47 |     results = Instances((output_height, output_width), **results.get_fields())
48 | 
49 |     if results.has("pred_boxes"):
50 |         output_boxes = results.pred_boxes
51 |     elif results.has("proposal_boxes"):
52 |         output_boxes = results.proposal_boxes
53 | 
54 |     output_boxes.scale(scale_x, scale_y)
55 |     # now the results.image_size is the one of raw input image
56 |     output_boxes.clip(results.image_size)
57 | 
58 |     results = results[output_boxes.nonempty()]
59 | 
60 |     if results.has("pred_polys"):
61 |         if results.has("pred_path"):
62 |             snake_path = results.pred_path
63 |             for i in range(snake_path.size(1)):     # number of evolution
64 |                 current_poly = PolygonPoints(snake_path[:, i, :, :])
65 |                 current_poly.scale(scale_x, scale_y)
66 |                 current_poly.clip(results.image_size)
67 |                 snake_path[:, i, :, :] = current_poly.tensor
68 | 
69 |         results.pred_polys.scale(scale_x, scale_y)
70 |         results.pred_polys.clip(results.image_size)
71 |         results.pred_masks = get_polygon_rles(results.pred_polys.flatten(),
72 |                                               (output_height, output_width))
73 |         return results
74 | 
75 |     else:
76 |         raise ValueError('No pred_polys in instance prediction!')
77 | 


--------------------------------------------------------------------------------
/core/modeling/edge_snake/__init__.py:
--------------------------------------------------------------------------------
1 | from .dance import Dance
2 | from .edge_det import build_edge_det_head
3 | 


--------------------------------------------------------------------------------
/core/modeling/edge_snake/dance.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch
  3 | from torch import nn
  4 | 
  5 | from detectron2.structures import ImageList
  6 | from detectron2.utils.logger import log_first_n
  7 | 
  8 | from detectron2.modeling.backbone import build_backbone
  9 | from detectron2.modeling.proposal_generator import build_proposal_generator
 10 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
 11 | 
 12 | from .edge_det import build_edge_det_head
 13 | from core.modeling.postprocessing import detector_postprocess, edge_map_postprocess
 14 | 
 15 | from core.utils import timer
 16 | 
 17 | 
 18 | @META_ARCH_REGISTRY.register()
 19 | class Dance(nn.Module):
 20 |     def __init__(self, cfg):
 21 |         super().__init__()
 22 |         self.device = torch.device(cfg.MODEL.DEVICE)
 23 | 
 24 |         self.backbone = build_backbone(cfg)
 25 |         self.proposal_generator = build_proposal_generator(
 26 |             cfg, self.backbone.output_shape()
 27 |         )
 28 | 
 29 |         self.refinement_head = build_edge_det_head(cfg, self.backbone.output_shape())
 30 | 
 31 |         self.mask_result_src = cfg.MODEL.DANCE.MASK_IN
 32 | 
 33 |         self.semantic_filter = cfg.MODEL.DANCE.SEMANTIC_FILTER
 34 |         self.semantic_filter_th = cfg.MODEL.DANCE.SEMANTIC_FILTER_TH
 35 | 
 36 |         self.need_concave_hull = (
 37 |             True if cfg.MODEL.SNAKE_HEAD.LOSS_TYPE == "chamfer" else False
 38 |         )
 39 | 
 40 |         self.roi_size = cfg.MODEL.DANCE.ROI_SIZE
 41 | 
 42 |         self.re_compute_box = cfg.MODEL.DANCE.RE_COMP_BOX
 43 | 
 44 |         self.visualize_path = cfg.MODEL.SNAKE_HEAD.VIS_PATH
 45 | 
 46 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
 47 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
 48 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 49 |         self.to(self.device)
 50 | 
 51 |     def single_test(self, batched_inputs):
 52 |         assert len(batched_inputs) == 1
 53 |         with timer.env("preprocess"):
 54 |             images = batched_inputs[0]["image"].to(self.device)
 55 |             images = self.normalizer(images)
 56 |             images = ImageList.from_tensors([images], self.backbone.size_divisibility)
 57 | 
 58 |         with timer.env("backbone"):
 59 |             features = self.backbone(images.tensor)
 60 | 
 61 |         gt_instances = None
 62 |         gt_sem_seg = None
 63 | 
 64 |         with timer.env("fcose"):
 65 |             proposals, proposal_losses = self.proposal_generator(
 66 |                 images, features, gt_instances
 67 |             )
 68 | 
 69 |         if self.mask_result_src != "BOX":
 70 |             edge_map, head_losses, proposals = self.refinement_head(
 71 |                 features, proposals, (gt_sem_seg, [gt_instances, images.image_sizes])
 72 |             )
 73 | 
 74 |         with timer.env("postprocess"):
 75 |             height = batched_inputs[0].get("height", images.image_sizes[0][0])
 76 |             width = batched_inputs[0].get("width", images.image_sizes[0][1])
 77 |             instance_r = detector_postprocess(
 78 |                 self.semantic_filter,
 79 |                 self.semantic_filter_th,
 80 |                 self.mask_result_src,
 81 |                 proposals[0],
 82 |                 height,
 83 |                 width,
 84 |                 self.roi_size,
 85 |                 self.need_concave_hull,
 86 |                 self.re_compute_box,
 87 |             )
 88 |             processed_results = [{"instances": instance_r}]
 89 |             return processed_results
 90 | 
 91 |     def forward(self, batched_inputs):
 92 |         """
 93 |         Args:
 94 |             Same as in :class:`GeneralizedRCNN.forward`
 95 | 
 96 |         Returns:
 97 |             list[dict]:
 98 |                 Each dict is the output for one input image.
 99 |                 The dict contains one key "proposals" whose value is a
100 |                 :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
101 |         """
102 |         if not self.training and not self.visualize_path:
103 |             return self.single_test(batched_inputs)
104 | 
105 |         with timer.env("preprocess"):
106 |             images = [x["image"].to(self.device) for x in batched_inputs]
107 |             images = [self.normalizer(x) for x in images]
108 |             images = ImageList.from_tensors(images, self.backbone.size_divisibility)
109 | 
110 |         with timer.env("backbone"):
111 |             features = self.backbone(images.tensor)
112 | 
113 |         if "instances" in batched_inputs[0]:
114 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
115 |         elif "targets" in batched_inputs[0]:
116 |             log_first_n(
117 |                 logging.WARN,
118 |                 "'targets' in the model inputs is now renamed to 'instances'!",
119 |                 n=10,
120 |             )
121 |             gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
122 |         else:
123 |             gt_instances = None
124 | 
125 |         if "sem_seg" in batched_inputs[0]:
126 |             gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs]
127 |             gt_sem_seg = ImageList.from_tensors(
128 |                 gt_sem_seg,
129 |                 self.backbone.size_divisibility,
130 |                 self.refinement_head.ignore_value,
131 |             ).tensor
132 |         else:
133 |             gt_sem_seg = None
134 | 
135 |         with timer.env("fcose"):
136 |             proposals, proposal_losses = self.proposal_generator(
137 |                 images, features, gt_instances
138 |             )
139 |         edge_map, head_losses, proposals = self.refinement_head(
140 |             features, proposals, (gt_sem_seg, [gt_instances, images.image_sizes])
141 |         )
142 | 
143 |         # In training, the proposals are not useful at all in RPN models; but not here
144 |         # This makes RPN-only models about 5% slower.
145 |         if self.training:
146 |             timer.reset()
147 |             proposal_losses.update(head_losses)
148 |             return proposal_losses
149 | 
150 |         processed_results = []
151 | 
152 |         with timer.env("postprocess"):
153 |             for per_edge_map, results_per_image, input_per_image, image_size in zip(
154 |                 edge_map, proposals, batched_inputs, images.image_sizes
155 |             ):
156 |                 height = input_per_image.get("height", image_size[0])
157 |                 width = input_per_image.get("width", image_size[1])
158 |                 # TODO (OPT): NO need for interpolate then back for real speed test
159 |                 with timer.env("extra"):
160 |                     edge_map_r = edge_map_postprocess(
161 |                         per_edge_map, image_size, height, width
162 |                     )
163 |                 instance_r = detector_postprocess(
164 |                     self.semantic_filter,
165 |                     self.semantic_filter_th,
166 |                     self.mask_result_src,
167 |                     results_per_image,
168 |                     height,
169 |                     width,
170 |                     self.roi_size,
171 |                     self.need_concave_hull,
172 |                     self.re_compute_box,
173 |                 )
174 |                 processed_results.append(
175 |                     {"instances": instance_r, "edge_map": edge_map_r},
176 |                 )
177 |         return processed_results
178 | 


--------------------------------------------------------------------------------
/core/modeling/edge_snake/draft.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | from skimage import measure
  6 | from shapely.geometry import Polygon
  7 | import pycocotools.mask as mask_util
  8 | 
  9 | from detectron2.layers import Conv2d, DeformConv, cat, ModulatedDeformConv
 10 | from core.structures import ExtremePoints, PolygonPoints
 11 | 
 12 | from core.layers import DFConv2d, SmoothL1Loss, ChamferLoss, extreme_utils
 13 | 
 14 | from core.modeling.fcose.utils import get_extreme_points
 15 | 
 16 | from .deform_head import DeformNet
 17 | 
 18 | from detectron2.utils import timer
 19 | 
 20 | def sample_octagons(self, pred_instances):
 21 |     poly_sample_locations = []
 22 |     image_index = []
 23 |     for im_i in range(len(pred_instances)):
 24 |         instance_per_im = pred_instances[im_i]
 25 |         ext_points = instance_per_im.ext_points
 26 |         octagons_per_im = ext_points.get_octagons().cpu().numpy().reshape(-1, 8, 2)
 27 |         for oct in octagons_per_im:
 28 |             # sampling from octagon
 29 |             oct_sampled_pts = self.uniform_sample(oct, self.num_sampling)
 30 | 
 31 |             oct_sampled_pts = oct_sampled_pts[::-1] if Polygon(
 32 |                 oct_sampled_pts).exterior.is_ccw else oct_sampled_pts
 33 |             assert not Polygon(oct_sampled_pts).exterior.is_ccw, '1) contour must be clock-wise!'
 34 | 
 35 |             poly_sample_locations.append(torch.tensor(oct_sampled_pts, device=ext_points.device))
 36 |             image_index.append(im_i)
 37 | 
 38 |     if not poly_sample_locations:
 39 |         return poly_sample_locations, image_index
 40 | 
 41 |     poly_sample_locations = torch.stack(poly_sample_locations, dim=0)
 42 |     image_index = torch.tensor(image_index)
 43 |     return poly_sample_locations, image_index
 44 | 
 45 | 
 46 | def compute_loss_for_maskious(self, classes, targets, location_preds, scores):
 47 |     if isinstance(location_preds, list):
 48 |         # e.g. 4*sum{k}, 128, 2
 49 |         classes = classes.repeat(len(location_preds))
 50 |         targets = targets.repeat(len(location_preds), 1, 1)
 51 |         location_preds = torch.cat(location_preds, dim=0)
 52 |     elif len(location_preds) % len(classes) == 0:
 53 |         ratio = int(len(location_preds) / len(classes))
 54 |         classes = classes.repeat(ratio)
 55 |         targets = targets.repeat(ratio, 1, 1)
 56 |     else:
 57 |         raise ValueError('Number of pairs not match!')
 58 | 
 59 |     targets_np = targets.cpu().numpy().reshape(targets.size(0), -1)
 60 |     location_preds_np = location_preds.cpu().numpy().reshape(location_preds.size(0), -1)
 61 |     ious_w_valid = []
 62 |     for (t, l) in zip(targets_np, location_preds_np):
 63 |         ious_w_valid.append(_compute_iou_coco(t, l, self.ms_min_area))
 64 |     ious_w_valid = torch.tensor(ious_w_valid, device=targets.device)
 65 |     select = ious_w_valid[:, 0].bool()
 66 |     ious   = ious_w_valid[:, 1]
 67 | 
 68 |     maskiou_t = ious[select]
 69 |     classes = classes[select]
 70 |     scores = scores[select]
 71 | 
 72 |     if len(scores) == 0:
 73 |         return maskiou_t.sum() * 0
 74 | 
 75 |     maskiou_p = torch.gather(scores, dim=1, index=classes[:, None]).view(-1)
 76 |     return F.smooth_l1_loss(maskiou_p, maskiou_t, reduction='mean')
 77 | 
 78 | 
 79 | def single_segment_matching(num_sampling, dense_targets, sampled_pts, edge_idx):
 80 |     ext_idx = edge_idx[::3]  # try ext first, if work then consider finer segments
 81 |     aug_ext_idx = torch.cat([ext_idx, torch.tensor([num_sampling], device=ext_idx.device)], dim=0)
 82 |     ch_pts = sampled_pts[ext_idx]  # characteristic points
 83 |     diff = (ch_pts[:, None, :] - dense_targets[None, :, :]).pow(2).sum(2)
 84 |     min_idx = torch.argmin(diff, dim=1)
 85 |     # TODO: hard-code 3x.
 86 |     aug_min_idx = torch.cat([min_idx, torch.tensor([num_sampling * 3], device=min_idx.device)], dim=0)
 87 | 
 88 |     # estimate curvature
 89 |     shift_d_l = torch.cat([dense_targets[1:], dense_targets[:1]], dim=0)
 90 |     shift_d_r = torch.cat([dense_targets[-1:], dense_targets[:-1]], dim=0)
 91 |     cur = ((shift_d_l + shift_d_r) / 2 - dense_targets).pow(2).sum(1)
 92 | 
 93 |     cur[::3] += 1e-9    # regular pulses.
 94 | 
 95 |     segments = []
 96 |     for i in range(4):
 97 |         mask = torch.zeros_like(cur)
 98 |         mask[aug_min_idx[i]:aug_min_idx[i + 1]] = 1
 99 |         interest_idx = torch.argsort(mask * cur, descending=True)[:aug_ext_idx[i + 1] - aug_ext_idx[i]]
100 |         segments.append(torch.sort(interest_idx)[0])
101 |     segments = torch.cat(segments)
102 |     return dense_targets[segments]
103 | 
104 | def single_uniform_segment_matching(self, dense_targets, sampled_pts, edge_idx):
105 |     ext_idx = edge_idx[::3]  # try ext first, if work then consider finer segments
106 |     aug_ext_idx = torch.cat([ext_idx, torch.tensor([self.num_sampling - 1], device=ext_idx.device)], dim=0)
107 |     ch_pts = sampled_pts[ext_idx]  # characteristic points
108 |     diff = (ch_pts[:, None, :] - dense_targets[None, :, :]).pow(2).sum(2)
109 |     min_idx = torch.argmin(diff, dim=1)
110 |     # TODO: hard-code 3x.
111 |     aug_min_idx = torch.cat([min_idx, torch.tensor([self.num_sampling * 3 - 1], device=min_idx.device)], dim=0)
112 | 
113 |     before_i = 0
114 |     after_i = 1
115 | 
116 |     segments = []
117 |     for i in range(4):
118 |         original_len = aug_min_idx[after_i] - aug_min_idx[before_i]
119 |         assert original_len >= 0
120 |         if original_len == 0:
121 |             after_i += 1
122 |             continue
123 | 
124 |         desired_num_seg = aug_ext_idx[after_i] - aug_ext_idx[before_i]
125 |         assert desired_num_seg >= 0
126 |         if desired_num_seg == 0:
127 |             before_i += 1
128 |             after_i += 1
129 |             continue
130 | 
131 |         re_sampled_pts = self.uniform_sample_1d(
132 |             dense_targets[aug_min_idx[before_i]: aug_min_idx[after_i]],
133 |             desired_num_seg)
134 | 
135 |         segments.append(re_sampled_pts)
136 | 
137 |     segments = np.concatenate(segments, axis=0)
138 |     assert len(segments) == self.num_sampling
139 |     return segments
140 | 
141 | 
142 | def segment_matching(dense_targets, sampled_pts, edge_idx):
143 |     ext_idx = edge_idx[:, ::3]  # try ext first, if work then consider finer segments
144 |     seq_idx = torch.arange(ext_idx.size(0)).repeat_interleave(ext_idx.size(1)).to(ext_idx.device)
145 |     ch_pts = sampled_pts[seq_idx, ext_idx.view(-1)].reshape(ext_idx.size(0), ext_idx.size(1), 2)  # characteristic points
146 |     diffs = (ch_pts[:, :, None, :] - dense_targets[:, None, :, :]).pow(2).sum(3)
147 |     min_idx = torch.argmin(diffs, dim=2)
148 | 
149 | 
150 | def uniform_sample_1d(pts, new_n):
151 |     n = pts.shape[0]
152 |     if n == new_n:
153 |         return pts
154 |     # len: n - 1
155 |     segment_len = np.sqrt(np.sum((pts[1:] - pts[:-1]) ** 2, axis=1))
156 | 
157 |     # down-sample or up-sample
158 |     # n
159 |     start_node = np.cumsum(np.concatenate([np.array([0]), segment_len]))
160 |     total_len = np.sum(segment_len)
161 | 
162 |     new_per_len = total_len / new_n
163 | 
164 |     mark_1d = ((np.arange(new_n-1) + 1) * new_per_len).reshape(-1, 1)
165 |     locate = (start_node.reshape(1, -1) - mark_1d)
166 |     iss, jss = np.where(locate > 0)
167 |     cut_idx = np.cumsum(np.unique(iss, return_counts=True)[1])
168 |     cut_idx = np.concatenate([np.array([0]), cut_idx[:-1]])
169 | 
170 |     after_idx = jss[cut_idx]
171 |     before_idx = after_idx - 1
172 | 
173 |     after_idx[after_idx < 0] = 0
174 | 
175 |     before = locate[np.arange(new_n-1), before_idx]
176 |     after = locate[np.arange(new_n-1), after_idx]
177 | 
178 |     w = (- before / (after - before)).reshape(-1, 1)
179 | 
180 |     sampled_pts = (1 - w) * pts[before_idx] + w * pts[after_idx]
181 | 
182 |     return np.concatenate([pts[:1], sampled_pts, pts[-1:]], axis=0)
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/core/modeling/fcos/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcos import FCOS
2 | 


--------------------------------------------------------------------------------
/core/modeling/fcos/fcos.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import List, Dict
  3 | import torch
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | 
  7 | from detectron2.layers import ShapeSpec
  8 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY
  9 | 
 10 | from core.layers import DFConv2d, IOULoss
 11 | from .fcos_outputs import FCOSOutputs
 12 | 
 13 | 
 14 | __all__ = ["FCOS"]
 15 | 
 16 | INF = 100000000
 17 | 
 18 | 
 19 | class Scale(nn.Module):
 20 |     def __init__(self, init_value=1.0):
 21 |         super(Scale, self).__init__()
 22 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
 23 | 
 24 |     def forward(self, input):
 25 |         return input * self.scale
 26 | 
 27 | 
 28 | @PROPOSAL_GENERATOR_REGISTRY.register()
 29 | class FCOS(nn.Module):
 30 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 31 |         super().__init__()
 32 |         # fmt: off
 33 |         self.in_features          = cfg.MODEL.FCOS.IN_FEATURES
 34 |         self.fpn_strides          = cfg.MODEL.FCOS.FPN_STRIDES
 35 |         self.focal_loss_alpha     = cfg.MODEL.FCOS.LOSS_ALPHA
 36 |         self.focal_loss_gamma     = cfg.MODEL.FCOS.LOSS_GAMMA
 37 |         self.center_sample        = cfg.MODEL.FCOS.CENTER_SAMPLE
 38 |         self.strides              = cfg.MODEL.FCOS.FPN_STRIDES
 39 |         self.radius               = cfg.MODEL.FCOS.POS_RADIUS
 40 |         self.pre_nms_thresh_train = cfg.MODEL.FCOS.INFERENCE_TH_TRAIN
 41 |         self.pre_nms_thresh_test  = cfg.MODEL.FCOS.INFERENCE_TH_TEST
 42 |         self.pre_nms_topk_train   = cfg.MODEL.FCOS.PRE_NMS_TOPK_TRAIN
 43 |         self.pre_nms_topk_test    = cfg.MODEL.FCOS.PRE_NMS_TOPK_TEST
 44 |         self.nms_thresh           = cfg.MODEL.FCOS.NMS_TH
 45 |         self.post_nms_topk_train  = cfg.MODEL.FCOS.POST_NMS_TOPK_TRAIN
 46 |         self.post_nms_topk_test   = cfg.MODEL.FCOS.POST_NMS_TOPK_TEST
 47 |         self.thresh_with_ctr      = cfg.MODEL.FCOS.THRESH_WITH_CTR
 48 |         # fmt: on
 49 |         self.iou_loss = IOULoss(cfg.MODEL.FCOS.LOC_LOSS_TYPE)
 50 |         # generate sizes of interest
 51 |         soi = []
 52 |         prev_size = -1
 53 |         for s in cfg.MODEL.FCOS.SIZES_OF_INTEREST:
 54 |             soi.append([prev_size, s])
 55 |             prev_size = s
 56 |         soi.append([prev_size, INF])
 57 |         self.sizes_of_interest = soi
 58 |         self.fcos_head = FCOSHead(cfg, [input_shape[f] for f in self.in_features])
 59 | 
 60 |     def forward(self, images, features, gt_instances):
 61 |         """
 62 |         Arguments:
 63 |             images (list[Tensor] or ImageList): images to be processed
 64 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
 65 | 
 66 |         Returns:
 67 |             result (list[BoxList] or dict[Tensor]): the output from the model.
 68 |                 During training, it returns a dict[Tensor] which contains the losses.
 69 |                 During testing, it returns list[BoxList] contains additional fields
 70 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
 71 | 
 72 |         """
 73 |         features = [features[f] for f in self.in_features]
 74 |         locations = self.compute_locations(features)
 75 |         logits_pred, reg_pred, ctrness_pred, bbox_towers = self.fcos_head(features)
 76 | 
 77 |         if self.training:
 78 |             pre_nms_thresh = self.pre_nms_thresh_train
 79 |             pre_nms_topk = self.pre_nms_topk_train
 80 |             post_nms_topk = self.post_nms_topk_train
 81 |         else:
 82 |             pre_nms_thresh = self.pre_nms_thresh_test
 83 |             pre_nms_topk = self.pre_nms_topk_test
 84 |             post_nms_topk = self.post_nms_topk_test
 85 | 
 86 |         outputs = FCOSOutputs(
 87 |             images,
 88 |             locations,
 89 |             logits_pred,
 90 |             reg_pred,
 91 |             ctrness_pred,
 92 |             self.focal_loss_alpha,
 93 |             self.focal_loss_gamma,
 94 |             self.iou_loss,
 95 |             self.center_sample,
 96 |             self.sizes_of_interest,
 97 |             self.strides,
 98 |             self.radius,
 99 |             self.fcos_head.num_classes,
100 |             pre_nms_thresh,
101 |             pre_nms_topk,
102 |             self.nms_thresh,
103 |             post_nms_topk,
104 |             self.thresh_with_ctr,
105 |             gt_instances
106 |         )
107 | 
108 |         if self.training:
109 |             losses, _ = outputs.losses()
110 |             return None, losses
111 |         else:
112 |             proposals = outputs.predict_proposals()
113 |             return proposals, {}
114 | 
115 |     def compute_locations(self, features):
116 |         locations = []
117 |         for level, feature in enumerate(features):
118 |             h, w = feature.size()[-2:]
119 |             locations_per_level = self.compute_locations_per_level(
120 |                 h, w, self.fpn_strides[level],
121 |                 feature.device
122 |             )
123 |             locations.append(locations_per_level)
124 |         return locations
125 | 
126 |     def compute_locations_per_level(self, h, w, stride, device):
127 |         shifts_x = torch.arange(
128 |             0, w * stride, step=stride,
129 |             dtype=torch.float32, device=device
130 |         )
131 |         shifts_y = torch.arange(
132 |             0, h * stride, step=stride,
133 |             dtype=torch.float32, device=device
134 |         )
135 |         shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
136 |         shift_x = shift_x.reshape(-1)
137 |         shift_y = shift_y.reshape(-1)
138 |         locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
139 |         return locations
140 | 
141 | 
142 | class FCOSHead(nn.Module):
143 |     def __init__(self, cfg, input_shape: List[ShapeSpec]):
144 |         """
145 |         Arguments:
146 |             in_channels (int): number of channels of the input feature
147 |         """
148 |         super().__init__()
149 |         # TODO: Implement the sigmoid version first.
150 |         self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
151 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
152 |         head_configs = {"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS,
153 |                                 False),
154 |                         "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS,
155 |                                  cfg.MODEL.FCOS.USE_DEFORMABLE),
156 |                         "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS,
157 |                                   cfg.MODEL.FCOS.USE_DEFORMABLE)}
158 |         norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM
159 | 
160 |         in_channels = [s.channels for s in input_shape]
161 |         assert len(set(in_channels)) == 1, "Each level must have the same channel!"
162 |         in_channels = in_channels[0]
163 | 
164 |         for head in head_configs:
165 |             tower = []
166 |             num_convs, use_deformable = head_configs[head]
167 |             if use_deformable:
168 |                 conv_func = DFConv2d
169 |             else:
170 |                 conv_func = nn.Conv2d
171 |             for i in range(num_convs):
172 |                 tower.append(conv_func(
173 |                         in_channels, in_channels,
174 |                         kernel_size=3, stride=1,
175 |                         padding=1, bias=True
176 |                 ))
177 |                 if norm == "GN":
178 |                     tower.append(nn.GroupNorm(32, in_channels))
179 |                 tower.append(nn.ReLU())
180 |             self.add_module('{}_tower'.format(head),
181 |                             nn.Sequential(*tower))
182 | 
183 |         self.cls_logits = nn.Conv2d(
184 |             in_channels, self.num_classes,
185 |             kernel_size=3, stride=1,
186 |             padding=1
187 |         )
188 |         self.bbox_pred = nn.Conv2d(
189 |             in_channels, 4, kernel_size=3,
190 |             stride=1, padding=1
191 |         )
192 |         self.ctrness = nn.Conv2d(
193 |             in_channels, 1, kernel_size=3,
194 |             stride=1, padding=1
195 |         )
196 | 
197 |         if cfg.MODEL.FCOS.USE_SCALE:
198 |             self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in self.fpn_strides])
199 |         else:
200 |             self.scales = None
201 | 
202 |         for modules in [
203 |             self.cls_tower, self.bbox_tower,
204 |             self.share_tower, self.cls_logits,
205 |             self.bbox_pred, self.ctrness
206 |         ]:
207 |             for l in modules.modules():
208 |                 if isinstance(l, nn.Conv2d):
209 |                     torch.nn.init.normal_(l.weight, std=0.01)
210 |                     torch.nn.init.constant_(l.bias, 0)
211 | 
212 |         # initialize the bias for focal loss
213 |         prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
214 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
215 |         torch.nn.init.constant_(self.cls_logits.bias, bias_value)
216 | 
217 |     def forward(self, x):
218 |         logits = []
219 |         bbox_reg = []
220 |         ctrness = []
221 |         bbox_towers = []
222 |         for l, feature in enumerate(x):
223 |             feature = self.share_tower(feature)
224 |             cls_tower = self.cls_tower(feature)
225 |             bbox_tower = self.bbox_tower(feature)
226 | 
227 |             logits.append(self.cls_logits(cls_tower))
228 |             ctrness.append(self.ctrness(bbox_tower))
229 |             reg = self.bbox_pred(bbox_tower)
230 |             if self.scales is not None:
231 |                 reg = self.scales[l](reg)
232 |             # Note that we use relu, as in the improved FCOS, instead of exp.
233 |             bbox_reg.append(F.relu(reg))
234 | 
235 |         return logits, bbox_reg, ctrness, bbox_towers
236 | 


--------------------------------------------------------------------------------
/core/modeling/fcose/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcose import FCOSE
2 | from .fcose_outputs import FCOSEOutputs
3 | from .extreme_detector import ExtremeDetector


--------------------------------------------------------------------------------
/core/modeling/fcose/dextr.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | from torch.nn.functional import upsample
 3 | from deeplab_resnet import resnet101
 4 | from dextr_helper import *
 5 | import os
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | 
10 | class Dextr(object):
11 |     def __init__(self, model_path='',
12 |                  gpu_id=0, flip_test=True):
13 |         if model_path == '':
14 |             model_path = os.path.join(
15 |                 'cache', 'dextr_pascal-sbd.pth')
16 |         self.pad = 50
17 |         self.thres = 0.8
18 |         self.device = torch.device(
19 |             "cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu")
20 |         self.flip_test = flip_test
21 | 
22 |         #  Create the network and load the weights
23 |         self.net = resnet101(1, nInputChannels=4, classifier='psp')
24 |         print("Initializing weights from: {}".format(model_path))
25 |         state_dict_checkpoint = torch.load(
26 |             model_path, map_location=lambda storage, loc: storage)
27 |         # Remove the prefix .module from the model when it is trained using DataParallel
28 |         if 'module.' in list(state_dict_checkpoint.keys())[0]:
29 |             new_state_dict = OrderedDict()
30 |             for k, v in state_dict_checkpoint.items():
31 |                 name = k[7:]  # remove `module.` from multi-gpu training
32 |                 new_state_dict[name] = v
33 |         else:
34 |             new_state_dict = state_dict_checkpoint
35 |         self.net.load_state_dict(new_state_dict)
36 |         self.net.eval()
37 |         self.net.to(self.device)
38 | 
39 |     def segment(self, image, extreme_points_ori):
40 |         #  Crop image to the bounding box from the extreme points and resize
41 |         bbox = get_bbox(image, points=extreme_points_ori, pad=self.pad, zero_pad=True)
42 |         crop_image = crop_from_bbox(image, bbox, zero_pad=True)
43 |         resize_image = fixed_resize(crop_image, (512, 512)).astype(np.float32)
44 | 
45 |         #  Generate extreme point heat map normalized to image values
46 |         extreme_points = extreme_points_ori - [np.min(extreme_points_ori[:, 0]), np.min(extreme_points_ori[:, 1])] + [
47 |             self.pad,
48 |             self.pad]
49 |         extreme_points = (512 * extreme_points * [1 / crop_image.shape[1], 1 / crop_image.shape[0]]).astype(np.int)
50 |         extreme_heatmap = make_gt(resize_image, extreme_points, sigma=10)
51 |         extreme_heatmap = cstm_normalize(extreme_heatmap, 255)
52 | 
53 |         #  Concatenate inputs and convert to tensor
54 |         input_dextr = np.concatenate((resize_image, extreme_heatmap[:, :, np.newaxis]), axis=2)
55 |         inputs = input_dextr.transpose((2, 0, 1))[np.newaxis, ...]
56 |         # import pdb; pdb.set_trace()
57 |         if self.flip_test:
58 |             inputs = np.concatenate([inputs, inputs[:, :, :, ::-1]], axis=0)
59 |         inputs = torch.from_numpy(inputs)
60 |         # Run a forward pass
61 |         inputs = inputs.to(self.device)
62 |         outputs = self.net.forward(inputs)
63 |         outputs = upsample(outputs, size=(512, 512), mode='bilinear', align_corners=True)
64 |         outputs = outputs.to(torch.device('cpu'))
65 |         outputs = outputs.data.numpy()
66 |         if self.flip_test:
67 |             outputs = (outputs[:1] + outputs[1:, :, :, ::-1]) / 2
68 | 
69 |         pred = np.transpose(outputs[0, ...], (1, 2, 0))
70 |         pred = 1 / (1 + np.exp(-pred))
71 |         pred = np.squeeze(pred)
72 |         result = crop2fullmask(pred, bbox, im_size=image.shape[:2], zero_pad=True, relax=self.pad) > self.thres
73 |         return result
74 | 


--------------------------------------------------------------------------------
/core/modeling/fcose/dextr_eval.py:
--------------------------------------------------------------------------------
 1 | from dextr import Dextr
 2 | import pycocotools.coco as cocoapi
 3 | from pycocotools.cocoeval import COCOeval
 4 | from pycocotools import mask as COCOmask
 5 | import numpy as np
 6 | import sys
 7 | import cv2
 8 | import json
 9 | from progress.bar import Bar
10 | 
11 | DEBUG = False
12 | ANN_PATH = '/ldap_home/zichen.liu/data/coco/annotations/instances_val2017.json'
13 | IMG_DIR = '/ldap_home/zichen.liu/data/coco/val2017/'
14 | 
15 | if __name__ == '__main__':
16 |     dextr = Dextr()
17 |     coco = cocoapi.COCO(ANN_PATH)
18 |     pred_path = sys.argv[1]
19 |     out_path = pred_path[:-5] + '_segm.json'
20 |     anns = json.load(open(pred_path, 'r'))
21 |     results = []
22 |     score_thresh = 0.2
23 |     num_boxes = 0
24 |     for i, ann in enumerate(anns):
25 |         if ann['score'] >= score_thresh:
26 |             num_boxes += 1
27 | 
28 |     bar = Bar('Pred + Dextr', max=num_boxes)
29 |     for i, ann in enumerate(anns):
30 |         if ann['score'] < score_thresh:
31 |             continue
32 |         ex = np.array(ann['extreme_points'], dtype=np.int32).reshape(4, 2)
33 |         img_id = ann['image_id']
34 |         img_info = coco.loadImgs(ids=[img_id])[0]
35 |         img_path = IMG_DIR + img_info['file_name']
36 |         img = cv2.imread(img_path)
37 |         mask = dextr.segment(img[:, :, ::-1], ex)
38 |         mask = np.asfortranarray(mask.astype(np.uint8))
39 |         if DEBUG:
40 |             if ann['score'] < 0.1:
41 |                 continue
42 |             print(ann['score'])
43 |             img = (0.4 * img + 0.6 * mask.reshape(
44 |                 mask.shape[0], mask.shape[1], 1) * 255).astype(np.uint8)
45 |             cv2.imshow('img', img)
46 |             cv2.waitKey()
47 |         encode = COCOmask.encode(mask)
48 |         if 'counts' in encode:
49 |             encode['counts'] = encode['counts'].decode("utf8")
50 |         pred = {'image_id': ann['image_id'],
51 |                 'category_id': ann['category_id'],
52 |                 'score': ann['score'],
53 |                 'segmentation': encode,
54 |                 'extreme_points': ann['extreme_points']}
55 |         results.append(pred)
56 |         Bar.suffix = '[{0}/{1}]| Total: {total:} | ETA: {eta:} |'.format(
57 |             i, num_boxes, total=bar.elapsed_td, eta=bar.eta_td)
58 |         bar.next()
59 |     bar.finish()
60 |     json.dump(results, open(out_path, 'w'))
61 | 
62 |     dets = coco.loadRes(out_path)
63 |     coco_eval = COCOeval(coco, dets, "segm")
64 |     coco_eval.evaluate()
65 |     coco_eval.accumulate()
66 |     coco_eval.summarize()
67 | 


--------------------------------------------------------------------------------
/core/modeling/fcose/dextr_helper.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch, cv2
  4 | import random
  5 | import numpy as np
  6 | 
  7 | 
  8 | def tens2image(im):
  9 |     if im.size()[0] == 1:
 10 |         tmp = np.squeeze(im.numpy(), axis=0)
 11 |     else:
 12 |         tmp = im.numpy()
 13 |     if tmp.ndim == 2:
 14 |         return tmp
 15 |     else:
 16 |         return tmp.transpose((1, 2, 0))
 17 | 
 18 | 
 19 | def crop2fullmask(crop_mask, bbox, im=None, im_size=None, zero_pad=False, relax=0, mask_relax=True,
 20 |                   interpolation=cv2.INTER_CUBIC, scikit=False):
 21 |     if scikit:
 22 |         from skimage.transform import resize as sk_resize
 23 |     assert (not (im is None and im_size is None)), 'You have to provide an image or the image size'
 24 |     if im is None:
 25 |         im_si = im_size
 26 |     else:
 27 |         im_si = im.shape
 28 |     # Borers of image
 29 |     bounds = (0, 0, im_si[1] - 1, im_si[0] - 1)
 30 | 
 31 |     # Valid bounding box locations as (x_min, y_min, x_max, y_max)
 32 |     bbox_valid = (max(bbox[0], bounds[0]),
 33 |                   max(bbox[1], bounds[1]),
 34 |                   min(bbox[2], bounds[2]),
 35 |                   min(bbox[3], bounds[3]))
 36 | 
 37 |     # Bounding box of initial mask
 38 |     bbox_init = (bbox[0] + relax,
 39 |                  bbox[1] + relax,
 40 |                  bbox[2] - relax,
 41 |                  bbox[3] - relax)
 42 | 
 43 |     if zero_pad:
 44 |         # Offsets for x and y
 45 |         offsets = (-bbox[0], -bbox[1])
 46 |     else:
 47 |         assert ((bbox == bbox_valid).all())
 48 |         offsets = (-bbox_valid[0], -bbox_valid[1])
 49 | 
 50 |     # Simple per element addition in the tuple
 51 |     inds = tuple(map(sum, zip(bbox_valid, offsets + offsets)))
 52 | 
 53 |     if scikit:
 54 |         crop_mask = sk_resize(crop_mask, (bbox[3] - bbox[1] + 1, bbox[2] - bbox[0] + 1), order=0,
 55 |                               mode='constant').astype(crop_mask.dtype)
 56 |     else:
 57 |         crop_mask = cv2.resize(crop_mask, (bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1), interpolation=interpolation)
 58 |     result_ = np.zeros(im_si)
 59 |     result_[bbox_valid[1]:bbox_valid[3] + 1, bbox_valid[0]:bbox_valid[2] + 1] = \
 60 |         crop_mask[inds[1]:inds[3] + 1, inds[0]:inds[2] + 1]
 61 | 
 62 |     result = np.zeros(im_si)
 63 |     if mask_relax:
 64 |         result[bbox_init[1]:bbox_init[3] + 1, bbox_init[0]:bbox_init[2] + 1] = \
 65 |             result_[bbox_init[1]:bbox_init[3] + 1, bbox_init[0]:bbox_init[2] + 1]
 66 |     else:
 67 |         result = result_
 68 | 
 69 |     return result
 70 | 
 71 | 
 72 | def overlay_mask(im, ma, colors=None, alpha=0.5):
 73 |     assert np.max(im) <= 1.0
 74 |     if colors is None:
 75 |         colors = np.load(os.path.join(os.path.dirname(__file__), 'pascal_map.npy')) / 255.
 76 |     else:
 77 |         colors = np.append([[0., 0., 0.]], colors, axis=0);
 78 | 
 79 |     if ma.ndim == 3:
 80 |         assert len(colors) >= ma.shape[0], 'Not enough colors'
 81 |     ma = ma.astype(np.bool)
 82 |     im = im.astype(np.float32)
 83 | 
 84 |     if ma.ndim == 2:
 85 |         fg = im * alpha + np.ones(im.shape) * (1 - alpha) * colors[1, :3]  # np.array([0,0,255])/255.0
 86 |     else:
 87 |         fg = []
 88 |         for n in range(ma.ndim):
 89 |             fg.append(im * alpha + np.ones(im.shape) * (1 - alpha) * colors[1 + n, :3])
 90 |     # Whiten background
 91 |     bg = im.copy()
 92 |     if ma.ndim == 2:
 93 |         bg[ma == 0] = im[ma == 0]
 94 |         bg[ma == 1] = fg[ma == 1]
 95 |         total_ma = ma
 96 |     else:
 97 |         total_ma = np.zeros([ma.shape[1], ma.shape[2]])
 98 |         for n in range(ma.shape[0]):
 99 |             tmp_ma = ma[n, :, :]
100 |             total_ma = np.logical_or(tmp_ma, total_ma)
101 |             tmp_fg = fg[n]
102 |             bg[tmp_ma == 1] = tmp_fg[tmp_ma == 1]
103 |         bg[total_ma == 0] = im[total_ma == 0]
104 | 
105 |     # [-2:] is s trick to be compatible both with opencv 2 and 3
106 |     contours = cv2.findContours(total_ma.copy().astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
107 |     cv2.drawContours(bg, contours[0], -1, (0.0, 0.0, 0.0), 1)
108 | 
109 |     return bg
110 | 
111 | 
112 | def overlay_masks(im, masks, alpha=0.5):
113 |     colors = np.load(os.path.join(os.path.dirname(__file__), 'pascal_map.npy')) / 255.
114 | 
115 |     if isinstance(masks, np.ndarray):
116 |         masks = [masks]
117 | 
118 |     assert len(colors) >= len(masks), 'Not enough colors'
119 | 
120 |     ov = im.copy()
121 |     im = im.astype(np.float32)
122 |     total_ma = np.zeros([im.shape[0], im.shape[1]])
123 |     i = 1
124 |     for ma in masks:
125 |         ma = ma.astype(np.bool)
126 |         fg = im * alpha + np.ones(im.shape) * (1 - alpha) * colors[i, :3]  # np.array([0,0,255])/255.0
127 |         i = i + 1
128 |         ov[ma == 1] = fg[ma == 1]
129 |         total_ma += ma
130 | 
131 |         # [-2:] is s trick to be compatible both with opencv 2 and 3
132 |         contours = cv2.findContours(ma.copy().astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
133 |         cv2.drawContours(ov, contours[0], -1, (0.0, 0.0, 0.0), 1)
134 |     ov[total_ma == 0] = im[total_ma == 0]
135 | 
136 |     return ov
137 | 
138 | 
139 | def extreme_points(mask, pert):
140 |     def find_point(id_x, id_y, ids):
141 |         sel_id = ids[0][random.randint(0, len(ids[0]) - 1)]
142 |         return [id_x[sel_id], id_y[sel_id]]
143 | 
144 |     # List of coordinates of the mask
145 |     inds_y, inds_x = np.where(mask > 0.5)
146 | 
147 |     # Find extreme points
148 |     return np.array([find_point(inds_x, inds_y, np.where(inds_x <= np.min(inds_x) + pert)),  # left
149 |                      find_point(inds_x, inds_y, np.where(inds_x >= np.max(inds_x) - pert)),  # right
150 |                      find_point(inds_x, inds_y, np.where(inds_y <= np.min(inds_y) + pert)),  # top
151 |                      find_point(inds_x, inds_y, np.where(inds_y >= np.max(inds_y) - pert))  # bottom
152 |                      ])
153 | 
154 | 
155 | def get_bbox(mask, points=None, pad=0, zero_pad=False):
156 |     if points is not None:
157 |         inds = np.flip(points.transpose(), axis=0)
158 |     else:
159 |         inds = np.where(mask > 0)
160 | 
161 |     if inds[0].shape[0] == 0:
162 |         return None
163 | 
164 |     if zero_pad:
165 |         x_min_bound = -np.inf
166 |         y_min_bound = -np.inf
167 |         x_max_bound = np.inf
168 |         y_max_bound = np.inf
169 |     else:
170 |         x_min_bound = 0
171 |         y_min_bound = 0
172 |         x_max_bound = mask.shape[1] - 1
173 |         y_max_bound = mask.shape[0] - 1
174 | 
175 |     x_min = max(inds[1].min() - pad, x_min_bound)
176 |     y_min = max(inds[0].min() - pad, y_min_bound)
177 |     x_max = min(inds[1].max() + pad, x_max_bound)
178 |     y_max = min(inds[0].max() + pad, y_max_bound)
179 | 
180 |     return x_min, y_min, x_max, y_max
181 | 
182 | 
183 | def crop_from_bbox(img, bbox, zero_pad=False):
184 |     # Borders of image
185 |     bounds = (0, 0, img.shape[1] - 1, img.shape[0] - 1)
186 | 
187 |     # Valid bounding box locations as (x_min, y_min, x_max, y_max)
188 |     bbox_valid = (max(bbox[0], bounds[0]),
189 |                   max(bbox[1], bounds[1]),
190 |                   min(bbox[2], bounds[2]),
191 |                   min(bbox[3], bounds[3]))
192 | 
193 |     if zero_pad:
194 |         # Initialize crop size (first 2 dimensions)
195 |         crop = np.zeros((bbox[3] - bbox[1] + 1, bbox[2] - bbox[0] + 1), dtype=img.dtype)
196 | 
197 |         # Offsets for x and y
198 |         offsets = (-bbox[0], -bbox[1])
199 | 
200 |     else:
201 |         assert (bbox == bbox_valid)
202 |         crop = np.zeros((bbox_valid[3] - bbox_valid[1] + 1, bbox_valid[2] - bbox_valid[0] + 1), dtype=img.dtype)
203 |         offsets = (-bbox_valid[0], -bbox_valid[1])
204 | 
205 |     # Simple per element addition in the tuple
206 |     inds = tuple(map(sum, zip(bbox_valid, offsets + offsets)))
207 | 
208 |     img = np.squeeze(img)
209 |     if img.ndim == 2:
210 |         crop[inds[1]:inds[3] + 1, inds[0]:inds[2] + 1] = \
211 |             img[bbox_valid[1]:bbox_valid[3] + 1, bbox_valid[0]:bbox_valid[2] + 1]
212 |     else:
213 |         crop = np.tile(crop[:, :, np.newaxis], [1, 1, 3])  # Add 3 RGB Channels
214 |         crop[inds[1]:inds[3] + 1, inds[0]:inds[2] + 1, :] = \
215 |             img[bbox_valid[1]:bbox_valid[3] + 1, bbox_valid[0]:bbox_valid[2] + 1, :]
216 | 
217 |     return crop
218 | 
219 | 
220 | def fixed_resize(sample, resolution, flagval=None):
221 |     if flagval is None:
222 |         if ((sample == 0) | (sample == 1)).all():
223 |             flagval = cv2.INTER_NEAREST
224 |         else:
225 |             flagval = cv2.INTER_CUBIC
226 | 
227 |     if isinstance(resolution, int):
228 |         tmp = [resolution, resolution]
229 |         tmp[np.argmax(sample.shape[:2])] = int(
230 |             round(float(resolution) / np.min(sample.shape[:2]) * np.max(sample.shape[:2])))
231 |         resolution = tuple(tmp)
232 | 
233 |     if sample.ndim == 2 or (sample.ndim == 3 and sample.shape[2] == 3):
234 |         sample = cv2.resize(sample, resolution[::-1], interpolation=flagval)
235 |     else:
236 |         tmp = sample
237 |         sample = np.zeros(np.append(resolution, tmp.shape[2]), dtype=np.float32)
238 |         for ii in range(sample.shape[2]):
239 |             sample[:, :, ii] = cv2.resize(tmp[:, :, ii], resolution[::-1], interpolation=flagval)
240 |     return sample
241 | 
242 | 
243 | def crop_from_mask(img, mask, relax=0, zero_pad=False):
244 |     if mask.shape[:2] != img.shape[:2]:
245 |         mask = cv2.resize(mask, dsize=tuple(reversed(img.shape[:2])), interpolation=cv2.INTER_NEAREST)
246 | 
247 |     assert (mask.shape[:2] == img.shape[:2])
248 | 
249 |     bbox = get_bbox(mask, pad=relax, zero_pad=zero_pad)
250 | 
251 |     if bbox is None:
252 |         return None
253 | 
254 |     crop = crop_from_bbox(img, bbox, zero_pad)
255 | 
256 |     return crop
257 | 
258 | 
259 | def make_gaussian(size, sigma=10, center=None, d_type=np.float64):
260 |     """ Make a square gaussian kernel.
261 |     size: is the dimensions of the output gaussian
262 |     sigma: is full-width-half-maximum, which
263 |     can be thought of as an effective radius.
264 |     """
265 | 
266 |     x = np.arange(0, size[1], 1, float)
267 |     y = np.arange(0, size[0], 1, float)
268 |     y = y[:, np.newaxis]
269 | 
270 |     if center is None:
271 |         x0 = y0 = size[0] // 2
272 |     else:
273 |         x0 = center[0]
274 |         y0 = center[1]
275 | 
276 |     return np.exp(-4 * np.log(2) * ((x - x0) ** 2 + (y - y0) ** 2) / sigma ** 2).astype(d_type)
277 | 
278 | 
279 | def make_gt(img, labels, sigma=10, one_mask_per_point=False):
280 |     """ Make the ground-truth for  landmark.
281 |     img: the original color image
282 |     labels: label with the Gaussian center(s) [[x0, y0],[x1, y1],...]
283 |     sigma: sigma of the Gaussian.
284 |     one_mask_per_point: masks for each point in different channels?
285 |     """
286 |     h, w = img.shape[:2]
287 |     if labels is None:
288 |         gt = make_gaussian((h, w), center=(h // 2, w // 2), sigma=sigma)
289 |     else:
290 |         labels = np.array(labels)
291 |         if labels.ndim == 1:
292 |             labels = labels[np.newaxis]
293 |         if one_mask_per_point:
294 |             gt = np.zeros(shape=(h, w, labels.shape[0]))
295 |             for ii in range(labels.shape[0]):
296 |                 gt[:, :, ii] = make_gaussian((h, w), center=labels[ii, :], sigma=sigma)
297 |         else:
298 |             gt = np.zeros(shape=(h, w), dtype=np.float64)
299 |             for ii in range(labels.shape[0]):
300 |                 gt = np.maximum(gt, make_gaussian((h, w), center=labels[ii, :], sigma=sigma))
301 | 
302 |     gt = gt.astype(dtype=img.dtype)
303 | 
304 |     return gt
305 | 
306 | 
307 | def cstm_normalize(im, max_value):
308 |     """
309 |     Normalize image to range 0 - max_value
310 |     """
311 |     imn = max_value * (im - im.min()) / max((im.max() - im.min()), 1e-8)
312 |     return imn
313 | 
314 | 
315 | def generate_param_report(logfile, param):
316 |     log_file = open(logfile, 'w')
317 |     for key, val in param.items():
318 |         log_file.write(key + ':' + str(val) + '\n')
319 |     log_file.close()


--------------------------------------------------------------------------------
/core/modeling/fcose/extreme_detector.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from detectron2.structures import ImageList
 6 | from detectron2.utils.logger import log_first_n
 7 | 
 8 | from detectron2.modeling.backbone import build_backbone
 9 | from detectron2.modeling.proposal_generator import build_proposal_generator
10 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
11 | 
12 | from core.modeling.postprocessing import detector_postprocess
13 | 
14 | 
15 | @META_ARCH_REGISTRY.register()
16 | class ExtremeDetector(nn.Module):
17 |     def __init__(self, cfg):
18 |         super().__init__()
19 |         self.device = torch.device(cfg.MODEL.DEVICE)
20 | 
21 |         self.backbone = build_backbone(cfg)
22 |         self.proposal_generator = build_proposal_generator(
23 |             cfg, self.backbone.output_shape()
24 |         )
25 | 
26 |         self.mask_result_src = cfg.MODEL.DANCE.MASK_IN
27 | 
28 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
29 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
30 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
31 |         self.to(self.device)
32 | 
33 |     def forward(self, batched_inputs):
34 |         """
35 |         Args:
36 |             Same as in :class:`GeneralizedRCNN.forward`
37 | 
38 |         Returns:
39 |             list[dict]:
40 |                 Each dict is the output for one input image.
41 |                 The dict contains one key "proposals" whose value is a
42 |                 :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
43 |         """
44 |         images = [x["image"].to(self.device) for x in batched_inputs]
45 |         images = [self.normalizer(x) for x in images]
46 |         images = ImageList.from_tensors(images, self.backbone.size_divisibility)
47 |         features = self.backbone(images.tensor)
48 | 
49 |         if "instances" in batched_inputs[0]:
50 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
51 |         elif "targets" in batched_inputs[0]:
52 |             log_first_n(
53 |                 logging.WARN,
54 |                 "'targets' in the model inputs is now renamed to 'instances'!",
55 |                 n=10,
56 |             )
57 |             gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
58 |         else:
59 |             gt_instances = None
60 |         proposals, proposal_losses = self.proposal_generator(
61 |             images, features, gt_instances
62 |         )
63 |         # In training, the proposals are not useful at all in RPN models; but not here
64 |         # This makes RPN-only models about 5% slower.
65 |         if self.training:
66 |             return proposal_losses
67 | 
68 |         processed_results = []
69 |         for results_per_image, input_per_image, image_size in zip(
70 |             proposals, batched_inputs, images.image_sizes
71 |         ):
72 |             height = input_per_image.get("height", image_size[0])
73 |             width = input_per_image.get("width", image_size[1])
74 |             r = detector_postprocess(
75 |                 self.mask_result_src, results_per_image, height, width
76 |             )
77 |             processed_results.append({"instances": r})
78 | 
79 |         return processed_results
80 | 


--------------------------------------------------------------------------------
/core/modeling/fcose/fcose.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import List, Dict
  3 | import torch
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | 
  7 | from detectron2.layers import ShapeSpec
  8 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY
  9 | 
 10 | from core.layers import DFConv2d, IOULoss, EXTLoss
 11 | from .fcose_outputs import FCOSEOutputs
 12 | 
 13 | 
 14 | __all__ = ["FCOS"]
 15 | 
 16 | INF = 100000000
 17 | 
 18 | 
 19 | class Scale(nn.Module):
 20 |     def __init__(self, init_value=1.0):
 21 |         super(Scale, self).__init__()
 22 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
 23 | 
 24 |     def forward(self, input):
 25 |         return input * self.scale
 26 | 
 27 | 
 28 | @PROPOSAL_GENERATOR_REGISTRY.register()
 29 | class FCOSE(nn.Module):
 30 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 31 |         super().__init__()
 32 |         # fmt: off
 33 |         self.in_features          = cfg.MODEL.FCOS.IN_FEATURES
 34 |         self.fpn_strides          = cfg.MODEL.FCOS.FPN_STRIDES
 35 |         self.focal_loss_alpha     = cfg.MODEL.FCOS.LOSS_ALPHA
 36 |         self.focal_loss_gamma     = cfg.MODEL.FCOS.LOSS_GAMMA
 37 |         self.center_sample        = cfg.MODEL.FCOS.CENTER_SAMPLE
 38 |         self.strides              = cfg.MODEL.FCOS.FPN_STRIDES
 39 |         self.radius               = cfg.MODEL.FCOS.POS_RADIUS
 40 |         self.pre_nms_thresh_train = cfg.MODEL.FCOS.INFERENCE_TH_TRAIN
 41 |         self.pre_nms_thresh_test  = cfg.MODEL.FCOS.INFERENCE_TH_TEST
 42 |         self.pre_nms_topk_train   = cfg.MODEL.FCOS.PRE_NMS_TOPK_TRAIN
 43 |         self.pre_nms_topk_test    = cfg.MODEL.FCOS.PRE_NMS_TOPK_TEST
 44 |         self.nms_thresh           = cfg.MODEL.FCOS.NMS_TH
 45 |         self.post_nms_topk_train  = cfg.MODEL.FCOS.POST_NMS_TOPK_TRAIN
 46 |         self.post_nms_topk_test   = cfg.MODEL.FCOS.POST_NMS_TOPK_TEST
 47 |         self.thresh_with_ctr      = cfg.MODEL.FCOS.THRESH_WITH_CTR
 48 |         # fmt: on
 49 |         self.iou_loss = IOULoss(cfg.MODEL.FCOS.LOC_LOSS_TYPE)
 50 |         self.ext_loss = EXTLoss(cfg.MODEL.FCOS.EXT_LOSS_TYPE)
 51 |         # generate sizes of interest
 52 |         soi = []
 53 |         prev_size = -1
 54 |         for s in cfg.MODEL.FCOS.SIZES_OF_INTEREST:
 55 |             soi.append([prev_size, s])
 56 |             prev_size = s
 57 |         soi.append([prev_size, INF])
 58 |         self.sizes_of_interest = soi
 59 |         self.fcose_head = FCOSEHead(cfg, [input_shape[f] for f in self.in_features])
 60 | 
 61 |     def forward(self, images, features, gt_instances):
 62 |         """
 63 |         Arguments:
 64 |             images (list[Tensor] or ImageList): images to be processed
 65 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
 66 | 
 67 |         Returns:
 68 |             result (list[BoxList] or dict[Tensor]): the output from the model.
 69 |                 During training, it returns a dict[Tensor] which contains the losses.
 70 |                 During testing, it returns list[BoxList] contains additional fields
 71 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
 72 | 
 73 |         """
 74 |         features = [features[f] for f in self.in_features]
 75 |         locations = self.compute_locations(features)
 76 |         logits_pred, reg_pred, ex_pred, ctrness_pred, bbox_towers = self.fcose_head(features)
 77 | 
 78 |         if self.training:
 79 |             pre_nms_thresh = self.pre_nms_thresh_train
 80 |             pre_nms_topk = self.pre_nms_topk_train
 81 |             post_nms_topk = self.post_nms_topk_train
 82 |         else:
 83 |             pre_nms_thresh = self.pre_nms_thresh_test
 84 |             pre_nms_topk = self.pre_nms_topk_test
 85 |             post_nms_topk = self.post_nms_topk_test
 86 | 
 87 |         outputs = FCOSEOutputs(
 88 |             images,
 89 |             locations,
 90 |             logits_pred,
 91 |             reg_pred,
 92 |             ex_pred,
 93 |             ctrness_pred,
 94 |             self.focal_loss_alpha,
 95 |             self.focal_loss_gamma,
 96 |             self.iou_loss,
 97 |             self.ext_loss,
 98 |             self.center_sample,
 99 |             self.sizes_of_interest,
100 |             self.strides,
101 |             self.radius,
102 |             self.fcose_head.num_classes,
103 |             pre_nms_thresh,
104 |             pre_nms_topk,
105 |             self.nms_thresh,
106 |             post_nms_topk,
107 |             self.thresh_with_ctr,
108 |             gt_instances
109 |         )
110 | 
111 |         if self.training:
112 |             losses, _ = outputs.losses()
113 |             return None, losses
114 |         else:
115 |             proposals = outputs.predict_proposals()
116 |             return proposals, {}
117 | 
118 |     def compute_locations(self, features):
119 |         locations = []
120 |         for level, feature in enumerate(features):
121 |             h, w = feature.size()[-2:]
122 |             locations_per_level = self.compute_locations_per_level(
123 |                 h, w, self.fpn_strides[level],
124 |                 feature.device
125 |             )
126 |             locations.append(locations_per_level)
127 |         return locations
128 | 
129 |     def compute_locations_per_level(self, h, w, stride, device):
130 |         shifts_x = torch.arange(
131 |             0, w * stride, step=stride,
132 |             dtype=torch.float32, device=device
133 |         )
134 |         shifts_y = torch.arange(
135 |             0, h * stride, step=stride,
136 |             dtype=torch.float32, device=device
137 |         )
138 |         shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
139 |         shift_x = shift_x.reshape(-1)
140 |         shift_y = shift_y.reshape(-1)
141 |         locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
142 |         return locations
143 | 
144 | 
145 | class FCOSEHead(nn.Module):
146 |     def __init__(self, cfg, input_shape: List[ShapeSpec]):
147 |         """
148 |         Arguments:
149 |             in_channels (int): number of channels of the input feature
150 |         """
151 |         super().__init__()
152 |         # TODO: Implement the sigmoid version first.
153 |         self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES
154 |         self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
155 |         head_configs = {"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS,
156 |                                 False),
157 |                         "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS,
158 |                                  cfg.MODEL.FCOS.USE_DEFORMABLE),
159 |                         "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS,
160 |                                   cfg.MODEL.FCOS.USE_DEFORMABLE)}
161 |         norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM
162 | 
163 |         in_channels = [s.channels for s in input_shape]
164 |         assert len(set(in_channels)) == 1, "Each level must have the same channel!"
165 |         in_channels = in_channels[0]
166 | 
167 |         for head in head_configs:
168 |             tower = []
169 |             num_convs, use_deformable = head_configs[head]
170 |             if use_deformable:
171 |                 conv_func = DFConv2d
172 |             else:
173 |                 conv_func = nn.Conv2d
174 |             for i in range(num_convs):
175 |                 tower.append(conv_func(
176 |                         in_channels, in_channels,
177 |                         kernel_size=3, stride=1,
178 |                         padding=1, bias=True
179 |                 ))
180 |                 if norm == "GN":
181 |                     tower.append(nn.GroupNorm(32, in_channels))
182 |                 tower.append(nn.ReLU())
183 |             self.add_module('{}_tower'.format(head),
184 |                             nn.Sequential(*tower))
185 | 
186 |         self.cls_logits = nn.Conv2d(
187 |             in_channels, self.num_classes,
188 |             kernel_size=3, stride=1,
189 |             padding=1
190 |         )
191 |         self.bbox_pred = nn.Conv2d(
192 |             in_channels, 4, kernel_size=3,
193 |             stride=1, padding=1
194 |         )
195 |         self.extrm_pred = nn.Conv2d(
196 |             in_channels, 4, kernel_size=3,
197 |             stride=1, padding=1
198 |         )
199 |         self.ctrness = nn.Conv2d(
200 |             in_channels, 1, kernel_size=3,
201 |             stride=1, padding=1
202 |         )
203 | 
204 |         if cfg.MODEL.FCOS.USE_SCALE:
205 |             self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in self.fpn_strides])
206 |         else:
207 |             self.scales = None
208 | 
209 |         for modules in [
210 |             self.cls_tower, self.bbox_tower,
211 |             self.share_tower, self.cls_logits,
212 |             self.bbox_pred, self.extrm_pred,
213 |             self.ctrness
214 |         ]:
215 |             for l in modules.modules():
216 |                 if isinstance(l, nn.Conv2d):
217 |                     torch.nn.init.normal_(l.weight, std=0.01)
218 |                     torch.nn.init.constant_(l.bias, 0)
219 | 
220 |         # initialize the bias for focal loss
221 |         prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
222 |         bias_value = -math.log((1 - prior_prob) / prior_prob)
223 |         torch.nn.init.constant_(self.cls_logits.bias, bias_value)
224 | 
225 |         self.output_ex = False if cfg.MODEL.SNAKE_HEAD.INITIAL == 'box' else True
226 | 
227 |     def forward(self, x):
228 |         logits = []
229 |         bbox_reg = []
230 |         ex_reg = []
231 |         ctrness = []
232 |         bbox_towers = []
233 |         for l, feature in enumerate(x):
234 |             feature = self.share_tower(feature)
235 |             cls_tower = self.cls_tower(feature)
236 |             bbox_tower = self.bbox_tower(feature)
237 | 
238 |             logits.append(self.cls_logits(cls_tower))
239 |             ctrness.append(self.ctrness(bbox_tower))
240 |             if self.training or self.output_ex:
241 |                 ex_reg.append(torch.tanh(self.extrm_pred(bbox_tower)))
242 |             else:
243 |                 ex_reg.append(None)
244 |             reg = self.bbox_pred(bbox_tower)
245 |             if self.scales is not None:
246 |                 reg = self.scales[l](reg)
247 |             # Note that we use relu, as in the improved FCOS, instead of exp.
248 |             bbox_reg.append(F.relu(reg))
249 | 
250 |         return logits, bbox_reg, ex_reg, ctrness, bbox_towers
251 | 


--------------------------------------------------------------------------------
/core/modeling/fcose/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from core.structures.points_set import ExtremePoints
  4 | from detectron2.structures.boxes import Boxes
  5 | import pycocotools.mask as mask_util
  6 | 
  7 | 
  8 | # unused
  9 | def get_octagon(ex):
 10 |     ex = np.array(ex).reshape(4, 2)
 11 |     w, h = ex[3][0] - ex[1][0], ex[2][1] - ex[0][1]
 12 |     t, l, b, r = ex[0][1], ex[1][0], ex[2][1], ex[3][0]
 13 |     x = 8.
 14 |     octagon = [[min(ex[0][0] + w / x, r), ex[0][1], \
 15 |               max(ex[0][0] - w / x, l), ex[0][1], \
 16 |               ex[1][0], max(ex[1][1] - h / x, t), \
 17 |               ex[1][0], min(ex[1][1] + h / x, b), \
 18 |               max(ex[2][0] - w / x, l), ex[2][1], \
 19 |               min(ex[2][0] + w / x, r), ex[2][1], \
 20 |               ex[3][0], min(ex[3][1] + h / x, b), \
 21 |               ex[3][0], max(ex[3][1] - h / x, t)
 22 |               ]]
 23 |     return octagon
 24 | 
 25 | 
 26 | def extreme_point_to_octagon_mask(extreme_points, h, w):
 27 |     octagon = get_octagon(extreme_points)
 28 |     rles = mask_util.frPyObjects(octagon, h, w)
 29 |     rle = mask_util.merge(rles)
 30 |     mask = mask_util.decode(rle)
 31 |     return mask
 32 | 
 33 | 
 34 | def get_extreme_points(pts):
 35 |     num_pt = pts.shape[0]
 36 |     l, t = min(pts[:, 0]), min(pts[:, 1])
 37 |     r, b = max(pts[:, 0]), max(pts[:, 1])
 38 |     # 3 degrees
 39 |     thresh = 0.02
 40 |     w = r - l + 1
 41 |     h = b - t + 1
 42 | 
 43 |     t_idx = np.argmin(pts[:, 1])
 44 |     t_idxs = [t_idx]
 45 |     tmp = (t_idx + 1) % num_pt
 46 |     while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h:
 47 |         t_idxs.append(tmp)
 48 |         tmp = (tmp + 1) % num_pt
 49 |     tmp = (t_idx - 1) % num_pt
 50 |     while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h:
 51 |         t_idxs.append(tmp)
 52 |         tmp = (tmp - 1) % num_pt
 53 |     tt = (max(pts[t_idxs, 0]) + min(pts[t_idxs, 0])) / 2
 54 | 
 55 |     b_idx = np.argmax(pts[:, 1])
 56 |     b_idxs = [b_idx]
 57 |     tmp = (b_idx + 1) % num_pt
 58 |     while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h:
 59 |         b_idxs.append(tmp)
 60 |         tmp = (tmp + 1) % num_pt
 61 |     tmp = (b_idx - 1) % num_pt
 62 |     while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h:
 63 |         b_idxs.append(tmp)
 64 |         tmp = (tmp - 1) % num_pt
 65 |     bb = (max(pts[b_idxs, 0]) + min(pts[b_idxs, 0])) / 2
 66 | 
 67 |     l_idx = np.argmin(pts[:, 0])
 68 |     l_idxs = [l_idx]
 69 |     tmp = (l_idx + 1) % num_pt
 70 |     while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w:
 71 |         l_idxs.append(tmp)
 72 |         tmp = (tmp + 1) % num_pt
 73 |     tmp = (l_idx - 1) % num_pt
 74 |     while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w:
 75 |         l_idxs.append(tmp)
 76 |         tmp = (tmp - 1) % num_pt
 77 |     ll = (max(pts[l_idxs, 1]) + min(pts[l_idxs, 1])) / 2
 78 | 
 79 |     r_idx = np.argmax(pts[:, 0])
 80 |     r_idxs = [r_idx]
 81 |     tmp = (r_idx + 1) % num_pt
 82 |     while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w:
 83 |         r_idxs.append(tmp)
 84 |         tmp = (tmp + 1) % num_pt
 85 |     tmp = (r_idx - 1) % num_pt
 86 |     while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w:
 87 |         r_idxs.append(tmp)
 88 |         tmp = (tmp - 1) % num_pt
 89 |     rr = (max(pts[r_idxs, 1]) + min(pts[r_idxs, 1])) / 2
 90 | 
 91 |     return np.array([tt, ll, bb, rr])
 92 | 
 93 | 
 94 | def get_aux_extreme_points(pts):
 95 |     num_pt = pts.shape[0]
 96 | 
 97 |     aux_ext_pts = []
 98 | 
 99 |     l, t = min(pts[:, 0]), min(pts[:, 1])
100 |     r, b = max(pts[:, 0]), max(pts[:, 1])
101 |     # 3 degrees
102 |     thresh = 0.02
103 |     band_thresh = 0.02
104 |     w = r - l + 1
105 |     h = b - t + 1
106 | 
107 |     t_band = np.where((pts[:, 1] - t) <= band_thresh * h)[0].tolist()
108 |     while t_band:
109 |         t_idx = t_band[np.argmin(pts[t_band, 1])]
110 |         t_idxs = [t_idx]
111 |         tmp = (t_idx + 1) % num_pt
112 |         while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h:
113 |             t_idxs.append(tmp)
114 |             tmp = (tmp + 1) % num_pt
115 |         tmp = (t_idx - 1) % num_pt
116 |         while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h:
117 |             t_idxs.append(tmp)
118 |             tmp = (tmp - 1) % num_pt
119 |         tt = (max(pts[t_idxs, 0]) + min(pts[t_idxs, 0])) / 2
120 |         aux_ext_pts.append(np.array([tt, t]))
121 |         t_band = [item for item in t_band if item not in t_idxs]
122 | 
123 |     b_band = np.where((b - pts[:, 1]) <= band_thresh * h)[0].tolist()
124 |     while b_band:
125 |         b_idx = b_band[np.argmax(pts[b_band, 1])]
126 |         b_idxs = [b_idx]
127 |         tmp = (b_idx + 1) % num_pt
128 |         while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h:
129 |             b_idxs.append(tmp)
130 |             tmp = (tmp + 1) % num_pt
131 |         tmp = (b_idx - 1) % num_pt
132 |         while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h:
133 |             b_idxs.append(tmp)
134 |             tmp = (tmp - 1) % num_pt
135 |         bb = (max(pts[b_idxs, 0]) + min(pts[b_idxs, 0])) / 2
136 |         aux_ext_pts.append(np.array([bb, b]))
137 |         b_band = [item for item in b_band if item not in b_idxs]
138 | 
139 |     l_band = np.where((pts[:, 0] - l) <= band_thresh * w)[0].tolist()
140 |     while l_band:
141 |         l_idx = l_band[np.argmin(pts[l_band, 0])]
142 |         l_idxs = [l_idx]
143 |         tmp = (l_idx + 1) % num_pt
144 |         while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w:
145 |             l_idxs.append(tmp)
146 |             tmp = (tmp + 1) % num_pt
147 |         tmp = (l_idx - 1) % num_pt
148 |         while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w:
149 |             l_idxs.append(tmp)
150 |             tmp = (tmp - 1) % num_pt
151 |         ll = (max(pts[l_idxs, 1]) + min(pts[l_idxs, 1])) / 2
152 |         aux_ext_pts.append(np.array([l, ll]))
153 |         l_band = [item for item in l_band if item not in l_idxs]
154 | 
155 |     r_band = np.where((r - pts[:, 0]) <= band_thresh * w)[0].tolist()
156 |     while r_band:
157 |         r_idx = r_band[np.argmax(pts[r_band, 0])]
158 |         r_idxs = [r_idx]
159 |         tmp = (r_idx + 1) % num_pt
160 |         while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w:
161 |             r_idxs.append(tmp)
162 |             tmp = (tmp + 1) % num_pt
163 |         tmp = (r_idx - 1) % num_pt
164 |         while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w:
165 |             r_idxs.append(tmp)
166 |             tmp = (tmp - 1) % num_pt
167 |         rr = (max(pts[r_idxs, 1]) + min(pts[r_idxs, 1])) / 2
168 |         aux_ext_pts.append(np.array([r, rr]))
169 |         r_band = [item for item in r_band if item not in r_idxs]
170 | 
171 |     # assert len(aux_ext_pts) >= 4
172 |     pt0 = aux_ext_pts[0]
173 | 
174 |     # collecting
175 |     aux_ext_pts = np.stack(aux_ext_pts, axis=0)
176 | 
177 |     # ordering
178 |     shift_idx = np.argmin(np.power(pts - pt0, 2).sum(axis=1))
179 |     re_ordered_pts = np.roll(pts, -shift_idx, axis=0)
180 | 
181 |     # indexing
182 |     ext_idxs = np.argmin(np.sum(
183 |         (aux_ext_pts[:, np.newaxis, :] - re_ordered_pts[np.newaxis, ...]) ** 2, axis=2),
184 |         axis=1)
185 |     ext_idxs[0] = 0
186 | 
187 |     ext_idxs = np.sort(np.unique(ext_idxs))
188 | 
189 |     return re_ordered_pts, ext_idxs
190 | 
191 | def vis_training_targets(cfg, fcose_outputs, image_list, idx=0):
192 |     import matplotlib.pyplot as plt
193 |     import matplotlib.patches as patches
194 |     import numpy as np
195 | 
196 |     colors = np.array([[1, 1, 198],
197 |                        [51, 1, 148],
198 |                        [101, 1, 98],
199 |                        [151, 1, 48],
200 |                        [201, 1, 8]]) / 255.
201 | 
202 |     num_loc_list = [len(loc) for loc in fcose_outputs.locations]
203 |     fcose_outputs.num_loc_list = num_loc_list
204 | 
205 |     # compute locations to size ranges
206 |     loc_to_size_range = []
207 |     for l, loc_per_level in enumerate(fcose_outputs.locations):
208 |         loc_to_size_range_per_level = loc_per_level.new_tensor(fcose_outputs.sizes_of_interest[l])
209 |         loc_to_size_range.append(
210 |             loc_to_size_range_per_level[None].expand(num_loc_list[l], -1)
211 |         )
212 | 
213 |     # (Sigma_{levels_points}, 2)
214 |     loc_to_size_range = torch.cat(loc_to_size_range, dim=0)
215 |     locations = torch.cat(fcose_outputs.locations, dim=0)
216 | 
217 |     training_targets = fcose_outputs.compute_targets_for_locations(
218 |         locations, fcose_outputs.gt_instances, loc_to_size_range
219 |     )
220 | 
221 |     training_target = {k: v[idx] for k, v in training_targets.items()}
222 | 
223 |     fig, ax = plt.subplots(1, figsize=(20, 10))
224 |     fig.tight_layout()
225 | 
226 |     labels = training_target['labels']
227 |     reg_targets = training_target['reg_targets']
228 |     ext_targets = training_target['ext_targets']
229 | 
230 |     idxOfloc_of_interest = torch.where(labels != 20)[0]
231 | 
232 |     global locxys, reg_targets_oi, ext_targets_oi, detections
233 | 
234 |     locxys = locations[idxOfloc_of_interest]
235 | 
236 |     reg_targets_oi = reg_targets[idxOfloc_of_interest]
237 |     ext_targets_oi = ext_targets[idxOfloc_of_interest]
238 | 
239 |     detections = torch.stack([
240 |         locxys[:, 0] - reg_targets_oi[:, 0],
241 |         locxys[:, 1] - reg_targets_oi[:, 1],
242 |         locxys[:, 0] + reg_targets_oi[:, 2],
243 |         locxys[:, 1] + reg_targets_oi[:, 3],
244 |     ], dim=1)
245 | 
246 |     global tmp, ext_points
247 | 
248 |     ext_points = ExtremePoints.from_boxes(Boxes(detections),
249 |                                           ext_targets_oi,
250 |                                           locxys).tensor.cpu().numpy()
251 | 
252 |     tmp = ext_points
253 | 
254 |     im = image_list.tensor[idx]
255 |     pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(im.device).view(-1, 1, 1)
256 |     pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(im.device).view(-1, 1, 1)
257 |     im_norm = ((im * pixel_std) + pixel_mean).cpu().numpy().transpose(1, 2, 0).astype(np.uint8)
258 | 
259 |     ax.imshow(im_norm)
260 |     locxys_np = locxys.cpu().numpy()
261 |     reg_targets_oi_np = reg_targets_oi.cpu().numpy()
262 |     ext_targets_oi_np = ext_targets_oi.cpu().numpy()
263 |     detections_np = detections.cpu().numpy()
264 | 
265 |     for i in range(len(locxys_np)):
266 |         ax.scatter(locxys_np[i, 0], locxys_np[i, 1], color=colors[i % len(colors)].tolist(), marker='*')
267 |         x1, y1, x2, y2 = detections_np[i, :]
268 | 
269 |         rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor=colors[i % len(colors)].tolist(),
270 |                                  facecolor='none', fill=False)
271 |         ax.add_patch(rect)
272 | 
273 |         ax.scatter(ext_points[i][:, 0], ext_points[i][:, 1], color=colors[i % len(colors)].tolist(), marker='+')
274 | 
275 |     plt.show()
276 | 


--------------------------------------------------------------------------------
/core/modeling/one_stage_detector.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # from detectron2.modeling import ProposalNetwork
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from detectron2.modeling.backbone import build_backbone
 7 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
 8 | from detectron2.modeling.postprocessing import detector_postprocess
 9 | from detectron2.modeling.proposal_generator import build_proposal_generator
10 | from detectron2.structures import ImageList
11 | from detectron2.utils.logger import log_first_n
12 | 
13 | import logging
14 | from core.utils import timer
15 | 
16 | 
17 | class ProposalNetwork(nn.Module):
18 |     def __init__(self, cfg):
19 |         super().__init__()
20 |         self.device = torch.device(cfg.MODEL.DEVICE)
21 | 
22 |         self.backbone = build_backbone(cfg)
23 |         self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
24 | 
25 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1)
26 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1)
27 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
28 |         self.to(self.device)
29 | 
30 |     def forward(self, batched_inputs):
31 |         """
32 |         Args:
33 |             Same as in :class:`GeneralizedRCNN.forward`
34 | 
35 |         Returns:
36 |             list[dict]:
37 |                 Each dict is the output for one input image.
38 |                 The dict contains one key "proposals" whose value is a
39 |                 :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
40 |         """
41 |         with timer.env("pre_process"):
42 |             images = [x["image"].to(self.device) for x in batched_inputs]
43 |             images = [self.normalizer(x) for x in images]
44 |             images = ImageList.from_tensors(images, self.backbone.size_divisibility)
45 | 
46 |         with timer.env('backbone'):
47 |             features = self.backbone(images.tensor)
48 | 
49 |         if "instances" in batched_inputs[0]:
50 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
51 |         elif "targets" in batched_inputs[0]:
52 |             log_first_n(
53 |                 logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
54 |             )
55 |             gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
56 |         else:
57 |             gt_instances = None
58 | 
59 |         with timer.env('fcos'):
60 |             proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
61 |         # In training, the proposals are not useful at all but we generate them anyway.
62 |         # This makes RPN-only models about 5% slower.
63 |         if self.training:
64 |             return proposal_losses
65 | 
66 |         processed_results = []
67 |         with timer.env('post_process'):
68 |             for results_per_image, input_per_image, image_size in zip(
69 |                 proposals, batched_inputs, images.image_sizes
70 |             ):
71 |                 height = input_per_image.get("height", image_size[0])
72 |                 width = input_per_image.get("width", image_size[1])
73 |                 r = detector_postprocess(results_per_image, height, width)
74 |                 processed_results.append({"proposals": r})
75 |         return processed_results
76 | 
77 | 
78 | @META_ARCH_REGISTRY.register()
79 | class OneStageDetector(ProposalNetwork):
80 |     def forward(self, batched_inputs):
81 |         if self.training:
82 |             return super().forward(batched_inputs)
83 |         processed_results = super().forward(batched_inputs)
84 |         processed_results = [{"instances": r["proposals"]} for r in processed_results]
85 |         return processed_results
86 | 


--------------------------------------------------------------------------------
/core/modeling/poolers.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import torch
  3 | from detectron2.layers import cat
  4 | 
  5 | from detectron2.modeling.poolers import (
  6 |     ROIPooler, convert_boxes_to_pooler_format, assign_boxes_to_levels
  7 | )
  8 | 
  9 | 
 10 | def _box_max_size(boxes):
 11 |     box = boxes.tensor
 12 |     max_size = torch.max(box[:, 2] - box[:, 0], box[:, 3] - box[:, 1])
 13 |     return max_size
 14 | 
 15 | 
 16 | def assign_boxes_to_levels_by_length(
 17 |         box_lists, min_level, max_level, canonical_box_size, canonical_level):
 18 |     """
 19 |     Map each box in `box_lists` to a feature map level index and return the assignment
 20 |     vector.
 21 | 
 22 |     Args:
 23 |         box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes,
 24 |             where N is the number of images in the batch.
 25 |         min_level (int): Smallest feature map level index. The input is considered index 0,
 26 |             the output of stage 1 is index 1, and so.
 27 |         max_level (int): Largest feature map level index.
 28 |         canonical_box_size (int): A canonical box size in pixels (sqrt(box area)).
 29 |         canonical_level (int): The feature map level index on which a canonically-sized box
 30 |             should be placed.
 31 | 
 32 |     Returns:
 33 |         A tensor of length M, where M is the total number of boxes aggregated over all
 34 |             N batch images. The memory layout corresponds to the concatenation of boxes
 35 |             from all images. Each element is the feature map index, as an offset from
 36 |             `self.min_level`, for the corresponding box (so value i means the box is at
 37 |             `self.min_level + i`).
 38 |     """
 39 |     eps = sys.float_info.epsilon
 40 |     box_sizes = cat([_box_max_size(boxes) for boxes in box_lists])
 41 |     # Eqn.(1) in FPN paper
 42 |     level_assignments = torch.floor(
 43 |         canonical_level + torch.log2(box_sizes / canonical_box_size + eps)
 44 |     )
 45 |     level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level)
 46 |     return level_assignments.to(torch.int64) - min_level
 47 | 
 48 | 
 49 | class TopPooler(ROIPooler):
 50 |     """
 51 |     ROIPooler with option to assign level by max length. Used by top modules.
 52 |     """
 53 |     def __init__(self,
 54 |                  output_size,
 55 |                  scales,
 56 |                  sampling_ratio,
 57 |                  pooler_type,
 58 |                  canonical_box_size=224,
 59 |                  canonical_level=4,
 60 |                  assign_crit="area",):
 61 |         super().__init__(output_size, scales, sampling_ratio, pooler_type,
 62 |                          canonical_box_size=canonical_box_size,
 63 |                          canonical_level=canonical_level)
 64 |         self.assign_crit = assign_crit
 65 | 
 66 |     def forward(self, x, box_lists):
 67 |         """
 68 |         Args:
 69 |             x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
 70 |                 used to construct this module.
 71 |             box_lists (list[Boxes] | list[RotatedBoxes]):
 72 |                 A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
 73 |                 The box coordinates are defined on the original image and
 74 |                 will be scaled by the `scales` argument of :class:`ROIPooler`.
 75 | 
 76 |         Returns:
 77 |             Tensor:
 78 |                 A tensor of shape (M, C, output_size, output_size) where M is the total number of
 79 |                 boxes aggregated over all N batch images and C is the number of channels in `x`.
 80 |         """
 81 |         num_level_assignments = len(self.level_poolers)
 82 | 
 83 |         assert isinstance(x, list) and isinstance(
 84 |             box_lists, list
 85 |         ), "Arguments to pooler must be lists"
 86 |         assert (
 87 |             len(x) == num_level_assignments
 88 |         ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
 89 |             num_level_assignments, len(x)
 90 |         )
 91 | 
 92 |         assert len(box_lists) == x[0].size(
 93 |             0
 94 |         ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
 95 |             x[0].size(0), len(box_lists)
 96 |         )
 97 | 
 98 |         pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
 99 | 
100 |         if num_level_assignments == 1:
101 |             return self.level_poolers[0](x[0], pooler_fmt_boxes)
102 | 
103 |         if self.assign_crit == "length":
104 |             assign_method = assign_boxes_to_levels_by_length
105 |         else:
106 |             assign_method = assign_boxes_to_levels
107 | 
108 |         level_assignments = assign_method(
109 |             box_lists, self.min_level, self.max_level,
110 |             self.canonical_box_size, self.canonical_level)
111 | 
112 |         num_boxes = len(pooler_fmt_boxes)
113 |         num_channels = x[0].shape[1]
114 |         output_size = self.output_size[0]
115 | 
116 |         dtype, device = x[0].dtype, x[0].device
117 |         output = torch.zeros(
118 |             (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device
119 |         )
120 | 
121 |         for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)):
122 |             inds = torch.nonzero(level_assignments == level).squeeze(1)
123 |             pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
124 |             output[inds] = pooler(x_level, pooler_fmt_boxes_level)
125 | 
126 |         return output
127 | 


--------------------------------------------------------------------------------
/core/structures/__init__.py:
--------------------------------------------------------------------------------
1 | from .pointset import PolygonPoints, ExtremePoints


--------------------------------------------------------------------------------
/core/structures/pointset.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Union, Tuple
  2 | import torch
  3 | from detectron2.structures import Boxes
  4 | 
  5 | from detectron2.layers import cat
  6 | 
  7 | 
  8 | class ExtremePoints:
  9 |     def __init__(self, tensor: torch.Tensor):
 10 |         """
 11 |         :param tensor (Tensor[float]): a Nx4x2 tensor.  Last dim is (x, y); second last follows [tt, ll, bb, rr]:
 12 |         """
 13 |         device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
 14 |         tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
 15 |         if tensor.numel() == 0:
 16 |             tensor = torch.zeros(0, 4, 2, dtype=torch.float32, device=device)
 17 |         assert tensor.dim() == 3 and tensor.size(-1) == 2, tensor.size()
 18 | 
 19 |         self.tensor = tensor
 20 |         self.spanned_nodes = []
 21 |         self.box = None
 22 | 
 23 |     def clone(self) -> "ExtremePoints":
 24 |         """
 25 |         Clone the Boxes.
 26 | 
 27 |         Returns:
 28 |             Boxes
 29 |         """
 30 |         return ExtremePoints(self.tensor.clone())
 31 | 
 32 |     def to(self, device: str) -> "ExtremePoints":
 33 |         return ExtremePoints(self.tensor.to(device))
 34 | 
 35 |     def get_boxes(self) -> Boxes:
 36 |         bboxes = torch.stack([
 37 |             self.tensor[:, 1, 0],
 38 |             self.tensor[:, 0, 1],
 39 |             self.tensor[:, 3, 0],
 40 |             self.tensor[:, 2, 1],
 41 |         ], dim=1)
 42 |         return Boxes(bboxes)
 43 | 
 44 |     def compute_on_ext_centered_masks(self, N, edge_map, radius, mode, image_shape):
 45 |         self.spread(N, radius, mode, image_shape)
 46 |         m = torch.zeros((N,) + image_shape, device=self.device)
 47 |         num_nodes = []
 48 |         for i, node in enumerate(self.spanned_nodes):
 49 |             node = node.long()
 50 |             m[i, node[:, 1], node[:, 0]] = 1  # TODO: trigger CUDA assert, ...?
 51 |             num_nodes.append(node.size(0))
 52 | 
 53 |         edge_map_m = edge_map.unsqueeze(0) * m
 54 |         instance_score = edge_map_m.sum(dim=1).sum(dim=1)
 55 |         num_nodes = torch.tensor(num_nodes, device=self.device)
 56 |         return instance_score / num_nodes
 57 | 
 58 |     def compute_by_grid_sample(self, N, edge_map, radius, mode, image_shape):
 59 |         self.spread(N, radius, mode, image_shape)
 60 |         mean_scores = []
 61 |         for i, node in enumerate(self.spanned_nodes):
 62 |             sampled_nodes = torch.nn.functional.grid_sample(edge_map.unsqueeze(0).unsqueeze(0),
 63 |                                                             node.unsqueeze(0).unsqueeze(0))
 64 |             mean_scores.append(sampled_nodes.mean())
 65 |         return torch.stack(mean_scores)
 66 | 
 67 |     def spread(self, N, radius, mode, image_shape):
 68 |         """
 69 |         Spreads the extreme points for robustness.
 70 |         :param N: (int) number of instances
 71 |         :param radius:  (int) circle radius
 72 |         :param mode: (str) 'linear' or 'gaussian', # TODO now only support linear
 73 |         """
 74 |         if len(self.spanned_nodes) == N:
 75 |             # avoid re-compute
 76 |             return
 77 |         assert mode == 'linear', 'unsupported mode'
 78 |         h, w = image_shape
 79 |         box = self.get_boxes().tensor
 80 |         whs = torch.stack([(box[:, 2] - box[:, 0]), (box[:, 3] - box[:, 1])], dim=1)
 81 |         num_pix_r = (whs * radius).floor()
 82 |         ext_pts = self.tensor
 83 |         for i in range(N):
 84 |             per_num_pix_r = num_pix_r[i]
 85 |             per_ext_pts = ext_pts[i]
 86 |             square_area = int((per_num_pix_r[0] * 2 + 1) * (per_num_pix_r[1] * 2 + 1))
 87 |             per_spanned_pts = per_ext_pts.repeat_interleave(int(square_area), dim=0)
 88 |             span_xs = torch.arange(-int(per_num_pix_r[0]), int(per_num_pix_r[0]) + 1,
 89 |                                    step=1, dtype=torch.float32, device=self.device)
 90 |             span_ys = torch.arange(-int(per_num_pix_r[1]), int(per_num_pix_r[1]) + 1,
 91 |                                    step=1, dtype=torch.float32, device=self.device)
 92 |             span_y, span_x = torch.meshgrid(span_ys, span_xs)
 93 |             span_xy = torch.stack([span_x.reshape(-1), span_y.reshape(-1)], dim=1)
 94 |             # (4 * square_area, 2)
 95 |             per_spanned_nodes = (per_spanned_pts + span_xy.repeat(4, 1)).floor()
 96 |             per_spanned_nodes[:, 0].clamp_(min=0, max=w - 1)
 97 |             per_spanned_nodes[:, 1].clamp_(min=0, max=h - 1)
 98 |             self.spanned_nodes.append(per_spanned_nodes)
 99 | 
100 |     def align(self, pooler_resolution):
101 |         box = self.get_boxes().tensor
102 |         w = box[:, 2] - box[:, 0] + 1
103 |         h = box[:, 3] - box[:, 1] + 1
104 |         de_location = self.tensor - box[:, None, :2]
105 |         de_location[:, :, 0] /= w[:, None] / pooler_resolution  # x
106 |         de_location[:, :, 1] /= h[:, None] / pooler_resolution  # y
107 |         return de_location.int()
108 | 
109 |     @staticmethod
110 |     def from_boxes(boxes: Boxes, offsets: torch.Tensor, locations: torch.Tensor) -> "ExtremePoints":
111 |         """
112 |         Generate the ExtremePoints from a box and offset along each edge, with locations bing origins;
113 |         the outputs will correspond to the input boxes
114 |         :param boxes (Boxes): from Nx4 tensor matrix.
115 |         :param offsets (torch.Tensor): float matrix of Nx4.
116 |         :param locations (torch.Tensor): float matrix of Nx2, indicating corresponding locations
117 |         :return: ExtremePoints
118 |         """
119 |         x1 = boxes.tensor[:, 0]  # ll_x
120 |         y1 = boxes.tensor[:, 1]  # tt_y
121 |         x2 = boxes.tensor[:, 2]  # rr_x
122 |         y2 = boxes.tensor[:, 3]  # bb_y
123 |         w = x2 - x1
124 |         h = y2 - y1
125 |         tt_x = (locations[:, 0] + w * offsets[:, 0])
126 |         ll_y = (locations[:, 1] + h * offsets[:, 1])
127 |         bb_x = (locations[:, 0] + w * offsets[:, 2])
128 |         rr_y = (locations[:, 1] + h * offsets[:, 3])
129 | 
130 |         return ExtremePoints(torch.stack([tt_x, y1, x1, ll_y, bb_x, y2, x2, rr_y], dim=1).view(-1, 4, 2))
131 | 
132 |     def fit_to_box(self):
133 |         box = self.get_boxes().tensor
134 |         n = box.size(0)
135 |         lower_bound = box.view(-1, 2, 2)[:, :1, :]
136 |         upper_bound = box.view(-1, 2, 2)[:, 1:, :]
137 |         beyond_lower = self.tensor < lower_bound
138 |         beyond_upper = self.tensor > upper_bound
139 |         if beyond_lower.any():
140 |             self.tensor[beyond_lower] = lower_bound.expand(n, 4, 2)[beyond_lower]
141 |         if beyond_upper.any():
142 |             self.tensor[beyond_upper] = upper_bound.expand(n, 4, 2)[beyond_upper]
143 | 
144 |     def scale(self, scale_x: float, scale_y: float) -> None:
145 |         self.tensor[:, :, 0] *= scale_x
146 |         self.tensor[:, :, 1] *= scale_y
147 | 
148 |     def get_octagons(self, frac=8.):
149 |         # counter clock wise
150 |         ext_pts = self.tensor  # N x 4 x 2
151 |         N = len(ext_pts)
152 |         if N == 0:
153 |             return ext_pts.new_empty(0, 16)
154 |         w, h = ext_pts[:, 3, 0] - ext_pts[:, 1, 0], ext_pts[:, 2, 1] - ext_pts[:, 0, 1]
155 |         t, l, b, r = ext_pts[:, 0, 1], ext_pts[:, 1, 0], ext_pts[:, 2, 1], ext_pts[:, 3, 0]
156 |         x1, y1 = torch.min(ext_pts[:, 0, 0] + w / frac, r), ext_pts[:, 0, 1]
157 |         x2, y2 = torch.max(ext_pts[:, 0, 0] - w / frac, l), ext_pts[:, 0, 1]
158 |         x3, y3 = ext_pts[:, 1, 0], torch.max(ext_pts[:, 1, 1] - h / frac, t)
159 |         x4, y4 = ext_pts[:, 1, 0], torch.min(ext_pts[:, 1, 1] + h / frac, b)
160 |         x5, y5 = torch.max(ext_pts[:, 2, 0] - w / frac, l), ext_pts[:, 2, 1]
161 |         x6, y6 = torch.min(ext_pts[:, 2, 0] + w / frac, r), ext_pts[:, 2, 1]
162 |         x7, y7 = ext_pts[:, 3, 0], torch.min(ext_pts[:, 3, 1] + h / frac, b)
163 |         x8, y8 = ext_pts[:, 3, 0], torch.max(ext_pts[:, 3, 1] - h / frac, t)
164 |         octagons = torch.stack([x1, y1, x2, y2, x3, y3, x4, y4,
165 |                                 x5, y5, x6, y6, x7, y7, x8, y8], dim=1)
166 |         return octagons
167 | 
168 |     def area(self) -> torch.Tensor:
169 |         return self.get_boxes().area()
170 | 
171 |     def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "ExtremePoints":
172 |         """
173 |         Returns:
174 |             ExtremePoints: Create a new :class:`ExtremePoints` by indexing.
175 | 
176 |         The following usage are allowed:
177 | 
178 |         1. `new_exts = exts[3]`: return a `ExtremePoints` which contains only one box.
179 |         2. `new_exts = exts[2:10]`: return a slice of extreme points.
180 |         3. `new_exts = exts[vector]`, where vector is a torch.BoolTensor
181 |            with `length = len(exts)`. Nonzero elements in the vector will be selected.
182 | 
183 |         Note that the returned ExtremePoints might share storage with this ExtremePoints,
184 |         subject to Pytorch's indexing semantics.
185 |         """
186 |         if isinstance(item, int):
187 |             return ExtremePoints(self.tensor[item].view(1, -1))
188 |         b = self.tensor[item]
189 |         assert b.dim() == 3, "Indexing on ExtremePoints with {} failed to return a matrix!".format(item)
190 |         return ExtremePoints(b)
191 | 
192 |     def __len__(self) -> int:
193 |         return self.tensor.shape[0]
194 | 
195 |     def __repr__(self) -> str:
196 |         return "ExtPts(" + str(self.tensor) + ")"
197 | 
198 |     @staticmethod
199 |     def cat(pts_list: List["ExtremePoints"]) -> "ExtremePoints":
200 |         """
201 |         Concatenates a list of ExtremePoints into a single ExtremePoints
202 | 
203 |         Arguments:
204 |             pts_list (list[ExtremePoints])
205 | 
206 |         Returns:
207 |             pts: the concatenated ExtremePoints
208 |         """
209 |         assert isinstance(pts_list, (list, tuple))
210 |         assert len(pts_list) > 0
211 |         assert all(isinstance(pts, ExtremePoints) for pts in pts_list)
212 | 
213 |         cat_pts = type(pts_list[0])(cat([p.tensor for p in pts_list], dim=0))
214 |         return cat_pts
215 | 
216 |     @property
217 |     def device(self) -> torch.device:
218 |         return self.tensor.device
219 | 
220 | 
221 | class PolygonPoints:
222 |     BoxSizeType = Union[List[int], Tuple[int, int]]
223 | 
224 |     def __init__(self, tensor: torch.Tensor):
225 |         """
226 |         :param tensor (Tensor[float]): a Nxkx2 tensor.  Last dim is (x, y);
227 |         """
228 |         device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
229 |         tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
230 |         if tensor.numel() == 0:
231 |             tensor = torch.zeros(0, 128, 2, dtype=torch.float32, device=device)
232 |         assert tensor.dim() == 3 and tensor.size(-1) == 2, tensor.size()
233 | 
234 |         self.tensor = tensor
235 | 
236 |     def clone(self) -> "PolygonPoints":
237 | 
238 |         return PolygonPoints(self.tensor.clone())
239 | 
240 |     def to(self, device: str) -> "PolygonPoints":
241 |         return PolygonPoints(self.tensor.to(device))
242 | 
243 |     def scale(self, scale_x: float, scale_y: float) -> None:
244 |         self.tensor[:, :, 0] *= scale_x
245 |         self.tensor[:, :, 1] *= scale_y
246 | 
247 |     def clip(self, box_size: BoxSizeType) -> None:
248 |         assert torch.isfinite(self.tensor).all(), "Polygon tensor contains infinite or NaN!"
249 |         h, w = box_size
250 |         self.tensor[:, :, 0].clamp_(min=0, max=w)
251 |         self.tensor[:, :, 1].clamp_(min=0, max=h)
252 | 
253 |     def flatten(self):
254 |         n = self.tensor.size(0)
255 |         if n == 0:
256 |             return self.tensor
257 |         return self.tensor.reshape(n, -1)
258 | 
259 |     def get_box(self):
260 |         return torch.cat([self.tensor.min(1)[0], self.tensor.max(1)[0]], dim=1)
261 | 
262 |     def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "PolygonPoints":
263 |         """
264 |         Returns:
265 |             ExtremePoints: Create a new :class:`ExtremePoints` by indexing.
266 | 
267 |         The following usage are allowed:
268 | 
269 |         1. `new_exts = exts[3]`: return a `ExtremePoints` which contains only one box.
270 |         2. `new_exts = exts[2:10]`: return a slice of extreme points.
271 |         3. `new_exts = exts[vector]`, where vector is a torch.BoolTensor
272 |            with `length = len(exts)`. Nonzero elements in the vector will be selected.
273 | 
274 |         Note that the returned ExtremePoints might share storage with this ExtremePoints,
275 |         subject to Pytorch's indexing semantics.
276 |         """
277 |         if isinstance(item, int):
278 |             return PolygonPoints(self.tensor[item].view(1, -1))
279 |         b = self.tensor[item]
280 |         assert b.dim() == 3, "Indexing on PolygonPoints with {} failed to return a matrix!".format(item)
281 |         return PolygonPoints(b)
282 | 
283 |     def __len__(self) -> int:
284 |         return self.tensor.shape[0]
285 | 
286 |     def __repr__(self) -> str:
287 |         return "PolyPts(" + str(self.tensor) + ")"
288 | 
289 |     @staticmethod
290 |     def cat(pts_list: List["PolygonPoints"]) -> "PolygonPoints":
291 |         """
292 |         Concatenates a list of ExtremePoints into a single ExtremePoints
293 | 
294 |         Arguments:
295 |             pts_list (list[PolygonPoints])
296 | 
297 |         Returns:
298 |             pts: the concatenated PolygonPoints
299 |         """
300 |         assert isinstance(pts_list, (list, tuple))
301 |         assert len(pts_list) > 0
302 |         assert all(isinstance(pts, PolygonPoints) for pts in pts_list)
303 | 
304 |         cat_pts = type(pts_list[0])(cat([p.tensor for p in pts_list], dim=0))
305 |         return cat_pts
306 | 
307 |     @property
308 |     def device(self) -> torch.device:
309 |         return self.tensor.device
310 | 


--------------------------------------------------------------------------------
/core/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/core/utils/__init__.py


--------------------------------------------------------------------------------
/core/utils/comm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file is borrowed from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py)
 3 | """
 4 | import torch.distributed as dist
 5 | from detectron2.utils.comm import get_world_size
 6 | 
 7 | 
 8 | def reduce_sum(tensor):
 9 |     world_size = get_world_size()
10 |     if world_size < 2:
11 |         return tensor
12 |     tensor = tensor.clone()
13 |     dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
14 |     return tensor
15 | 


--------------------------------------------------------------------------------
/core/utils/timer.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from collections import defaultdict
  3 | 
  4 | _total_times = defaultdict(lambda: 0)
  5 | _start_times = defaultdict(lambda: -1)
  6 | _disabled_names = set()
  7 | _timer_stack = []
  8 | _running_timer = None
  9 | _disable_all = False
 10 | 
 11 | 
 12 | def disable_all():
 13 |     global _disable_all
 14 |     _disable_all = True
 15 | 
 16 | 
 17 | def enable_all():
 18 |     global _disable_all
 19 |     _disable_all = False
 20 | 
 21 | 
 22 | def disable(fn_name):
 23 |     """ Disables the given function name fom being considered for the average or outputted in print_stats. """
 24 |     _disabled_names.add(fn_name)
 25 | 
 26 | 
 27 | def enable(fn_name):
 28 |     """ Enables function names disabled by disable. """
 29 |     _disabled_names.remove(fn_name)
 30 | 
 31 | 
 32 | def reset():
 33 |     """ Resets the current timer. Call this at the start of an iteration. """
 34 |     global _running_timer
 35 |     _total_times.clear()
 36 |     _start_times.clear()
 37 |     _timer_stack.clear()
 38 |     _running_timer = None
 39 | 
 40 | 
 41 | def start(fn_name, use_stack=True):
 42 |     """
 43 |     Start timing the specific function.
 44 |     Note: If use_stack is True, only one timer can be active at a time.
 45 |           Once you stop this timer, the previous one will start again.
 46 |     """
 47 |     global _running_timer, _disable_all
 48 | 
 49 |     if _disable_all:
 50 |         return
 51 | 
 52 |     if use_stack:
 53 |         if _running_timer is not None:
 54 |             stop(_running_timer, use_stack=False)
 55 |             _timer_stack.append(_running_timer)
 56 |         start(fn_name, use_stack=False)
 57 |         _running_timer = fn_name
 58 |     else:
 59 |         _start_times[fn_name] = time.perf_counter()
 60 | 
 61 | 
 62 | def stop(fn_name=None, use_stack=True):
 63 |     """
 64 |     If use_stack is True, this will stop the currently running timer and restore
 65 |     the previous timer on the stack if that exists. Note if use_stack is True,
 66 |     fn_name will be ignored.
 67 | 
 68 |     If use_stack is False, this will just stop timing the timer fn_name.
 69 |     """
 70 |     global _running_timer, _disable_all
 71 | 
 72 |     if _disable_all:
 73 |         return
 74 | 
 75 |     if use_stack:
 76 |         if _running_timer is not None:
 77 |             stop(_running_timer, use_stack=False)
 78 |             if len(_timer_stack) > 0:
 79 |                 _running_timer = _timer_stack.pop()
 80 |                 start(_running_timer, use_stack=False)
 81 |             else:
 82 |                 _running_timer = None
 83 |         else:
 84 |             print('Warning: timer stopped with no timer running!')
 85 |     else:
 86 |         if _start_times[fn_name] > -1:
 87 |             _total_times[fn_name] += time.perf_counter() - _start_times[fn_name]
 88 |         else:
 89 |             print('Warning: timer for %s stopped before starting!' % fn_name)
 90 | 
 91 | 
 92 | def print_stats(divider=5000):
 93 |     """ Prints the current timing information into a table. """
 94 |     print()
 95 | 
 96 |     all_fn_names = [k for k in _total_times.keys() if k not in _disabled_names]
 97 | 
 98 |     max_name_width = max([len(k) for k in all_fn_names] + [4])
 99 |     if max_name_width % 2 == 1: max_name_width += 1
100 |     format_str = ' {:>%d} | {:>10.4f} ' % max_name_width
101 | 
102 |     header = (' {:^%d} | {:^10} ' % max_name_width).format('Name', 'Time (ms)')
103 |     print(header)
104 | 
105 |     sep_idx = header.find('|')
106 |     sep_text = ('-' * sep_idx) + '+' + '-' * (len(header) - sep_idx - 1)
107 |     print(sep_text)
108 | 
109 |     for name in all_fn_names:
110 |         print(format_str.format(name, _total_times[name] * 1000 / divider))    # val2017, 5k ims.
111 | 
112 |     print(sep_text)
113 |     print(format_str.format('Total', total_time() * 1000 / divider))    # val2017, 5k ims.
114 |     print()
115 | 
116 | 
117 | def total_time():
118 |     """ Returns the total amount accumulated across all functions in seconds. """
119 |     return sum([elapsed_time for name, elapsed_time in _total_times.items() if name not in _disabled_names])
120 | 
121 | 
122 | class env():
123 |     """
124 |     A class that lets you go:
125 |         with timer.env(fn_name):
126 |             # (...)
127 |     That automatically manages a timer start and stop for you.
128 |     """
129 | 
130 |     def __init__(self, fn_name, use_stack=True):
131 |         self.fn_name = fn_name
132 |         self.use_stack = use_stack
133 | 
134 |     def __enter__(self):
135 |         start(self.fn_name, use_stack=self.use_stack)
136 | 
137 |     def __exit__(self, e, ev, t):
138 |         stop(self.fn_name, use_stack=self.use_stack)
139 | 
140 | 


--------------------------------------------------------------------------------
/core/utils/visualizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from detectron2.utils.visualizer import (
  3 |     Visualizer, ColorMode, GenericMask,
  4 |     _create_text_labels, _SMALL_OBJECT_AREA_THRESH
  5 | )
  6 | import pycocotools.mask as mask_util
  7 | from detectron2.utils.colormap import random_color
  8 | 
  9 | from core.structures.pointset import ExtremePoints
 10 | 
 11 | 
 12 | def get_polygon_rles(polygons, image_shape):
 13 |     # input: N x (p*2)
 14 |     polygons = polygons.cpu().numpy()
 15 |     h, w = image_shape
 16 |     rles = [
 17 |         mask_util.merge(mask_util.frPyObjects([p.tolist()], h, w))
 18 |         for p in polygons
 19 |     ]
 20 |     return rles
 21 | 
 22 | 
 23 | class ExVisualizer(Visualizer):
 24 |     def __init__(self, img_rgb, metadata, scale=1.0, instance_mode=ColorMode.IMAGE):
 25 |         super().__init__(img_rgb, metadata, scale=scale, instance_mode=instance_mode)
 26 | 
 27 |     def draw_instance_predictions(self, predictions):
 28 |         """
 29 |         :param predictions:
 30 |         :return: Besides the functions of its mother class method, this method deals with extreme points.
 31 |         """
 32 |         ext_points = predictions.ext_points if predictions.has("ext_points") else None
 33 |         pred_polys = predictions.pred_polys if predictions.has("pred_polys") else None
 34 |         if False:
 35 |             return super().draw_instance_predictions(predictions)
 36 |         else:
 37 |             boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
 38 |             scores = predictions.scores if predictions.has("scores") else None
 39 |             classes = predictions.pred_classes if predictions.has("pred_classes") else None
 40 |             labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
 41 |             keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
 42 | 
 43 |             if predictions.has("pred_masks"):
 44 |                 masks = np.asarray(predictions.pred_masks)
 45 |                 masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
 46 |             else:
 47 |                 if predictions.has("pred_polys"):
 48 |                     output_height = predictions.image_size[0]
 49 |                     output_width = predictions.image_size[1]
 50 |                     pred_masks = get_polygon_rles(predictions.pred_polys.flatten(),
 51 |                                                   (output_height, output_width))
 52 | 
 53 |                     masks = np.asarray(pred_masks)
 54 |                     masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
 55 |                 else:
 56 |                     masks = None
 57 | 
 58 |             path = predictions.pred_path.numpy() if predictions.has("pred_path") else None
 59 | 
 60 |             if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
 61 |                 colors = [
 62 |                     self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
 63 |                 ]
 64 |                 alpha = 0.8
 65 |             else:
 66 |                 colors = None
 67 |                 alpha = 0.5
 68 | 
 69 |             if self._instance_mode == ColorMode.IMAGE_BW:
 70 |                 assert predictions.has("pred_masks"), "ColorMode.IMAGE_BW requires segmentations"
 71 |                 self.output.img = self._create_grayscale_image(
 72 |                     (predictions.pred_masks.any(dim=0) > 0).numpy()
 73 |                 )
 74 |                 alpha = 0.3
 75 | 
 76 |             self.overlay_instances(
 77 |                 masks=masks,
 78 |                 boxes=boxes,
 79 |                 labels=labels,
 80 |                 ext_points=ext_points,
 81 |                 path=path,
 82 |                 keypoints=keypoints,
 83 |                 assigned_colors=colors,
 84 |                 alpha=alpha,
 85 |             )
 86 |             return self.output
 87 | 
 88 |     def draw_extreme_pts(self, pts_coord, circle_color, radius=2):
 89 |         for pt in pts_coord:
 90 |             x, y = pt
 91 |             self.draw_circle([x, y], color=circle_color, radius=radius)
 92 |         return self.output
 93 | 
 94 |     def draw_snake_path(self, path, color, alpha=0.7):
 95 |         # path (4, num_points, 2)
 96 |         for i, poly in enumerate(path):
 97 |             if i > 0:
 98 |                 prev_poly = path[i - 1]
 99 |                 offsets = poly - prev_poly
100 |                 for j in range(len(offsets)):
101 |                     self.output.ax.arrow(prev_poly[j, 0],
102 |                                          prev_poly[j, 1],
103 |                                          offsets[j, 0],
104 |                                          offsets[j, 1],
105 |                                          linestyle='-',
106 |                                          linewidth=1,
107 |                                          alpha=alpha)
108 |             self.output.ax.plot(poly[0:, 0],
109 |                                 poly[0:, 1],
110 |                                 color=color,
111 |                                 marker='1',
112 |                                 alpha=alpha)
113 |         return self.output
114 | 
115 |     def _convert_ext_points(self, ext_points):
116 |         if isinstance(ext_points, ExtremePoints):
117 |             return ext_points.tensor.numpy()
118 |         else:
119 |             return np.asarray(ext_points)
120 | 
121 |     def overlay_instances(
122 |             self,
123 |             *,
124 |             boxes=None,
125 |             labels=None,
126 |             masks=None,
127 |             ext_points=None,
128 |             path=None,
129 |             keypoints=None,
130 |             assigned_colors=None,
131 |             alpha=0.5
132 |     ):
133 |         num_instances = None
134 |         if boxes is not None:
135 |             boxes = self._convert_boxes(boxes)
136 |             num_instances = len(boxes)
137 |         if masks is not None:
138 |             masks = self._convert_masks(masks)
139 |             if num_instances:
140 |                 assert len(masks) == num_instances
141 |             else:
142 |                 num_instances = len(masks)
143 |         if keypoints is not None:
144 |             if num_instances:
145 |                 assert len(keypoints) == num_instances
146 |             else:
147 |                 num_instances = len(keypoints)
148 |             keypoints = self._convert_keypoints(keypoints)
149 |         if ext_points is not None:
150 |             ext_points = self._convert_ext_points(ext_points)
151 |             if num_instances:
152 |                 assert len(ext_points) == num_instances
153 |             else:
154 |                 num_instances = len(ext_points)
155 |         if labels is not None:
156 |             assert len(labels) == num_instances
157 |         if assigned_colors is None:
158 |             assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
159 |         if num_instances == 0:
160 |             return self.output
161 |         if boxes is not None and boxes.shape[1] == 5:
162 |             return self.overlay_rotated_instances(
163 |                 boxes=boxes, labels=labels, assigned_colors=assigned_colors
164 |             )
165 | 
166 |         # Display in largest to smallest order to reduce occlusion.
167 |         areas = None
168 |         if boxes is not None:
169 |             areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
170 |         elif masks is not None:
171 |             areas = np.asarray([x.area() for x in masks])
172 | 
173 |         if areas is not None:
174 |             sorted_idxs = np.argsort(-areas).tolist()
175 |             # Re-order overlapped instances in descending order.
176 |             boxes = boxes[sorted_idxs] if boxes is not None else None
177 |             labels = [labels[k] for k in sorted_idxs] if labels is not None else None
178 |             masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
179 |             assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
180 |             keypoints = keypoints[sorted_idxs] if keypoints is not None else None
181 | 
182 |         for i in range(num_instances):
183 |             color = assigned_colors[i]
184 |             if boxes is not None:
185 |                 self.draw_box(boxes[i], edge_color=color)
186 | 
187 |             if masks is not None:
188 |                 for segment in masks[i].polygons:
189 |                     self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
190 | 
191 |             if ext_points is not None:
192 |                 self.draw_extreme_pts(ext_points[i], circle_color=color, radius=3)
193 | 
194 |             if path is not None:
195 |                 self.draw_snake_path(path[i], color=color)
196 | 
197 |             if labels is not None:
198 |                 # first get a box
199 |                 # boxes = None
200 |                 if boxes is not None:
201 |                     x0, y0, x1, y1 = boxes[i]
202 |                     text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
203 |                     horiz_align = "left"
204 |                 elif masks is not None:
205 |                     x0, y0, x1, y1 = masks[i].bbox()
206 | 
207 |                     # draw text in the center (defined by median) when box is not drawn
208 |                     # median is less sensitive to outliers.
209 |                     text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
210 |                     horiz_align = "center"
211 |                 else:
212 |                     continue  # drawing the box confidence for keypoints isn't very useful.
213 |                 # for small objects, draw text at the side to avoid occlusion
214 | 
215 |                 text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
216 |                 horiz_align = "center"
217 | 
218 |                 instance_area = (y1 - y0) * (x1 - x0)
219 |                 if (
220 |                         instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
221 |                         or y1 - y0 < 40 * self.output.scale
222 |                 ):
223 |                     if y1 >= self.output.height - 5:
224 |                         text_pos = (x1, y0)
225 |                     else:
226 |                         text_pos = (x0, y1)
227 | 
228 |                 height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
229 |                 lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
230 |                 font_size = (
231 |                         np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
232 |                         * 0.5
233 |                         * self._default_font_size
234 |                 )
235 |                 self.draw_text(
236 |                     labels[i],
237 |                     text_pos,
238 |                     color=lighter_color,
239 |                     horizontal_alignment=horiz_align,
240 |                     font_size=font_size,
241 |                 )
242 | 
243 |         # draw keypoints
244 |         if keypoints is not None:
245 |             for keypoints_per_instance in keypoints:
246 |                 self.draw_and_connect_keypoints(keypoints_per_instance)
247 | 
248 |         return self.output
249 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/datasets/__init__.py


--------------------------------------------------------------------------------
/datasets/prepare_edge_map.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # Adapted for edge map generation from panoptic segmentation data of COCO
 5 | 
 6 | import time
 7 | import functools
 8 | import json
 9 | import multiprocessing as mp
10 | import numpy as np
11 | import os
12 | from PIL import Image
13 | import cv2
14 | 
15 | from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
16 | 
17 | from panopticapi.utils import rgb2id
18 | 
19 | EDGE_THICKNESS = 1
20 | 
21 | 
22 | def save_edge_map(instance_map, output_edge):
23 |     canvas = np.zeros_like(instance_map)
24 |     for i in range(np.max(instance_map)):
25 |         instance_idx = i + 1
26 |         contours, hierarchy = cv2.findContours(
27 |             (instance_map == instance_idx).astype(np.uint8),
28 |             cv2.RETR_EXTERNAL,
29 |             cv2.CHAIN_APPROX_NONE)
30 |         cv2.drawContours(canvas, contours, -1, 1, EDGE_THICKNESS)
31 |     cv2.imwrite(output_edge, canvas)
32 | 
33 | 
34 | def _process_panoptic_to_instance(input_panoptic, output_edge, segments, stuff_ids):
35 |     # assuming there is no more that 255 instances in one image;
36 |     # if violated, consider do use rbg instead of gray-scale
37 |     panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32)
38 |     panoptic = rgb2id(panoptic)  # map to stuff/thing object ids.
39 |     instance_map = np.zeros_like(panoptic, dtype=np.uint8)
40 |     instance_count = 1
41 |     for seg in segments:
42 |         cat_id = seg["category_id"]
43 |         if cat_id in stuff_ids:
44 |             continue
45 |         else:
46 |             assert instance_count <= 255, 'Too many instances (>256)'
47 |             instance_map[panoptic == seg["id"]] = instance_count
48 |             instance_count += 1
49 |     save_edge_map(instance_map, output_edge)
50 | 
51 | 
52 | def separate_coco_edge_map_from_panoptic(panoptic_json, panoptic_root, edge_root, categories):
53 |     os.makedirs(edge_root, exist_ok=True)
54 | 
55 |     stuff_ids = [k["id"] for k in categories if k["isthing"] == 0]
56 | 
57 |     with open(panoptic_json) as f:
58 |         obj = json.load(f)
59 | 
60 |     pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
61 | 
62 |     def iter_annotations():
63 |         for anno in obj["annotations"]:
64 |             file_name = anno["file_name"]
65 |             segments = anno["segments_info"]
66 |             input = os.path.join(panoptic_root, file_name)
67 |             output = os.path.join(edge_root, file_name)
68 |             yield input, output, segments
69 | 
70 |     print("Start writing to {} ...".format(edge_root))
71 |     start = time.time()
72 |     pool.starmap(
73 |         functools.partial(_process_panoptic_to_instance, stuff_ids=stuff_ids),
74 |         iter_annotations(),
75 |         chunksize=100,
76 |     )
77 |     print("Finished. time: {:.2f}s".format(time.time() - start))
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     dataset_dir = os.path.join(os.path.dirname(__file__), "mycoco")
82 |     for s in ["val2017", "train2017"]:
83 |         separate_coco_edge_map_from_panoptic(
84 |             os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)),
85 |             os.path.join(dataset_dir, "panoptic_{}".format(s)),
86 |             os.path.join(dataset_dir, "edge_{}".format(s)),
87 |             COCO_CATEGORIES,
88 |         )
89 | 


--------------------------------------------------------------------------------
/datasets/prepare_edge_map_cityscapes.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # Adapted for edge map generation from instance segmentation data of Cityscapes (in COCO format)
 5 | 
 6 | import time
 7 | import functools
 8 | import multiprocessing as mp
 9 | import numpy as np
10 | import os
11 | import cv2
12 | 
13 | from pycocotools.coco import COCO
14 | 
15 | 
16 | EDGE_THICKNESS = 1
17 | 
18 | 
19 | def save_edge_map(edge_fn, mask, im_size):
20 |     canvas = np.zeros(im_size)
21 |     all_segs = list(map(lambda x: x['segmentation'], mask))
22 |     counters = []
23 |     for segs in all_segs:
24 |         counters += [np.expand_dims(np.array(seg, dtype=np.int32).reshape(-1,2), 0) for seg in segs]
25 |     cv2.drawContours(canvas, counters, -1, 1, EDGE_THICKNESS)
26 |     cv2.imwrite(edge_fn, canvas)
27 | 
28 | 
29 | def _process_json_to_mask(file_name, height, width, ann, edge_root):
30 |     edge_fn = os.path.join(edge_root, os.path.basename(file_name))
31 |     save_edge_map(edge_fn, ann, [height, width])
32 | 
33 | 
34 | def generate_coco_edge_map_from_json(instance_json, edge_root):
35 |     os.makedirs(edge_root, exist_ok=True)
36 | 
37 |     coco_api = COCO(instance_json)
38 |     img_ids = sorted(coco_api.imgs.keys())
39 | 
40 | 
41 |     pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
42 | 
43 | 
44 |     def iter_annotations():
45 |         count = 0
46 |         for img_id in img_ids:
47 |             img_info = coco_api.loadImgs(img_id)[0]
48 |             file_name = img_info['file_name']
49 |             width = img_info['width']
50 |             height = img_info['height']
51 | 
52 |             ann = coco_api.imgToAnns[img_id]
53 |             count += 1
54 |             yield file_name, height, width, ann
55 |         print(count)
56 | 
57 |     print("Start writing to {} ...".format(edge_root))
58 |     start = time.time()
59 |     pool.starmap(
60 |         functools.partial(_process_json_to_mask, edge_root=edge_root),
61 |         iter_annotations(),
62 |         chunksize=100,
63 |     )
64 |     print("Finished. time: {:.2f}s".format(time.time() - start))
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     dataset_dir = os.path.join(os.path.dirname(__file__), "dance")
69 |     for s in ["val", "train"]:
70 |         generate_coco_edge_map_from_json(
71 |             os.path.join(dataset_dir, "coco_ann/instance_{}.json".format(s)),
72 |             os.path.join(dataset_dir, "edge_{}".format(s)),
73 |         )
74 | 


--------------------------------------------------------------------------------
/output/.gitignore:
--------------------------------------------------------------------------------
1 | *.pth


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Shapely==1.7.1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import glob
 5 | import os
 6 | from setuptools import find_packages, setup
 7 | import torch
 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 9 | 
10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
12 | 
13 | 
14 | def get_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     extensions_dir = os.path.join(this_dir, "adet", "layers", "csrc")
17 | 
18 |     main_source = os.path.join(extensions_dir, "vision.cpp")
19 |     sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
21 |         os.path.join(extensions_dir, "*.cu")
22 |     )
23 | 
24 |     sources = [main_source] + sources
25 | 
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv(
32 |         "FORCE_CUDA", "0"
33 |     ) == "1":
34 |         extension = CUDAExtension
35 |         sources += source_cuda
36 |         define_macros += [("WITH_CUDA", None)]
37 |         extra_compile_args["nvcc"] = [
38 |             "-DCUDA_HAS_FP16=1",
39 |             "-D__CUDA_NO_HALF_OPERATORS__",
40 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
41 |             "-D__CUDA_NO_HALF2_OPERATORS__",
42 |         ]
43 | 
44 |         # It's better if pytorch can do this by default ..
45 |         CC = os.environ.get("CC", None)
46 |         if CC is not None:
47 |             extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
48 | 
49 |     sources = [os.path.join(extensions_dir, s) for s in sources]
50 | 
51 |     include_dirs = [extensions_dir]
52 | 
53 |     ext_modules = [
54 |         extension(
55 |             "core._C",
56 |             sources,
57 |             include_dirs=include_dirs,
58 |             define_macros=define_macros,
59 |             extra_compile_args=extra_compile_args,
60 |         )
61 |     ]
62 | 
63 |     return ext_modules
64 | 
65 | 
66 | setup(
67 |     name="Dance",
68 |     version="1.0.0",
69 |     author="liuzichen@u.nus.edu",
70 |     url="https://github.com/lkevinzc/dance",
71 |     description="A Deep Attentive Contour Model for Efficient Instance Segmentation",
72 |     packages=find_packages(exclude=("configs", "tests", "detectron2")),
73 |     python_requires=">=3.6",
74 |     install_requires=[
75 |         "termcolor>=1.1",
76 |         "Pillow>=6.0",
77 |         "yacs>=0.1.6",
78 |         "tabulate",
79 |         "cloudpickle",
80 |         "matplotlib",
81 |         "tqdm>4.29.0",
82 |         "tensorboard",
83 |         "python-Levenshtein",
84 |         "Polygon3",
85 |         "shapely",
86 |     ],
87 |     extras_require={"all": ["psutil"]},
88 |     ext_modules=get_extensions(),
89 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
90 | )
91 | 


--------------------------------------------------------------------------------
/train_net.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import detectron2.utils.comm as comm
 4 | from detectron2.checkpoint import DetectionCheckpointer
 5 | from detectron2.engine import default_argument_parser, default_setup, launch
 6 | from detectron2.evaluation import (
 7 |     DatasetEvaluators,
 8 |     verify_results
 9 | )
10 | from detectron2.data import MetadataCatalog
11 | from detectron2.engine import DefaultTrainer
12 | 
13 | # for datasets registration
14 | import core.data  # noqa
15 | 
16 | from core.config import get_cfg
17 | from core.evaluation import (
18 |     COCOEvaluator,  # to prevent redundant conversion
19 | )
20 | 
21 | 
22 | class Trainer(DefaultTrainer):
23 |     @classmethod
24 |     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
25 |         if output_folder is None:
26 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
27 |         evaluator_list = []
28 |         evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
29 | 
30 |         if "coco" in evaluator_type:
31 |             evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
32 | 
33 |         if len(evaluator_list) == 0:
34 |             raise NotImplementedError(
35 |                 "no Evaluator for the dataset {} with the type {}".format(
36 |                     dataset_name, evaluator_type
37 |                 )
38 |             )
39 |         elif len(evaluator_list) == 1:
40 |             return evaluator_list[0]
41 | 
42 |         return DatasetEvaluators(evaluator_list)
43 | 
44 | 
45 | def setup(args):
46 |     cfg = get_cfg()
47 |     cfg.merge_from_file(args.config_file)
48 |     cfg.merge_from_list(args.opts)
49 |     cfg.freeze()
50 |     default_setup(cfg, args)
51 |     return cfg
52 | 
53 | 
54 | def main(args):
55 |     cfg = setup(args)
56 | 
57 |     if args.eval_only:
58 |         model = Trainer.build_model(cfg)
59 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
60 |             cfg.MODEL.WEIGHTS, resume=args.resume
61 |         )
62 |         res = Trainer.test(cfg, model)
63 |         if comm.is_main_process():
64 |             verify_results(cfg, res)
65 |         return res
66 | 
67 |     trainer = Trainer(cfg)
68 |     trainer.resume_or_load(resume=args.resume)
69 | 
70 |     return trainer.train()
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     args = default_argument_parser().parse_args()
75 |     print("Command Line Args:", args)
76 |     launch(
77 |         main,
78 |         args.num_gpus,
79 |         num_machines=args.num_machines,
80 |         machine_rank=args.machine_rank,
81 |         dist_url=args.dist_url,
82 |         args=(args,),
83 |     )
84 | 


--------------------------------------------------------------------------------