├── .gitignore ├── README.md ├── assets ├── demo.gif └── pipeline.png ├── configs ├── Base-dance.yaml ├── Base-dsnake.yaml ├── Dance_R_101_3x.yaml ├── Dance_R_50_3x.yaml └── Dsnake_R_50_1x.yaml ├── core ├── __init__.py ├── config │ ├── __init__.py │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── builtin.py │ └── datasets │ │ ├── __init__.py │ │ └── register_coco_edge.py ├── evaluation │ ├── __init__.py │ ├── coco_evaluation.py │ ├── cocoeval.py │ ├── edge_map_evaluation.py │ └── evaluator.py ├── layers │ ├── __init__.py │ ├── deform_conv.py │ ├── extreme_utils │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── src │ │ │ ├── cuda_common.h │ │ │ ├── nms.cu │ │ │ ├── nms.h │ │ │ └── utils.cu │ │ ├── utils.cpp │ │ └── utils.h │ ├── losses.py │ └── ml_nms.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── dla.py │ │ ├── fpn.py │ │ ├── mobilenet.py │ │ └── vovnet.py │ ├── dsnake_baseline │ │ ├── __init__.py │ │ ├── af_two_stage.py │ │ ├── dsnake_head.py │ │ └── postprocessing.py │ ├── edge_snake │ │ ├── __init__.py │ │ ├── dance.py │ │ ├── draft.py │ │ ├── edge_det.py │ │ └── snake_head.py │ ├── fcos │ │ ├── __init__.py │ │ ├── fcos.py │ │ └── fcos_outputs.py │ ├── fcose │ │ ├── __init__.py │ │ ├── deeplab_resnet.py │ │ ├── dextr.py │ │ ├── dextr_eval.py │ │ ├── dextr_helper.py │ │ ├── extreme_detector.py │ │ ├── fcose.py │ │ ├── fcose_outputs.py │ │ └── utils.py │ ├── one_stage_detector.py │ ├── poolers.py │ └── postprocessing.py ├── structures │ ├── __init__.py │ ├── points_set.py │ └── pointset.py └── utils │ ├── __init__.py │ ├── comm.py │ ├── timer.py │ └── visualizer.py ├── datasets ├── __init__.py ├── prepare_edge_map.py └── prepare_edge_map_cityscapes.py ├── output └── .gitignore ├── requirements.txt ├── setup.py └── train_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Customized 2 | sync*.sh 3 | .vscode 4 | .idea 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | # .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | 139 | # pytype static type analyzer 140 | .pytype/ 141 | 142 | # Cython debug symbols 143 | cython_debug/ 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | If you are interested in ML model serving, feel free to have a look at my another project on [model serving framework](https://github.com/mosecorg/mosec)! 2 | 3 | --- 4 | # dance 5 | 6 | A Deep Attentive Contour Model for Efficient Instance Segmentation ([PDF](https://openaccess.thecvf.com/content/WACV2021/html/Liu_DANCE_A_Deep_Attentive_Contour_Model_for_Efficient_Instance_Segmentation_WACV_2021_paper.html)) 7 | 8 | | ![](./assets/pipeline.png) | ![](assets/demo.gif) | 9 | | :------------------------: | :------------------: | 10 | | *DANCE's Pipeline* | *Illustration* | 11 | 12 | ### *note* 13 | The codes in `master` branch are mainly for experiments on COCO; if you want to have a look on the codes for experiments on SBD / Cityscapes, welcome to checkout the `snake` branch, which is developed based on [the codebase of previous art](https://github.com/zju3dv/snake). 14 | 15 | ## Get started 16 | 1. Prepare the environment (the scripts are just examples) 17 | - gcc & g++ ≥ 5 18 | - Python 3.6.8 (developed & tested on this version) 19 | - `conda create --name dance python==3.6.8` 20 | - `conda deactivate && conda activate dance` 21 | - PyTorch 1.4 with CUDA 10.1 22 | - `conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 -c pytorch` 23 | 2. Clone this project and install framework / package dependency 24 | - clone dance and install dependencies: `git clone https://github.com/lkevinzc/dance && cd dance && pip install -r requirements.txt && cd ..` 25 | - clone Detectron2 and install v0.1: `git clone https://github.com/facebookresearch/detectron2.git && cd detectron2 && git checkout 1a7daee064eeca2d7fddce4ba74b74183ba1d4a0 && python -m pip install -e . && cd ..` 26 | - install cpp utils: `cd dance/core/layers/extreme_utils && export CUDA_HOME="/usr/local/cuda-10.1" && python setup.py build_ext --inplace` 27 | - install pycocotools: `pip install cython; pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'` 28 | - fix the `fvcore` version: `pip install fvcore==0.1.1.dev200512` 29 | 3. Prepare dataset 30 | - Download form [COCO official website](https://cocodataset.org/#download) 31 | - put it at `dance/datasets/coco` 32 | 4. Download pre-trained model (metrics on COCO test-dev) 33 | 34 | | model name | AP | AP50 | AP75 | weights | 35 | | :-----------: | :---: | :---: | :---: | :----------------------------------------------------------------------------------------: | 36 | | dance_r50_3x | 36.8 | 58.5 | 39.0 | [link](https://drive.google.com/file/d/1oh0ZkBgnYu6t4dlPNlfxEnhWruA87DIt/view?usp=sharing) | 37 | | dance_r101_3x | 38.1 | 60.2 | 40.5 | [link](https://drive.google.com/file/d/1H5eyu06qBpyw-We7CYEs4IxpdZvouJBo/view?usp=sharing) | 38 | 39 | *note*: put them under `output/` 40 | 41 | ## Evaluation 42 | ```bash 43 | python train_net.py --config-file configs/Dance_R_50_3x.yaml --eval-only MODEL.WEIGHTS ./output/r50_3x_model_final.pth 44 | 45 | python train_net.py --config-file configs/Dance_R_101_3x.yaml --eval-only MODEL.WEIGHTS ./output/r101_3x_model_final.pth 46 | ``` 47 | 48 | ## Discussion 49 | Any discussion or suggestion is welcome! Feel free to contact the author via `liuzichen@u.nus.edu` :) 50 | 51 | ## Citation 52 | If you find this project helpful for your research, please consider citing using BibTeX below: 53 | ```tex 54 | @InProceedings{liu2021dance, 55 | author = {Liu, Zichen and Liew, Jun Hao and Chen, Xiangyu and Feng, Jiashi}, 56 | title = {DANCE: A Deep Attentive Contour Model for Efficient Instance Segmentation}, 57 | booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)}, 58 | month = {January}, 59 | year = {2021}, 60 | pages = {345-354} 61 | } 62 | ``` 63 | 64 | -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/assets/demo.gif -------------------------------------------------------------------------------- /assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/assets/pipeline.png -------------------------------------------------------------------------------- /configs/Base-dance.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Dance" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "FCOSE" 12 | DATASETS: 13 | TRAIN: ("coco_2017_train_edge",) 14 | TEST: ("coco_2017_val_edge",) 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # 2 GPUs or 4 GPUs 17 | BASE_LR: 0.005 # Note that RetinaNet uses a different default learning rate 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 20 | CHECKPOINT_PERIOD: 20000 21 | INPUT: 22 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 23 | VERSION: 2 24 | -------------------------------------------------------------------------------- /configs/Base-dsnake.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "FcosSnake" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "FCOS" 12 | DATASETS: 13 | TRAIN: ("coco_2017_train",) 14 | TEST: ("coco_2017_val",) 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # 2 GPUs 17 | BASE_LR: 0.005 # Note that RetinaNet uses a different default learning rate 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 20 | CHECKPOINT_PERIOD: 20000 21 | INPUT: 22 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 23 | VERSION: 2 24 | -------------------------------------------------------------------------------- /configs/Dance_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DANCE.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | EDGE_HEAD: 7 | NAME: "EdgeSnakeFPNHead" 8 | CONVS_DIM: 256 9 | STRONG_FEAT: True 10 | IN_FEATURES: ['p2', 'p3', 'p4', 'p5'] 11 | SNAKE_HEAD: 12 | NUM_SAMPLING: 196 13 | DETACH: True 14 | INITIAL: 'box' 15 | NEW_MATCHING: True 16 | ATTENTION: True 17 | INDIVIDUAL_SCALE: True 18 | SOLVER: 19 | IMS_PER_BATCH: 6 # 2 GPUs 20 | BASE_LR: 0.00375 21 | STEPS: (660000, 700000) 22 | MAX_ITER: 720000 23 | CHECKPOINT_PERIOD: 5000 24 | OUTPUT_DIR: "output/coco/dance_r101_3x/" 25 | 26 | #DATASETS: 27 | # TEST: ("coco_2017_test-dev",) 28 | -------------------------------------------------------------------------------- /configs/Dance_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-DANCE.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | EDGE_HEAD: 7 | NAME: "EdgeSnakeFPNHead" 8 | CONVS_DIM: 256 9 | STRONG_FEAT: True 10 | IN_FEATURES: ['p2', 'p3', 'p4', 'p5'] 11 | SNAKE_HEAD: 12 | NUM_SAMPLING: 196 13 | DETACH: True 14 | INITIAL: 'box' 15 | NEW_MATCHING: True 16 | ATTENTION: True 17 | INDIVIDUAL_SCALE: True 18 | SOLVER: 19 | STEPS: (480000, 520000) 20 | MAX_ITER: 540000 21 | CHECKPOINT_PERIOD: 60000 22 | OUTPUT_DIR: "output/coco/dance_r50_3x/" 23 | 24 | #DATASETS: 25 | # TEST: ("coco_2017_test-dev",) 26 | -------------------------------------------------------------------------------- /configs/Dsnake_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-dsnake.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | EDGE_HEAD: 7 | CONVS_DIM: 256 8 | OUTPUT_DIR: "output/coco/dsnake/R_50_1x/" 9 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | from core import modeling -------------------------------------------------------------------------------- /core/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import get_cfg 2 | 3 | __all__ = [ 4 | "get_cfg", 5 | ] 6 | -------------------------------------------------------------------------------- /core/config/config.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode 2 | 3 | 4 | def get_cfg() -> CfgNode: 5 | """ 6 | Get a copy of the default config. 7 | 8 | Returns: 9 | a detectron2 CfgNode instance. 10 | """ 11 | from .defaults import _C 12 | 13 | return _C.clone() 14 | -------------------------------------------------------------------------------- /core/config/defaults.py: -------------------------------------------------------------------------------- 1 | from detectron2.config.defaults import _C 2 | from detectron2.config import CfgNode as CN 3 | 4 | 5 | # ---------------------------------------------------------------------------- # 6 | # Additional Configs 7 | # ---------------------------------------------------------------------------- # 8 | _C.MODEL.MOBILENET = False 9 | _C.MODEL.USE_VOVNET = False 10 | 11 | # ---------------------------------------------------------------------------- # 12 | # MY CONFIG (ZC) 13 | # ---------------------------------------------------------------------------- # 14 | _C.MODEL.DANCE = CN() 15 | 16 | # Channeling the input for mask_pred used for model evaluation 17 | # Use NO to avoid error during evaluation when turn MASK_ON but no mask_pred output. 18 | _C.MODEL.DANCE.MASK_IN = "OCT_RLE" # {'BOX', 'OCT_BIT', 'OCT_RLE', 'MASK', 'NO'} 19 | _C.MODEL.DANCE.SEMANTIC_FILTER = False 20 | _C.MODEL.DANCE.SEMANTIC_FILTER_TH = 0.1 21 | _C.MODEL.DANCE.ROI_SIZE = 28 22 | 23 | 24 | _C.MODEL.DANCE.RE_COMP_BOX = False 25 | 26 | # ---------------------------------------------------------------------------- # 27 | # Deformable Convolution Head (ZC) 28 | # ---------------------------------------------------------------------------- # 29 | _C.MODEL.DEFORM_HEAD = CN() 30 | _C.MODEL.DEFORM_HEAD.ON = False 31 | _C.MODEL.DEFORM_HEAD.NUM_CONVS = 256 32 | _C.MODEL.DEFORM_HEAD.NORM = "GN" 33 | _C.MODEL.DEFORM_HEAD.USE_MODULATED = False 34 | 35 | # ---------------------------------------------------------------------------- # 36 | # Snake Head (ZC) 37 | # ---------------------------------------------------------------------------- # 38 | _C.MODEL.SNAKE_HEAD = CN() 39 | 40 | _C.MODEL.SNAKE_HEAD.DETACH = False 41 | 42 | 43 | _C.MODEL.SNAKE_HEAD.ORIGINAL = False 44 | 45 | _C.MODEL.SNAKE_HEAD.STRUCTURE = "sequential" # {"sequential", "parallel"}; 46 | 47 | # circular conv net / graph conv net 48 | _C.MODEL.SNAKE_HEAD.CONV_TYPE = "ccn" # {"ccn", "gcn"}; 49 | 50 | _C.MODEL.SNAKE_HEAD.FEAT_DIM = 128 51 | 52 | _C.MODEL.SNAKE_HEAD.NUM_ITER = (0, 0, 1) # correspond to the convs 53 | _C.MODEL.SNAKE_HEAD.NUM_CONVS = 2 54 | _C.MODEL.SNAKE_HEAD.STRONGER = False 55 | 56 | _C.MODEL.SNAKE_HEAD.MULTI_OFFSET = 1 57 | 58 | _C.MODEL.SNAKE_HEAD.SKIP = False 59 | _C.MODEL.SNAKE_HEAD.NUM_LAYER = (8, 8, 8) 60 | _C.MODEL.SNAKE_HEAD.CIR_DILATIONS = ( 61 | (1, 1, 1, 2, 2, 4, 4), 62 | (1, 1, 1, 2, 2, 4, 4), 63 | (1, 1, 1, 2, 2, 4, 4), 64 | ) # by default the first one is 1. 65 | 66 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_ON = False 67 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_NUM_LAYER = 5 68 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_CIR_DILATIONS = ( 69 | 1, 70 | 2, 71 | 2, 72 | 4, 73 | ) # by default the first one is 1. 74 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_FEAT_DIM = 128 75 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_MIN_AREA = 5 * 5 76 | # _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_PERTURB = True 77 | _C.MODEL.SNAKE_HEAD.MSCORE_SNAKE_LOSS_WEIGHT = 1.0 78 | 79 | _C.MODEL.SNAKE_HEAD.PRE_OFFSET = False # first snake also predict a global offset 80 | 81 | _C.MODEL.SNAKE_HEAD.USE_ASPP = False 82 | _C.MODEL.SNAKE_HEAD.ASPP_DIM = 64 83 | _C.MODEL.SNAKE_HEAD.ASPP_DILATIONS = (1, 6, 12, 18) 84 | 85 | _C.MODEL.SNAKE_HEAD.USE_PSP = False 86 | _C.MODEL.SNAKE_HEAD.PSP_SIZE = (1, 2, 3, 6) 87 | 88 | _C.MODEL.SNAKE_HEAD.LAST_UP_SAMPLE = False 89 | _C.MODEL.SNAKE_HEAD.UP_SAMPLE_RATE = 2 90 | _C.MODEL.SNAKE_HEAD.LAST_CHAMFER = False 91 | _C.MODEL.SNAKE_HEAD.LAST_CHAMFER_WEIGHT = 5.0 / 3 92 | _C.MODEL.SNAKE_HEAD.LAST_NEIGHBOR = False 93 | 94 | _C.MODEL.SNAKE_HEAD.TRACK_PATH = False 95 | 96 | _C.MODEL.SNAKE_HEAD.NEW_MATCHING = False 97 | 98 | _C.MODEL.SNAKE_HEAD.INITIAL = "octagon" # {"octagon", "box"}; 99 | _C.MODEL.SNAKE_HEAD.DE_LOC_TYPE = "derange" # {"derange", "demean"} 100 | _C.MODEL.SNAKE_HEAD.LOCAL_SPATIAL = False 101 | 102 | _C.MODEL.SNAKE_HEAD.INDIVIDUAL_SCALE = False 103 | 104 | _C.MODEL.SNAKE_HEAD.LOSS_TYPE = "smoothl1" # {"smoothl1", "chamfer"} 105 | _C.MODEL.SNAKE_HEAD.LOSS_ADAPTIVE = False 106 | _C.MODEL.SNAKE_HEAD.LOSS_SEPARATE_REFINE = False 107 | _C.MODEL.SNAKE_HEAD.LOSS_WEIGH = False 108 | _C.MODEL.SNAKE_HEAD.LOSS_DISTRIBUTION = (1.0 / 3, 1.0 / 3, 2.0 / 3) 109 | _C.MODEL.SNAKE_HEAD.LOSS_L1_BETA = 0.11 110 | _C.MODEL.SNAKE_HEAD.EDGE_IN = False 111 | _C.MODEL.SNAKE_HEAD.PRED_EDGE = False 112 | _C.MODEL.SNAKE_HEAD.EDGE_IN_SEPARATE = (False, False) 113 | _C.MODEL.SNAKE_HEAD.EDGE_POSITION = "before" # {"before", "after"} 114 | _C.MODEL.SNAKE_HEAD.DILATIONS = (1, 1) 115 | _C.MODEL.SNAKE_HEAD.COORD_CONV = (False, False) 116 | _C.MODEL.SNAKE_HEAD.EDGE_IN_TH = -1.0 # used for inference 117 | 118 | _C.MODEL.SNAKE_HEAD.FILTER_WIDTH = 4 119 | 120 | _C.MODEL.SNAKE_HEAD.USE_DEFORMABLE = (False, False) 121 | 122 | _C.MODEL.SNAKE_HEAD.NUM_SAMPLING = 128 123 | _C.MODEL.SNAKE_HEAD.MARK_INDEX = False 124 | _C.MODEL.SNAKE_HEAD.REORDER_METHOD = "dsnake" # {'dsnake', 'curvegcn'} 125 | _C.MODEL.SNAKE_HEAD.JITTERING = 0.0 126 | _C.MODEL.SNAKE_HEAD.POINT_WEIGH = False 127 | 128 | _C.MODEL.SNAKE_HEAD.ATTENTION = False 129 | _C.MODEL.SNAKE_HEAD.SELECTIVE_REFINE = False 130 | _C.MODEL.SNAKE_HEAD.DOUBLE_SELECTIVE_REFINE = False 131 | 132 | 133 | # utils 134 | _C.MODEL.SNAKE_HEAD.VIS_PATH = False 135 | 136 | 137 | # ---------------------------------------------------------------------------- # 138 | # Edge Prediction Head (ZC) 139 | # ---------------------------------------------------------------------------- # 140 | _C.MODEL.EDGE_HEAD = CN() 141 | _C.MODEL.EDGE_HEAD.NAME = "EdgeFPNHead" 142 | 143 | _C.MODEL.EDGE_HEAD.TRAIN = True 144 | 145 | _C.MODEL.EDGE_HEAD.IN_FEATURES = ["p2"] 146 | _C.MODEL.EDGE_HEAD.STRONG_FEAT = False 147 | # Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for 148 | # the correposnding pixel. 149 | _C.MODEL.EDGE_HEAD.IGNORE_VALUE = 255 150 | # Number of classes in the edge prediction head 151 | _C.MODEL.EDGE_HEAD.NUM_CLASSES = 1 # (only foreground or not) 152 | # Number of channels in the 3x3 convs inside semantic-FPN heads. 153 | _C.MODEL.EDGE_HEAD.CONVS_DIM = 128 154 | # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. 155 | _C.MODEL.EDGE_HEAD.COMMON_STRIDE = 4 156 | # Normalization method for the convolution layers. Options: "" (no norm), "GN". 157 | _C.MODEL.EDGE_HEAD.NORM = "GN" 158 | _C.MODEL.EDGE_HEAD.BCE_WEIGHT = ( 159 | 0 # 1:1 BCE harms the training, very small BCE not helpful 160 | ) 161 | 162 | _C.MODEL.EDGE_HEAD.LOSS_WEIGHT = 1 163 | 164 | 165 | # ---------------------------------------------------------------------------- # 166 | # Investigation Configs (ZC) 167 | # ---------------------------------------------------------------------------- # 168 | _C.TEST.GT_IN = CN() 169 | _C.TEST.GT_IN.ON = False 170 | _C.TEST.GT_IN.WHAT = ["edge", "instance"] # {"edge", "instance"} 171 | 172 | 173 | # ---------------------------------------------------------------------------- # 174 | # VoV Backbone 175 | # ---------------------------------------------------------------------------- # 176 | _C.MODEL.VOVNET = CN() 177 | 178 | _C.MODEL.VOVNET.CONV_BODY = "V-39-eSE" 179 | _C.MODEL.VOVNET.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"] 180 | 181 | # Options: FrozenBN, GN, "SyncBN", "BN" 182 | _C.MODEL.VOVNET.NORM = "FrozenBN" 183 | 184 | _C.MODEL.VOVNET.OUT_CHANNELS = 256 185 | 186 | _C.MODEL.VOVNET.BACKBONE_OUT_CHANNELS = 256 187 | 188 | 189 | # ---------------------------------------------------------------------------- # 190 | # DLA backbone 191 | # ---------------------------------------------------------------------------- # 192 | 193 | _C.MODEL.DLA = CN() 194 | _C.MODEL.DLA.CONV_BODY = "DLA34" 195 | _C.MODEL.DLA.OUT_FEATURES = ["stage2", "stage3", "stage4", "stage5"] 196 | 197 | # Options: FrozenBN, GN, "SyncBN", "BN" 198 | _C.MODEL.DLA.NORM = "FrozenBN" 199 | 200 | # ---------------------------------------------------------------------------- # 201 | # FCOS Head 202 | # ---------------------------------------------------------------------------- # 203 | _C.MODEL.FCOS = CN() 204 | 205 | # This is the number of foreground classes. 206 | _C.MODEL.FCOS.NUM_CLASSES = 80 207 | _C.MODEL.FCOS.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] 208 | _C.MODEL.FCOS.FPN_STRIDES = [8, 16, 32, 64, 128] 209 | _C.MODEL.FCOS.PRIOR_PROB = 0.01 210 | _C.MODEL.FCOS.INFERENCE_TH_TRAIN = 0.05 211 | _C.MODEL.FCOS.INFERENCE_TH_TEST = 0.05 212 | _C.MODEL.FCOS.NMS_TH = 0.6 213 | _C.MODEL.FCOS.PRE_NMS_TOPK_TRAIN = 1000 214 | _C.MODEL.FCOS.PRE_NMS_TOPK_TEST = 1000 215 | _C.MODEL.FCOS.POST_NMS_TOPK_TRAIN = 100 216 | _C.MODEL.FCOS.POST_NMS_TOPK_TEST = 100 217 | _C.MODEL.FCOS.TOP_LEVELS = 2 218 | _C.MODEL.FCOS.NORM = "GN" # Support GN or none 219 | _C.MODEL.FCOS.USE_SCALE = True 220 | 221 | # Multiply centerness before threshold 222 | # This will affect the final performance by about 0.05 AP but save some time 223 | _C.MODEL.FCOS.THRESH_WITH_CTR = False 224 | 225 | # Focal loss parameters 226 | _C.MODEL.FCOS.LOSS_ALPHA = 0.25 227 | _C.MODEL.FCOS.LOSS_GAMMA = 2.0 228 | _C.MODEL.FCOS.SIZES_OF_INTEREST = [64, 128, 256, 512] 229 | _C.MODEL.FCOS.USE_RELU = True 230 | _C.MODEL.FCOS.USE_DEFORMABLE = False 231 | 232 | # the number of convolutions used in the cls and bbox tower 233 | _C.MODEL.FCOS.NUM_CLS_CONVS = 4 234 | _C.MODEL.FCOS.NUM_BOX_CONVS = 4 235 | _C.MODEL.FCOS.NUM_SHARE_CONVS = 0 236 | _C.MODEL.FCOS.CENTER_SAMPLE = True 237 | _C.MODEL.FCOS.POS_RADIUS = 1.5 238 | _C.MODEL.FCOS.LOC_LOSS_TYPE = "giou" 239 | _C.MODEL.FCOS.EXT_LOSS_TYPE = "smoothl1" 240 | 241 | 242 | # ---------------------------------------------------------------------------- # 243 | # Misc options 244 | # ---------------------------------------------------------------------------- # 245 | _C.SEED = 77 246 | -------------------------------------------------------------------------------- /core/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import builtin # ensure the builtin datasets are registered 2 | 3 | -------------------------------------------------------------------------------- /core/data/builtin.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 4 | from detectron2.data.datasets.register_coco import register_coco_instances 5 | from detectron2.data.datasets.coco import load_coco_json 6 | from detectron2.data import DatasetCatalog, MetadataCatalog 7 | from .datasets import register_coco_edge_map, register_cityscapes_edge_map 8 | 9 | ''' 10 | Register COCO dataset with edge map annotations 11 | ''' 12 | 13 | SPLITS_COCO_W_EDGE = { 14 | "coco_2017_train_edge": ( 15 | # original directory/annotations coco detection 16 | "coco/train2017", 17 | "coco/annotations/instances_train2017.json", 18 | # directory for edge map created by datasets/prepare_edge_map.py 19 | # takes ~ 12 mins on a machine with 64 Xeon(R) Gold 6130 CPUs 20 | "coco/edge_train2017" 21 | ), 22 | "coco_2017_val_edge": ( 23 | "coco/val2017", 24 | "coco/annotations/instances_val2017.json", 25 | "coco/edge_val2017" 26 | ), 27 | } 28 | 29 | 30 | def register_all_coco_edge(root="datasets"): 31 | for name, (image_root, json_file, edge_root) in SPLITS_COCO_W_EDGE.items(): 32 | # Assume pre-defined datasets live in `./datasets`. 33 | register_coco_edge_map( 34 | name, 35 | _get_builtin_metadata("coco"), 36 | os.path.join(root, image_root), 37 | os.path.join(root, edge_root), 38 | os.path.join(root, json_file) if "://" not in json_file else json_file 39 | ) 40 | 41 | 42 | register_all_coco_edge() 43 | 44 | ''' 45 | Register CITYSCAPES dataset with edge map annotations 46 | ''' 47 | 48 | SPLITS_CITY_W_EDGE = { 49 | "cityscapes_train_edge": ( 50 | # original directory/annotations coco detection 51 | "cityscape-coco/coco_img/train", 52 | "cityscape-coco/coco_ann/instance_train.json", 53 | "cityscape-coco/edge_train" 54 | ), 55 | "cityscapes_val_edge": ( 56 | "cityscape-coco/coco_img/val", 57 | "cityscape-coco/coco_ann/instance_val.json", 58 | "cityscape-coco/edge_val" 59 | ), 60 | } 61 | 62 | 63 | def register_all_cityscapes_edge(root="datasets"): 64 | for name, (image_root, json_file, edge_root) in SPLITS_CITY_W_EDGE.items(): 65 | # Assume pre-defined datasets live in `./datasets`. 66 | register_cityscapes_edge_map( 67 | name, 68 | {}, 69 | os.path.join(root, image_root), 70 | os.path.join(root, edge_root), 71 | os.path.join(root, json_file) if "://" not in json_file else json_file 72 | ) 73 | 74 | 75 | register_all_cityscapes_edge() 76 | 77 | 78 | def register_cityscapes(root="datasets"): 79 | # Assume pre-defined datasets live in `./datasets`. 80 | DatasetCatalog.register('cityscapes_coco_fine_instance_seg_train', 81 | lambda: load_coco_json( 82 | os.path.join(root, 'cityscape-coco/coco_ann/instance_train.json'), 83 | os.path.join(root, 'cityscape-coco/coco_img/train'), 84 | 'cityscapes_coco_fine_instance_seg_train')) 85 | 86 | DatasetCatalog.register('cityscapes_coco_fine_instance_seg_val', 87 | lambda: load_coco_json( 88 | os.path.join(root, 'cityscape-coco/coco_ann/instance_val.json'), 89 | os.path.join(root, 'cityscape-coco/coco_img/val'), 90 | 'cityscapes_coco_fine_instance_seg_val')) 91 | MetadataCatalog.get('cityscapes_coco_fine_instance_seg_train').set( 92 | evaluator_type="coco", 93 | ) 94 | MetadataCatalog.get('cityscapes_coco_fine_instance_seg_val').set( 95 | evaluator_type="coco", 96 | ) 97 | 98 | 99 | register_cityscapes() 100 | 101 | ''' 102 | Register SBD dataset 103 | ''' 104 | 105 | _PREDEFINED_SPLITS_SBD = { 106 | "sbd_train": ("sbd/images", "sbd/annotations/sbd_train_instance.json"), 107 | "sbd_val": ("sbd/images", "sbd/annotations/sbd_val_instance.json"), 108 | } 109 | 110 | SBD_CATEGORIES = [ 111 | {"color": [220, 20, 60], 'id': 1, 'name': 'aeroplane'}, 112 | {"color": [119, 11, 32], 'id': 2, 'name': 'bicycle'}, 113 | {"color": [0, 0, 142], 'id': 3, 'name': 'bird'}, 114 | {"color": [0, 0, 230], 'id': 4, 'name': 'boat'}, 115 | {"color": [106, 0, 228], 'id': 5, 'name': 'bottle'}, 116 | {"color": [0, 60, 100], 'id': 6, 'name': 'bus'}, 117 | {"color": [0, 80, 100], 'id': 7, 'name': 'car'}, 118 | {"color": [0, 0, 70], 'id': 8, 'name': 'cat'}, 119 | {"color": [0, 0, 192], 'id': 9, 'name': 'chair'}, 120 | {"color": [250, 170, 30], 'id': 10, 'name': 'cow'}, 121 | {"color": [100, 170, 30], 'id': 11, 'name': 'diningtable'}, 122 | {"color": [220, 220, 0], 'id': 12, 'name': 'dog'}, 123 | {"color": [175, 116, 175], 'id': 13, 'name': 'horse'}, 124 | {"color": [0, 82, 0], 'id': 14, 'name': 'motorbike'}, 125 | {"color": [0, 82, 100], 'id': 15, 'name': 'person'}, 126 | {"color": [82, 82, 100], 'id': 16, 'name': 'pottedplant'}, 127 | {"color": [182, 8, 100], 'id': 17, 'name': 'sheep'}, 128 | {"color": [182, 8, 0], 'id': 18, 'name': 'sofa'}, 129 | {"color": [182, 18, 0], 'id': 19, 'name': 'train'}, 130 | {"color": [12, 18, 192], 'id': 20, 'name': 'tvmonitor'} 131 | ] 132 | 133 | thing_ids = [k["id"] for k in SBD_CATEGORIES] 134 | thing_colors = [k["color"] for k in SBD_CATEGORIES] 135 | assert len(thing_ids) == 20, len(thing_ids) 136 | # Mapping from the incontiguous COCO category id to an id in [0, 19] 137 | thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} 138 | thing_classes = [k["name"] for k in SBD_CATEGORIES] 139 | metadata = { 140 | "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, 141 | "thing_classes": thing_classes, 142 | "thing_colors": thing_colors, 143 | } 144 | 145 | 146 | def register_all_coco(root="datasets"): 147 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_SBD.items(): 148 | # Assume pre-defined datasets live in `./datasets`. 149 | register_coco_instances( 150 | key, 151 | metadata, 152 | os.path.join(root, json_file) if "://" not in json_file else json_file, 153 | os.path.join(root, image_root), 154 | ) 155 | 156 | 157 | register_all_coco() 158 | -------------------------------------------------------------------------------- /core/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .register_coco_edge import register_coco_edge_map, register_cityscapes_edge_map 2 | -------------------------------------------------------------------------------- /core/data/datasets/register_coco_edge.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from detectron2.data import DatasetCatalog, MetadataCatalog 4 | from detectron2.data.datasets import load_coco_json, load_sem_seg 5 | 6 | 7 | def register_coco_edge_map( 8 | name, metadata, image_root, edge_root, instances_json 9 | ): 10 | ds_name = name 11 | DatasetCatalog.register( 12 | ds_name, 13 | lambda: merge_to_panoptic( 14 | load_coco_json(instances_json, image_root, ds_name), 15 | load_sem_seg(edge_root, image_root), 16 | ), 17 | ) 18 | MetadataCatalog.get(ds_name).set( 19 | image_root=image_root, 20 | edge_root=edge_root, 21 | json_file=instances_json, 22 | evaluator_type="coco+edge_map", 23 | **metadata 24 | ) 25 | 26 | semantic_name = name + "_edgeonly" 27 | DatasetCatalog.register(semantic_name, lambda: load_sem_seg(edge_root, image_root)) 28 | MetadataCatalog.get(semantic_name).set( 29 | sem_seg_root=edge_root, image_root=image_root, evaluator_type="sem_seg", **metadata 30 | ) 31 | 32 | 33 | def register_cityscapes_edge_map( 34 | name, metadata, image_root, edge_root, instances_json 35 | ): 36 | ds_name = name 37 | DatasetCatalog.register( 38 | ds_name, 39 | lambda: merge_to_panoptic( 40 | load_coco_json(instances_json, image_root, ds_name), 41 | load_sem_seg(edge_root, image_root, image_ext='png'), 42 | ), 43 | ) 44 | MetadataCatalog.get(ds_name).set( 45 | image_root=image_root, 46 | edge_root=edge_root, 47 | json_file=instances_json, 48 | evaluator_type="coco+edge_map", 49 | **metadata 50 | ) 51 | 52 | semantic_name = name + "_edgeonly" 53 | DatasetCatalog.register(semantic_name, lambda: load_sem_seg(edge_root, image_root)) 54 | MetadataCatalog.get(semantic_name).set( 55 | sem_seg_root=edge_root, image_root=image_root, evaluator_type="sem_seg", **metadata 56 | ) 57 | 58 | 59 | def merge_to_panoptic(detection_dicts, sem_seg_dicts): 60 | """ 61 | Create dataset dicts for panoptic segmentation, by 62 | merging two dicts using "file_name" field to match their entries. 63 | 64 | Args: 65 | detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation. 66 | sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation. 67 | 68 | Returns: 69 | list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in 70 | both detection_dicts and sem_seg_dicts that correspond to the same image. 71 | The function assumes that the same key in different dicts has the same value. 72 | """ 73 | results = [] 74 | sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts} 75 | assert len(sem_seg_file_to_entry) > 0 76 | 77 | for det_dict in detection_dicts: 78 | dic = copy.copy(det_dict) 79 | dic.update(sem_seg_file_to_entry[dic["file_name"]]) 80 | results.append(dic) 81 | return results 82 | -------------------------------------------------------------------------------- /core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_evaluation import COCOEvaluator 2 | from .edge_map_evaluation import EdgeMapEvaluator 3 | from .cocoeval import COCOeval -------------------------------------------------------------------------------- /core/evaluation/edge_map_evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import itertools 3 | import json 4 | import logging 5 | import numpy as np 6 | import os 7 | from collections import OrderedDict 8 | import PIL.Image as Image 9 | import pycocotools.mask as mask_util 10 | import torch 11 | from fvcore.common.file_io import PathManager 12 | 13 | from detectron2.data import DatasetCatalog, MetadataCatalog 14 | from detectron2.utils.comm import all_gather, is_main_process, synchronize 15 | 16 | from detectron2.evaluation.evaluator import DatasetEvaluator 17 | 18 | 19 | class EdgeMapEvaluator(DatasetEvaluator): 20 | """ 21 | Evaluate semantic segmentation 22 | """ 23 | 24 | def __init__( 25 | self, dataset_name, distributed, num_classes, ignore_label=255, output_dir=None 26 | ): 27 | """ 28 | Args: 29 | dataset_name (str): name of the dataset to be evaluated. 30 | distributed (True): if True, will collect results from all ranks for evaluation. 31 | Otherwise, will evaluate the results in the current process. 32 | num_classes (int): number of classes 33 | ignore_label (int): value in semantic segmentation ground truth. Predictions for the 34 | corresponding pixels should be ignored. 35 | output_dir (str): an output directory to dump results. 36 | """ 37 | self._dataset_name = dataset_name 38 | self._distributed = distributed 39 | self._output_dir = output_dir 40 | self._num_classes = num_classes 41 | self._ignore_label = ignore_label 42 | self._N = num_classes + 1 43 | 44 | self._cpu_device = torch.device("cpu") 45 | self._logger = logging.getLogger(__name__) 46 | 47 | self.input_file_to_gt_file = { 48 | dataset_record["file_name"]: dataset_record["sem_seg_file_name"] 49 | for dataset_record in DatasetCatalog.get(dataset_name) 50 | } 51 | 52 | meta = MetadataCatalog.get(dataset_name) 53 | # Dict that maps contiguous training ids to COCO category ids 54 | try: 55 | c2d = meta.stuff_dataset_id_to_contiguous_id 56 | self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} 57 | except AttributeError: 58 | self._contiguous_id_to_dataset_id = None 59 | 60 | def reset(self): 61 | self._conf_matrix = np.zeros((self._N, self._N), dtype=np.int64) 62 | self._predictions = [] 63 | 64 | def process(self, inputs, outputs): 65 | """ 66 | Args: 67 | inputs: the inputs to a model. 68 | It is a list of dicts. Each dict corresponds to an image and 69 | contains keys like "height", "width", "file_name". 70 | outputs: the outputs of a model. It is either list of semantic segmentation predictions 71 | (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic 72 | segmentation prediction in the same format. 73 | """ 74 | for input, output in zip(inputs, outputs): 75 | # TODO: Just use 0.5 as threshold; should we change? 76 | output = (output["edge_map"] > 0.5).to(self._cpu_device) 77 | pred = np.array(output, dtype=np.int) 78 | with PathManager.open( 79 | self.input_file_to_gt_file[input["file_name"]], "rb" 80 | ) as f: 81 | gt = np.array(Image.open(f), dtype=np.int) 82 | 83 | gt[gt == self._ignore_label] = 0 # NOTE: 0 - background in edge map. 84 | 85 | self._conf_matrix += np.bincount( 86 | self._N * pred.reshape(-1) + gt.reshape(-1), minlength=self._N ** 2 87 | ).reshape(self._N, self._N) 88 | 89 | self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) 90 | 91 | def evaluate(self): 92 | """ 93 | Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): 94 | 95 | * Mean intersection-over-union averaged across classes (mIoU) 96 | * Frequency Weighted IoU (fwIoU) 97 | * Mean pixel accuracy averaged across classes (mACC) 98 | * Pixel Accuracy (pACC) 99 | """ 100 | if self._distributed: 101 | synchronize() 102 | conf_matrix_list = all_gather(self._conf_matrix) 103 | self._predictions = all_gather(self._predictions) 104 | self._predictions = list(itertools.chain(*self._predictions)) 105 | if not is_main_process(): 106 | return 107 | 108 | self._conf_matrix = np.zeros_like(self._conf_matrix) 109 | for conf_matrix in conf_matrix_list: 110 | self._conf_matrix += conf_matrix 111 | 112 | if self._output_dir: 113 | PathManager.mkdirs(self._output_dir) 114 | file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") 115 | with PathManager.open(file_path, "w") as f: 116 | f.write(json.dumps(self._predictions)) 117 | 118 | acc = np.zeros(self._num_classes, dtype=np.float) 119 | iou = np.zeros(self._num_classes, dtype=np.float) 120 | tp = self._conf_matrix.diagonal()[:-1].astype(np.float) 121 | pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) 122 | class_weights = pos_gt / np.sum(pos_gt) 123 | pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) 124 | acc_valid = pos_gt > 0 125 | acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] 126 | iou_valid = (pos_gt + pos_pred) > 0 127 | union = pos_gt + pos_pred - tp 128 | iou[acc_valid] = tp[acc_valid] / union[acc_valid] 129 | macc = np.sum(acc) / np.sum(acc_valid) 130 | miou = np.sum(iou) / np.sum(iou_valid) 131 | fiou = np.sum(iou * class_weights) 132 | pacc = np.sum(tp) / np.sum(pos_gt) 133 | 134 | res = {} 135 | res["mIoU"] = 100 * miou 136 | res["fwIoU"] = 100 * fiou 137 | res["mACC"] = 100 * macc 138 | res["pACC"] = 100 * pacc 139 | 140 | if self._output_dir: 141 | file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") 142 | with PathManager.open(file_path, "wb") as f: 143 | torch.save(res, f) 144 | results = OrderedDict({"edge_map": res}) 145 | self._logger.info(results) 146 | return results 147 | 148 | def encode_json_sem_seg(self, sem_seg, input_file_name): 149 | """ 150 | Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. 151 | See http://cocodataset.org/#format-results 152 | """ 153 | json_list = [] 154 | for label in np.unique(sem_seg): 155 | if self._contiguous_id_to_dataset_id is not None: 156 | assert ( 157 | label in self._contiguous_id_to_dataset_id 158 | ), "Label {} is not in the metadata info for {}".format( 159 | label, self._dataset_name 160 | ) 161 | dataset_id = self._contiguous_id_to_dataset_id[label] 162 | else: 163 | dataset_id = int(label) 164 | mask = (sem_seg == label).astype(np.uint8) 165 | mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] 166 | mask_rle["counts"] = mask_rle["counts"].decode("utf-8") 167 | json_list.append( 168 | { 169 | "file_name": input_file_name, 170 | "category_id": dataset_id, 171 | "segmentation": mask_rle, 172 | } 173 | ) 174 | return json_list 175 | -------------------------------------------------------------------------------- /core/evaluation/evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import datetime 3 | import logging 4 | import time 5 | from collections import OrderedDict 6 | from contextlib import contextmanager 7 | import torch 8 | 9 | from detectron2.utils.comm import is_main_process 10 | from detectron2.utils.logger import log_every_n_seconds 11 | 12 | 13 | class DatasetEvaluator: 14 | """ 15 | Base class for a dataset evaluator. 16 | 17 | The function :func:`inference_on_dataset` runs the model over 18 | all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. 19 | 20 | This class will accumulate information of the inputs/outputs (by :meth:`process`), 21 | and produce evaluation results in the end (by :meth:`evaluate`). 22 | """ 23 | 24 | def reset(self): 25 | """ 26 | Preparation for a new round of evaluation. 27 | Should be called before starting a round of evaluation. 28 | """ 29 | pass 30 | 31 | def process(self, input, output): 32 | """ 33 | Process an input/output pair. 34 | 35 | Args: 36 | input: the input that's used to call the model. 37 | output: the return value of `model(input)` 38 | """ 39 | pass 40 | 41 | def evaluate(self): 42 | """ 43 | Evaluate/summarize the performance, after processing all input/output pairs. 44 | 45 | Returns: 46 | dict: 47 | A new evaluator class can return a dict of arbitrary format 48 | as long as the user can process the results. 49 | In our train_net.py, we expect the following format: 50 | 51 | * key: the name of the task (e.g., bbox) 52 | * value: a dict of {metric name: score}, e.g.: {"AP50": 80} 53 | """ 54 | pass 55 | 56 | 57 | class DatasetEvaluators(DatasetEvaluator): 58 | def __init__(self, evaluators): 59 | assert len(evaluators) 60 | super().__init__() 61 | self._evaluators = evaluators 62 | 63 | def reset(self): 64 | for evaluator in self._evaluators: 65 | evaluator.reset() 66 | 67 | def process(self, input, output): 68 | for evaluator in self._evaluators: 69 | evaluator.process(input, output) 70 | 71 | def evaluate(self): 72 | results = OrderedDict() 73 | for evaluator in self._evaluators: 74 | result = evaluator.evaluate() 75 | if is_main_process() and result is not None: 76 | for k, v in result.items(): 77 | assert ( 78 | k not in results 79 | ), "Different evaluators produce results with the same key {}".format(k) 80 | results[k] = v 81 | return results 82 | 83 | 84 | def inference_on_dataset(model, data_loader, evaluator): 85 | """ 86 | Run model on the data_loader and evaluate the metrics with evaluator. 87 | Also benchmark the inference speed of `model.forward` accurately. 88 | The model will be used in eval mode. 89 | 90 | Args: 91 | model (nn.Module): a module which accepts an object from 92 | `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. 93 | 94 | If you wish to evaluate a model in `training` mode instead, you can 95 | wrap the given model and override its behavior of `.eval()` and `.train()`. 96 | data_loader: an iterable object with a length. 97 | The elements it generates will be the inputs to the model. 98 | evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want 99 | to benchmark, but don't want to do any evaluation. 100 | 101 | Returns: 102 | The return value of `evaluator.evaluate()` 103 | """ 104 | num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1 105 | logger = logging.getLogger(__name__) 106 | logger.info("Start inference on {} images".format(len(data_loader))) 107 | 108 | total = len(data_loader) # inference data loader must have a fixed length 109 | if evaluator is None: 110 | # create a no-op evaluator 111 | evaluator = DatasetEvaluators([]) 112 | evaluator.reset() 113 | 114 | num_warmup = min(5, total - 1) 115 | start_time = time.perf_counter() 116 | total_compute_time = 0 117 | with inference_context(model), torch.no_grad(): 118 | for idx, inputs in enumerate(data_loader): 119 | if idx == num_warmup: 120 | start_time = time.perf_counter() 121 | total_compute_time = 0 122 | 123 | start_compute_time = time.perf_counter() 124 | outputs = model(inputs) 125 | if torch.cuda.is_available(): 126 | torch.cuda.synchronize() 127 | total_compute_time += time.perf_counter() - start_compute_time 128 | evaluator.process(inputs, outputs) 129 | 130 | iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) 131 | seconds_per_img = total_compute_time / iters_after_start 132 | if idx >= num_warmup * 2 or seconds_per_img > 5: 133 | total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start 134 | eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) 135 | log_every_n_seconds( 136 | logging.INFO, 137 | "Inference done {}/{}. {:.4f} s / img. ETA={}".format( 138 | idx + 1, total, seconds_per_img, str(eta) 139 | ), 140 | n=5, 141 | ) 142 | 143 | # Measure the time only for this worker (before the synchronization barrier) 144 | total_time = time.perf_counter() - start_time 145 | total_time_str = str(datetime.timedelta(seconds=total_time)) 146 | # NOTE this format is parsed by grep 147 | logger.info( 148 | "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format( 149 | total_time_str, total_time / (total - num_warmup), num_devices 150 | ) 151 | ) 152 | total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) 153 | logger.info( 154 | "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format( 155 | total_compute_time_str, total_compute_time / (total - num_warmup), num_devices 156 | ) 157 | ) 158 | 159 | results = evaluator.evaluate() 160 | # An evaluator may return None when not in main process. 161 | # Replace it by an empty dict instead to make it easier for downstream code to handle 162 | if results is None: 163 | results = {} 164 | return results 165 | 166 | 167 | @contextmanager 168 | def inference_context(model): 169 | """ 170 | A context where the model is temporarily changed to eval mode, 171 | and restored to previous mode afterwards. 172 | 173 | Args: 174 | model: a torch Module 175 | """ 176 | training_mode = model.training 177 | model.eval() 178 | yield 179 | model.train(training_mode) 180 | -------------------------------------------------------------------------------- /core/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import EXTLoss, DiceLoss, SmoothL1Loss, IOULoss 2 | from .deform_conv import DFConv2d 3 | from .ml_nms import ml_nms 4 | from .extreme_utils import _ext as extreme_utils 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | -------------------------------------------------------------------------------- /core/layers/deform_conv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Codes from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/deform_conv.py) 3 | """ 4 | import torch 5 | from torch import nn 6 | 7 | from detectron2.layers import Conv2d 8 | 9 | 10 | class _NewEmptyTensorOp(torch.autograd.Function): 11 | @staticmethod 12 | def forward(ctx, x, new_shape): 13 | ctx.shape = x.shape 14 | return x.new_empty(new_shape) 15 | 16 | @staticmethod 17 | def backward(ctx, grad): 18 | shape = ctx.shape 19 | return _NewEmptyTensorOp.apply(grad, shape), None 20 | 21 | 22 | class DFConv2d(nn.Module): 23 | """ 24 | Deformable convolutional layer with configurable 25 | deformable groups, dilations and groups. 26 | 27 | Code is from: 28 | https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py 29 | 30 | 31 | """ 32 | 33 | def __init__( 34 | self, 35 | in_channels, 36 | out_channels, 37 | with_modulated_dcn=True, 38 | kernel_size=3, 39 | stride=1, 40 | groups=1, 41 | dilation=1, 42 | deformable_groups=1, 43 | bias=False, 44 | padding=None 45 | ): 46 | super(DFConv2d, self).__init__() 47 | if isinstance(kernel_size, (list, tuple)): 48 | assert isinstance(stride, (list, tuple)) 49 | assert isinstance(dilation, (list, tuple)) 50 | assert len(kernel_size) == 2 51 | assert len(stride) == 2 52 | assert len(dilation) == 2 53 | padding = ( 54 | dilation[0] * (kernel_size[0] - 1) // 2, 55 | dilation[1] * (kernel_size[1] - 1) // 2 56 | ) 57 | offset_base_channels = kernel_size[0] * kernel_size[1] 58 | else: 59 | padding = dilation * (kernel_size - 1) // 2 60 | offset_base_channels = kernel_size * kernel_size 61 | if with_modulated_dcn: 62 | from detectron2.layers.deform_conv import ModulatedDeformConv 63 | offset_channels = offset_base_channels * 3 # default: 27 64 | conv_block = ModulatedDeformConv 65 | else: 66 | from detectron2.layers.deform_conv import DeformConv 67 | offset_channels = offset_base_channels * 2 # default: 18 68 | conv_block = DeformConv 69 | self.offset = Conv2d( 70 | in_channels, 71 | deformable_groups * offset_channels, 72 | kernel_size=kernel_size, 73 | stride=stride, 74 | padding=padding, 75 | groups=1, 76 | dilation=dilation 77 | ) 78 | for l in [self.offset, ]: 79 | nn.init.kaiming_uniform_(l.weight, a=1) 80 | torch.nn.init.constant_(l.bias, 0.) 81 | self.conv = conv_block( 82 | in_channels, 83 | out_channels, 84 | kernel_size=kernel_size, 85 | stride=stride, 86 | padding=padding, 87 | dilation=dilation, 88 | groups=groups, 89 | deformable_groups=deformable_groups, 90 | bias=bias 91 | ) 92 | self.with_modulated_dcn = with_modulated_dcn 93 | self.kernel_size = kernel_size 94 | self.stride = stride 95 | self.padding = padding 96 | self.dilation = dilation 97 | self.offset_split = offset_base_channels * deformable_groups * 2 98 | 99 | def forward(self, x, return_offset=False): 100 | if x.numel() > 0: 101 | if not self.with_modulated_dcn: 102 | offset_mask = self.offset(x) 103 | x = self.conv(x, offset_mask) 104 | else: 105 | offset_mask = self.offset(x) 106 | offset = offset_mask[:, :self.offset_split, :, :] 107 | mask = offset_mask[:, self.offset_split:, :, :].sigmoid() 108 | x = self.conv(x, offset, mask) 109 | if return_offset: 110 | return x, offset_mask 111 | return x 112 | # get output shape 113 | output_shape = [ 114 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 115 | for i, p, di, k, d in zip( 116 | x.shape[-2:], 117 | self.padding, 118 | self.dilation, 119 | self.kernel_size, 120 | self.stride 121 | ) 122 | ] 123 | output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape 124 | return _NewEmptyTensorOp.apply(x, output_shape) 125 | -------------------------------------------------------------------------------- /core/layers/extreme_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/core/layers/extreme_utils/__init__.py -------------------------------------------------------------------------------- /core/layers/extreme_utils/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 3 | import os 4 | import glob 5 | 6 | 7 | def get_extensions(): 8 | this_dir = os.path.dirname(os.path.abspath(__file__)) 9 | main_file = glob.glob(os.path.join(this_dir, '*.cpp')) 10 | source_cuda = glob.glob(os.path.join(this_dir, 'src', '*.cu')) 11 | sources = main_file + source_cuda 12 | include_dirs = [this_dir] 13 | ext_modules = [ 14 | CUDAExtension( 15 | name='_ext', 16 | sources=sources, 17 | include_dirs=include_dirs 18 | ) 19 | ] 20 | return ext_modules 21 | 22 | 23 | setup( 24 | ext_modules=get_extensions(), 25 | cmdclass={'build_ext': BuildExtension} 26 | ) 27 | -------------------------------------------------------------------------------- /core/layers/extreme_utils/src/cuda_common.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifndef CUDA_COMMON_H_ 9 | #define CUDA_COMMON_H_ 10 | 11 | #define DIST(x1,y1,z1,x2,y2,z2) (((x1)-(x2))*((x1)-(x2))+((y1)-(y2))*((y1)-(y2))+((z1)-(z2))*((z1)-(z2))) 12 | #define DIST2D(x1,y1,x2,y2) (((x1)-(x2))*((x1)-(x2))+((y1)-(y2))*((y1)-(y2))) 13 | 14 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 15 | 16 | void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 17 | { 18 | if (code != cudaSuccess) 19 | { 20 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 21 | if (abort) exit(code); 22 | } 23 | } 24 | 25 | int infTwoExp(int val) 26 | { 27 | int inf=1; 28 | while(val>inf) inf<<=1; 29 | return inf; 30 | } 31 | 32 | void getGPULayout( 33 | int dim0,int dim1,int dim2, 34 | int* bdim0,int* bdim1,int* bdim2, 35 | int* tdim0,int* tdim1,int* tdim2 36 | ) 37 | { 38 | (*tdim2)=64; 39 | if(dim2<(*tdim2)) (*tdim2)=infTwoExp(dim2); 40 | (*bdim2)=dim2/(*tdim2); 41 | if(dim2%(*tdim2)>0) (*bdim2)++; 42 | 43 | (*tdim1)=1024/(*tdim2); 44 | if(dim1<(*tdim1)) (*tdim1)=infTwoExp(dim1); 45 | (*bdim1)=dim1/(*tdim1); 46 | if(dim1%(*tdim1)>0) (*bdim1)++; 47 | 48 | (*tdim0)=1024/((*tdim1)*(*tdim2)); 49 | if(dim0<(*tdim0)) (*tdim0)=infTwoExp(dim0); 50 | (*bdim0)=dim0/(*tdim0); 51 | if(dim0%(*tdim0)>0) (*bdim0)++; 52 | } 53 | #endif 54 | -------------------------------------------------------------------------------- /core/layers/extreme_utils/src/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 19 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep)}).sort(0, false)); 128 | } 129 | 130 | -------------------------------------------------------------------------------- /core/layers/extreme_utils/src/nms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 5 | 6 | -------------------------------------------------------------------------------- /core/layers/extreme_utils/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("collect_extreme_point", &collect_extreme_point, "collect_extreme_point"); 6 | m.def("calculate_edge_num", &calculate_edge_num, "calculate_edge_num"); 7 | m.def("calculate_wnp", &calculate_wnp, "calculate_wnp"); 8 | m.def("roll_array", &roll_array, "roll_array"); 9 | m.def("nms", &nms, "non-maximum suppression"); 10 | } 11 | -------------------------------------------------------------------------------- /core/layers/extreme_utils/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "src/nms.h" 4 | 5 | 6 | at::Tensor collect_extreme_point( 7 | const at::Tensor& ext_hm, 8 | const at::Tensor& bbox, 9 | const at::Tensor& radius, 10 | const at::Tensor& vote, 11 | const at::Tensor& ct 12 | ); 13 | 14 | 15 | void calculate_edge_num( 16 | at::Tensor& edge_num, 17 | const at::Tensor& edge_num_sum, 18 | const at::Tensor& edge_idx_sort, 19 | const int p_num 20 | ); 21 | 22 | 23 | std::tuple calculate_wnp( 24 | const at::Tensor& edge_num, 25 | const at::Tensor& edge_start_idx, 26 | const int p_num 27 | ); 28 | 29 | 30 | at::Tensor roll_array( 31 | const at::Tensor& array, 32 | const at::Tensor& step 33 | ); 34 | 35 | 36 | at::Tensor nms(const at::Tensor& dets, 37 | const at::Tensor& scores, 38 | const float threshold) { 39 | if (dets.numel() == 0) 40 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 41 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 42 | return nms_cuda(b, threshold); 43 | } 44 | 45 | -------------------------------------------------------------------------------- /core/layers/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | import functools 5 | 6 | import torch.nn.functional as F 7 | 8 | 9 | def reduce_loss(loss, reduction): 10 | reduction_enum = F._Reduction.get_enum(reduction) 11 | # none: 0, elementwise_mean:1, sum: 2 12 | if reduction_enum == 0: 13 | return loss 14 | elif reduction_enum == 1: 15 | return loss.mean() 16 | elif reduction_enum == 2: 17 | return loss.sum() 18 | 19 | 20 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 21 | # if weight is specified, apply element-wise weight 22 | if weight is not None: 23 | loss = loss * weight 24 | 25 | # if avg_factor is not specified, just reduce the loss 26 | if avg_factor is None: 27 | loss = reduce_loss(loss, reduction) 28 | else: 29 | # if reduction is mean, then average the loss by avg_factor 30 | if reduction == 'mean': 31 | loss = loss.sum() / avg_factor 32 | # if reduction is 'none', then do nothing, otherwise raise an error 33 | elif reduction != 'none': 34 | raise ValueError('avg_factor can not be used with reduction="sum"') 35 | return loss 36 | 37 | 38 | def weighted_loss(loss_func): 39 | @functools.wraps(loss_func) 40 | def wrapper(pred, 41 | target, 42 | weight=None, 43 | reduction='mean', 44 | avg_factor=None, 45 | **kwargs): 46 | # get element-wise loss 47 | loss = loss_func(pred, target, **kwargs) 48 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 49 | return loss 50 | 51 | return wrapper 52 | 53 | 54 | @weighted_loss 55 | def smooth_l1_loss(pred, target, beta=1.0): 56 | assert beta > 0 57 | assert pred.size() == target.size() and target.numel() > 0 58 | diff = torch.abs(pred - target) 59 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 60 | diff - 0.5 * beta) 61 | return loss 62 | 63 | 64 | class SmoothL1Loss(nn.Module): 65 | 66 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 67 | super(SmoothL1Loss, self).__init__() 68 | self.beta = beta 69 | self.reduction = reduction 70 | self.loss_weight = loss_weight 71 | 72 | def forward(self, 73 | pred, 74 | target, 75 | weight=None, 76 | avg_factor=None, 77 | reduction_override=None, 78 | **kwargs): 79 | assert reduction_override in (None, 'none', 'mean', 'sum') 80 | reduction = ( 81 | reduction_override if reduction_override else self.reduction) 82 | loss = self.loss_weight * smooth_l1_loss( 83 | pred, 84 | target, 85 | weight, 86 | beta=self.beta, 87 | reduction=reduction, 88 | avg_factor=avg_factor, 89 | **kwargs) 90 | return loss 91 | 92 | 93 | class EXTLoss(nn.Module): 94 | def __init__(self, ext_loss_type='smoothl1'): 95 | super(EXTLoss, self).__init__() 96 | self.ext_loss_type = ext_loss_type 97 | if ext_loss_type == 'smoothl1': 98 | self.loss_func = nn.SmoothL1Loss(reduction='none') 99 | 100 | def forward(self, pred, target, weight=None): 101 | losses = self.loss_func(pred, target).sum(dim=1) 102 | if weight is not None: 103 | return (losses * weight).sum() 104 | else: 105 | return losses.sum() 106 | 107 | 108 | class DiceLoss(nn.Module): 109 | def __init__(self, 110 | bce_weight=0, 111 | ignore_value=255): 112 | super(DiceLoss, self).__init__() 113 | self.ignore_value = ignore_value 114 | if bce_weight != 0: 115 | self.bce_crit = nn.BCELoss() 116 | else: 117 | self.bce_crit = None 118 | self.bce_weight = bce_weight 119 | 120 | def forward(self, pred, target): 121 | if len(target.size()) == 3: 122 | target = target.unsqueeze(1) 123 | assert pred.size() == target.size() 124 | 125 | target = target.float() 126 | 127 | if self.ignore_value: 128 | mask = torch.ne(target, self.ignore_value).float() 129 | pred *= mask 130 | target *= mask 131 | 132 | p2 = pred * pred 133 | g2 = target * target 134 | pg = pred * target 135 | 136 | p2 = torch.sum(p2, (3, 2, 1)) 137 | g2 = torch.sum(g2, (3, 2, 1)) 138 | pg = torch.sum(pg, (3, 2, 1)) 139 | 140 | dice_coef = (2 * pg) / (p2 + g2 + 0.0001) 141 | 142 | dice_loss = (1.0 - dice_coef).sum() 143 | dice_loss /= target.size(0) 144 | 145 | if self.bce_crit is not None: 146 | bce_loss = self.bce_crit(pred, target) 147 | dice_loss += self.bce_weight * bce_loss 148 | 149 | return dice_loss 150 | 151 | 152 | class IOULoss(nn.Module): 153 | """ 154 | Codes from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/iou_loss.py) 155 | """ 156 | def __init__(self, loc_loss_type='iou'): 157 | super(IOULoss, self).__init__() 158 | self.loc_loss_type = loc_loss_type 159 | 160 | def forward(self, pred, target, weight=None): 161 | pred_left = pred[:, 0] 162 | pred_top = pred[:, 1] 163 | pred_right = pred[:, 2] 164 | pred_bottom = pred[:, 3] 165 | 166 | target_left = target[:, 0] 167 | target_top = target[:, 1] 168 | target_right = target[:, 2] 169 | target_bottom = target[:, 3] 170 | 171 | target_aera = (target_left + target_right) * \ 172 | (target_top + target_bottom) 173 | pred_aera = (pred_left + pred_right) * \ 174 | (pred_top + pred_bottom) 175 | 176 | w_intersect = torch.min(pred_left, target_left) + \ 177 | torch.min(pred_right, target_right) 178 | h_intersect = torch.min(pred_bottom, target_bottom) + \ 179 | torch.min(pred_top, target_top) 180 | 181 | g_w_intersect = torch.max(pred_left, target_left) + \ 182 | torch.max(pred_right, target_right) 183 | g_h_intersect = torch.max(pred_bottom, target_bottom) + \ 184 | torch.max(pred_top, target_top) 185 | ac_uion = g_w_intersect * g_h_intersect 186 | 187 | area_intersect = w_intersect * h_intersect 188 | area_union = target_aera + pred_aera - area_intersect 189 | 190 | ious = (area_intersect + 1.0) / (area_union + 1.0) 191 | gious = ious - (ac_uion - area_union) / ac_uion 192 | if self.loc_loss_type == 'iou': 193 | losses = -torch.log(ious) 194 | elif self.loc_loss_type == 'linear_iou': 195 | losses = 1 - ious 196 | elif self.loc_loss_type == 'giou': 197 | losses = 1 - gious 198 | else: 199 | raise NotImplementedError 200 | 201 | if weight is not None: 202 | return (losses * weight).sum() 203 | else: 204 | return losses.sum() 205 | -------------------------------------------------------------------------------- /core/layers/ml_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | Codes from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/ml_nms.py) 3 | """ 4 | 5 | from detectron2.layers import batched_nms 6 | 7 | 8 | def ml_nms(boxlist, nms_thresh, max_proposals=-1, 9 | score_field="scores", label_field="labels"): 10 | """ 11 | Performs non-maximum suppression on a boxlist, with scores specified 12 | in a boxlist field via score_field. 13 | Arguments: 14 | boxlist(BoxList) 15 | nms_thresh (float) 16 | max_proposals (int): if > 0, then only the top max_proposals are kept 17 | after non-maximum suppression 18 | score_field (str) 19 | """ 20 | if nms_thresh <= 0: 21 | return boxlist 22 | boxes = boxlist.pred_boxes.tensor 23 | scores = boxlist.scores 24 | labels = boxlist.pred_classes 25 | keep = batched_nms(boxes, scores, labels, nms_thresh) 26 | if max_proposals > 0: 27 | keep = keep[: max_proposals] 28 | boxlist = boxlist[keep] 29 | return boxlist 30 | -------------------------------------------------------------------------------- /core/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcos import FCOS 2 | from .fcose import FCOSE, ExtremeDetector 3 | from .dsnake_baseline import FcosSnake 4 | from .backbone import build_fcos_resnet_fpn_backbone 5 | from .one_stage_detector import OneStageDetector 6 | from .edge_snake import Dance -------------------------------------------------------------------------------- /core/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import build_fcos_resnet_fpn_backbone 2 | from .vovnet import build_vovnet_backbone 3 | from .dla import build_fcos_dla_fpn_backbone 4 | -------------------------------------------------------------------------------- /core/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | import fvcore.nn.weight_init as weight_init 2 | import torch.nn.functional as F 3 | from detectron2.layers import ShapeSpec 4 | from detectron2.modeling.backbone import FPN, build_resnet_backbone 5 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 6 | from torch import nn 7 | 8 | from .mobilenet import build_mnv2_backbone 9 | from .vovnet import build_vovnet_backbone 10 | 11 | 12 | class LastLevelP6P7(nn.Module): 13 | """ 14 | This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from 15 | C5 or P5 feature. 16 | """ 17 | 18 | def __init__(self, in_channels, out_channels, in_features="res5"): 19 | super().__init__() 20 | self.num_levels = 2 21 | self.in_feature = in_features 22 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 23 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 24 | for module in [self.p6, self.p7]: 25 | weight_init.c2_xavier_fill(module) 26 | 27 | def forward(self, x): 28 | p6 = self.p6(x) 29 | p7 = self.p7(F.relu(p6)) 30 | return [p6, p7] 31 | 32 | 33 | class LastLevelP6(nn.Module): 34 | """ 35 | This module is used in FCOS to generate extra layers 36 | """ 37 | 38 | def __init__(self, in_channels, out_channels, in_features="res5"): 39 | super().__init__() 40 | self.num_levels = 1 41 | self.in_feature = in_features 42 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 43 | for module in [self.p6]: 44 | weight_init.c2_xavier_fill(module) 45 | 46 | def forward(self, x): 47 | p6 = self.p6(x) 48 | return [p6] 49 | 50 | 51 | @BACKBONE_REGISTRY.register() 52 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): 53 | """ 54 | Args: 55 | cfg: a detectron2 CfgNode 56 | 57 | Returns: 58 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. 59 | """ 60 | if cfg.MODEL.MOBILENET: 61 | bottom_up = build_mnv2_backbone(cfg, input_shape) 62 | elif cfg.MODEL.USE_VOVNET: 63 | bottom_up = build_vovnet_backbone(cfg, input_shape) 64 | else: 65 | bottom_up = build_resnet_backbone(cfg, input_shape) 66 | in_features = cfg.MODEL.FPN.IN_FEATURES 67 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 68 | top_levels = cfg.MODEL.FCOS.TOP_LEVELS 69 | in_channels_top = out_channels 70 | if top_levels == 2: 71 | top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") 72 | if top_levels == 1: 73 | top_block = LastLevelP6(in_channels_top, out_channels, "p5") 74 | elif top_levels == 0: 75 | top_block = None 76 | backbone = FPN( 77 | bottom_up=bottom_up, 78 | in_features=in_features, 79 | out_channels=out_channels, 80 | norm=cfg.MODEL.FPN.NORM, 81 | top_block=top_block, 82 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 83 | ) 84 | return backbone 85 | -------------------------------------------------------------------------------- /core/modeling/backbone/mobilenet.py: -------------------------------------------------------------------------------- 1 | # taken from https://github.com/tonylins/pytorch-mobilenet-v2/ 2 | # Published by Ji Lin, tonylins 3 | # licensed under the Apache License, Version 2.0, January 2004 4 | 5 | from torch import nn 6 | from torch.nn import BatchNorm2d 7 | 8 | # from detectron2.layers.batch_norm import NaiveSyncBatchNorm as BatchNorm2d 9 | from detectron2.layers import Conv2d 10 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 11 | from detectron2.modeling.backbone import Backbone 12 | 13 | 14 | def conv_bn(inp, oup, stride): 15 | return nn.Sequential( 16 | Conv2d(inp, oup, 3, stride, 1, bias=False), 17 | BatchNorm2d(oup), 18 | nn.ReLU6(inplace=True), 19 | ) 20 | 21 | 22 | def conv_1x1_bn(inp, oup): 23 | return nn.Sequential( 24 | Conv2d(inp, oup, 1, 1, 0, bias=False), BatchNorm2d(oup), nn.ReLU6(inplace=True) 25 | ) 26 | 27 | 28 | class InvertedResidual(nn.Module): 29 | def __init__(self, inp, oup, stride, expand_ratio): 30 | super(InvertedResidual, self).__init__() 31 | self.stride = stride 32 | assert stride in [1, 2] 33 | 34 | hidden_dim = int(round(inp * expand_ratio)) 35 | self.use_res_connect = self.stride == 1 and inp == oup 36 | 37 | if expand_ratio == 1: 38 | self.conv = nn.Sequential( 39 | # dw 40 | Conv2d( 41 | hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False 42 | ), 43 | BatchNorm2d(hidden_dim), 44 | nn.ReLU6(inplace=True), 45 | # pw-linear 46 | Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 47 | BatchNorm2d(oup), 48 | ) 49 | else: 50 | self.conv = nn.Sequential( 51 | # pw 52 | Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 53 | BatchNorm2d(hidden_dim), 54 | nn.ReLU6(inplace=True), 55 | # dw 56 | Conv2d( 57 | hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False 58 | ), 59 | BatchNorm2d(hidden_dim), 60 | nn.ReLU6(inplace=True), 61 | # pw-linear 62 | Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 63 | BatchNorm2d(oup), 64 | ) 65 | 66 | def forward(self, x): 67 | if self.use_res_connect: 68 | return x + self.conv(x) 69 | else: 70 | return self.conv(x) 71 | 72 | 73 | class MobileNetV2(Backbone): 74 | """ 75 | Should freeze bn 76 | """ 77 | 78 | def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.0): 79 | super(MobileNetV2, self).__init__() 80 | block = InvertedResidual 81 | input_channel = 32 82 | interverted_residual_setting = [ 83 | # t, c, n, s 84 | [1, 16, 1, 1], 85 | [6, 24, 2, 2], 86 | [6, 32, 3, 2], 87 | [6, 64, 4, 2], 88 | [6, 96, 3, 1], 89 | [6, 160, 3, 2], 90 | [6, 320, 1, 1], 91 | ] 92 | 93 | # building first layer 94 | assert input_size % 32 == 0 95 | input_channel = int(input_channel * width_mult) 96 | self.return_features_indices = [3, 6, 13, 17] 97 | self.return_features_num_channels = [] 98 | self.features = nn.ModuleList([conv_bn(3, input_channel, 2)]) 99 | # building inverted residual blocks 100 | for t, c, n, s in interverted_residual_setting: 101 | output_channel = int(c * width_mult) 102 | for i in range(n): 103 | if i == 0: 104 | self.features.append( 105 | block(input_channel, output_channel, s, expand_ratio=t) 106 | ) 107 | else: 108 | self.features.append( 109 | block(input_channel, output_channel, 1, expand_ratio=t) 110 | ) 111 | input_channel = output_channel 112 | if len(self.features) - 1 in self.return_features_indices: 113 | self.return_features_num_channels.append(output_channel) 114 | 115 | self._initialize_weights() 116 | self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT) 117 | 118 | def _freeze_backbone(self, freeze_at): 119 | for layer_index in range(freeze_at): 120 | for p in self.features[layer_index].parameters(): 121 | p.requires_grad = False 122 | 123 | def forward(self, x): 124 | res = [] 125 | for i, m in enumerate(self.features): 126 | x = m(x) 127 | if i in self.return_features_indices: 128 | res.append(x) 129 | return {"res{}".format(i + 2): r for i, r in enumerate(res)} 130 | 131 | def _initialize_weights(self): 132 | for m in self.modules(): 133 | if isinstance(m, Conv2d): 134 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 135 | m.weight.data.normal_(0, (2.0 / n) ** 0.5) 136 | if m.bias is not None: 137 | m.bias.data.zero_() 138 | elif isinstance(m, BatchNorm2d): 139 | m.weight.data.fill_(1) 140 | m.bias.data.zero_() 141 | elif isinstance(m, nn.Linear): 142 | n = m.weight.size(1) 143 | m.weight.data.normal_(0, 0.01) 144 | m.bias.data.zero_() 145 | 146 | 147 | @BACKBONE_REGISTRY.register() 148 | def build_mnv2_backbone(cfg, input_shape): 149 | """ 150 | Create a ResNet instance from config. 151 | 152 | Returns: 153 | ResNet: a :class:`ResNet` instance. 154 | """ 155 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES 156 | 157 | out_feature_channels = {"res2": 24, "res3": 32, "res4": 96, "res5": 320} 158 | out_feature_strides = {"res2": 4, "res3": 8, "res4": 16, "res5": 32} 159 | model = MobileNetV2(cfg) 160 | model._out_features = out_features 161 | model._out_feature_channels = out_feature_channels 162 | model._out_feature_strides = out_feature_strides 163 | return model 164 | -------------------------------------------------------------------------------- /core/modeling/backbone/vovnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Youngwan Lee (ETRI) All Rights Reserved. 2 | from collections import OrderedDict 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from detectron2.layers import FrozenBatchNorm2d, ShapeSpec, get_norm 8 | from detectron2.modeling.backbone import Backbone 9 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 10 | from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool 11 | 12 | __all__ = ["VoVNet", "build_vovnet_backbone", "build_vovnet_fpn_backbone"] 13 | 14 | _NORM = False 15 | 16 | VoVNet19_eSE = { 17 | "stage_conv_ch": [128, 160, 192, 224], 18 | "stage_out_ch": [256, 512, 768, 1024], 19 | "layer_per_block": 3, 20 | "block_per_stage": [1, 1, 1, 1], 21 | "eSE": True, 22 | } 23 | 24 | VoVNet39_eSE = { 25 | "stage_conv_ch": [128, 160, 192, 224], 26 | "stage_out_ch": [256, 512, 768, 1024], 27 | "layer_per_block": 5, 28 | "block_per_stage": [1, 1, 2, 2], 29 | "eSE": True, 30 | } 31 | 32 | VoVNet57_eSE = { 33 | "stage_conv_ch": [128, 160, 192, 224], 34 | "stage_out_ch": [256, 512, 768, 1024], 35 | "layer_per_block": 5, 36 | "block_per_stage": [1, 1, 4, 3], 37 | "eSE": True, 38 | } 39 | 40 | VoVNet99_eSE = { 41 | "stage_conv_ch": [128, 160, 192, 224], 42 | "stage_out_ch": [256, 512, 768, 1024], 43 | "layer_per_block": 5, 44 | "block_per_stage": [1, 3, 9, 3], 45 | "eSE": True, 46 | } 47 | 48 | _STAGE_SPECS = { 49 | "V-19-eSE": VoVNet19_eSE, 50 | "V-39-eSE": VoVNet39_eSE, 51 | "V-57-eSE": VoVNet57_eSE, 52 | "V-99-eSE": VoVNet99_eSE, 53 | } 54 | 55 | 56 | def conv3x3( 57 | in_channels, 58 | out_channels, 59 | module_name, 60 | postfix, 61 | stride=1, 62 | groups=1, 63 | kernel_size=3, 64 | padding=1, 65 | ): 66 | """3x3 convolution with padding""" 67 | return [ 68 | ( 69 | f"{module_name}_{postfix}/conv", 70 | nn.Conv2d( 71 | in_channels, 72 | out_channels, 73 | kernel_size=kernel_size, 74 | stride=stride, 75 | padding=padding, 76 | groups=groups, 77 | bias=False, 78 | ), 79 | ), 80 | (f"{module_name}_{postfix}/norm", get_norm(_NORM, out_channels)), 81 | (f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)), 82 | ] 83 | 84 | 85 | def conv1x1( 86 | in_channels, 87 | out_channels, 88 | module_name, 89 | postfix, 90 | stride=1, 91 | groups=1, 92 | kernel_size=1, 93 | padding=0, 94 | ): 95 | """1x1 convolution with padding""" 96 | return [ 97 | ( 98 | f"{module_name}_{postfix}/conv", 99 | nn.Conv2d( 100 | in_channels, 101 | out_channels, 102 | kernel_size=kernel_size, 103 | stride=stride, 104 | padding=padding, 105 | groups=groups, 106 | bias=False, 107 | ), 108 | ), 109 | (f"{module_name}_{postfix}/norm", get_norm(_NORM, out_channels)), 110 | (f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)), 111 | ] 112 | 113 | 114 | class Hsigmoid(nn.Module): 115 | def __init__(self, inplace=True): 116 | super(Hsigmoid, self).__init__() 117 | self.inplace = inplace 118 | 119 | def forward(self, x): 120 | return F.relu6(x + 3.0, inplace=self.inplace) / 6.0 121 | 122 | 123 | class eSEModule(nn.Module): 124 | def __init__(self, channel, reduction=4): 125 | super(eSEModule, self).__init__() 126 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 127 | self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0) 128 | self.hsigmoid = Hsigmoid() 129 | 130 | def forward(self, x): 131 | input = x 132 | x = self.avg_pool(x) 133 | x = self.fc(x) 134 | x = self.hsigmoid(x) 135 | return input * x 136 | 137 | 138 | class _OSA_module(nn.Module): 139 | def __init__( 140 | self, 141 | in_ch, 142 | stage_ch, 143 | concat_ch, 144 | layer_per_block, 145 | module_name, 146 | SE=False, 147 | identity=False, 148 | ): 149 | 150 | super(_OSA_module, self).__init__() 151 | 152 | self.identity = identity 153 | self.layers = nn.ModuleList() 154 | in_channel = in_ch 155 | for i in range(layer_per_block): 156 | self.layers.append( 157 | nn.Sequential( 158 | OrderedDict(conv3x3(in_channel, stage_ch, module_name, i)) 159 | ) 160 | ) 161 | in_channel = stage_ch 162 | 163 | # feature aggregation 164 | in_channel = in_ch + layer_per_block * stage_ch 165 | self.concat = nn.Sequential( 166 | OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat")) 167 | ) 168 | 169 | self.ese = eSEModule(concat_ch) 170 | 171 | def forward(self, x): 172 | 173 | identity_feat = x 174 | 175 | output = [] 176 | output.append(x) 177 | for layer in self.layers: 178 | x = layer(x) 179 | output.append(x) 180 | 181 | x = torch.cat(output, dim=1) 182 | xt = self.concat(x) 183 | 184 | xt = self.ese(xt) 185 | 186 | if self.identity: 187 | xt = xt + identity_feat 188 | 189 | return xt 190 | 191 | 192 | class _OSA_stage(nn.Sequential): 193 | def __init__( 194 | self, 195 | in_ch, 196 | stage_ch, 197 | concat_ch, 198 | block_per_stage, 199 | layer_per_block, 200 | stage_num, 201 | SE=False, 202 | ): 203 | super(_OSA_stage, self).__init__() 204 | 205 | if not stage_num == 2: 206 | self.add_module( 207 | "Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) 208 | ) 209 | 210 | if block_per_stage != 1: 211 | SE = False 212 | module_name = f"OSA{stage_num}_1" 213 | self.add_module( 214 | module_name, 215 | _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE), 216 | ) 217 | for i in range(block_per_stage - 1): 218 | if i != block_per_stage - 2: # last block 219 | SE = False 220 | module_name = f"OSA{stage_num}_{i + 2}" 221 | self.add_module( 222 | module_name, 223 | _OSA_module( 224 | concat_ch, 225 | stage_ch, 226 | concat_ch, 227 | layer_per_block, 228 | module_name, 229 | SE, 230 | identity=True, 231 | ), 232 | ) 233 | 234 | 235 | class VoVNet(Backbone): 236 | def __init__(self, cfg, input_ch, out_features=None): 237 | """ 238 | Args: 239 | input_ch(int) : the number of input channel 240 | out_features (list[str]): name of the layers whose outputs should 241 | be returned in forward. Can be anything in "stem", "stage2" ... 242 | """ 243 | super(VoVNet, self).__init__() 244 | 245 | global _NORM 246 | _NORM = cfg.MODEL.VOVNET.NORM 247 | 248 | stage_specs = _STAGE_SPECS[cfg.MODEL.VOVNET.CONV_BODY] 249 | 250 | config_stage_ch = stage_specs["stage_conv_ch"] 251 | config_concat_ch = stage_specs["stage_out_ch"] 252 | block_per_stage = stage_specs["block_per_stage"] 253 | layer_per_block = stage_specs["layer_per_block"] 254 | SE = stage_specs["eSE"] 255 | 256 | self._out_features = out_features 257 | 258 | # Stem module 259 | stem = conv3x3(input_ch, 64, "stem", "1", 2) 260 | stem += conv3x3(64, 64, "stem", "2", 1) 261 | stem += conv3x3(64, 128, "stem", "3", 2) 262 | self.add_module("stem", nn.Sequential((OrderedDict(stem)))) 263 | current_stirde = 4 264 | self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde} 265 | self._out_feature_channels = {"stem": 128} 266 | 267 | stem_out_ch = [128] 268 | in_ch_list = stem_out_ch + config_concat_ch[:-1] 269 | # OSA stages 270 | self.stage_names = [] 271 | for i in range(4): # num_stages 272 | name = "stage%d" % (i + 2) # stage 2 ... stage 5 273 | self.stage_names.append(name) 274 | self.add_module( 275 | name, 276 | _OSA_stage( 277 | in_ch_list[i], 278 | config_stage_ch[i], 279 | config_concat_ch[i], 280 | block_per_stage[i], 281 | layer_per_block, 282 | i + 2, 283 | SE, 284 | ), 285 | ) 286 | 287 | self._out_feature_channels[name] = config_concat_ch[i] 288 | if not i == 0: 289 | self._out_feature_strides[name] = current_stirde = int( 290 | current_stirde * 2 291 | ) 292 | 293 | # initialize weights 294 | self._initialize_weights() 295 | # Optionally freeze (requires_grad=False) parts of the backbone 296 | self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_AT) 297 | 298 | def _initialize_weights(self): 299 | for m in self.modules(): 300 | if isinstance(m, nn.Conv2d): 301 | nn.init.kaiming_normal_(m.weight) 302 | 303 | def _freeze_backbone(self, freeze_at): 304 | if freeze_at < 0: 305 | return 306 | 307 | for stage_index in range(freeze_at): 308 | if stage_index == 0: 309 | m = self.stem # stage 0 is the stem 310 | else: 311 | m = getattr(self, "stage" + str(stage_index + 1)) 312 | for p in m.parameters(): 313 | p.requires_grad = False 314 | FrozenBatchNorm2d.convert_frozen_batchnorm(self) 315 | 316 | def forward(self, x): 317 | outputs = {} 318 | x = self.stem(x) 319 | if "stem" in self._out_features: 320 | outputs["stem"] = x 321 | for name in self.stage_names: 322 | x = getattr(self, name)(x) 323 | if name in self._out_features: 324 | outputs[name] = x 325 | 326 | return outputs 327 | 328 | def output_shape(self): 329 | return { 330 | name: ShapeSpec( 331 | channels=self._out_feature_channels[name], 332 | stride=self._out_feature_strides[name], 333 | ) 334 | for name in self._out_features 335 | } 336 | 337 | 338 | @BACKBONE_REGISTRY.register() 339 | def build_vovnet_backbone(cfg, input_shape): 340 | """ 341 | Create a VoVNet instance from config. 342 | 343 | Returns: 344 | VoVNet: a :class:`VoVNet` instance. 345 | """ 346 | out_features = cfg.MODEL.VOVNET.OUT_FEATURES 347 | return VoVNet(cfg, input_shape.channels, out_features=out_features) 348 | 349 | 350 | @BACKBONE_REGISTRY.register() 351 | def build_vovnet_fpn_backbone(cfg, input_shape: ShapeSpec): 352 | """ 353 | Args: 354 | cfg: a detectron2 CfgNode 355 | 356 | Returns: 357 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. 358 | """ 359 | bottom_up = build_vovnet_backbone(cfg, input_shape) 360 | in_features = cfg.MODEL.FPN.IN_FEATURES 361 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 362 | backbone = FPN( 363 | bottom_up=bottom_up, 364 | in_features=in_features, 365 | out_channels=out_channels, 366 | norm=cfg.MODEL.FPN.NORM, 367 | top_block=LastLevelMaxPool(), 368 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 369 | ) 370 | return backbone 371 | -------------------------------------------------------------------------------- /core/modeling/dsnake_baseline/__init__.py: -------------------------------------------------------------------------------- 1 | from .af_two_stage import FcosSnake -------------------------------------------------------------------------------- /core/modeling/dsnake_baseline/af_two_stage.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from torch import nn 4 | 5 | from detectron2.structures import ImageList 6 | from detectron2.utils.logger import log_first_n 7 | 8 | from detectron2.modeling.backbone import build_backbone 9 | from detectron2.modeling.proposal_generator import build_proposal_generator 10 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY 11 | 12 | from .postprocessing import ( 13 | detector_postprocess 14 | ) 15 | 16 | from detectron2.structures import Instances, Boxes 17 | from core.structures import ExtremePoints 18 | 19 | from .dsnake_head import SnakeFPNHead 20 | 21 | 22 | @META_ARCH_REGISTRY.register() 23 | class FcosSnake(nn.Module): 24 | def __init__(self, cfg): 25 | super().__init__() 26 | self.device = torch.device(cfg.MODEL.DEVICE) 27 | 28 | self.backbone = build_backbone(cfg) 29 | self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) 30 | self.refinement_head = SnakeFPNHead(cfg, self.backbone.output_shape()) 31 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1) 32 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1) 33 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 34 | self.to(self.device) 35 | 36 | self.gt_input = cfg.TEST.GT_IN.WHAT if cfg.TEST.GT_IN.ON else (None,) 37 | 38 | def forward(self, batched_inputs): 39 | images = [x["image"].to(self.device) for x in batched_inputs] 40 | images = [self.normalizer(x) for x in images] 41 | images = ImageList.from_tensors(images, self.backbone.size_divisibility) 42 | 43 | features = self.backbone(images.tensor) 44 | 45 | if "instances" in batched_inputs[0] : 46 | gt_instances = [x["instances"].to(self.device) for x in batched_inputs] 47 | elif "targets" in batched_inputs[0]: 48 | log_first_n( 49 | logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 50 | ) 51 | gt_instances = [x["targets"].to(self.device) for x in batched_inputs] 52 | else: 53 | gt_instances = None 54 | 55 | proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) 56 | 57 | if not self.training: 58 | if 'instance' in self.gt_input: 59 | assert gt_instances is not None 60 | 61 | for im_i in range(len(gt_instances)): 62 | gt_instances_per_im = gt_instances[im_i] 63 | bboxes = gt_instances_per_im.gt_boxes.tensor 64 | instances_per_im = Instances(proposals[im_i]._image_size) 65 | instances_per_im.pred_boxes = Boxes(bboxes) 66 | instances_per_im.pred_classes = gt_instances_per_im.gt_classes 67 | instances_per_im.scores = torch.ones_like(gt_instances_per_im.gt_classes).to(bboxes.device) 68 | 69 | if gt_instances_per_im.has("gt_masks"): 70 | gt_masks = gt_instances_per_im.gt_masks 71 | ext_pts_off = self.refinement_head.refine_head.get_simple_extreme_points( 72 | gt_masks.polygons).to(bboxes.device) 73 | ex_t = torch.stack([ext_pts_off[:, None, 0], bboxes[:, None, 1]], dim=2) 74 | ex_l = torch.stack([bboxes[:, None, 0], ext_pts_off[:, None, 1]], dim=2) 75 | ex_b = torch.stack([ext_pts_off[:, None, 2], bboxes[:, None, 3]], dim=2) 76 | ex_r = torch.stack([bboxes[:, None, 2], ext_pts_off[:, None, 3]], dim=2) 77 | instances_per_im.ext_points = ExtremePoints( 78 | torch.cat([ex_t, ex_l, ex_b, ex_r], dim=1)) 79 | else: 80 | quad = self.refinement_head.refine_head.get_quadrangle(bboxes).view(-1, 4, 2) 81 | instances_per_im.ext_points = ExtremePoints(quad) 82 | 83 | proposals[im_i] = instances_per_im 84 | 85 | head_losses, proposals = self.refinement_head(features, proposals, gt_instances) 86 | 87 | # In training, the proposals are not useful at all in RPN models; but not here 88 | # This makes RPN-only models about 5% slower. 89 | if self.training: 90 | proposal_losses.update(head_losses) 91 | return proposal_losses 92 | 93 | processed_results = [] 94 | for results_per_image, input_per_image, image_size in zip( 95 | proposals, batched_inputs, images.image_sizes 96 | ): 97 | height = input_per_image.get("height", image_size[0]) 98 | width = input_per_image.get("width", image_size[1]) 99 | instance_r = detector_postprocess(results_per_image, 100 | height, 101 | width) 102 | processed_results.append( 103 | {"instances": instance_r} 104 | ) 105 | 106 | return processed_results 107 | -------------------------------------------------------------------------------- /core/modeling/dsnake_baseline/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | import numpy as np 4 | from torch.nn import functional as F 5 | import functools 6 | import multiprocessing as mp 7 | from detectron2.layers import ROIAlign 8 | from detectron2.structures import Instances, polygons_to_bitmask 9 | import pycocotools.mask as mask_util 10 | from core.structures import PolygonPoints 11 | 12 | 13 | def get_polygon_rles(polygons, image_shape): 14 | # input: N x (p*2) 15 | polygons = polygons.cpu().numpy() 16 | h, w = image_shape 17 | rles = [ 18 | mask_util.merge(mask_util.frPyObjects([p.tolist()], h, w)) 19 | for p in polygons 20 | ] 21 | return rles 22 | 23 | 24 | def detector_postprocess(results, 25 | output_height, 26 | output_width): 27 | """ 28 | Resize the output instances. 29 | The input images are often resized when entering an object detector. 30 | As a result, we often need the outputs of the detector in a different 31 | resolution from its inputs. 32 | 33 | This function will resize the raw outputs of an R-CNN detector 34 | to produce outputs according to the desired output resolution. 35 | 36 | Args: 37 | results (Instances): the raw outputs from the detector. 38 | `results.image_size` contains the input image resolution the detector sees. 39 | This object might be modified in-place. 40 | output_height, output_width: the desired output resolution. 41 | 42 | Returns: 43 | Instances: the resized output from the model, based on the output resolution 44 | """ 45 | # the results.image_size here is the one the model saw, typically (800, xxxx) 46 | scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) 47 | results = Instances((output_height, output_width), **results.get_fields()) 48 | 49 | if results.has("pred_boxes"): 50 | output_boxes = results.pred_boxes 51 | elif results.has("proposal_boxes"): 52 | output_boxes = results.proposal_boxes 53 | 54 | output_boxes.scale(scale_x, scale_y) 55 | # now the results.image_size is the one of raw input image 56 | output_boxes.clip(results.image_size) 57 | 58 | results = results[output_boxes.nonempty()] 59 | 60 | if results.has("pred_polys"): 61 | if results.has("pred_path"): 62 | snake_path = results.pred_path 63 | for i in range(snake_path.size(1)): # number of evolution 64 | current_poly = PolygonPoints(snake_path[:, i, :, :]) 65 | current_poly.scale(scale_x, scale_y) 66 | current_poly.clip(results.image_size) 67 | snake_path[:, i, :, :] = current_poly.tensor 68 | 69 | results.pred_polys.scale(scale_x, scale_y) 70 | results.pred_polys.clip(results.image_size) 71 | results.pred_masks = get_polygon_rles(results.pred_polys.flatten(), 72 | (output_height, output_width)) 73 | return results 74 | 75 | else: 76 | raise ValueError('No pred_polys in instance prediction!') 77 | -------------------------------------------------------------------------------- /core/modeling/edge_snake/__init__.py: -------------------------------------------------------------------------------- 1 | from .dance import Dance 2 | from .edge_det import build_edge_det_head 3 | -------------------------------------------------------------------------------- /core/modeling/edge_snake/dance.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from torch import nn 4 | 5 | from detectron2.structures import ImageList 6 | from detectron2.utils.logger import log_first_n 7 | 8 | from detectron2.modeling.backbone import build_backbone 9 | from detectron2.modeling.proposal_generator import build_proposal_generator 10 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY 11 | 12 | from .edge_det import build_edge_det_head 13 | from core.modeling.postprocessing import detector_postprocess, edge_map_postprocess 14 | 15 | from core.utils import timer 16 | 17 | 18 | @META_ARCH_REGISTRY.register() 19 | class Dance(nn.Module): 20 | def __init__(self, cfg): 21 | super().__init__() 22 | self.device = torch.device(cfg.MODEL.DEVICE) 23 | 24 | self.backbone = build_backbone(cfg) 25 | self.proposal_generator = build_proposal_generator( 26 | cfg, self.backbone.output_shape() 27 | ) 28 | 29 | self.refinement_head = build_edge_det_head(cfg, self.backbone.output_shape()) 30 | 31 | self.mask_result_src = cfg.MODEL.DANCE.MASK_IN 32 | 33 | self.semantic_filter = cfg.MODEL.DANCE.SEMANTIC_FILTER 34 | self.semantic_filter_th = cfg.MODEL.DANCE.SEMANTIC_FILTER_TH 35 | 36 | self.need_concave_hull = ( 37 | True if cfg.MODEL.SNAKE_HEAD.LOSS_TYPE == "chamfer" else False 38 | ) 39 | 40 | self.roi_size = cfg.MODEL.DANCE.ROI_SIZE 41 | 42 | self.re_compute_box = cfg.MODEL.DANCE.RE_COMP_BOX 43 | 44 | self.visualize_path = cfg.MODEL.SNAKE_HEAD.VIS_PATH 45 | 46 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1) 47 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1) 48 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 49 | self.to(self.device) 50 | 51 | def single_test(self, batched_inputs): 52 | assert len(batched_inputs) == 1 53 | with timer.env("preprocess"): 54 | images = batched_inputs[0]["image"].to(self.device) 55 | images = self.normalizer(images) 56 | images = ImageList.from_tensors([images], self.backbone.size_divisibility) 57 | 58 | with timer.env("backbone"): 59 | features = self.backbone(images.tensor) 60 | 61 | gt_instances = None 62 | gt_sem_seg = None 63 | 64 | with timer.env("fcose"): 65 | proposals, proposal_losses = self.proposal_generator( 66 | images, features, gt_instances 67 | ) 68 | 69 | if self.mask_result_src != "BOX": 70 | edge_map, head_losses, proposals = self.refinement_head( 71 | features, proposals, (gt_sem_seg, [gt_instances, images.image_sizes]) 72 | ) 73 | 74 | with timer.env("postprocess"): 75 | height = batched_inputs[0].get("height", images.image_sizes[0][0]) 76 | width = batched_inputs[0].get("width", images.image_sizes[0][1]) 77 | instance_r = detector_postprocess( 78 | self.semantic_filter, 79 | self.semantic_filter_th, 80 | self.mask_result_src, 81 | proposals[0], 82 | height, 83 | width, 84 | self.roi_size, 85 | self.need_concave_hull, 86 | self.re_compute_box, 87 | ) 88 | processed_results = [{"instances": instance_r}] 89 | return processed_results 90 | 91 | def forward(self, batched_inputs): 92 | """ 93 | Args: 94 | Same as in :class:`GeneralizedRCNN.forward` 95 | 96 | Returns: 97 | list[dict]: 98 | Each dict is the output for one input image. 99 | The dict contains one key "proposals" whose value is a 100 | :class:`Instances` with keys "proposal_boxes" and "objectness_logits". 101 | """ 102 | if not self.training and not self.visualize_path: 103 | return self.single_test(batched_inputs) 104 | 105 | with timer.env("preprocess"): 106 | images = [x["image"].to(self.device) for x in batched_inputs] 107 | images = [self.normalizer(x) for x in images] 108 | images = ImageList.from_tensors(images, self.backbone.size_divisibility) 109 | 110 | with timer.env("backbone"): 111 | features = self.backbone(images.tensor) 112 | 113 | if "instances" in batched_inputs[0]: 114 | gt_instances = [x["instances"].to(self.device) for x in batched_inputs] 115 | elif "targets" in batched_inputs[0]: 116 | log_first_n( 117 | logging.WARN, 118 | "'targets' in the model inputs is now renamed to 'instances'!", 119 | n=10, 120 | ) 121 | gt_instances = [x["targets"].to(self.device) for x in batched_inputs] 122 | else: 123 | gt_instances = None 124 | 125 | if "sem_seg" in batched_inputs[0]: 126 | gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] 127 | gt_sem_seg = ImageList.from_tensors( 128 | gt_sem_seg, 129 | self.backbone.size_divisibility, 130 | self.refinement_head.ignore_value, 131 | ).tensor 132 | else: 133 | gt_sem_seg = None 134 | 135 | with timer.env("fcose"): 136 | proposals, proposal_losses = self.proposal_generator( 137 | images, features, gt_instances 138 | ) 139 | edge_map, head_losses, proposals = self.refinement_head( 140 | features, proposals, (gt_sem_seg, [gt_instances, images.image_sizes]) 141 | ) 142 | 143 | # In training, the proposals are not useful at all in RPN models; but not here 144 | # This makes RPN-only models about 5% slower. 145 | if self.training: 146 | timer.reset() 147 | proposal_losses.update(head_losses) 148 | return proposal_losses 149 | 150 | processed_results = [] 151 | 152 | with timer.env("postprocess"): 153 | for per_edge_map, results_per_image, input_per_image, image_size in zip( 154 | edge_map, proposals, batched_inputs, images.image_sizes 155 | ): 156 | height = input_per_image.get("height", image_size[0]) 157 | width = input_per_image.get("width", image_size[1]) 158 | # TODO (OPT): NO need for interpolate then back for real speed test 159 | with timer.env("extra"): 160 | edge_map_r = edge_map_postprocess( 161 | per_edge_map, image_size, height, width 162 | ) 163 | instance_r = detector_postprocess( 164 | self.semantic_filter, 165 | self.semantic_filter_th, 166 | self.mask_result_src, 167 | results_per_image, 168 | height, 169 | width, 170 | self.roi_size, 171 | self.need_concave_hull, 172 | self.re_compute_box, 173 | ) 174 | processed_results.append( 175 | {"instances": instance_r, "edge_map": edge_map_r}, 176 | ) 177 | return processed_results 178 | -------------------------------------------------------------------------------- /core/modeling/edge_snake/draft.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from skimage import measure 6 | from shapely.geometry import Polygon 7 | import pycocotools.mask as mask_util 8 | 9 | from detectron2.layers import Conv2d, DeformConv, cat, ModulatedDeformConv 10 | from core.structures import ExtremePoints, PolygonPoints 11 | 12 | from core.layers import DFConv2d, SmoothL1Loss, ChamferLoss, extreme_utils 13 | 14 | from core.modeling.fcose.utils import get_extreme_points 15 | 16 | from .deform_head import DeformNet 17 | 18 | from detectron2.utils import timer 19 | 20 | def sample_octagons(self, pred_instances): 21 | poly_sample_locations = [] 22 | image_index = [] 23 | for im_i in range(len(pred_instances)): 24 | instance_per_im = pred_instances[im_i] 25 | ext_points = instance_per_im.ext_points 26 | octagons_per_im = ext_points.get_octagons().cpu().numpy().reshape(-1, 8, 2) 27 | for oct in octagons_per_im: 28 | # sampling from octagon 29 | oct_sampled_pts = self.uniform_sample(oct, self.num_sampling) 30 | 31 | oct_sampled_pts = oct_sampled_pts[::-1] if Polygon( 32 | oct_sampled_pts).exterior.is_ccw else oct_sampled_pts 33 | assert not Polygon(oct_sampled_pts).exterior.is_ccw, '1) contour must be clock-wise!' 34 | 35 | poly_sample_locations.append(torch.tensor(oct_sampled_pts, device=ext_points.device)) 36 | image_index.append(im_i) 37 | 38 | if not poly_sample_locations: 39 | return poly_sample_locations, image_index 40 | 41 | poly_sample_locations = torch.stack(poly_sample_locations, dim=0) 42 | image_index = torch.tensor(image_index) 43 | return poly_sample_locations, image_index 44 | 45 | 46 | def compute_loss_for_maskious(self, classes, targets, location_preds, scores): 47 | if isinstance(location_preds, list): 48 | # e.g. 4*sum{k}, 128, 2 49 | classes = classes.repeat(len(location_preds)) 50 | targets = targets.repeat(len(location_preds), 1, 1) 51 | location_preds = torch.cat(location_preds, dim=0) 52 | elif len(location_preds) % len(classes) == 0: 53 | ratio = int(len(location_preds) / len(classes)) 54 | classes = classes.repeat(ratio) 55 | targets = targets.repeat(ratio, 1, 1) 56 | else: 57 | raise ValueError('Number of pairs not match!') 58 | 59 | targets_np = targets.cpu().numpy().reshape(targets.size(0), -1) 60 | location_preds_np = location_preds.cpu().numpy().reshape(location_preds.size(0), -1) 61 | ious_w_valid = [] 62 | for (t, l) in zip(targets_np, location_preds_np): 63 | ious_w_valid.append(_compute_iou_coco(t, l, self.ms_min_area)) 64 | ious_w_valid = torch.tensor(ious_w_valid, device=targets.device) 65 | select = ious_w_valid[:, 0].bool() 66 | ious = ious_w_valid[:, 1] 67 | 68 | maskiou_t = ious[select] 69 | classes = classes[select] 70 | scores = scores[select] 71 | 72 | if len(scores) == 0: 73 | return maskiou_t.sum() * 0 74 | 75 | maskiou_p = torch.gather(scores, dim=1, index=classes[:, None]).view(-1) 76 | return F.smooth_l1_loss(maskiou_p, maskiou_t, reduction='mean') 77 | 78 | 79 | def single_segment_matching(num_sampling, dense_targets, sampled_pts, edge_idx): 80 | ext_idx = edge_idx[::3] # try ext first, if work then consider finer segments 81 | aug_ext_idx = torch.cat([ext_idx, torch.tensor([num_sampling], device=ext_idx.device)], dim=0) 82 | ch_pts = sampled_pts[ext_idx] # characteristic points 83 | diff = (ch_pts[:, None, :] - dense_targets[None, :, :]).pow(2).sum(2) 84 | min_idx = torch.argmin(diff, dim=1) 85 | # TODO: hard-code 3x. 86 | aug_min_idx = torch.cat([min_idx, torch.tensor([num_sampling * 3], device=min_idx.device)], dim=0) 87 | 88 | # estimate curvature 89 | shift_d_l = torch.cat([dense_targets[1:], dense_targets[:1]], dim=0) 90 | shift_d_r = torch.cat([dense_targets[-1:], dense_targets[:-1]], dim=0) 91 | cur = ((shift_d_l + shift_d_r) / 2 - dense_targets).pow(2).sum(1) 92 | 93 | cur[::3] += 1e-9 # regular pulses. 94 | 95 | segments = [] 96 | for i in range(4): 97 | mask = torch.zeros_like(cur) 98 | mask[aug_min_idx[i]:aug_min_idx[i + 1]] = 1 99 | interest_idx = torch.argsort(mask * cur, descending=True)[:aug_ext_idx[i + 1] - aug_ext_idx[i]] 100 | segments.append(torch.sort(interest_idx)[0]) 101 | segments = torch.cat(segments) 102 | return dense_targets[segments] 103 | 104 | def single_uniform_segment_matching(self, dense_targets, sampled_pts, edge_idx): 105 | ext_idx = edge_idx[::3] # try ext first, if work then consider finer segments 106 | aug_ext_idx = torch.cat([ext_idx, torch.tensor([self.num_sampling - 1], device=ext_idx.device)], dim=0) 107 | ch_pts = sampled_pts[ext_idx] # characteristic points 108 | diff = (ch_pts[:, None, :] - dense_targets[None, :, :]).pow(2).sum(2) 109 | min_idx = torch.argmin(diff, dim=1) 110 | # TODO: hard-code 3x. 111 | aug_min_idx = torch.cat([min_idx, torch.tensor([self.num_sampling * 3 - 1], device=min_idx.device)], dim=0) 112 | 113 | before_i = 0 114 | after_i = 1 115 | 116 | segments = [] 117 | for i in range(4): 118 | original_len = aug_min_idx[after_i] - aug_min_idx[before_i] 119 | assert original_len >= 0 120 | if original_len == 0: 121 | after_i += 1 122 | continue 123 | 124 | desired_num_seg = aug_ext_idx[after_i] - aug_ext_idx[before_i] 125 | assert desired_num_seg >= 0 126 | if desired_num_seg == 0: 127 | before_i += 1 128 | after_i += 1 129 | continue 130 | 131 | re_sampled_pts = self.uniform_sample_1d( 132 | dense_targets[aug_min_idx[before_i]: aug_min_idx[after_i]], 133 | desired_num_seg) 134 | 135 | segments.append(re_sampled_pts) 136 | 137 | segments = np.concatenate(segments, axis=0) 138 | assert len(segments) == self.num_sampling 139 | return segments 140 | 141 | 142 | def segment_matching(dense_targets, sampled_pts, edge_idx): 143 | ext_idx = edge_idx[:, ::3] # try ext first, if work then consider finer segments 144 | seq_idx = torch.arange(ext_idx.size(0)).repeat_interleave(ext_idx.size(1)).to(ext_idx.device) 145 | ch_pts = sampled_pts[seq_idx, ext_idx.view(-1)].reshape(ext_idx.size(0), ext_idx.size(1), 2) # characteristic points 146 | diffs = (ch_pts[:, :, None, :] - dense_targets[:, None, :, :]).pow(2).sum(3) 147 | min_idx = torch.argmin(diffs, dim=2) 148 | 149 | 150 | def uniform_sample_1d(pts, new_n): 151 | n = pts.shape[0] 152 | if n == new_n: 153 | return pts 154 | # len: n - 1 155 | segment_len = np.sqrt(np.sum((pts[1:] - pts[:-1]) ** 2, axis=1)) 156 | 157 | # down-sample or up-sample 158 | # n 159 | start_node = np.cumsum(np.concatenate([np.array([0]), segment_len])) 160 | total_len = np.sum(segment_len) 161 | 162 | new_per_len = total_len / new_n 163 | 164 | mark_1d = ((np.arange(new_n-1) + 1) * new_per_len).reshape(-1, 1) 165 | locate = (start_node.reshape(1, -1) - mark_1d) 166 | iss, jss = np.where(locate > 0) 167 | cut_idx = np.cumsum(np.unique(iss, return_counts=True)[1]) 168 | cut_idx = np.concatenate([np.array([0]), cut_idx[:-1]]) 169 | 170 | after_idx = jss[cut_idx] 171 | before_idx = after_idx - 1 172 | 173 | after_idx[after_idx < 0] = 0 174 | 175 | before = locate[np.arange(new_n-1), before_idx] 176 | after = locate[np.arange(new_n-1), after_idx] 177 | 178 | w = (- before / (after - before)).reshape(-1, 1) 179 | 180 | sampled_pts = (1 - w) * pts[before_idx] + w * pts[after_idx] 181 | 182 | return np.concatenate([pts[:1], sampled_pts, pts[-1:]], axis=0) 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | -------------------------------------------------------------------------------- /core/modeling/fcos/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcos import FCOS 2 | -------------------------------------------------------------------------------- /core/modeling/fcos/fcos.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List, Dict 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from detectron2.layers import ShapeSpec 8 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY 9 | 10 | from core.layers import DFConv2d, IOULoss 11 | from .fcos_outputs import FCOSOutputs 12 | 13 | 14 | __all__ = ["FCOS"] 15 | 16 | INF = 100000000 17 | 18 | 19 | class Scale(nn.Module): 20 | def __init__(self, init_value=1.0): 21 | super(Scale, self).__init__() 22 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 23 | 24 | def forward(self, input): 25 | return input * self.scale 26 | 27 | 28 | @PROPOSAL_GENERATOR_REGISTRY.register() 29 | class FCOS(nn.Module): 30 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 31 | super().__init__() 32 | # fmt: off 33 | self.in_features = cfg.MODEL.FCOS.IN_FEATURES 34 | self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES 35 | self.focal_loss_alpha = cfg.MODEL.FCOS.LOSS_ALPHA 36 | self.focal_loss_gamma = cfg.MODEL.FCOS.LOSS_GAMMA 37 | self.center_sample = cfg.MODEL.FCOS.CENTER_SAMPLE 38 | self.strides = cfg.MODEL.FCOS.FPN_STRIDES 39 | self.radius = cfg.MODEL.FCOS.POS_RADIUS 40 | self.pre_nms_thresh_train = cfg.MODEL.FCOS.INFERENCE_TH_TRAIN 41 | self.pre_nms_thresh_test = cfg.MODEL.FCOS.INFERENCE_TH_TEST 42 | self.pre_nms_topk_train = cfg.MODEL.FCOS.PRE_NMS_TOPK_TRAIN 43 | self.pre_nms_topk_test = cfg.MODEL.FCOS.PRE_NMS_TOPK_TEST 44 | self.nms_thresh = cfg.MODEL.FCOS.NMS_TH 45 | self.post_nms_topk_train = cfg.MODEL.FCOS.POST_NMS_TOPK_TRAIN 46 | self.post_nms_topk_test = cfg.MODEL.FCOS.POST_NMS_TOPK_TEST 47 | self.thresh_with_ctr = cfg.MODEL.FCOS.THRESH_WITH_CTR 48 | # fmt: on 49 | self.iou_loss = IOULoss(cfg.MODEL.FCOS.LOC_LOSS_TYPE) 50 | # generate sizes of interest 51 | soi = [] 52 | prev_size = -1 53 | for s in cfg.MODEL.FCOS.SIZES_OF_INTEREST: 54 | soi.append([prev_size, s]) 55 | prev_size = s 56 | soi.append([prev_size, INF]) 57 | self.sizes_of_interest = soi 58 | self.fcos_head = FCOSHead(cfg, [input_shape[f] for f in self.in_features]) 59 | 60 | def forward(self, images, features, gt_instances): 61 | """ 62 | Arguments: 63 | images (list[Tensor] or ImageList): images to be processed 64 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 65 | 66 | Returns: 67 | result (list[BoxList] or dict[Tensor]): the output from the model. 68 | During training, it returns a dict[Tensor] which contains the losses. 69 | During testing, it returns list[BoxList] contains additional fields 70 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 71 | 72 | """ 73 | features = [features[f] for f in self.in_features] 74 | locations = self.compute_locations(features) 75 | logits_pred, reg_pred, ctrness_pred, bbox_towers = self.fcos_head(features) 76 | 77 | if self.training: 78 | pre_nms_thresh = self.pre_nms_thresh_train 79 | pre_nms_topk = self.pre_nms_topk_train 80 | post_nms_topk = self.post_nms_topk_train 81 | else: 82 | pre_nms_thresh = self.pre_nms_thresh_test 83 | pre_nms_topk = self.pre_nms_topk_test 84 | post_nms_topk = self.post_nms_topk_test 85 | 86 | outputs = FCOSOutputs( 87 | images, 88 | locations, 89 | logits_pred, 90 | reg_pred, 91 | ctrness_pred, 92 | self.focal_loss_alpha, 93 | self.focal_loss_gamma, 94 | self.iou_loss, 95 | self.center_sample, 96 | self.sizes_of_interest, 97 | self.strides, 98 | self.radius, 99 | self.fcos_head.num_classes, 100 | pre_nms_thresh, 101 | pre_nms_topk, 102 | self.nms_thresh, 103 | post_nms_topk, 104 | self.thresh_with_ctr, 105 | gt_instances 106 | ) 107 | 108 | if self.training: 109 | losses, _ = outputs.losses() 110 | return None, losses 111 | else: 112 | proposals = outputs.predict_proposals() 113 | return proposals, {} 114 | 115 | def compute_locations(self, features): 116 | locations = [] 117 | for level, feature in enumerate(features): 118 | h, w = feature.size()[-2:] 119 | locations_per_level = self.compute_locations_per_level( 120 | h, w, self.fpn_strides[level], 121 | feature.device 122 | ) 123 | locations.append(locations_per_level) 124 | return locations 125 | 126 | def compute_locations_per_level(self, h, w, stride, device): 127 | shifts_x = torch.arange( 128 | 0, w * stride, step=stride, 129 | dtype=torch.float32, device=device 130 | ) 131 | shifts_y = torch.arange( 132 | 0, h * stride, step=stride, 133 | dtype=torch.float32, device=device 134 | ) 135 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) 136 | shift_x = shift_x.reshape(-1) 137 | shift_y = shift_y.reshape(-1) 138 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 139 | return locations 140 | 141 | 142 | class FCOSHead(nn.Module): 143 | def __init__(self, cfg, input_shape: List[ShapeSpec]): 144 | """ 145 | Arguments: 146 | in_channels (int): number of channels of the input feature 147 | """ 148 | super().__init__() 149 | # TODO: Implement the sigmoid version first. 150 | self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES 151 | self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES 152 | head_configs = {"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS, 153 | False), 154 | "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS, 155 | cfg.MODEL.FCOS.USE_DEFORMABLE), 156 | "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, 157 | cfg.MODEL.FCOS.USE_DEFORMABLE)} 158 | norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM 159 | 160 | in_channels = [s.channels for s in input_shape] 161 | assert len(set(in_channels)) == 1, "Each level must have the same channel!" 162 | in_channels = in_channels[0] 163 | 164 | for head in head_configs: 165 | tower = [] 166 | num_convs, use_deformable = head_configs[head] 167 | if use_deformable: 168 | conv_func = DFConv2d 169 | else: 170 | conv_func = nn.Conv2d 171 | for i in range(num_convs): 172 | tower.append(conv_func( 173 | in_channels, in_channels, 174 | kernel_size=3, stride=1, 175 | padding=1, bias=True 176 | )) 177 | if norm == "GN": 178 | tower.append(nn.GroupNorm(32, in_channels)) 179 | tower.append(nn.ReLU()) 180 | self.add_module('{}_tower'.format(head), 181 | nn.Sequential(*tower)) 182 | 183 | self.cls_logits = nn.Conv2d( 184 | in_channels, self.num_classes, 185 | kernel_size=3, stride=1, 186 | padding=1 187 | ) 188 | self.bbox_pred = nn.Conv2d( 189 | in_channels, 4, kernel_size=3, 190 | stride=1, padding=1 191 | ) 192 | self.ctrness = nn.Conv2d( 193 | in_channels, 1, kernel_size=3, 194 | stride=1, padding=1 195 | ) 196 | 197 | if cfg.MODEL.FCOS.USE_SCALE: 198 | self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in self.fpn_strides]) 199 | else: 200 | self.scales = None 201 | 202 | for modules in [ 203 | self.cls_tower, self.bbox_tower, 204 | self.share_tower, self.cls_logits, 205 | self.bbox_pred, self.ctrness 206 | ]: 207 | for l in modules.modules(): 208 | if isinstance(l, nn.Conv2d): 209 | torch.nn.init.normal_(l.weight, std=0.01) 210 | torch.nn.init.constant_(l.bias, 0) 211 | 212 | # initialize the bias for focal loss 213 | prior_prob = cfg.MODEL.FCOS.PRIOR_PROB 214 | bias_value = -math.log((1 - prior_prob) / prior_prob) 215 | torch.nn.init.constant_(self.cls_logits.bias, bias_value) 216 | 217 | def forward(self, x): 218 | logits = [] 219 | bbox_reg = [] 220 | ctrness = [] 221 | bbox_towers = [] 222 | for l, feature in enumerate(x): 223 | feature = self.share_tower(feature) 224 | cls_tower = self.cls_tower(feature) 225 | bbox_tower = self.bbox_tower(feature) 226 | 227 | logits.append(self.cls_logits(cls_tower)) 228 | ctrness.append(self.ctrness(bbox_tower)) 229 | reg = self.bbox_pred(bbox_tower) 230 | if self.scales is not None: 231 | reg = self.scales[l](reg) 232 | # Note that we use relu, as in the improved FCOS, instead of exp. 233 | bbox_reg.append(F.relu(reg)) 234 | 235 | return logits, bbox_reg, ctrness, bbox_towers 236 | -------------------------------------------------------------------------------- /core/modeling/fcose/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcose import FCOSE 2 | from .fcose_outputs import FCOSEOutputs 3 | from .extreme_detector import ExtremeDetector -------------------------------------------------------------------------------- /core/modeling/fcose/dextr.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from torch.nn.functional import upsample 3 | from deeplab_resnet import resnet101 4 | from dextr_helper import * 5 | import os 6 | import torch 7 | import numpy as np 8 | 9 | 10 | class Dextr(object): 11 | def __init__(self, model_path='', 12 | gpu_id=0, flip_test=True): 13 | if model_path == '': 14 | model_path = os.path.join( 15 | 'cache', 'dextr_pascal-sbd.pth') 16 | self.pad = 50 17 | self.thres = 0.8 18 | self.device = torch.device( 19 | "cuda:" + str(gpu_id) if torch.cuda.is_available() else "cpu") 20 | self.flip_test = flip_test 21 | 22 | # Create the network and load the weights 23 | self.net = resnet101(1, nInputChannels=4, classifier='psp') 24 | print("Initializing weights from: {}".format(model_path)) 25 | state_dict_checkpoint = torch.load( 26 | model_path, map_location=lambda storage, loc: storage) 27 | # Remove the prefix .module from the model when it is trained using DataParallel 28 | if 'module.' in list(state_dict_checkpoint.keys())[0]: 29 | new_state_dict = OrderedDict() 30 | for k, v in state_dict_checkpoint.items(): 31 | name = k[7:] # remove `module.` from multi-gpu training 32 | new_state_dict[name] = v 33 | else: 34 | new_state_dict = state_dict_checkpoint 35 | self.net.load_state_dict(new_state_dict) 36 | self.net.eval() 37 | self.net.to(self.device) 38 | 39 | def segment(self, image, extreme_points_ori): 40 | # Crop image to the bounding box from the extreme points and resize 41 | bbox = get_bbox(image, points=extreme_points_ori, pad=self.pad, zero_pad=True) 42 | crop_image = crop_from_bbox(image, bbox, zero_pad=True) 43 | resize_image = fixed_resize(crop_image, (512, 512)).astype(np.float32) 44 | 45 | # Generate extreme point heat map normalized to image values 46 | extreme_points = extreme_points_ori - [np.min(extreme_points_ori[:, 0]), np.min(extreme_points_ori[:, 1])] + [ 47 | self.pad, 48 | self.pad] 49 | extreme_points = (512 * extreme_points * [1 / crop_image.shape[1], 1 / crop_image.shape[0]]).astype(np.int) 50 | extreme_heatmap = make_gt(resize_image, extreme_points, sigma=10) 51 | extreme_heatmap = cstm_normalize(extreme_heatmap, 255) 52 | 53 | # Concatenate inputs and convert to tensor 54 | input_dextr = np.concatenate((resize_image, extreme_heatmap[:, :, np.newaxis]), axis=2) 55 | inputs = input_dextr.transpose((2, 0, 1))[np.newaxis, ...] 56 | # import pdb; pdb.set_trace() 57 | if self.flip_test: 58 | inputs = np.concatenate([inputs, inputs[:, :, :, ::-1]], axis=0) 59 | inputs = torch.from_numpy(inputs) 60 | # Run a forward pass 61 | inputs = inputs.to(self.device) 62 | outputs = self.net.forward(inputs) 63 | outputs = upsample(outputs, size=(512, 512), mode='bilinear', align_corners=True) 64 | outputs = outputs.to(torch.device('cpu')) 65 | outputs = outputs.data.numpy() 66 | if self.flip_test: 67 | outputs = (outputs[:1] + outputs[1:, :, :, ::-1]) / 2 68 | 69 | pred = np.transpose(outputs[0, ...], (1, 2, 0)) 70 | pred = 1 / (1 + np.exp(-pred)) 71 | pred = np.squeeze(pred) 72 | result = crop2fullmask(pred, bbox, im_size=image.shape[:2], zero_pad=True, relax=self.pad) > self.thres 73 | return result 74 | -------------------------------------------------------------------------------- /core/modeling/fcose/dextr_eval.py: -------------------------------------------------------------------------------- 1 | from dextr import Dextr 2 | import pycocotools.coco as cocoapi 3 | from pycocotools.cocoeval import COCOeval 4 | from pycocotools import mask as COCOmask 5 | import numpy as np 6 | import sys 7 | import cv2 8 | import json 9 | from progress.bar import Bar 10 | 11 | DEBUG = False 12 | ANN_PATH = '/ldap_home/zichen.liu/data/coco/annotations/instances_val2017.json' 13 | IMG_DIR = '/ldap_home/zichen.liu/data/coco/val2017/' 14 | 15 | if __name__ == '__main__': 16 | dextr = Dextr() 17 | coco = cocoapi.COCO(ANN_PATH) 18 | pred_path = sys.argv[1] 19 | out_path = pred_path[:-5] + '_segm.json' 20 | anns = json.load(open(pred_path, 'r')) 21 | results = [] 22 | score_thresh = 0.2 23 | num_boxes = 0 24 | for i, ann in enumerate(anns): 25 | if ann['score'] >= score_thresh: 26 | num_boxes += 1 27 | 28 | bar = Bar('Pred + Dextr', max=num_boxes) 29 | for i, ann in enumerate(anns): 30 | if ann['score'] < score_thresh: 31 | continue 32 | ex = np.array(ann['extreme_points'], dtype=np.int32).reshape(4, 2) 33 | img_id = ann['image_id'] 34 | img_info = coco.loadImgs(ids=[img_id])[0] 35 | img_path = IMG_DIR + img_info['file_name'] 36 | img = cv2.imread(img_path) 37 | mask = dextr.segment(img[:, :, ::-1], ex) 38 | mask = np.asfortranarray(mask.astype(np.uint8)) 39 | if DEBUG: 40 | if ann['score'] < 0.1: 41 | continue 42 | print(ann['score']) 43 | img = (0.4 * img + 0.6 * mask.reshape( 44 | mask.shape[0], mask.shape[1], 1) * 255).astype(np.uint8) 45 | cv2.imshow('img', img) 46 | cv2.waitKey() 47 | encode = COCOmask.encode(mask) 48 | if 'counts' in encode: 49 | encode['counts'] = encode['counts'].decode("utf8") 50 | pred = {'image_id': ann['image_id'], 51 | 'category_id': ann['category_id'], 52 | 'score': ann['score'], 53 | 'segmentation': encode, 54 | 'extreme_points': ann['extreme_points']} 55 | results.append(pred) 56 | Bar.suffix = '[{0}/{1}]| Total: {total:} | ETA: {eta:} |'.format( 57 | i, num_boxes, total=bar.elapsed_td, eta=bar.eta_td) 58 | bar.next() 59 | bar.finish() 60 | json.dump(results, open(out_path, 'w')) 61 | 62 | dets = coco.loadRes(out_path) 63 | coco_eval = COCOeval(coco, dets, "segm") 64 | coco_eval.evaluate() 65 | coco_eval.accumulate() 66 | coco_eval.summarize() 67 | -------------------------------------------------------------------------------- /core/modeling/fcose/dextr_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch, cv2 4 | import random 5 | import numpy as np 6 | 7 | 8 | def tens2image(im): 9 | if im.size()[0] == 1: 10 | tmp = np.squeeze(im.numpy(), axis=0) 11 | else: 12 | tmp = im.numpy() 13 | if tmp.ndim == 2: 14 | return tmp 15 | else: 16 | return tmp.transpose((1, 2, 0)) 17 | 18 | 19 | def crop2fullmask(crop_mask, bbox, im=None, im_size=None, zero_pad=False, relax=0, mask_relax=True, 20 | interpolation=cv2.INTER_CUBIC, scikit=False): 21 | if scikit: 22 | from skimage.transform import resize as sk_resize 23 | assert (not (im is None and im_size is None)), 'You have to provide an image or the image size' 24 | if im is None: 25 | im_si = im_size 26 | else: 27 | im_si = im.shape 28 | # Borers of image 29 | bounds = (0, 0, im_si[1] - 1, im_si[0] - 1) 30 | 31 | # Valid bounding box locations as (x_min, y_min, x_max, y_max) 32 | bbox_valid = (max(bbox[0], bounds[0]), 33 | max(bbox[1], bounds[1]), 34 | min(bbox[2], bounds[2]), 35 | min(bbox[3], bounds[3])) 36 | 37 | # Bounding box of initial mask 38 | bbox_init = (bbox[0] + relax, 39 | bbox[1] + relax, 40 | bbox[2] - relax, 41 | bbox[3] - relax) 42 | 43 | if zero_pad: 44 | # Offsets for x and y 45 | offsets = (-bbox[0], -bbox[1]) 46 | else: 47 | assert ((bbox == bbox_valid).all()) 48 | offsets = (-bbox_valid[0], -bbox_valid[1]) 49 | 50 | # Simple per element addition in the tuple 51 | inds = tuple(map(sum, zip(bbox_valid, offsets + offsets))) 52 | 53 | if scikit: 54 | crop_mask = sk_resize(crop_mask, (bbox[3] - bbox[1] + 1, bbox[2] - bbox[0] + 1), order=0, 55 | mode='constant').astype(crop_mask.dtype) 56 | else: 57 | crop_mask = cv2.resize(crop_mask, (bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1), interpolation=interpolation) 58 | result_ = np.zeros(im_si) 59 | result_[bbox_valid[1]:bbox_valid[3] + 1, bbox_valid[0]:bbox_valid[2] + 1] = \ 60 | crop_mask[inds[1]:inds[3] + 1, inds[0]:inds[2] + 1] 61 | 62 | result = np.zeros(im_si) 63 | if mask_relax: 64 | result[bbox_init[1]:bbox_init[3] + 1, bbox_init[0]:bbox_init[2] + 1] = \ 65 | result_[bbox_init[1]:bbox_init[3] + 1, bbox_init[0]:bbox_init[2] + 1] 66 | else: 67 | result = result_ 68 | 69 | return result 70 | 71 | 72 | def overlay_mask(im, ma, colors=None, alpha=0.5): 73 | assert np.max(im) <= 1.0 74 | if colors is None: 75 | colors = np.load(os.path.join(os.path.dirname(__file__), 'pascal_map.npy')) / 255. 76 | else: 77 | colors = np.append([[0., 0., 0.]], colors, axis=0); 78 | 79 | if ma.ndim == 3: 80 | assert len(colors) >= ma.shape[0], 'Not enough colors' 81 | ma = ma.astype(np.bool) 82 | im = im.astype(np.float32) 83 | 84 | if ma.ndim == 2: 85 | fg = im * alpha + np.ones(im.shape) * (1 - alpha) * colors[1, :3] # np.array([0,0,255])/255.0 86 | else: 87 | fg = [] 88 | for n in range(ma.ndim): 89 | fg.append(im * alpha + np.ones(im.shape) * (1 - alpha) * colors[1 + n, :3]) 90 | # Whiten background 91 | bg = im.copy() 92 | if ma.ndim == 2: 93 | bg[ma == 0] = im[ma == 0] 94 | bg[ma == 1] = fg[ma == 1] 95 | total_ma = ma 96 | else: 97 | total_ma = np.zeros([ma.shape[1], ma.shape[2]]) 98 | for n in range(ma.shape[0]): 99 | tmp_ma = ma[n, :, :] 100 | total_ma = np.logical_or(tmp_ma, total_ma) 101 | tmp_fg = fg[n] 102 | bg[tmp_ma == 1] = tmp_fg[tmp_ma == 1] 103 | bg[total_ma == 0] = im[total_ma == 0] 104 | 105 | # [-2:] is s trick to be compatible both with opencv 2 and 3 106 | contours = cv2.findContours(total_ma.copy().astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:] 107 | cv2.drawContours(bg, contours[0], -1, (0.0, 0.0, 0.0), 1) 108 | 109 | return bg 110 | 111 | 112 | def overlay_masks(im, masks, alpha=0.5): 113 | colors = np.load(os.path.join(os.path.dirname(__file__), 'pascal_map.npy')) / 255. 114 | 115 | if isinstance(masks, np.ndarray): 116 | masks = [masks] 117 | 118 | assert len(colors) >= len(masks), 'Not enough colors' 119 | 120 | ov = im.copy() 121 | im = im.astype(np.float32) 122 | total_ma = np.zeros([im.shape[0], im.shape[1]]) 123 | i = 1 124 | for ma in masks: 125 | ma = ma.astype(np.bool) 126 | fg = im * alpha + np.ones(im.shape) * (1 - alpha) * colors[i, :3] # np.array([0,0,255])/255.0 127 | i = i + 1 128 | ov[ma == 1] = fg[ma == 1] 129 | total_ma += ma 130 | 131 | # [-2:] is s trick to be compatible both with opencv 2 and 3 132 | contours = cv2.findContours(ma.copy().astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:] 133 | cv2.drawContours(ov, contours[0], -1, (0.0, 0.0, 0.0), 1) 134 | ov[total_ma == 0] = im[total_ma == 0] 135 | 136 | return ov 137 | 138 | 139 | def extreme_points(mask, pert): 140 | def find_point(id_x, id_y, ids): 141 | sel_id = ids[0][random.randint(0, len(ids[0]) - 1)] 142 | return [id_x[sel_id], id_y[sel_id]] 143 | 144 | # List of coordinates of the mask 145 | inds_y, inds_x = np.where(mask > 0.5) 146 | 147 | # Find extreme points 148 | return np.array([find_point(inds_x, inds_y, np.where(inds_x <= np.min(inds_x) + pert)), # left 149 | find_point(inds_x, inds_y, np.where(inds_x >= np.max(inds_x) - pert)), # right 150 | find_point(inds_x, inds_y, np.where(inds_y <= np.min(inds_y) + pert)), # top 151 | find_point(inds_x, inds_y, np.where(inds_y >= np.max(inds_y) - pert)) # bottom 152 | ]) 153 | 154 | 155 | def get_bbox(mask, points=None, pad=0, zero_pad=False): 156 | if points is not None: 157 | inds = np.flip(points.transpose(), axis=0) 158 | else: 159 | inds = np.where(mask > 0) 160 | 161 | if inds[0].shape[0] == 0: 162 | return None 163 | 164 | if zero_pad: 165 | x_min_bound = -np.inf 166 | y_min_bound = -np.inf 167 | x_max_bound = np.inf 168 | y_max_bound = np.inf 169 | else: 170 | x_min_bound = 0 171 | y_min_bound = 0 172 | x_max_bound = mask.shape[1] - 1 173 | y_max_bound = mask.shape[0] - 1 174 | 175 | x_min = max(inds[1].min() - pad, x_min_bound) 176 | y_min = max(inds[0].min() - pad, y_min_bound) 177 | x_max = min(inds[1].max() + pad, x_max_bound) 178 | y_max = min(inds[0].max() + pad, y_max_bound) 179 | 180 | return x_min, y_min, x_max, y_max 181 | 182 | 183 | def crop_from_bbox(img, bbox, zero_pad=False): 184 | # Borders of image 185 | bounds = (0, 0, img.shape[1] - 1, img.shape[0] - 1) 186 | 187 | # Valid bounding box locations as (x_min, y_min, x_max, y_max) 188 | bbox_valid = (max(bbox[0], bounds[0]), 189 | max(bbox[1], bounds[1]), 190 | min(bbox[2], bounds[2]), 191 | min(bbox[3], bounds[3])) 192 | 193 | if zero_pad: 194 | # Initialize crop size (first 2 dimensions) 195 | crop = np.zeros((bbox[3] - bbox[1] + 1, bbox[2] - bbox[0] + 1), dtype=img.dtype) 196 | 197 | # Offsets for x and y 198 | offsets = (-bbox[0], -bbox[1]) 199 | 200 | else: 201 | assert (bbox == bbox_valid) 202 | crop = np.zeros((bbox_valid[3] - bbox_valid[1] + 1, bbox_valid[2] - bbox_valid[0] + 1), dtype=img.dtype) 203 | offsets = (-bbox_valid[0], -bbox_valid[1]) 204 | 205 | # Simple per element addition in the tuple 206 | inds = tuple(map(sum, zip(bbox_valid, offsets + offsets))) 207 | 208 | img = np.squeeze(img) 209 | if img.ndim == 2: 210 | crop[inds[1]:inds[3] + 1, inds[0]:inds[2] + 1] = \ 211 | img[bbox_valid[1]:bbox_valid[3] + 1, bbox_valid[0]:bbox_valid[2] + 1] 212 | else: 213 | crop = np.tile(crop[:, :, np.newaxis], [1, 1, 3]) # Add 3 RGB Channels 214 | crop[inds[1]:inds[3] + 1, inds[0]:inds[2] + 1, :] = \ 215 | img[bbox_valid[1]:bbox_valid[3] + 1, bbox_valid[0]:bbox_valid[2] + 1, :] 216 | 217 | return crop 218 | 219 | 220 | def fixed_resize(sample, resolution, flagval=None): 221 | if flagval is None: 222 | if ((sample == 0) | (sample == 1)).all(): 223 | flagval = cv2.INTER_NEAREST 224 | else: 225 | flagval = cv2.INTER_CUBIC 226 | 227 | if isinstance(resolution, int): 228 | tmp = [resolution, resolution] 229 | tmp[np.argmax(sample.shape[:2])] = int( 230 | round(float(resolution) / np.min(sample.shape[:2]) * np.max(sample.shape[:2]))) 231 | resolution = tuple(tmp) 232 | 233 | if sample.ndim == 2 or (sample.ndim == 3 and sample.shape[2] == 3): 234 | sample = cv2.resize(sample, resolution[::-1], interpolation=flagval) 235 | else: 236 | tmp = sample 237 | sample = np.zeros(np.append(resolution, tmp.shape[2]), dtype=np.float32) 238 | for ii in range(sample.shape[2]): 239 | sample[:, :, ii] = cv2.resize(tmp[:, :, ii], resolution[::-1], interpolation=flagval) 240 | return sample 241 | 242 | 243 | def crop_from_mask(img, mask, relax=0, zero_pad=False): 244 | if mask.shape[:2] != img.shape[:2]: 245 | mask = cv2.resize(mask, dsize=tuple(reversed(img.shape[:2])), interpolation=cv2.INTER_NEAREST) 246 | 247 | assert (mask.shape[:2] == img.shape[:2]) 248 | 249 | bbox = get_bbox(mask, pad=relax, zero_pad=zero_pad) 250 | 251 | if bbox is None: 252 | return None 253 | 254 | crop = crop_from_bbox(img, bbox, zero_pad) 255 | 256 | return crop 257 | 258 | 259 | def make_gaussian(size, sigma=10, center=None, d_type=np.float64): 260 | """ Make a square gaussian kernel. 261 | size: is the dimensions of the output gaussian 262 | sigma: is full-width-half-maximum, which 263 | can be thought of as an effective radius. 264 | """ 265 | 266 | x = np.arange(0, size[1], 1, float) 267 | y = np.arange(0, size[0], 1, float) 268 | y = y[:, np.newaxis] 269 | 270 | if center is None: 271 | x0 = y0 = size[0] // 2 272 | else: 273 | x0 = center[0] 274 | y0 = center[1] 275 | 276 | return np.exp(-4 * np.log(2) * ((x - x0) ** 2 + (y - y0) ** 2) / sigma ** 2).astype(d_type) 277 | 278 | 279 | def make_gt(img, labels, sigma=10, one_mask_per_point=False): 280 | """ Make the ground-truth for landmark. 281 | img: the original color image 282 | labels: label with the Gaussian center(s) [[x0, y0],[x1, y1],...] 283 | sigma: sigma of the Gaussian. 284 | one_mask_per_point: masks for each point in different channels? 285 | """ 286 | h, w = img.shape[:2] 287 | if labels is None: 288 | gt = make_gaussian((h, w), center=(h // 2, w // 2), sigma=sigma) 289 | else: 290 | labels = np.array(labels) 291 | if labels.ndim == 1: 292 | labels = labels[np.newaxis] 293 | if one_mask_per_point: 294 | gt = np.zeros(shape=(h, w, labels.shape[0])) 295 | for ii in range(labels.shape[0]): 296 | gt[:, :, ii] = make_gaussian((h, w), center=labels[ii, :], sigma=sigma) 297 | else: 298 | gt = np.zeros(shape=(h, w), dtype=np.float64) 299 | for ii in range(labels.shape[0]): 300 | gt = np.maximum(gt, make_gaussian((h, w), center=labels[ii, :], sigma=sigma)) 301 | 302 | gt = gt.astype(dtype=img.dtype) 303 | 304 | return gt 305 | 306 | 307 | def cstm_normalize(im, max_value): 308 | """ 309 | Normalize image to range 0 - max_value 310 | """ 311 | imn = max_value * (im - im.min()) / max((im.max() - im.min()), 1e-8) 312 | return imn 313 | 314 | 315 | def generate_param_report(logfile, param): 316 | log_file = open(logfile, 'w') 317 | for key, val in param.items(): 318 | log_file.write(key + ':' + str(val) + '\n') 319 | log_file.close() -------------------------------------------------------------------------------- /core/modeling/fcose/extreme_detector.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from torch import nn 4 | 5 | from detectron2.structures import ImageList 6 | from detectron2.utils.logger import log_first_n 7 | 8 | from detectron2.modeling.backbone import build_backbone 9 | from detectron2.modeling.proposal_generator import build_proposal_generator 10 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY 11 | 12 | from core.modeling.postprocessing import detector_postprocess 13 | 14 | 15 | @META_ARCH_REGISTRY.register() 16 | class ExtremeDetector(nn.Module): 17 | def __init__(self, cfg): 18 | super().__init__() 19 | self.device = torch.device(cfg.MODEL.DEVICE) 20 | 21 | self.backbone = build_backbone(cfg) 22 | self.proposal_generator = build_proposal_generator( 23 | cfg, self.backbone.output_shape() 24 | ) 25 | 26 | self.mask_result_src = cfg.MODEL.DANCE.MASK_IN 27 | 28 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1) 29 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1) 30 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 31 | self.to(self.device) 32 | 33 | def forward(self, batched_inputs): 34 | """ 35 | Args: 36 | Same as in :class:`GeneralizedRCNN.forward` 37 | 38 | Returns: 39 | list[dict]: 40 | Each dict is the output for one input image. 41 | The dict contains one key "proposals" whose value is a 42 | :class:`Instances` with keys "proposal_boxes" and "objectness_logits". 43 | """ 44 | images = [x["image"].to(self.device) for x in batched_inputs] 45 | images = [self.normalizer(x) for x in images] 46 | images = ImageList.from_tensors(images, self.backbone.size_divisibility) 47 | features = self.backbone(images.tensor) 48 | 49 | if "instances" in batched_inputs[0]: 50 | gt_instances = [x["instances"].to(self.device) for x in batched_inputs] 51 | elif "targets" in batched_inputs[0]: 52 | log_first_n( 53 | logging.WARN, 54 | "'targets' in the model inputs is now renamed to 'instances'!", 55 | n=10, 56 | ) 57 | gt_instances = [x["targets"].to(self.device) for x in batched_inputs] 58 | else: 59 | gt_instances = None 60 | proposals, proposal_losses = self.proposal_generator( 61 | images, features, gt_instances 62 | ) 63 | # In training, the proposals are not useful at all in RPN models; but not here 64 | # This makes RPN-only models about 5% slower. 65 | if self.training: 66 | return proposal_losses 67 | 68 | processed_results = [] 69 | for results_per_image, input_per_image, image_size in zip( 70 | proposals, batched_inputs, images.image_sizes 71 | ): 72 | height = input_per_image.get("height", image_size[0]) 73 | width = input_per_image.get("width", image_size[1]) 74 | r = detector_postprocess( 75 | self.mask_result_src, results_per_image, height, width 76 | ) 77 | processed_results.append({"instances": r}) 78 | 79 | return processed_results 80 | -------------------------------------------------------------------------------- /core/modeling/fcose/fcose.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List, Dict 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | from detectron2.layers import ShapeSpec 8 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY 9 | 10 | from core.layers import DFConv2d, IOULoss, EXTLoss 11 | from .fcose_outputs import FCOSEOutputs 12 | 13 | 14 | __all__ = ["FCOS"] 15 | 16 | INF = 100000000 17 | 18 | 19 | class Scale(nn.Module): 20 | def __init__(self, init_value=1.0): 21 | super(Scale, self).__init__() 22 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 23 | 24 | def forward(self, input): 25 | return input * self.scale 26 | 27 | 28 | @PROPOSAL_GENERATOR_REGISTRY.register() 29 | class FCOSE(nn.Module): 30 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 31 | super().__init__() 32 | # fmt: off 33 | self.in_features = cfg.MODEL.FCOS.IN_FEATURES 34 | self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES 35 | self.focal_loss_alpha = cfg.MODEL.FCOS.LOSS_ALPHA 36 | self.focal_loss_gamma = cfg.MODEL.FCOS.LOSS_GAMMA 37 | self.center_sample = cfg.MODEL.FCOS.CENTER_SAMPLE 38 | self.strides = cfg.MODEL.FCOS.FPN_STRIDES 39 | self.radius = cfg.MODEL.FCOS.POS_RADIUS 40 | self.pre_nms_thresh_train = cfg.MODEL.FCOS.INFERENCE_TH_TRAIN 41 | self.pre_nms_thresh_test = cfg.MODEL.FCOS.INFERENCE_TH_TEST 42 | self.pre_nms_topk_train = cfg.MODEL.FCOS.PRE_NMS_TOPK_TRAIN 43 | self.pre_nms_topk_test = cfg.MODEL.FCOS.PRE_NMS_TOPK_TEST 44 | self.nms_thresh = cfg.MODEL.FCOS.NMS_TH 45 | self.post_nms_topk_train = cfg.MODEL.FCOS.POST_NMS_TOPK_TRAIN 46 | self.post_nms_topk_test = cfg.MODEL.FCOS.POST_NMS_TOPK_TEST 47 | self.thresh_with_ctr = cfg.MODEL.FCOS.THRESH_WITH_CTR 48 | # fmt: on 49 | self.iou_loss = IOULoss(cfg.MODEL.FCOS.LOC_LOSS_TYPE) 50 | self.ext_loss = EXTLoss(cfg.MODEL.FCOS.EXT_LOSS_TYPE) 51 | # generate sizes of interest 52 | soi = [] 53 | prev_size = -1 54 | for s in cfg.MODEL.FCOS.SIZES_OF_INTEREST: 55 | soi.append([prev_size, s]) 56 | prev_size = s 57 | soi.append([prev_size, INF]) 58 | self.sizes_of_interest = soi 59 | self.fcose_head = FCOSEHead(cfg, [input_shape[f] for f in self.in_features]) 60 | 61 | def forward(self, images, features, gt_instances): 62 | """ 63 | Arguments: 64 | images (list[Tensor] or ImageList): images to be processed 65 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 66 | 67 | Returns: 68 | result (list[BoxList] or dict[Tensor]): the output from the model. 69 | During training, it returns a dict[Tensor] which contains the losses. 70 | During testing, it returns list[BoxList] contains additional fields 71 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 72 | 73 | """ 74 | features = [features[f] for f in self.in_features] 75 | locations = self.compute_locations(features) 76 | logits_pred, reg_pred, ex_pred, ctrness_pred, bbox_towers = self.fcose_head(features) 77 | 78 | if self.training: 79 | pre_nms_thresh = self.pre_nms_thresh_train 80 | pre_nms_topk = self.pre_nms_topk_train 81 | post_nms_topk = self.post_nms_topk_train 82 | else: 83 | pre_nms_thresh = self.pre_nms_thresh_test 84 | pre_nms_topk = self.pre_nms_topk_test 85 | post_nms_topk = self.post_nms_topk_test 86 | 87 | outputs = FCOSEOutputs( 88 | images, 89 | locations, 90 | logits_pred, 91 | reg_pred, 92 | ex_pred, 93 | ctrness_pred, 94 | self.focal_loss_alpha, 95 | self.focal_loss_gamma, 96 | self.iou_loss, 97 | self.ext_loss, 98 | self.center_sample, 99 | self.sizes_of_interest, 100 | self.strides, 101 | self.radius, 102 | self.fcose_head.num_classes, 103 | pre_nms_thresh, 104 | pre_nms_topk, 105 | self.nms_thresh, 106 | post_nms_topk, 107 | self.thresh_with_ctr, 108 | gt_instances 109 | ) 110 | 111 | if self.training: 112 | losses, _ = outputs.losses() 113 | return None, losses 114 | else: 115 | proposals = outputs.predict_proposals() 116 | return proposals, {} 117 | 118 | def compute_locations(self, features): 119 | locations = [] 120 | for level, feature in enumerate(features): 121 | h, w = feature.size()[-2:] 122 | locations_per_level = self.compute_locations_per_level( 123 | h, w, self.fpn_strides[level], 124 | feature.device 125 | ) 126 | locations.append(locations_per_level) 127 | return locations 128 | 129 | def compute_locations_per_level(self, h, w, stride, device): 130 | shifts_x = torch.arange( 131 | 0, w * stride, step=stride, 132 | dtype=torch.float32, device=device 133 | ) 134 | shifts_y = torch.arange( 135 | 0, h * stride, step=stride, 136 | dtype=torch.float32, device=device 137 | ) 138 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) 139 | shift_x = shift_x.reshape(-1) 140 | shift_y = shift_y.reshape(-1) 141 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 142 | return locations 143 | 144 | 145 | class FCOSEHead(nn.Module): 146 | def __init__(self, cfg, input_shape: List[ShapeSpec]): 147 | """ 148 | Arguments: 149 | in_channels (int): number of channels of the input feature 150 | """ 151 | super().__init__() 152 | # TODO: Implement the sigmoid version first. 153 | self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES 154 | self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES 155 | head_configs = {"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS, 156 | False), 157 | "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS, 158 | cfg.MODEL.FCOS.USE_DEFORMABLE), 159 | "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, 160 | cfg.MODEL.FCOS.USE_DEFORMABLE)} 161 | norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM 162 | 163 | in_channels = [s.channels for s in input_shape] 164 | assert len(set(in_channels)) == 1, "Each level must have the same channel!" 165 | in_channels = in_channels[0] 166 | 167 | for head in head_configs: 168 | tower = [] 169 | num_convs, use_deformable = head_configs[head] 170 | if use_deformable: 171 | conv_func = DFConv2d 172 | else: 173 | conv_func = nn.Conv2d 174 | for i in range(num_convs): 175 | tower.append(conv_func( 176 | in_channels, in_channels, 177 | kernel_size=3, stride=1, 178 | padding=1, bias=True 179 | )) 180 | if norm == "GN": 181 | tower.append(nn.GroupNorm(32, in_channels)) 182 | tower.append(nn.ReLU()) 183 | self.add_module('{}_tower'.format(head), 184 | nn.Sequential(*tower)) 185 | 186 | self.cls_logits = nn.Conv2d( 187 | in_channels, self.num_classes, 188 | kernel_size=3, stride=1, 189 | padding=1 190 | ) 191 | self.bbox_pred = nn.Conv2d( 192 | in_channels, 4, kernel_size=3, 193 | stride=1, padding=1 194 | ) 195 | self.extrm_pred = nn.Conv2d( 196 | in_channels, 4, kernel_size=3, 197 | stride=1, padding=1 198 | ) 199 | self.ctrness = nn.Conv2d( 200 | in_channels, 1, kernel_size=3, 201 | stride=1, padding=1 202 | ) 203 | 204 | if cfg.MODEL.FCOS.USE_SCALE: 205 | self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in self.fpn_strides]) 206 | else: 207 | self.scales = None 208 | 209 | for modules in [ 210 | self.cls_tower, self.bbox_tower, 211 | self.share_tower, self.cls_logits, 212 | self.bbox_pred, self.extrm_pred, 213 | self.ctrness 214 | ]: 215 | for l in modules.modules(): 216 | if isinstance(l, nn.Conv2d): 217 | torch.nn.init.normal_(l.weight, std=0.01) 218 | torch.nn.init.constant_(l.bias, 0) 219 | 220 | # initialize the bias for focal loss 221 | prior_prob = cfg.MODEL.FCOS.PRIOR_PROB 222 | bias_value = -math.log((1 - prior_prob) / prior_prob) 223 | torch.nn.init.constant_(self.cls_logits.bias, bias_value) 224 | 225 | self.output_ex = False if cfg.MODEL.SNAKE_HEAD.INITIAL == 'box' else True 226 | 227 | def forward(self, x): 228 | logits = [] 229 | bbox_reg = [] 230 | ex_reg = [] 231 | ctrness = [] 232 | bbox_towers = [] 233 | for l, feature in enumerate(x): 234 | feature = self.share_tower(feature) 235 | cls_tower = self.cls_tower(feature) 236 | bbox_tower = self.bbox_tower(feature) 237 | 238 | logits.append(self.cls_logits(cls_tower)) 239 | ctrness.append(self.ctrness(bbox_tower)) 240 | if self.training or self.output_ex: 241 | ex_reg.append(torch.tanh(self.extrm_pred(bbox_tower))) 242 | else: 243 | ex_reg.append(None) 244 | reg = self.bbox_pred(bbox_tower) 245 | if self.scales is not None: 246 | reg = self.scales[l](reg) 247 | # Note that we use relu, as in the improved FCOS, instead of exp. 248 | bbox_reg.append(F.relu(reg)) 249 | 250 | return logits, bbox_reg, ex_reg, ctrness, bbox_towers 251 | -------------------------------------------------------------------------------- /core/modeling/fcose/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from core.structures.points_set import ExtremePoints 4 | from detectron2.structures.boxes import Boxes 5 | import pycocotools.mask as mask_util 6 | 7 | 8 | # unused 9 | def get_octagon(ex): 10 | ex = np.array(ex).reshape(4, 2) 11 | w, h = ex[3][0] - ex[1][0], ex[2][1] - ex[0][1] 12 | t, l, b, r = ex[0][1], ex[1][0], ex[2][1], ex[3][0] 13 | x = 8. 14 | octagon = [[min(ex[0][0] + w / x, r), ex[0][1], \ 15 | max(ex[0][0] - w / x, l), ex[0][1], \ 16 | ex[1][0], max(ex[1][1] - h / x, t), \ 17 | ex[1][0], min(ex[1][1] + h / x, b), \ 18 | max(ex[2][0] - w / x, l), ex[2][1], \ 19 | min(ex[2][0] + w / x, r), ex[2][1], \ 20 | ex[3][0], min(ex[3][1] + h / x, b), \ 21 | ex[3][0], max(ex[3][1] - h / x, t) 22 | ]] 23 | return octagon 24 | 25 | 26 | def extreme_point_to_octagon_mask(extreme_points, h, w): 27 | octagon = get_octagon(extreme_points) 28 | rles = mask_util.frPyObjects(octagon, h, w) 29 | rle = mask_util.merge(rles) 30 | mask = mask_util.decode(rle) 31 | return mask 32 | 33 | 34 | def get_extreme_points(pts): 35 | num_pt = pts.shape[0] 36 | l, t = min(pts[:, 0]), min(pts[:, 1]) 37 | r, b = max(pts[:, 0]), max(pts[:, 1]) 38 | # 3 degrees 39 | thresh = 0.02 40 | w = r - l + 1 41 | h = b - t + 1 42 | 43 | t_idx = np.argmin(pts[:, 1]) 44 | t_idxs = [t_idx] 45 | tmp = (t_idx + 1) % num_pt 46 | while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h: 47 | t_idxs.append(tmp) 48 | tmp = (tmp + 1) % num_pt 49 | tmp = (t_idx - 1) % num_pt 50 | while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h: 51 | t_idxs.append(tmp) 52 | tmp = (tmp - 1) % num_pt 53 | tt = (max(pts[t_idxs, 0]) + min(pts[t_idxs, 0])) / 2 54 | 55 | b_idx = np.argmax(pts[:, 1]) 56 | b_idxs = [b_idx] 57 | tmp = (b_idx + 1) % num_pt 58 | while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h: 59 | b_idxs.append(tmp) 60 | tmp = (tmp + 1) % num_pt 61 | tmp = (b_idx - 1) % num_pt 62 | while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h: 63 | b_idxs.append(tmp) 64 | tmp = (tmp - 1) % num_pt 65 | bb = (max(pts[b_idxs, 0]) + min(pts[b_idxs, 0])) / 2 66 | 67 | l_idx = np.argmin(pts[:, 0]) 68 | l_idxs = [l_idx] 69 | tmp = (l_idx + 1) % num_pt 70 | while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w: 71 | l_idxs.append(tmp) 72 | tmp = (tmp + 1) % num_pt 73 | tmp = (l_idx - 1) % num_pt 74 | while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w: 75 | l_idxs.append(tmp) 76 | tmp = (tmp - 1) % num_pt 77 | ll = (max(pts[l_idxs, 1]) + min(pts[l_idxs, 1])) / 2 78 | 79 | r_idx = np.argmax(pts[:, 0]) 80 | r_idxs = [r_idx] 81 | tmp = (r_idx + 1) % num_pt 82 | while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w: 83 | r_idxs.append(tmp) 84 | tmp = (tmp + 1) % num_pt 85 | tmp = (r_idx - 1) % num_pt 86 | while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w: 87 | r_idxs.append(tmp) 88 | tmp = (tmp - 1) % num_pt 89 | rr = (max(pts[r_idxs, 1]) + min(pts[r_idxs, 1])) / 2 90 | 91 | return np.array([tt, ll, bb, rr]) 92 | 93 | 94 | def get_aux_extreme_points(pts): 95 | num_pt = pts.shape[0] 96 | 97 | aux_ext_pts = [] 98 | 99 | l, t = min(pts[:, 0]), min(pts[:, 1]) 100 | r, b = max(pts[:, 0]), max(pts[:, 1]) 101 | # 3 degrees 102 | thresh = 0.02 103 | band_thresh = 0.02 104 | w = r - l + 1 105 | h = b - t + 1 106 | 107 | t_band = np.where((pts[:, 1] - t) <= band_thresh * h)[0].tolist() 108 | while t_band: 109 | t_idx = t_band[np.argmin(pts[t_band, 1])] 110 | t_idxs = [t_idx] 111 | tmp = (t_idx + 1) % num_pt 112 | while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h: 113 | t_idxs.append(tmp) 114 | tmp = (tmp + 1) % num_pt 115 | tmp = (t_idx - 1) % num_pt 116 | while tmp != t_idx and pts[tmp, 1] - pts[t_idx, 1] <= thresh * h: 117 | t_idxs.append(tmp) 118 | tmp = (tmp - 1) % num_pt 119 | tt = (max(pts[t_idxs, 0]) + min(pts[t_idxs, 0])) / 2 120 | aux_ext_pts.append(np.array([tt, t])) 121 | t_band = [item for item in t_band if item not in t_idxs] 122 | 123 | b_band = np.where((b - pts[:, 1]) <= band_thresh * h)[0].tolist() 124 | while b_band: 125 | b_idx = b_band[np.argmax(pts[b_band, 1])] 126 | b_idxs = [b_idx] 127 | tmp = (b_idx + 1) % num_pt 128 | while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h: 129 | b_idxs.append(tmp) 130 | tmp = (tmp + 1) % num_pt 131 | tmp = (b_idx - 1) % num_pt 132 | while tmp != b_idx and pts[b_idx, 1] - pts[tmp, 1] <= thresh * h: 133 | b_idxs.append(tmp) 134 | tmp = (tmp - 1) % num_pt 135 | bb = (max(pts[b_idxs, 0]) + min(pts[b_idxs, 0])) / 2 136 | aux_ext_pts.append(np.array([bb, b])) 137 | b_band = [item for item in b_band if item not in b_idxs] 138 | 139 | l_band = np.where((pts[:, 0] - l) <= band_thresh * w)[0].tolist() 140 | while l_band: 141 | l_idx = l_band[np.argmin(pts[l_band, 0])] 142 | l_idxs = [l_idx] 143 | tmp = (l_idx + 1) % num_pt 144 | while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w: 145 | l_idxs.append(tmp) 146 | tmp = (tmp + 1) % num_pt 147 | tmp = (l_idx - 1) % num_pt 148 | while tmp != l_idx and pts[tmp, 0] - pts[l_idx, 0] <= thresh * w: 149 | l_idxs.append(tmp) 150 | tmp = (tmp - 1) % num_pt 151 | ll = (max(pts[l_idxs, 1]) + min(pts[l_idxs, 1])) / 2 152 | aux_ext_pts.append(np.array([l, ll])) 153 | l_band = [item for item in l_band if item not in l_idxs] 154 | 155 | r_band = np.where((r - pts[:, 0]) <= band_thresh * w)[0].tolist() 156 | while r_band: 157 | r_idx = r_band[np.argmax(pts[r_band, 0])] 158 | r_idxs = [r_idx] 159 | tmp = (r_idx + 1) % num_pt 160 | while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w: 161 | r_idxs.append(tmp) 162 | tmp = (tmp + 1) % num_pt 163 | tmp = (r_idx - 1) % num_pt 164 | while tmp != r_idx and pts[r_idx, 0] - pts[tmp, 0] <= thresh * w: 165 | r_idxs.append(tmp) 166 | tmp = (tmp - 1) % num_pt 167 | rr = (max(pts[r_idxs, 1]) + min(pts[r_idxs, 1])) / 2 168 | aux_ext_pts.append(np.array([r, rr])) 169 | r_band = [item for item in r_band if item not in r_idxs] 170 | 171 | # assert len(aux_ext_pts) >= 4 172 | pt0 = aux_ext_pts[0] 173 | 174 | # collecting 175 | aux_ext_pts = np.stack(aux_ext_pts, axis=0) 176 | 177 | # ordering 178 | shift_idx = np.argmin(np.power(pts - pt0, 2).sum(axis=1)) 179 | re_ordered_pts = np.roll(pts, -shift_idx, axis=0) 180 | 181 | # indexing 182 | ext_idxs = np.argmin(np.sum( 183 | (aux_ext_pts[:, np.newaxis, :] - re_ordered_pts[np.newaxis, ...]) ** 2, axis=2), 184 | axis=1) 185 | ext_idxs[0] = 0 186 | 187 | ext_idxs = np.sort(np.unique(ext_idxs)) 188 | 189 | return re_ordered_pts, ext_idxs 190 | 191 | def vis_training_targets(cfg, fcose_outputs, image_list, idx=0): 192 | import matplotlib.pyplot as plt 193 | import matplotlib.patches as patches 194 | import numpy as np 195 | 196 | colors = np.array([[1, 1, 198], 197 | [51, 1, 148], 198 | [101, 1, 98], 199 | [151, 1, 48], 200 | [201, 1, 8]]) / 255. 201 | 202 | num_loc_list = [len(loc) for loc in fcose_outputs.locations] 203 | fcose_outputs.num_loc_list = num_loc_list 204 | 205 | # compute locations to size ranges 206 | loc_to_size_range = [] 207 | for l, loc_per_level in enumerate(fcose_outputs.locations): 208 | loc_to_size_range_per_level = loc_per_level.new_tensor(fcose_outputs.sizes_of_interest[l]) 209 | loc_to_size_range.append( 210 | loc_to_size_range_per_level[None].expand(num_loc_list[l], -1) 211 | ) 212 | 213 | # (Sigma_{levels_points}, 2) 214 | loc_to_size_range = torch.cat(loc_to_size_range, dim=0) 215 | locations = torch.cat(fcose_outputs.locations, dim=0) 216 | 217 | training_targets = fcose_outputs.compute_targets_for_locations( 218 | locations, fcose_outputs.gt_instances, loc_to_size_range 219 | ) 220 | 221 | training_target = {k: v[idx] for k, v in training_targets.items()} 222 | 223 | fig, ax = plt.subplots(1, figsize=(20, 10)) 224 | fig.tight_layout() 225 | 226 | labels = training_target['labels'] 227 | reg_targets = training_target['reg_targets'] 228 | ext_targets = training_target['ext_targets'] 229 | 230 | idxOfloc_of_interest = torch.where(labels != 20)[0] 231 | 232 | global locxys, reg_targets_oi, ext_targets_oi, detections 233 | 234 | locxys = locations[idxOfloc_of_interest] 235 | 236 | reg_targets_oi = reg_targets[idxOfloc_of_interest] 237 | ext_targets_oi = ext_targets[idxOfloc_of_interest] 238 | 239 | detections = torch.stack([ 240 | locxys[:, 0] - reg_targets_oi[:, 0], 241 | locxys[:, 1] - reg_targets_oi[:, 1], 242 | locxys[:, 0] + reg_targets_oi[:, 2], 243 | locxys[:, 1] + reg_targets_oi[:, 3], 244 | ], dim=1) 245 | 246 | global tmp, ext_points 247 | 248 | ext_points = ExtremePoints.from_boxes(Boxes(detections), 249 | ext_targets_oi, 250 | locxys).tensor.cpu().numpy() 251 | 252 | tmp = ext_points 253 | 254 | im = image_list.tensor[idx] 255 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(im.device).view(-1, 1, 1) 256 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(im.device).view(-1, 1, 1) 257 | im_norm = ((im * pixel_std) + pixel_mean).cpu().numpy().transpose(1, 2, 0).astype(np.uint8) 258 | 259 | ax.imshow(im_norm) 260 | locxys_np = locxys.cpu().numpy() 261 | reg_targets_oi_np = reg_targets_oi.cpu().numpy() 262 | ext_targets_oi_np = ext_targets_oi.cpu().numpy() 263 | detections_np = detections.cpu().numpy() 264 | 265 | for i in range(len(locxys_np)): 266 | ax.scatter(locxys_np[i, 0], locxys_np[i, 1], color=colors[i % len(colors)].tolist(), marker='*') 267 | x1, y1, x2, y2 = detections_np[i, :] 268 | 269 | rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor=colors[i % len(colors)].tolist(), 270 | facecolor='none', fill=False) 271 | ax.add_patch(rect) 272 | 273 | ax.scatter(ext_points[i][:, 0], ext_points[i][:, 1], color=colors[i % len(colors)].tolist(), marker='+') 274 | 275 | plt.show() 276 | -------------------------------------------------------------------------------- /core/modeling/one_stage_detector.py: -------------------------------------------------------------------------------- 1 | 2 | # from detectron2.modeling import ProposalNetwork 3 | import torch 4 | import torch.nn as nn 5 | 6 | from detectron2.modeling.backbone import build_backbone 7 | from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY 8 | from detectron2.modeling.postprocessing import detector_postprocess 9 | from detectron2.modeling.proposal_generator import build_proposal_generator 10 | from detectron2.structures import ImageList 11 | from detectron2.utils.logger import log_first_n 12 | 13 | import logging 14 | from core.utils import timer 15 | 16 | 17 | class ProposalNetwork(nn.Module): 18 | def __init__(self, cfg): 19 | super().__init__() 20 | self.device = torch.device(cfg.MODEL.DEVICE) 21 | 22 | self.backbone = build_backbone(cfg) 23 | self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) 24 | 25 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(-1, 1, 1) 26 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(-1, 1, 1) 27 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 28 | self.to(self.device) 29 | 30 | def forward(self, batched_inputs): 31 | """ 32 | Args: 33 | Same as in :class:`GeneralizedRCNN.forward` 34 | 35 | Returns: 36 | list[dict]: 37 | Each dict is the output for one input image. 38 | The dict contains one key "proposals" whose value is a 39 | :class:`Instances` with keys "proposal_boxes" and "objectness_logits". 40 | """ 41 | with timer.env("pre_process"): 42 | images = [x["image"].to(self.device) for x in batched_inputs] 43 | images = [self.normalizer(x) for x in images] 44 | images = ImageList.from_tensors(images, self.backbone.size_divisibility) 45 | 46 | with timer.env('backbone'): 47 | features = self.backbone(images.tensor) 48 | 49 | if "instances" in batched_inputs[0]: 50 | gt_instances = [x["instances"].to(self.device) for x in batched_inputs] 51 | elif "targets" in batched_inputs[0]: 52 | log_first_n( 53 | logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 54 | ) 55 | gt_instances = [x["targets"].to(self.device) for x in batched_inputs] 56 | else: 57 | gt_instances = None 58 | 59 | with timer.env('fcos'): 60 | proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) 61 | # In training, the proposals are not useful at all but we generate them anyway. 62 | # This makes RPN-only models about 5% slower. 63 | if self.training: 64 | return proposal_losses 65 | 66 | processed_results = [] 67 | with timer.env('post_process'): 68 | for results_per_image, input_per_image, image_size in zip( 69 | proposals, batched_inputs, images.image_sizes 70 | ): 71 | height = input_per_image.get("height", image_size[0]) 72 | width = input_per_image.get("width", image_size[1]) 73 | r = detector_postprocess(results_per_image, height, width) 74 | processed_results.append({"proposals": r}) 75 | return processed_results 76 | 77 | 78 | @META_ARCH_REGISTRY.register() 79 | class OneStageDetector(ProposalNetwork): 80 | def forward(self, batched_inputs): 81 | if self.training: 82 | return super().forward(batched_inputs) 83 | processed_results = super().forward(batched_inputs) 84 | processed_results = [{"instances": r["proposals"]} for r in processed_results] 85 | return processed_results 86 | -------------------------------------------------------------------------------- /core/modeling/poolers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | from detectron2.layers import cat 4 | 5 | from detectron2.modeling.poolers import ( 6 | ROIPooler, convert_boxes_to_pooler_format, assign_boxes_to_levels 7 | ) 8 | 9 | 10 | def _box_max_size(boxes): 11 | box = boxes.tensor 12 | max_size = torch.max(box[:, 2] - box[:, 0], box[:, 3] - box[:, 1]) 13 | return max_size 14 | 15 | 16 | def assign_boxes_to_levels_by_length( 17 | box_lists, min_level, max_level, canonical_box_size, canonical_level): 18 | """ 19 | Map each box in `box_lists` to a feature map level index and return the assignment 20 | vector. 21 | 22 | Args: 23 | box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes, 24 | where N is the number of images in the batch. 25 | min_level (int): Smallest feature map level index. The input is considered index 0, 26 | the output of stage 1 is index 1, and so. 27 | max_level (int): Largest feature map level index. 28 | canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). 29 | canonical_level (int): The feature map level index on which a canonically-sized box 30 | should be placed. 31 | 32 | Returns: 33 | A tensor of length M, where M is the total number of boxes aggregated over all 34 | N batch images. The memory layout corresponds to the concatenation of boxes 35 | from all images. Each element is the feature map index, as an offset from 36 | `self.min_level`, for the corresponding box (so value i means the box is at 37 | `self.min_level + i`). 38 | """ 39 | eps = sys.float_info.epsilon 40 | box_sizes = cat([_box_max_size(boxes) for boxes in box_lists]) 41 | # Eqn.(1) in FPN paper 42 | level_assignments = torch.floor( 43 | canonical_level + torch.log2(box_sizes / canonical_box_size + eps) 44 | ) 45 | level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) 46 | return level_assignments.to(torch.int64) - min_level 47 | 48 | 49 | class TopPooler(ROIPooler): 50 | """ 51 | ROIPooler with option to assign level by max length. Used by top modules. 52 | """ 53 | def __init__(self, 54 | output_size, 55 | scales, 56 | sampling_ratio, 57 | pooler_type, 58 | canonical_box_size=224, 59 | canonical_level=4, 60 | assign_crit="area",): 61 | super().__init__(output_size, scales, sampling_ratio, pooler_type, 62 | canonical_box_size=canonical_box_size, 63 | canonical_level=canonical_level) 64 | self.assign_crit = assign_crit 65 | 66 | def forward(self, x, box_lists): 67 | """ 68 | Args: 69 | x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those 70 | used to construct this module. 71 | box_lists (list[Boxes] | list[RotatedBoxes]): 72 | A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. 73 | The box coordinates are defined on the original image and 74 | will be scaled by the `scales` argument of :class:`ROIPooler`. 75 | 76 | Returns: 77 | Tensor: 78 | A tensor of shape (M, C, output_size, output_size) where M is the total number of 79 | boxes aggregated over all N batch images and C is the number of channels in `x`. 80 | """ 81 | num_level_assignments = len(self.level_poolers) 82 | 83 | assert isinstance(x, list) and isinstance( 84 | box_lists, list 85 | ), "Arguments to pooler must be lists" 86 | assert ( 87 | len(x) == num_level_assignments 88 | ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( 89 | num_level_assignments, len(x) 90 | ) 91 | 92 | assert len(box_lists) == x[0].size( 93 | 0 94 | ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( 95 | x[0].size(0), len(box_lists) 96 | ) 97 | 98 | pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) 99 | 100 | if num_level_assignments == 1: 101 | return self.level_poolers[0](x[0], pooler_fmt_boxes) 102 | 103 | if self.assign_crit == "length": 104 | assign_method = assign_boxes_to_levels_by_length 105 | else: 106 | assign_method = assign_boxes_to_levels 107 | 108 | level_assignments = assign_method( 109 | box_lists, self.min_level, self.max_level, 110 | self.canonical_box_size, self.canonical_level) 111 | 112 | num_boxes = len(pooler_fmt_boxes) 113 | num_channels = x[0].shape[1] 114 | output_size = self.output_size[0] 115 | 116 | dtype, device = x[0].dtype, x[0].device 117 | output = torch.zeros( 118 | (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device 119 | ) 120 | 121 | for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)): 122 | inds = torch.nonzero(level_assignments == level).squeeze(1) 123 | pooler_fmt_boxes_level = pooler_fmt_boxes[inds] 124 | output[inds] = pooler(x_level, pooler_fmt_boxes_level) 125 | 126 | return output 127 | -------------------------------------------------------------------------------- /core/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .pointset import PolygonPoints, ExtremePoints -------------------------------------------------------------------------------- /core/structures/pointset.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Tuple 2 | import torch 3 | from detectron2.structures import Boxes 4 | 5 | from detectron2.layers import cat 6 | 7 | 8 | class ExtremePoints: 9 | def __init__(self, tensor: torch.Tensor): 10 | """ 11 | :param tensor (Tensor[float]): a Nx4x2 tensor. Last dim is (x, y); second last follows [tt, ll, bb, rr]: 12 | """ 13 | device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") 14 | tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) 15 | if tensor.numel() == 0: 16 | tensor = torch.zeros(0, 4, 2, dtype=torch.float32, device=device) 17 | assert tensor.dim() == 3 and tensor.size(-1) == 2, tensor.size() 18 | 19 | self.tensor = tensor 20 | self.spanned_nodes = [] 21 | self.box = None 22 | 23 | def clone(self) -> "ExtremePoints": 24 | """ 25 | Clone the Boxes. 26 | 27 | Returns: 28 | Boxes 29 | """ 30 | return ExtremePoints(self.tensor.clone()) 31 | 32 | def to(self, device: str) -> "ExtremePoints": 33 | return ExtremePoints(self.tensor.to(device)) 34 | 35 | def get_boxes(self) -> Boxes: 36 | bboxes = torch.stack([ 37 | self.tensor[:, 1, 0], 38 | self.tensor[:, 0, 1], 39 | self.tensor[:, 3, 0], 40 | self.tensor[:, 2, 1], 41 | ], dim=1) 42 | return Boxes(bboxes) 43 | 44 | def compute_on_ext_centered_masks(self, N, edge_map, radius, mode, image_shape): 45 | self.spread(N, radius, mode, image_shape) 46 | m = torch.zeros((N,) + image_shape, device=self.device) 47 | num_nodes = [] 48 | for i, node in enumerate(self.spanned_nodes): 49 | node = node.long() 50 | m[i, node[:, 1], node[:, 0]] = 1 # TODO: trigger CUDA assert, ...? 51 | num_nodes.append(node.size(0)) 52 | 53 | edge_map_m = edge_map.unsqueeze(0) * m 54 | instance_score = edge_map_m.sum(dim=1).sum(dim=1) 55 | num_nodes = torch.tensor(num_nodes, device=self.device) 56 | return instance_score / num_nodes 57 | 58 | def compute_by_grid_sample(self, N, edge_map, radius, mode, image_shape): 59 | self.spread(N, radius, mode, image_shape) 60 | mean_scores = [] 61 | for i, node in enumerate(self.spanned_nodes): 62 | sampled_nodes = torch.nn.functional.grid_sample(edge_map.unsqueeze(0).unsqueeze(0), 63 | node.unsqueeze(0).unsqueeze(0)) 64 | mean_scores.append(sampled_nodes.mean()) 65 | return torch.stack(mean_scores) 66 | 67 | def spread(self, N, radius, mode, image_shape): 68 | """ 69 | Spreads the extreme points for robustness. 70 | :param N: (int) number of instances 71 | :param radius: (int) circle radius 72 | :param mode: (str) 'linear' or 'gaussian', # TODO now only support linear 73 | """ 74 | if len(self.spanned_nodes) == N: 75 | # avoid re-compute 76 | return 77 | assert mode == 'linear', 'unsupported mode' 78 | h, w = image_shape 79 | box = self.get_boxes().tensor 80 | whs = torch.stack([(box[:, 2] - box[:, 0]), (box[:, 3] - box[:, 1])], dim=1) 81 | num_pix_r = (whs * radius).floor() 82 | ext_pts = self.tensor 83 | for i in range(N): 84 | per_num_pix_r = num_pix_r[i] 85 | per_ext_pts = ext_pts[i] 86 | square_area = int((per_num_pix_r[0] * 2 + 1) * (per_num_pix_r[1] * 2 + 1)) 87 | per_spanned_pts = per_ext_pts.repeat_interleave(int(square_area), dim=0) 88 | span_xs = torch.arange(-int(per_num_pix_r[0]), int(per_num_pix_r[0]) + 1, 89 | step=1, dtype=torch.float32, device=self.device) 90 | span_ys = torch.arange(-int(per_num_pix_r[1]), int(per_num_pix_r[1]) + 1, 91 | step=1, dtype=torch.float32, device=self.device) 92 | span_y, span_x = torch.meshgrid(span_ys, span_xs) 93 | span_xy = torch.stack([span_x.reshape(-1), span_y.reshape(-1)], dim=1) 94 | # (4 * square_area, 2) 95 | per_spanned_nodes = (per_spanned_pts + span_xy.repeat(4, 1)).floor() 96 | per_spanned_nodes[:, 0].clamp_(min=0, max=w - 1) 97 | per_spanned_nodes[:, 1].clamp_(min=0, max=h - 1) 98 | self.spanned_nodes.append(per_spanned_nodes) 99 | 100 | def align(self, pooler_resolution): 101 | box = self.get_boxes().tensor 102 | w = box[:, 2] - box[:, 0] + 1 103 | h = box[:, 3] - box[:, 1] + 1 104 | de_location = self.tensor - box[:, None, :2] 105 | de_location[:, :, 0] /= w[:, None] / pooler_resolution # x 106 | de_location[:, :, 1] /= h[:, None] / pooler_resolution # y 107 | return de_location.int() 108 | 109 | @staticmethod 110 | def from_boxes(boxes: Boxes, offsets: torch.Tensor, locations: torch.Tensor) -> "ExtremePoints": 111 | """ 112 | Generate the ExtremePoints from a box and offset along each edge, with locations bing origins; 113 | the outputs will correspond to the input boxes 114 | :param boxes (Boxes): from Nx4 tensor matrix. 115 | :param offsets (torch.Tensor): float matrix of Nx4. 116 | :param locations (torch.Tensor): float matrix of Nx2, indicating corresponding locations 117 | :return: ExtremePoints 118 | """ 119 | x1 = boxes.tensor[:, 0] # ll_x 120 | y1 = boxes.tensor[:, 1] # tt_y 121 | x2 = boxes.tensor[:, 2] # rr_x 122 | y2 = boxes.tensor[:, 3] # bb_y 123 | w = x2 - x1 124 | h = y2 - y1 125 | tt_x = (locations[:, 0] + w * offsets[:, 0]) 126 | ll_y = (locations[:, 1] + h * offsets[:, 1]) 127 | bb_x = (locations[:, 0] + w * offsets[:, 2]) 128 | rr_y = (locations[:, 1] + h * offsets[:, 3]) 129 | 130 | return ExtremePoints(torch.stack([tt_x, y1, x1, ll_y, bb_x, y2, x2, rr_y], dim=1).view(-1, 4, 2)) 131 | 132 | def fit_to_box(self): 133 | box = self.get_boxes().tensor 134 | n = box.size(0) 135 | lower_bound = box.view(-1, 2, 2)[:, :1, :] 136 | upper_bound = box.view(-1, 2, 2)[:, 1:, :] 137 | beyond_lower = self.tensor < lower_bound 138 | beyond_upper = self.tensor > upper_bound 139 | if beyond_lower.any(): 140 | self.tensor[beyond_lower] = lower_bound.expand(n, 4, 2)[beyond_lower] 141 | if beyond_upper.any(): 142 | self.tensor[beyond_upper] = upper_bound.expand(n, 4, 2)[beyond_upper] 143 | 144 | def scale(self, scale_x: float, scale_y: float) -> None: 145 | self.tensor[:, :, 0] *= scale_x 146 | self.tensor[:, :, 1] *= scale_y 147 | 148 | def get_octagons(self, frac=8.): 149 | # counter clock wise 150 | ext_pts = self.tensor # N x 4 x 2 151 | N = len(ext_pts) 152 | if N == 0: 153 | return ext_pts.new_empty(0, 16) 154 | w, h = ext_pts[:, 3, 0] - ext_pts[:, 1, 0], ext_pts[:, 2, 1] - ext_pts[:, 0, 1] 155 | t, l, b, r = ext_pts[:, 0, 1], ext_pts[:, 1, 0], ext_pts[:, 2, 1], ext_pts[:, 3, 0] 156 | x1, y1 = torch.min(ext_pts[:, 0, 0] + w / frac, r), ext_pts[:, 0, 1] 157 | x2, y2 = torch.max(ext_pts[:, 0, 0] - w / frac, l), ext_pts[:, 0, 1] 158 | x3, y3 = ext_pts[:, 1, 0], torch.max(ext_pts[:, 1, 1] - h / frac, t) 159 | x4, y4 = ext_pts[:, 1, 0], torch.min(ext_pts[:, 1, 1] + h / frac, b) 160 | x5, y5 = torch.max(ext_pts[:, 2, 0] - w / frac, l), ext_pts[:, 2, 1] 161 | x6, y6 = torch.min(ext_pts[:, 2, 0] + w / frac, r), ext_pts[:, 2, 1] 162 | x7, y7 = ext_pts[:, 3, 0], torch.min(ext_pts[:, 3, 1] + h / frac, b) 163 | x8, y8 = ext_pts[:, 3, 0], torch.max(ext_pts[:, 3, 1] - h / frac, t) 164 | octagons = torch.stack([x1, y1, x2, y2, x3, y3, x4, y4, 165 | x5, y5, x6, y6, x7, y7, x8, y8], dim=1) 166 | return octagons 167 | 168 | def area(self) -> torch.Tensor: 169 | return self.get_boxes().area() 170 | 171 | def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "ExtremePoints": 172 | """ 173 | Returns: 174 | ExtremePoints: Create a new :class:`ExtremePoints` by indexing. 175 | 176 | The following usage are allowed: 177 | 178 | 1. `new_exts = exts[3]`: return a `ExtremePoints` which contains only one box. 179 | 2. `new_exts = exts[2:10]`: return a slice of extreme points. 180 | 3. `new_exts = exts[vector]`, where vector is a torch.BoolTensor 181 | with `length = len(exts)`. Nonzero elements in the vector will be selected. 182 | 183 | Note that the returned ExtremePoints might share storage with this ExtremePoints, 184 | subject to Pytorch's indexing semantics. 185 | """ 186 | if isinstance(item, int): 187 | return ExtremePoints(self.tensor[item].view(1, -1)) 188 | b = self.tensor[item] 189 | assert b.dim() == 3, "Indexing on ExtremePoints with {} failed to return a matrix!".format(item) 190 | return ExtremePoints(b) 191 | 192 | def __len__(self) -> int: 193 | return self.tensor.shape[0] 194 | 195 | def __repr__(self) -> str: 196 | return "ExtPts(" + str(self.tensor) + ")" 197 | 198 | @staticmethod 199 | def cat(pts_list: List["ExtremePoints"]) -> "ExtremePoints": 200 | """ 201 | Concatenates a list of ExtremePoints into a single ExtremePoints 202 | 203 | Arguments: 204 | pts_list (list[ExtremePoints]) 205 | 206 | Returns: 207 | pts: the concatenated ExtremePoints 208 | """ 209 | assert isinstance(pts_list, (list, tuple)) 210 | assert len(pts_list) > 0 211 | assert all(isinstance(pts, ExtremePoints) for pts in pts_list) 212 | 213 | cat_pts = type(pts_list[0])(cat([p.tensor for p in pts_list], dim=0)) 214 | return cat_pts 215 | 216 | @property 217 | def device(self) -> torch.device: 218 | return self.tensor.device 219 | 220 | 221 | class PolygonPoints: 222 | BoxSizeType = Union[List[int], Tuple[int, int]] 223 | 224 | def __init__(self, tensor: torch.Tensor): 225 | """ 226 | :param tensor (Tensor[float]): a Nxkx2 tensor. Last dim is (x, y); 227 | """ 228 | device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") 229 | tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) 230 | if tensor.numel() == 0: 231 | tensor = torch.zeros(0, 128, 2, dtype=torch.float32, device=device) 232 | assert tensor.dim() == 3 and tensor.size(-1) == 2, tensor.size() 233 | 234 | self.tensor = tensor 235 | 236 | def clone(self) -> "PolygonPoints": 237 | 238 | return PolygonPoints(self.tensor.clone()) 239 | 240 | def to(self, device: str) -> "PolygonPoints": 241 | return PolygonPoints(self.tensor.to(device)) 242 | 243 | def scale(self, scale_x: float, scale_y: float) -> None: 244 | self.tensor[:, :, 0] *= scale_x 245 | self.tensor[:, :, 1] *= scale_y 246 | 247 | def clip(self, box_size: BoxSizeType) -> None: 248 | assert torch.isfinite(self.tensor).all(), "Polygon tensor contains infinite or NaN!" 249 | h, w = box_size 250 | self.tensor[:, :, 0].clamp_(min=0, max=w) 251 | self.tensor[:, :, 1].clamp_(min=0, max=h) 252 | 253 | def flatten(self): 254 | n = self.tensor.size(0) 255 | if n == 0: 256 | return self.tensor 257 | return self.tensor.reshape(n, -1) 258 | 259 | def get_box(self): 260 | return torch.cat([self.tensor.min(1)[0], self.tensor.max(1)[0]], dim=1) 261 | 262 | def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "PolygonPoints": 263 | """ 264 | Returns: 265 | ExtremePoints: Create a new :class:`ExtremePoints` by indexing. 266 | 267 | The following usage are allowed: 268 | 269 | 1. `new_exts = exts[3]`: return a `ExtremePoints` which contains only one box. 270 | 2. `new_exts = exts[2:10]`: return a slice of extreme points. 271 | 3. `new_exts = exts[vector]`, where vector is a torch.BoolTensor 272 | with `length = len(exts)`. Nonzero elements in the vector will be selected. 273 | 274 | Note that the returned ExtremePoints might share storage with this ExtremePoints, 275 | subject to Pytorch's indexing semantics. 276 | """ 277 | if isinstance(item, int): 278 | return PolygonPoints(self.tensor[item].view(1, -1)) 279 | b = self.tensor[item] 280 | assert b.dim() == 3, "Indexing on PolygonPoints with {} failed to return a matrix!".format(item) 281 | return PolygonPoints(b) 282 | 283 | def __len__(self) -> int: 284 | return self.tensor.shape[0] 285 | 286 | def __repr__(self) -> str: 287 | return "PolyPts(" + str(self.tensor) + ")" 288 | 289 | @staticmethod 290 | def cat(pts_list: List["PolygonPoints"]) -> "PolygonPoints": 291 | """ 292 | Concatenates a list of ExtremePoints into a single ExtremePoints 293 | 294 | Arguments: 295 | pts_list (list[PolygonPoints]) 296 | 297 | Returns: 298 | pts: the concatenated PolygonPoints 299 | """ 300 | assert isinstance(pts_list, (list, tuple)) 301 | assert len(pts_list) > 0 302 | assert all(isinstance(pts, PolygonPoints) for pts in pts_list) 303 | 304 | cat_pts = type(pts_list[0])(cat([p.tensor for p in pts_list], dim=0)) 305 | return cat_pts 306 | 307 | @property 308 | def device(self) -> torch.device: 309 | return self.tensor.device 310 | -------------------------------------------------------------------------------- /core/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/core/utils/__init__.py -------------------------------------------------------------------------------- /core/utils/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is borrowed from Adet (https://github.com/aim-uofa/AdelaiDet/blob/master/adet/utils/comm.py) 3 | """ 4 | import torch.distributed as dist 5 | from detectron2.utils.comm import get_world_size 6 | 7 | 8 | def reduce_sum(tensor): 9 | world_size = get_world_size() 10 | if world_size < 2: 11 | return tensor 12 | tensor = tensor.clone() 13 | dist.all_reduce(tensor, op=dist.ReduceOp.SUM) 14 | return tensor 15 | -------------------------------------------------------------------------------- /core/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | 4 | _total_times = defaultdict(lambda: 0) 5 | _start_times = defaultdict(lambda: -1) 6 | _disabled_names = set() 7 | _timer_stack = [] 8 | _running_timer = None 9 | _disable_all = False 10 | 11 | 12 | def disable_all(): 13 | global _disable_all 14 | _disable_all = True 15 | 16 | 17 | def enable_all(): 18 | global _disable_all 19 | _disable_all = False 20 | 21 | 22 | def disable(fn_name): 23 | """ Disables the given function name fom being considered for the average or outputted in print_stats. """ 24 | _disabled_names.add(fn_name) 25 | 26 | 27 | def enable(fn_name): 28 | """ Enables function names disabled by disable. """ 29 | _disabled_names.remove(fn_name) 30 | 31 | 32 | def reset(): 33 | """ Resets the current timer. Call this at the start of an iteration. """ 34 | global _running_timer 35 | _total_times.clear() 36 | _start_times.clear() 37 | _timer_stack.clear() 38 | _running_timer = None 39 | 40 | 41 | def start(fn_name, use_stack=True): 42 | """ 43 | Start timing the specific function. 44 | Note: If use_stack is True, only one timer can be active at a time. 45 | Once you stop this timer, the previous one will start again. 46 | """ 47 | global _running_timer, _disable_all 48 | 49 | if _disable_all: 50 | return 51 | 52 | if use_stack: 53 | if _running_timer is not None: 54 | stop(_running_timer, use_stack=False) 55 | _timer_stack.append(_running_timer) 56 | start(fn_name, use_stack=False) 57 | _running_timer = fn_name 58 | else: 59 | _start_times[fn_name] = time.perf_counter() 60 | 61 | 62 | def stop(fn_name=None, use_stack=True): 63 | """ 64 | If use_stack is True, this will stop the currently running timer and restore 65 | the previous timer on the stack if that exists. Note if use_stack is True, 66 | fn_name will be ignored. 67 | 68 | If use_stack is False, this will just stop timing the timer fn_name. 69 | """ 70 | global _running_timer, _disable_all 71 | 72 | if _disable_all: 73 | return 74 | 75 | if use_stack: 76 | if _running_timer is not None: 77 | stop(_running_timer, use_stack=False) 78 | if len(_timer_stack) > 0: 79 | _running_timer = _timer_stack.pop() 80 | start(_running_timer, use_stack=False) 81 | else: 82 | _running_timer = None 83 | else: 84 | print('Warning: timer stopped with no timer running!') 85 | else: 86 | if _start_times[fn_name] > -1: 87 | _total_times[fn_name] += time.perf_counter() - _start_times[fn_name] 88 | else: 89 | print('Warning: timer for %s stopped before starting!' % fn_name) 90 | 91 | 92 | def print_stats(divider=5000): 93 | """ Prints the current timing information into a table. """ 94 | print() 95 | 96 | all_fn_names = [k for k in _total_times.keys() if k not in _disabled_names] 97 | 98 | max_name_width = max([len(k) for k in all_fn_names] + [4]) 99 | if max_name_width % 2 == 1: max_name_width += 1 100 | format_str = ' {:>%d} | {:>10.4f} ' % max_name_width 101 | 102 | header = (' {:^%d} | {:^10} ' % max_name_width).format('Name', 'Time (ms)') 103 | print(header) 104 | 105 | sep_idx = header.find('|') 106 | sep_text = ('-' * sep_idx) + '+' + '-' * (len(header) - sep_idx - 1) 107 | print(sep_text) 108 | 109 | for name in all_fn_names: 110 | print(format_str.format(name, _total_times[name] * 1000 / divider)) # val2017, 5k ims. 111 | 112 | print(sep_text) 113 | print(format_str.format('Total', total_time() * 1000 / divider)) # val2017, 5k ims. 114 | print() 115 | 116 | 117 | def total_time(): 118 | """ Returns the total amount accumulated across all functions in seconds. """ 119 | return sum([elapsed_time for name, elapsed_time in _total_times.items() if name not in _disabled_names]) 120 | 121 | 122 | class env(): 123 | """ 124 | A class that lets you go: 125 | with timer.env(fn_name): 126 | # (...) 127 | That automatically manages a timer start and stop for you. 128 | """ 129 | 130 | def __init__(self, fn_name, use_stack=True): 131 | self.fn_name = fn_name 132 | self.use_stack = use_stack 133 | 134 | def __enter__(self): 135 | start(self.fn_name, use_stack=self.use_stack) 136 | 137 | def __exit__(self, e, ev, t): 138 | stop(self.fn_name, use_stack=self.use_stack) 139 | 140 | -------------------------------------------------------------------------------- /core/utils/visualizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from detectron2.utils.visualizer import ( 3 | Visualizer, ColorMode, GenericMask, 4 | _create_text_labels, _SMALL_OBJECT_AREA_THRESH 5 | ) 6 | import pycocotools.mask as mask_util 7 | from detectron2.utils.colormap import random_color 8 | 9 | from core.structures.pointset import ExtremePoints 10 | 11 | 12 | def get_polygon_rles(polygons, image_shape): 13 | # input: N x (p*2) 14 | polygons = polygons.cpu().numpy() 15 | h, w = image_shape 16 | rles = [ 17 | mask_util.merge(mask_util.frPyObjects([p.tolist()], h, w)) 18 | for p in polygons 19 | ] 20 | return rles 21 | 22 | 23 | class ExVisualizer(Visualizer): 24 | def __init__(self, img_rgb, metadata, scale=1.0, instance_mode=ColorMode.IMAGE): 25 | super().__init__(img_rgb, metadata, scale=scale, instance_mode=instance_mode) 26 | 27 | def draw_instance_predictions(self, predictions): 28 | """ 29 | :param predictions: 30 | :return: Besides the functions of its mother class method, this method deals with extreme points. 31 | """ 32 | ext_points = predictions.ext_points if predictions.has("ext_points") else None 33 | pred_polys = predictions.pred_polys if predictions.has("pred_polys") else None 34 | if False: 35 | return super().draw_instance_predictions(predictions) 36 | else: 37 | boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None 38 | scores = predictions.scores if predictions.has("scores") else None 39 | classes = predictions.pred_classes if predictions.has("pred_classes") else None 40 | labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) 41 | keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None 42 | 43 | if predictions.has("pred_masks"): 44 | masks = np.asarray(predictions.pred_masks) 45 | masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] 46 | else: 47 | if predictions.has("pred_polys"): 48 | output_height = predictions.image_size[0] 49 | output_width = predictions.image_size[1] 50 | pred_masks = get_polygon_rles(predictions.pred_polys.flatten(), 51 | (output_height, output_width)) 52 | 53 | masks = np.asarray(pred_masks) 54 | masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] 55 | else: 56 | masks = None 57 | 58 | path = predictions.pred_path.numpy() if predictions.has("pred_path") else None 59 | 60 | if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): 61 | colors = [ 62 | self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes 63 | ] 64 | alpha = 0.8 65 | else: 66 | colors = None 67 | alpha = 0.5 68 | 69 | if self._instance_mode == ColorMode.IMAGE_BW: 70 | assert predictions.has("pred_masks"), "ColorMode.IMAGE_BW requires segmentations" 71 | self.output.img = self._create_grayscale_image( 72 | (predictions.pred_masks.any(dim=0) > 0).numpy() 73 | ) 74 | alpha = 0.3 75 | 76 | self.overlay_instances( 77 | masks=masks, 78 | boxes=boxes, 79 | labels=labels, 80 | ext_points=ext_points, 81 | path=path, 82 | keypoints=keypoints, 83 | assigned_colors=colors, 84 | alpha=alpha, 85 | ) 86 | return self.output 87 | 88 | def draw_extreme_pts(self, pts_coord, circle_color, radius=2): 89 | for pt in pts_coord: 90 | x, y = pt 91 | self.draw_circle([x, y], color=circle_color, radius=radius) 92 | return self.output 93 | 94 | def draw_snake_path(self, path, color, alpha=0.7): 95 | # path (4, num_points, 2) 96 | for i, poly in enumerate(path): 97 | if i > 0: 98 | prev_poly = path[i - 1] 99 | offsets = poly - prev_poly 100 | for j in range(len(offsets)): 101 | self.output.ax.arrow(prev_poly[j, 0], 102 | prev_poly[j, 1], 103 | offsets[j, 0], 104 | offsets[j, 1], 105 | linestyle='-', 106 | linewidth=1, 107 | alpha=alpha) 108 | self.output.ax.plot(poly[0:, 0], 109 | poly[0:, 1], 110 | color=color, 111 | marker='1', 112 | alpha=alpha) 113 | return self.output 114 | 115 | def _convert_ext_points(self, ext_points): 116 | if isinstance(ext_points, ExtremePoints): 117 | return ext_points.tensor.numpy() 118 | else: 119 | return np.asarray(ext_points) 120 | 121 | def overlay_instances( 122 | self, 123 | *, 124 | boxes=None, 125 | labels=None, 126 | masks=None, 127 | ext_points=None, 128 | path=None, 129 | keypoints=None, 130 | assigned_colors=None, 131 | alpha=0.5 132 | ): 133 | num_instances = None 134 | if boxes is not None: 135 | boxes = self._convert_boxes(boxes) 136 | num_instances = len(boxes) 137 | if masks is not None: 138 | masks = self._convert_masks(masks) 139 | if num_instances: 140 | assert len(masks) == num_instances 141 | else: 142 | num_instances = len(masks) 143 | if keypoints is not None: 144 | if num_instances: 145 | assert len(keypoints) == num_instances 146 | else: 147 | num_instances = len(keypoints) 148 | keypoints = self._convert_keypoints(keypoints) 149 | if ext_points is not None: 150 | ext_points = self._convert_ext_points(ext_points) 151 | if num_instances: 152 | assert len(ext_points) == num_instances 153 | else: 154 | num_instances = len(ext_points) 155 | if labels is not None: 156 | assert len(labels) == num_instances 157 | if assigned_colors is None: 158 | assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] 159 | if num_instances == 0: 160 | return self.output 161 | if boxes is not None and boxes.shape[1] == 5: 162 | return self.overlay_rotated_instances( 163 | boxes=boxes, labels=labels, assigned_colors=assigned_colors 164 | ) 165 | 166 | # Display in largest to smallest order to reduce occlusion. 167 | areas = None 168 | if boxes is not None: 169 | areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) 170 | elif masks is not None: 171 | areas = np.asarray([x.area() for x in masks]) 172 | 173 | if areas is not None: 174 | sorted_idxs = np.argsort(-areas).tolist() 175 | # Re-order overlapped instances in descending order. 176 | boxes = boxes[sorted_idxs] if boxes is not None else None 177 | labels = [labels[k] for k in sorted_idxs] if labels is not None else None 178 | masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None 179 | assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] 180 | keypoints = keypoints[sorted_idxs] if keypoints is not None else None 181 | 182 | for i in range(num_instances): 183 | color = assigned_colors[i] 184 | if boxes is not None: 185 | self.draw_box(boxes[i], edge_color=color) 186 | 187 | if masks is not None: 188 | for segment in masks[i].polygons: 189 | self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) 190 | 191 | if ext_points is not None: 192 | self.draw_extreme_pts(ext_points[i], circle_color=color, radius=3) 193 | 194 | if path is not None: 195 | self.draw_snake_path(path[i], color=color) 196 | 197 | if labels is not None: 198 | # first get a box 199 | # boxes = None 200 | if boxes is not None: 201 | x0, y0, x1, y1 = boxes[i] 202 | text_pos = (x0, y0) # if drawing boxes, put text on the box corner. 203 | horiz_align = "left" 204 | elif masks is not None: 205 | x0, y0, x1, y1 = masks[i].bbox() 206 | 207 | # draw text in the center (defined by median) when box is not drawn 208 | # median is less sensitive to outliers. 209 | text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] 210 | horiz_align = "center" 211 | else: 212 | continue # drawing the box confidence for keypoints isn't very useful. 213 | # for small objects, draw text at the side to avoid occlusion 214 | 215 | text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] 216 | horiz_align = "center" 217 | 218 | instance_area = (y1 - y0) * (x1 - x0) 219 | if ( 220 | instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale 221 | or y1 - y0 < 40 * self.output.scale 222 | ): 223 | if y1 >= self.output.height - 5: 224 | text_pos = (x1, y0) 225 | else: 226 | text_pos = (x0, y1) 227 | 228 | height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) 229 | lighter_color = self._change_color_brightness(color, brightness_factor=0.7) 230 | font_size = ( 231 | np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) 232 | * 0.5 233 | * self._default_font_size 234 | ) 235 | self.draw_text( 236 | labels[i], 237 | text_pos, 238 | color=lighter_color, 239 | horizontal_alignment=horiz_align, 240 | font_size=font_size, 241 | ) 242 | 243 | # draw keypoints 244 | if keypoints is not None: 245 | for keypoints_per_instance in keypoints: 246 | self.draw_and_connect_keypoints(keypoints_per_instance) 247 | 248 | return self.output 249 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lkevinzc/dance/62ce83a07e5335c2a17944eeabf7eaffb3e59261/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/prepare_edge_map.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Adapted for edge map generation from panoptic segmentation data of COCO 5 | 6 | import time 7 | import functools 8 | import json 9 | import multiprocessing as mp 10 | import numpy as np 11 | import os 12 | from PIL import Image 13 | import cv2 14 | 15 | from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES 16 | 17 | from panopticapi.utils import rgb2id 18 | 19 | EDGE_THICKNESS = 1 20 | 21 | 22 | def save_edge_map(instance_map, output_edge): 23 | canvas = np.zeros_like(instance_map) 24 | for i in range(np.max(instance_map)): 25 | instance_idx = i + 1 26 | contours, hierarchy = cv2.findContours( 27 | (instance_map == instance_idx).astype(np.uint8), 28 | cv2.RETR_EXTERNAL, 29 | cv2.CHAIN_APPROX_NONE) 30 | cv2.drawContours(canvas, contours, -1, 1, EDGE_THICKNESS) 31 | cv2.imwrite(output_edge, canvas) 32 | 33 | 34 | def _process_panoptic_to_instance(input_panoptic, output_edge, segments, stuff_ids): 35 | # assuming there is no more that 255 instances in one image; 36 | # if violated, consider do use rbg instead of gray-scale 37 | panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32) 38 | panoptic = rgb2id(panoptic) # map to stuff/thing object ids. 39 | instance_map = np.zeros_like(panoptic, dtype=np.uint8) 40 | instance_count = 1 41 | for seg in segments: 42 | cat_id = seg["category_id"] 43 | if cat_id in stuff_ids: 44 | continue 45 | else: 46 | assert instance_count <= 255, 'Too many instances (>256)' 47 | instance_map[panoptic == seg["id"]] = instance_count 48 | instance_count += 1 49 | save_edge_map(instance_map, output_edge) 50 | 51 | 52 | def separate_coco_edge_map_from_panoptic(panoptic_json, panoptic_root, edge_root, categories): 53 | os.makedirs(edge_root, exist_ok=True) 54 | 55 | stuff_ids = [k["id"] for k in categories if k["isthing"] == 0] 56 | 57 | with open(panoptic_json) as f: 58 | obj = json.load(f) 59 | 60 | pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) 61 | 62 | def iter_annotations(): 63 | for anno in obj["annotations"]: 64 | file_name = anno["file_name"] 65 | segments = anno["segments_info"] 66 | input = os.path.join(panoptic_root, file_name) 67 | output = os.path.join(edge_root, file_name) 68 | yield input, output, segments 69 | 70 | print("Start writing to {} ...".format(edge_root)) 71 | start = time.time() 72 | pool.starmap( 73 | functools.partial(_process_panoptic_to_instance, stuff_ids=stuff_ids), 74 | iter_annotations(), 75 | chunksize=100, 76 | ) 77 | print("Finished. time: {:.2f}s".format(time.time() - start)) 78 | 79 | 80 | if __name__ == "__main__": 81 | dataset_dir = os.path.join(os.path.dirname(__file__), "mycoco") 82 | for s in ["val2017", "train2017"]: 83 | separate_coco_edge_map_from_panoptic( 84 | os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)), 85 | os.path.join(dataset_dir, "panoptic_{}".format(s)), 86 | os.path.join(dataset_dir, "edge_{}".format(s)), 87 | COCO_CATEGORIES, 88 | ) 89 | -------------------------------------------------------------------------------- /datasets/prepare_edge_map_cityscapes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Adapted for edge map generation from instance segmentation data of Cityscapes (in COCO format) 5 | 6 | import time 7 | import functools 8 | import multiprocessing as mp 9 | import numpy as np 10 | import os 11 | import cv2 12 | 13 | from pycocotools.coco import COCO 14 | 15 | 16 | EDGE_THICKNESS = 1 17 | 18 | 19 | def save_edge_map(edge_fn, mask, im_size): 20 | canvas = np.zeros(im_size) 21 | all_segs = list(map(lambda x: x['segmentation'], mask)) 22 | counters = [] 23 | for segs in all_segs: 24 | counters += [np.expand_dims(np.array(seg, dtype=np.int32).reshape(-1,2), 0) for seg in segs] 25 | cv2.drawContours(canvas, counters, -1, 1, EDGE_THICKNESS) 26 | cv2.imwrite(edge_fn, canvas) 27 | 28 | 29 | def _process_json_to_mask(file_name, height, width, ann, edge_root): 30 | edge_fn = os.path.join(edge_root, os.path.basename(file_name)) 31 | save_edge_map(edge_fn, ann, [height, width]) 32 | 33 | 34 | def generate_coco_edge_map_from_json(instance_json, edge_root): 35 | os.makedirs(edge_root, exist_ok=True) 36 | 37 | coco_api = COCO(instance_json) 38 | img_ids = sorted(coco_api.imgs.keys()) 39 | 40 | 41 | pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) 42 | 43 | 44 | def iter_annotations(): 45 | count = 0 46 | for img_id in img_ids: 47 | img_info = coco_api.loadImgs(img_id)[0] 48 | file_name = img_info['file_name'] 49 | width = img_info['width'] 50 | height = img_info['height'] 51 | 52 | ann = coco_api.imgToAnns[img_id] 53 | count += 1 54 | yield file_name, height, width, ann 55 | print(count) 56 | 57 | print("Start writing to {} ...".format(edge_root)) 58 | start = time.time() 59 | pool.starmap( 60 | functools.partial(_process_json_to_mask, edge_root=edge_root), 61 | iter_annotations(), 62 | chunksize=100, 63 | ) 64 | print("Finished. time: {:.2f}s".format(time.time() - start)) 65 | 66 | 67 | if __name__ == "__main__": 68 | dataset_dir = os.path.join(os.path.dirname(__file__), "dance") 69 | for s in ["val", "train"]: 70 | generate_coco_edge_map_from_json( 71 | os.path.join(dataset_dir, "coco_ann/instance_{}.json".format(s)), 72 | os.path.join(dataset_dir, "edge_{}".format(s)), 73 | ) 74 | -------------------------------------------------------------------------------- /output/.gitignore: -------------------------------------------------------------------------------- 1 | *.pth -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Shapely==1.7.1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import glob 5 | import os 6 | from setuptools import find_packages, setup 7 | import torch 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 9 | 10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" 12 | 13 | 14 | def get_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | extensions_dir = os.path.join(this_dir, "adet", "layers", "csrc") 17 | 18 | main_source = os.path.join(extensions_dir, "vision.cpp") 19 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) 20 | source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob( 21 | os.path.join(extensions_dir, "*.cu") 22 | ) 23 | 24 | sources = [main_source] + sources 25 | 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv( 32 | "FORCE_CUDA", "0" 33 | ) == "1": 34 | extension = CUDAExtension 35 | sources += source_cuda 36 | define_macros += [("WITH_CUDA", None)] 37 | extra_compile_args["nvcc"] = [ 38 | "-DCUDA_HAS_FP16=1", 39 | "-D__CUDA_NO_HALF_OPERATORS__", 40 | "-D__CUDA_NO_HALF_CONVERSIONS__", 41 | "-D__CUDA_NO_HALF2_OPERATORS__", 42 | ] 43 | 44 | # It's better if pytorch can do this by default .. 45 | CC = os.environ.get("CC", None) 46 | if CC is not None: 47 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) 48 | 49 | sources = [os.path.join(extensions_dir, s) for s in sources] 50 | 51 | include_dirs = [extensions_dir] 52 | 53 | ext_modules = [ 54 | extension( 55 | "core._C", 56 | sources, 57 | include_dirs=include_dirs, 58 | define_macros=define_macros, 59 | extra_compile_args=extra_compile_args, 60 | ) 61 | ] 62 | 63 | return ext_modules 64 | 65 | 66 | setup( 67 | name="Dance", 68 | version="1.0.0", 69 | author="liuzichen@u.nus.edu", 70 | url="https://github.com/lkevinzc/dance", 71 | description="A Deep Attentive Contour Model for Efficient Instance Segmentation", 72 | packages=find_packages(exclude=("configs", "tests", "detectron2")), 73 | python_requires=">=3.6", 74 | install_requires=[ 75 | "termcolor>=1.1", 76 | "Pillow>=6.0", 77 | "yacs>=0.1.6", 78 | "tabulate", 79 | "cloudpickle", 80 | "matplotlib", 81 | "tqdm>4.29.0", 82 | "tensorboard", 83 | "python-Levenshtein", 84 | "Polygon3", 85 | "shapely", 86 | ], 87 | extras_require={"all": ["psutil"]}, 88 | ext_modules=get_extensions(), 89 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 90 | ) 91 | -------------------------------------------------------------------------------- /train_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import detectron2.utils.comm as comm 4 | from detectron2.checkpoint import DetectionCheckpointer 5 | from detectron2.engine import default_argument_parser, default_setup, launch 6 | from detectron2.evaluation import ( 7 | DatasetEvaluators, 8 | verify_results 9 | ) 10 | from detectron2.data import MetadataCatalog 11 | from detectron2.engine import DefaultTrainer 12 | 13 | # for datasets registration 14 | import core.data # noqa 15 | 16 | from core.config import get_cfg 17 | from core.evaluation import ( 18 | COCOEvaluator, # to prevent redundant conversion 19 | ) 20 | 21 | 22 | class Trainer(DefaultTrainer): 23 | @classmethod 24 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 25 | if output_folder is None: 26 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 27 | evaluator_list = [] 28 | evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type 29 | 30 | if "coco" in evaluator_type: 31 | evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) 32 | 33 | if len(evaluator_list) == 0: 34 | raise NotImplementedError( 35 | "no Evaluator for the dataset {} with the type {}".format( 36 | dataset_name, evaluator_type 37 | ) 38 | ) 39 | elif len(evaluator_list) == 1: 40 | return evaluator_list[0] 41 | 42 | return DatasetEvaluators(evaluator_list) 43 | 44 | 45 | def setup(args): 46 | cfg = get_cfg() 47 | cfg.merge_from_file(args.config_file) 48 | cfg.merge_from_list(args.opts) 49 | cfg.freeze() 50 | default_setup(cfg, args) 51 | return cfg 52 | 53 | 54 | def main(args): 55 | cfg = setup(args) 56 | 57 | if args.eval_only: 58 | model = Trainer.build_model(cfg) 59 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 60 | cfg.MODEL.WEIGHTS, resume=args.resume 61 | ) 62 | res = Trainer.test(cfg, model) 63 | if comm.is_main_process(): 64 | verify_results(cfg, res) 65 | return res 66 | 67 | trainer = Trainer(cfg) 68 | trainer.resume_or_load(resume=args.resume) 69 | 70 | return trainer.train() 71 | 72 | 73 | if __name__ == "__main__": 74 | args = default_argument_parser().parse_args() 75 | print("Command Line Args:", args) 76 | launch( 77 | main, 78 | args.num_gpus, 79 | num_machines=args.num_machines, 80 | machine_rank=args.machine_rank, 81 | dist_url=args.dist_url, 82 | args=(args,), 83 | ) 84 | --------------------------------------------------------------------------------