├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── cfgs
    ├── cfg_coco_resnet101.py
    ├── cfg_coco_resnet18.py
    ├── cfg_coco_resnet34.py
    ├── cfg_coco_resnet50.py
    └── getcfg.py
├── demo.py
├── docs
    ├── Res101FPN_pytorch_epoch12.MD
    └── Res50FPN_pytorch_epoch12.MD
├── libs
    ├── cocoapi
    │   ├── .gitignore
    │   ├── .travis.yml
    │   ├── LuaAPI
    │   │   ├── CocoApi.lua
    │   │   ├── MaskApi.lua
    │   │   ├── cocoDemo.lua
    │   │   ├── env.lua
    │   │   ├── init.lua
    │   │   └── rocks
    │   │   │   └── coco-scm-1.rockspec
    │   ├── MatlabAPI
    │   │   ├── CocoApi.m
    │   │   ├── CocoEval.m
    │   │   ├── CocoUtils.m
    │   │   ├── MaskApi.m
    │   │   ├── cocoDemo.m
    │   │   ├── evalDemo.m
    │   │   ├── gason.m
    │   │   └── private
    │   │   │   ├── gasonMex.cpp
    │   │   │   ├── gasonMex.mexa64
    │   │   │   ├── gasonMex.mexmaci64
    │   │   │   ├── getPrmDflt.m
    │   │   │   └── maskApiMex.c
    │   ├── PythonAPI
    │   │   ├── Makefile
    │   │   ├── pycocoDemo.ipynb
    │   │   ├── pycocoEvalDemo.ipynb
    │   │   ├── pycocotools
    │   │   │   ├── __init__.py
    │   │   │   ├── _mask.pyx
    │   │   │   ├── coco.py
    │   │   │   ├── cocoeval.py
    │   │   │   └── mask.py
    │   │   └── setup.py
    │   ├── README.txt
    │   ├── common
    │   │   ├── gason.cpp
    │   │   ├── gason.h
    │   │   ├── maskApi.c
    │   │   └── maskApi.h
    │   ├── license.txt
    │   └── results
    │   │   ├── captions_val2014_fakecap_results.json
    │   │   ├── instances_val2014_fakebbox100_results.json
    │   │   ├── instances_val2014_fakesegm100_results.json
    │   │   ├── person_keypoints_val2014_fakekeypoints100_results.json
    │   │   └── val2014_fake_eval_res.txt
    ├── dcn
    │   ├── __init__.py
    │   ├── deform_conv.py
    │   ├── deform_pool.py
    │   └── src
    │   │   ├── deform_conv_cuda.cpp
    │   │   ├── deform_conv_cuda_kernel.cu
    │   │   ├── deform_pool_cuda.cpp
    │   │   └── deform_pool_cuda_kernel.cu
    ├── font.TTF
    ├── make.sh
    ├── nms
    │   ├── __init__.py
    │   ├── nms_wrapper.py
    │   └── src
    │   │   ├── nms_cpu.cpp
    │   │   ├── nms_cuda.cpp
    │   │   └── nms_kernel.cu
    ├── roi_align
    │   ├── __init__.py
    │   ├── gradcheck.py
    │   ├── roi_align.py
    │   └── src
    │   │   ├── roi_align_cuda.cpp
    │   │   └── roi_align_kernel.cu
    ├── roi_pool
    │   ├── __init__.py
    │   ├── gradcheck.py
    │   ├── roi_pool.py
    │   └── src
    │   │   ├── roi_pool_cuda.cpp
    │   │   └── roi_pool_kernel.cu
    └── setup.py
├── modules
    ├── RPN.py
    ├── backbones
    │   ├── FPNResNets.py
    │   └── __init__.py
    ├── fasterRCNN.py
    ├── losses
    │   ├── CELoss.py
    │   ├── IoULoss.py
    │   ├── __init__.py
    │   ├── focalLoss.py
    │   └── smoothL1.py
    └── utils
    │   ├── __init__.py
    │   ├── anchors.py
    │   ├── datasets
    │       ├── COCODataset.py
    │       ├── Custom.py
    │       └── __init__.py
    │   ├── initialization.py
    │   └── misc.py
├── names
    ├── coco.names
    └── voc.names
├── test.py
└── train.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | * linguist-language=python
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 DetectionBLWX
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FPN
  2 | ```
  3 | Pytorch Implementation of "Feature Pyramid Networks for Object Detection"
  4 | You can star this repository to keep track of the project if it's helpful for you, thank you for your support.
  5 | ```
  6 | 
  7 | 
  8 | # Environment
  9 | ```
 10 | OS: Ubuntu 16.04
 11 | Python: python3.x with torch==1.2.0, torchvision==0.4.0
 12 | ```
 13 | 
 14 | 
 15 | # Performance
 16 | |  Backbone      | Train       |  Test         |  Pretrained Model  |  Epochs  |	Learning Rate		|   RoI per image    |   AP      					                |
 17 | |  :----:        | :----:      |  :----:       |  :----:    	    |  :----:  |	:----:				|   :----:  		 |   :----: 				                    |
 18 | |  Res50-FPN     | trainval35k |  minival5k    |  Pytorch		    |  12	   |	2e-2/2e-3/2e-4   	|	512              |   [35.5](docs/Res50FPN_pytorch_epoch12.MD)   |
 19 | |  Res101-FPN    | trainval35k |  minival5k    |  Pytorch   	    |  12	   |	2e-2/2e-3/2e-4		|	512  			 |	 [37.4](docs/Res101FPN_pytorch_epoch12.MD)	|
 20 | 
 21 | 
 22 | # Trained models
 23 | ```
 24 | You could get the trained models reported above at 
 25 | https://drive.google.com/open?id=1xm8z-EMbNG17sQzd-2FRRLVk_N7UIOhE
 26 | ```
 27 | 
 28 | 
 29 | # Usage
 30 | #### Setup
 31 | ```
 32 | cd libs
 33 | sh make.sh
 34 | ```
 35 | #### Train
 36 | ```
 37 | usage: train.py [-h] --datasetname DATASETNAME --backbonename BACKBONENAME
 38 |                 [--checkpointspath CHECKPOINTSPATH]
 39 | optional arguments:
 40 |   -h, --help            show this help message and exit
 41 |   --datasetname DATASETNAME
 42 |                         dataset for training.
 43 |   --backbonename BACKBONENAME
 44 |                         backbone network for training.
 45 |   --checkpointspath CHECKPOINTSPATH
 46 |                         checkpoints you want to use.
 47 | cmd example:
 48 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py --datasetname coco --backbonename resnet50
 49 | ```
 50 | #### Test
 51 | ```
 52 | usage: test.py [-h] --datasetname DATASETNAME [--annfilepath ANNFILEPATH]
 53 |                [--datasettype DATASETTYPE] --backbonename BACKBONENAME
 54 |                --checkpointspath CHECKPOINTSPATH [--nmsthresh NMSTHRESH]
 55 | optional arguments:
 56 |   -h, --help            show this help message and exit
 57 |   --datasetname DATASETNAME
 58 |                         dataset for testing.
 59 |   --annfilepath ANNFILEPATH
 60 |                         used to specify annfilepath.
 61 |   --datasettype DATASETTYPE
 62 |                         used to specify datasettype.
 63 |   --backbonename BACKBONENAME
 64 |                         backbone network for testing.
 65 |   --checkpointspath CHECKPOINTSPATH
 66 |                         checkpoints you want to use.
 67 |   --nmsthresh NMSTHRESH
 68 |                         thresh used in nms.
 69 | cmd example:
 70 | CUDA_VISIBLE_DEVICES=0 python test.py --checkpointspath fpn_res50_trainbackup_coco/epoch_12.pth --datasetname coco --backbonename resnet50
 71 | ```
 72 | #### Demo
 73 | ```
 74 | usage: demo.py [-h] --imagepath IMAGEPATH --backbonename BACKBONENAME
 75 |                --datasetname DATASETNAME --checkpointspath CHECKPOINTSPATH
 76 |                [--nmsthresh NMSTHRESH] [--confthresh CONFTHRESH]
 77 | optional arguments:
 78 |   -h, --help            show this help message and exit
 79 |   --imagepath IMAGEPATH
 80 |                         image you want to detect.
 81 |   --backbonename BACKBONENAME
 82 |                         backbone network for demo.
 83 |   --datasetname DATASETNAME
 84 |                         dataset used to train.
 85 |   --checkpointspath CHECKPOINTSPATH
 86 |                         checkpoints you want to use.
 87 |   --nmsthresh NMSTHRESH
 88 |                         thresh used in nms.
 89 |   --confthresh CONFTHRESH
 90 |                         thresh used in showing bounding box.
 91 | cmd example:
 92 | CUDA_VISIBLE_DEVICES=0 python demo.py --checkpointspath fpn_res50_trainbackup_coco/epoch_12.pth --datasetname coco --backbonename resnet50 --imagepath 000001.jpg
 93 | ```
 94 | 
 95 | 
 96 | # Reference
 97 | ```
 98 | [1]. https://github.com/jwyang/fpn.pytorch
 99 | [2]. https://github.com/open-mmlab/mmdetection
100 | ```


--------------------------------------------------------------------------------
/cfgs/cfg_coco_resnet101.py:
--------------------------------------------------------------------------------
 1 | '''cfg file for coco2017 dataset'''
 2 | 
 3 | 
 4 | # anchors
 5 | ANCHOR_SCALES = [8]
 6 | ANCHOR_RATIOS = [0.5, 1, 2]
 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64]
 8 | # RPN, RoI settings
 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000
10 | TRAIN_RPN_POST_NMS_TOP_N = 2000
11 | TRAIN_RPN_NMS_THRESH = 0.7
12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3
13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7
14 | TRAIN_RPN_FG_FRACTION = 0.5
15 | TRAIN_RPN_BATCHSIZE = 256
16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0]
19 | TRAIN_POOLING_SIZE = 7
20 | TRAIN_POOLING_SAMPLE_NUM = 2
21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56
22 | TRAIN_ROI_BATCHSIZE = 512
23 | TRAIN_ROI_FG_FRACTION = 0.25
24 | TRAIN_ROI_FG_THRESH = 0.5
25 | TRAIN_ROI_BG_THRESH_HI = 0.5
26 | TRAIN_ROI_BG_THRESH_LO = 0.0
27 | TEST_RPN_PRE_NMS_TOP_N = 1000
28 | TEST_RPN_POST_NMS_TOP_N = 1000
29 | TEST_RPN_NMS_THRESH = 0.7
30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3
31 | TEST_RPN_POSITIVE_OVERLAP = 0.7
32 | TEST_RPN_FG_FRACTION = 0.5
33 | TEST_RPN_BATCHSIZE = 256
34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
36 | TEST_POOLING_METHOD = ['align', 'pool'][0]
37 | TEST_POOLING_SIZE = 7
38 | TEST_POOLING_SAMPLE_NUM = 2
39 | TEST_ROI_MAP_LEVEL_SCALE = 56
40 | TEST_ROI_BATCHSIZE = 512
41 | TEST_ROI_FG_FRACTION = 0.25
42 | TEST_ROI_FG_THRESH = 0.5
43 | TEST_ROI_BG_THRESH_HI = 0.5
44 | TEST_ROI_BG_THRESH_LO = 0.0
45 | # backbone
46 | BACKBONE_TYPE = 'resnet101'
47 | PRETRAINED_MODEL_PATH = ''
48 | USE_CAFFE_PRETRAINED_MODEL = False
49 | FIXED_FRONT_BLOCKS = True
50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'}
51 | IS_MULTI_GPUS = True
52 | IS_CLASS_AGNOSTIC = False
53 | # dataset
54 | DATASET_ROOT_DIR = ''
55 | MAX_NUM_GT_BOXES = 50
56 | NUM_CLASSES = 81
57 | NUM_WORKERS = 8
58 | PIN_MEMORY = True
59 | BATCHSIZE = 16
60 | CLSNAMESPATH = 'names/coco.names'
61 | USE_COLOR_JITTER = False
62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}}
63 | # loss function
64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}}
65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}}
66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}}
67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}}
68 | # optimizer
69 | MOMENTUM = 0.9
70 | WEIGHT_DECAY = 0.0001
71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)]
72 | LR_ADJUST_EPOCHS = [9, 12]
73 | MAX_EPOCHS = 12
74 | IS_USE_WARMUP = True
75 | NUM_WARMUP_STEPS = 500
76 | GRAD_CLIP_MAX_NORM = 35
77 | GRAD_CLIP_NORM_TYPE = 2
78 | # image size (max_len,  min_len)
79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800}
80 | # record
81 | TRAIN_BACKUPDIR = 'fpn_res101_trainbackup_coco'
82 | TRAIN_LOGFILE = 'fpn_res101_trainbackup_coco/train.log'
83 | TEST_BACKUPDIR = 'fpn_res101_testbackup_coco'
84 | TEST_LOGFILE = 'fpn_res101_testbackup_coco/test.log'
85 | TEST_BBOXES_SAVE_PATH = 'fpn_res101_testbackup_coco/fpn_res101_detection_results_coco.json'
86 | SAVE_INTERVAL = 1


--------------------------------------------------------------------------------
/cfgs/cfg_coco_resnet18.py:
--------------------------------------------------------------------------------
 1 | '''cfg file for coco2017 dataset'''
 2 | 
 3 | 
 4 | # anchors
 5 | ANCHOR_SCALES = [8]
 6 | ANCHOR_RATIOS = [0.5, 1, 2]
 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64]
 8 | # RPN, RoI settings
 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000
10 | TRAIN_RPN_POST_NMS_TOP_N = 2000
11 | TRAIN_RPN_NMS_THRESH = 0.7
12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3
13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7
14 | TRAIN_RPN_FG_FRACTION = 0.5
15 | TRAIN_RPN_BATCHSIZE = 256
16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0]
19 | TRAIN_POOLING_SIZE = 7
20 | TRAIN_POOLING_SAMPLE_NUM = 2
21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56
22 | TRAIN_ROI_BATCHSIZE = 512
23 | TRAIN_ROI_FG_FRACTION = 0.25
24 | TRAIN_ROI_FG_THRESH = 0.5
25 | TRAIN_ROI_BG_THRESH_HI = 0.5
26 | TRAIN_ROI_BG_THRESH_LO = 0.0
27 | TEST_RPN_PRE_NMS_TOP_N = 1000
28 | TEST_RPN_POST_NMS_TOP_N = 1000
29 | TEST_RPN_NMS_THRESH = 0.7
30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3
31 | TEST_RPN_POSITIVE_OVERLAP = 0.7
32 | TEST_RPN_FG_FRACTION = 0.5
33 | TEST_RPN_BATCHSIZE = 256
34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
36 | TEST_POOLING_METHOD = ['align', 'pool'][0]
37 | TEST_POOLING_SIZE = 7
38 | TEST_POOLING_SAMPLE_NUM = 2
39 | TEST_ROI_MAP_LEVEL_SCALE = 56
40 | TEST_ROI_BATCHSIZE = 512
41 | TEST_ROI_FG_FRACTION = 0.25
42 | TEST_ROI_FG_THRESH = 0.5
43 | TEST_ROI_BG_THRESH_HI = 0.5
44 | TEST_ROI_BG_THRESH_LO = 0.0
45 | # backbone
46 | BACKBONE_TYPE = 'resnet18'
47 | PRETRAINED_MODEL_PATH = ''
48 | USE_CAFFE_PRETRAINED_MODEL = False
49 | FIXED_FRONT_BLOCKS = True
50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'}
51 | IS_MULTI_GPUS = True
52 | IS_CLASS_AGNOSTIC = False
53 | # dataset
54 | DATASET_ROOT_DIR = ''
55 | MAX_NUM_GT_BOXES = 50
56 | NUM_CLASSES = 81
57 | NUM_WORKERS = 8
58 | PIN_MEMORY = True
59 | BATCHSIZE = 16
60 | CLSNAMESPATH = 'names/coco.names'
61 | USE_COLOR_JITTER = False
62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}}
63 | # loss function
64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}}
65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}}
66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}}
67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}}
68 | # optimizer
69 | MOMENTUM = 0.9
70 | WEIGHT_DECAY = 0.0001
71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)]
72 | LR_ADJUST_EPOCHS = [9, 12]
73 | MAX_EPOCHS = 12
74 | IS_USE_WARMUP = True
75 | NUM_WARMUP_STEPS = 500
76 | GRAD_CLIP_MAX_NORM = 35
77 | GRAD_CLIP_NORM_TYPE = 2
78 | # image size (max_len, min_len)
79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800}
80 | # record
81 | TRAIN_BACKUPDIR = 'fpn_res18_trainbackup_coco'
82 | TRAIN_LOGFILE = 'fpn_res18_trainbackup_coco/train.log'
83 | TEST_BACKUPDIR = 'fpn_res18_testbackup_coco'
84 | TEST_LOGFILE = 'fpn_res18_testbackup_coco/test.log'
85 | TEST_BBOXES_SAVE_PATH = 'fpn_res18_testbackup_coco/fpn_res18_detection_results_coco.json'
86 | SAVE_INTERVAL = 1


--------------------------------------------------------------------------------
/cfgs/cfg_coco_resnet34.py:
--------------------------------------------------------------------------------
 1 | '''cfg file for coco2017 dataset'''
 2 | 
 3 | 
 4 | # anchors
 5 | ANCHOR_SCALES = [8]
 6 | ANCHOR_RATIOS = [0.5, 1, 2]
 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64]
 8 | # RPN, RoI settings
 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000
10 | TRAIN_RPN_POST_NMS_TOP_N = 2000
11 | TRAIN_RPN_NMS_THRESH = 0.7
12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3
13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7
14 | TRAIN_RPN_FG_FRACTION = 0.5
15 | TRAIN_RPN_BATCHSIZE = 256
16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0]
19 | TRAIN_POOLING_SIZE = 7
20 | TRAIN_POOLING_SAMPLE_NUM = 2
21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56
22 | TRAIN_ROI_BATCHSIZE = 512
23 | TRAIN_ROI_FG_FRACTION = 0.25
24 | TRAIN_ROI_FG_THRESH = 0.5
25 | TRAIN_ROI_BG_THRESH_HI = 0.5
26 | TRAIN_ROI_BG_THRESH_LO = 0.0
27 | TEST_RPN_PRE_NMS_TOP_N = 1000
28 | TEST_RPN_POST_NMS_TOP_N = 1000
29 | TEST_RPN_NMS_THRESH = 0.7
30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3
31 | TEST_RPN_POSITIVE_OVERLAP = 0.7
32 | TEST_RPN_FG_FRACTION = 0.5
33 | TEST_RPN_BATCHSIZE = 256
34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
36 | TEST_POOLING_METHOD = ['align', 'pool'][0]
37 | TEST_POOLING_SIZE = 7
38 | TEST_POOLING_SAMPLE_NUM = 2
39 | TEST_ROI_MAP_LEVEL_SCALE = 56
40 | TEST_ROI_BATCHSIZE = 512
41 | TEST_ROI_FG_FRACTION = 0.25
42 | TEST_ROI_FG_THRESH = 0.5
43 | TEST_ROI_BG_THRESH_HI = 0.5
44 | TEST_ROI_BG_THRESH_LO = 0.0
45 | # backbone
46 | BACKBONE_TYPE = 'resnet34'
47 | PRETRAINED_MODEL_PATH = ''
48 | USE_CAFFE_PRETRAINED_MODEL = False
49 | FIXED_FRONT_BLOCKS = True
50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'}
51 | IS_MULTI_GPUS = True
52 | IS_CLASS_AGNOSTIC = False
53 | # dataset
54 | DATASET_ROOT_DIR = ''
55 | MAX_NUM_GT_BOXES = 50
56 | NUM_CLASSES = 81
57 | NUM_WORKERS = 8
58 | PIN_MEMORY = True
59 | BATCHSIZE = 16
60 | CLSNAMESPATH = 'names/coco.names'
61 | USE_COLOR_JITTER = False
62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}}
63 | # loss function
64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}}
65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}}
66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}}
67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}}
68 | # optimizer
69 | MOMENTUM = 0.9
70 | WEIGHT_DECAY = 0.0001
71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)]
72 | LR_ADJUST_EPOCHS = [9, 12]
73 | MAX_EPOCHS = 12
74 | IS_USE_WARMUP = True
75 | NUM_WARMUP_STEPS = 500
76 | GRAD_CLIP_MAX_NORM = 35
77 | GRAD_CLIP_NORM_TYPE = 2
78 | # image size (max_len, min_len)
79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800}
80 | # record
81 | TRAIN_BACKUPDIR = 'fpn_res34_trainbackup_coco'
82 | TRAIN_LOGFILE = 'fpn_res34_trainbackup_coco/train.log'
83 | TEST_BACKUPDIR = 'fpn_res34_testbackup_coco'
84 | TEST_LOGFILE = 'fpn_res34_testbackup_coco/test.log'
85 | TEST_BBOXES_SAVE_PATH = 'fpn_res34_testbackup_coco/fpn_res34_detection_results_coco.json'
86 | SAVE_INTERVAL = 1


--------------------------------------------------------------------------------
/cfgs/cfg_coco_resnet50.py:
--------------------------------------------------------------------------------
 1 | '''cfg file for coco2017 dataset'''
 2 | 
 3 | 
 4 | # anchors
 5 | ANCHOR_SCALES = [8]
 6 | ANCHOR_RATIOS = [0.5, 1, 2]
 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64]
 8 | # RPN, RoI settings
 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000
10 | TRAIN_RPN_POST_NMS_TOP_N = 2000
11 | TRAIN_RPN_NMS_THRESH = 0.7
12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3
13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7
14 | TRAIN_RPN_FG_FRACTION = 0.5
15 | TRAIN_RPN_BATCHSIZE = 256
16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0]
19 | TRAIN_POOLING_SIZE = 7
20 | TRAIN_POOLING_SAMPLE_NUM = 2
21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56
22 | TRAIN_ROI_BATCHSIZE = 512
23 | TRAIN_ROI_FG_FRACTION = 0.25
24 | TRAIN_ROI_FG_THRESH = 0.5
25 | TRAIN_ROI_BG_THRESH_HI = 0.5
26 | TRAIN_ROI_BG_THRESH_LO = 0.0
27 | TEST_RPN_PRE_NMS_TOP_N = 1000
28 | TEST_RPN_POST_NMS_TOP_N = 1000
29 | TEST_RPN_NMS_THRESH = 0.7
30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3
31 | TEST_RPN_POSITIVE_OVERLAP = 0.7
32 | TEST_RPN_FG_FRACTION = 0.5
33 | TEST_RPN_BATCHSIZE = 256
34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
36 | TEST_POOLING_METHOD = ['align', 'pool'][0]
37 | TEST_POOLING_SIZE = 7
38 | TEST_POOLING_SAMPLE_NUM = 2
39 | TEST_ROI_MAP_LEVEL_SCALE = 56
40 | TEST_ROI_BATCHSIZE = 512
41 | TEST_ROI_FG_FRACTION = 0.25
42 | TEST_ROI_FG_THRESH = 0.5
43 | TEST_ROI_BG_THRESH_HI = 0.5
44 | TEST_ROI_BG_THRESH_LO = 0.0
45 | # backbone
46 | BACKBONE_TYPE = 'resnet50'
47 | PRETRAINED_MODEL_PATH = ''
48 | USE_CAFFE_PRETRAINED_MODEL = False
49 | FIXED_FRONT_BLOCKS = True
50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'}
51 | IS_MULTI_GPUS = True
52 | IS_CLASS_AGNOSTIC = False
53 | # dataset
54 | DATASET_ROOT_DIR = ''
55 | MAX_NUM_GT_BOXES = 50
56 | NUM_CLASSES = 81
57 | NUM_WORKERS = 8
58 | PIN_MEMORY = True
59 | BATCHSIZE = 16
60 | CLSNAMESPATH = 'names/coco.names'
61 | USE_COLOR_JITTER = False
62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}}
63 | # loss function
64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}}
65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}}
66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}}
67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}}
68 | # optimizer
69 | MOMENTUM = 0.9
70 | WEIGHT_DECAY = 0.0001
71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)]
72 | LR_ADJUST_EPOCHS = [9, 12]
73 | MAX_EPOCHS = 12
74 | IS_USE_WARMUP = True
75 | NUM_WARMUP_STEPS = 500
76 | GRAD_CLIP_MAX_NORM = 35
77 | GRAD_CLIP_NORM_TYPE = 2
78 | # image size (max_len, min_len)
79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800}
80 | # record
81 | TRAIN_BACKUPDIR = 'fpn_res50_trainbackup_coco'
82 | TRAIN_LOGFILE = 'fpn_res50_trainbackup_coco/train.log'
83 | TEST_BACKUPDIR = 'fpn_res50_testbackup_coco'
84 | TEST_LOGFILE = 'fpn_res50_testbackup_coco/test.log'
85 | TEST_BBOXES_SAVE_PATH = 'fpn_res50_testbackup_coco/fpn_res50_detection_results_coco.json'
86 | SAVE_INTERVAL = 1


--------------------------------------------------------------------------------
/cfgs/getcfg.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	used to get config file for specified dataset and backbone.
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | def getCfgByDatasetAndBackbone(datasetname, backbonename):
 8 | 	if [datasetname, backbonename] == ['coco', 'resnet101']:
 9 | 		import cfgs.cfg_coco_resnet101 as cfg
10 | 		cfg_file_path = 'cfgs/cfg_coco_resnet101'
11 | 	elif [datasetname, backbonename] == ['coco', 'resnet50']:
12 | 		import cfgs.cfg_coco_resnet50 as cfg
13 | 		cfg_file_path = 'cfgs/cfg_coco_resnet50'
14 | 	elif [datasetname, backbonename] == ['coco', 'resnet34']:
15 | 		import cfgs.cfg_coco_resnet34 as cfg
16 | 		cfg_file_path = 'cfgs/cfg_coco_resnet34'
17 | 	elif [datasetname, backbonename] == ['coco', 'resnet18']:
18 | 		import cfgs.cfg_coco_resnet18 as cfg
19 | 		cfg_file_path = 'cfgs/cfg_coco_resnet18'
20 | 	else:
21 | 		raise ValueError('Can not find cfg file for dataset <%s> and backbone <%s>...' % (datasetname, backbonename))
22 | 	return cfg, cfg_file_path


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Function:
  3 | 	detect objects in one image
  4 | Author:
  5 | 	Charles
  6 | '''
  7 | import os
  8 | import torch
  9 | import warnings
 10 | import argparse
 11 | import numpy as np
 12 | from modules.utils import *
 13 | from libs.nms.nms_wrapper import nms
 14 | from PIL import Image, ImageDraw, ImageFont
 15 | from cfgs.getcfg import getCfgByDatasetAndBackbone
 16 | from modules.fasterRCNN import FasterRCNNFPNResNets
 17 | warnings.filterwarnings("ignore")
 18 | 
 19 | 
 20 | '''parse arguments for demo'''
 21 | def parseArgs():
 22 | 	parser = argparse.ArgumentParser(description='Faster R-CNN with FPN')
 23 | 	parser.add_argument('--imagepath', dest='imagepath', help='image you want to detect.', default='', type=str, required=True)
 24 | 	parser.add_argument('--backbonename', dest='backbonename', help='backbone network for demo.', default='', type=str, required=True)
 25 | 	parser.add_argument('--datasetname', dest='datasetname', help='dataset used to train.', default='', type=str, required=True)
 26 | 	parser.add_argument('--checkpointspath', dest='checkpointspath', help='checkpoints you want to use.', default='', type=str, required=True)
 27 | 	parser.add_argument('--nmsthresh', dest='nmsthresh', help='thresh used in nms.', default=0.5, type=float)
 28 | 	parser.add_argument('--confthresh', dest='confthresh', help='thresh used in showing bounding box.', default=0.5, type=float)
 29 | 	args = parser.parse_args()
 30 | 	return args
 31 | 
 32 | 
 33 | '''detect objects in one image'''
 34 | def demo():
 35 | 	# prepare base things
 36 | 	args = parseArgs()
 37 | 	cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename)
 38 | 	checkDir(cfg.TEST_BACKUPDIR)
 39 | 	logger_handle = Logger(cfg.TEST_LOGFILE)
 40 | 	use_cuda = torch.cuda.is_available()
 41 | 	clsnames = loadclsnames(cfg.CLSNAMESPATH)
 42 | 	# prepare model
 43 | 	if args.backbonename.find('resnet') != -1:
 44 | 		model = FasterRCNNFPNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle)
 45 | 	else:
 46 | 		raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename)
 47 | 	if use_cuda:
 48 | 		model = model.cuda()
 49 | 	# load checkpoints
 50 | 	checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
 51 | 	model.load_state_dict(checkpoints['model'])
 52 | 	model.eval()
 53 | 	# do detect
 54 | 	FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
 55 | 	img = Image.open(args.imagepath)
 56 | 	if args.datasetname == 'coco':
 57 | 		input_img, scale_factor, target_size = COCODataset.preprocessImage(img, use_color_jitter=False, image_size_dict=cfg.IMAGESIZE_DICT, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL)
 58 | 	else:
 59 | 		raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname)
 60 | 	input_img = input_img.unsqueeze(0).type(FloatTensor)
 61 | 	gt_boxes = torch.FloatTensor([1, 1, 1, 1, 0]).unsqueeze(0).type(FloatTensor)
 62 | 	img_info = torch.from_numpy(np.array([target_size[0], target_size[1], scale_factor])).unsqueeze(0).type(FloatTensor)
 63 | 	num_gt_boxes = torch.FloatTensor([0]).unsqueeze(0).type(FloatTensor)
 64 | 	with torch.no_grad():
 65 | 		output = model(x=input_img, gt_boxes=gt_boxes, img_info=img_info, num_gt_boxes=num_gt_boxes)
 66 | 	rois = output[0].data[..., 1:5]
 67 | 	cls_probs = output[1].data
 68 | 	bbox_preds = output[2].data
 69 | 	# parse the results
 70 | 	if cfg.IS_CLASS_AGNOSTIC:
 71 | 		box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
 72 | 		box_deltas = box_deltas.view(1, -1, 4)
 73 | 	else:
 74 | 		box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
 75 | 		box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES)
 76 | 	boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas)
 77 | 	boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, img_info.data)
 78 | 	boxes_pred = boxes_pred.squeeze()
 79 | 	scores = cls_probs.squeeze()
 80 | 	thresh = 0.05
 81 | 	for j in range(1, cfg.NUM_CLASSES):
 82 | 		idxs = torch.nonzero(scores[:, j] > thresh).view(-1)
 83 | 		if idxs.numel() > 0:
 84 | 			cls_scores = scores[:, j][idxs]
 85 | 			_, order = torch.sort(cls_scores, 0, True)
 86 | 			if cfg.IS_CLASS_AGNOSTIC:
 87 | 				cls_boxes = boxes_pred[idxs, :]
 88 | 			else:
 89 | 				cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4]
 90 | 			cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
 91 | 			cls_dets = cls_dets[order]
 92 | 			_, keep_idxs = nms(cls_dets, args.nmsthresh)
 93 | 			cls_dets = cls_dets[keep_idxs.view(-1).long()]
 94 | 			for cls_det in cls_dets:
 95 | 				if cls_det[-1] > args.confthresh:
 96 | 					x1, y1, x2, y2 = cls_det[:4]
 97 | 					x1 = x1.item() / scale_factor
 98 | 					x2 = x2.item() / scale_factor
 99 | 					y1 = y1.item() / scale_factor
100 | 					y2 = y2.item() / scale_factor
101 | 					label = clsnames[j-1]
102 | 					logger_handle.info('Detect a %s in confidence %.4f...' % (label, cls_det[-1].item()))
103 | 					color = (0, 255, 0)
104 | 					draw = ImageDraw.Draw(img)
105 | 					draw.line([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)], width=2, fill=color)
106 | 					font = ImageFont.truetype('libs/font.TTF', 25)
107 | 					draw.text((x1+5, y1), label, fill=color, font=font)
108 | 	img.save(os.path.join(cfg.TEST_BACKUPDIR, 'demo_output.jpg'))
109 | 
110 | 
111 | '''run'''
112 | if __name__ == '__main__':
113 | 	demo()


--------------------------------------------------------------------------------
/docs/Res101FPN_pytorch_epoch12.MD:
--------------------------------------------------------------------------------
 1 | # Results on minival5k
 2 | #### RoI512
 3 | ```
 4 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.374
 5 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.587
 6 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.406
 7 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.208
 8 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.417
 9 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.487
10 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.313
11 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.493
12 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.517
13 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319
14 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.562
15 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.664
16 | ```


--------------------------------------------------------------------------------
/docs/Res50FPN_pytorch_epoch12.MD:
--------------------------------------------------------------------------------
 1 | # Results on minival5k
 2 | #### RoI512
 3 | ```
 4 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.355
 5 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.569
 6 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.383
 7 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.205
 8 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.393
 9 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.452
10 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.298
11 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.479
12 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505
13 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.314
14 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.544
15 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641
16 | ```


--------------------------------------------------------------------------------
/libs/cocoapi/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/libs/cocoapi/.travis.yml:
--------------------------------------------------------------------------------
 1 | group: travis_latest
 2 | language: python
 3 | cache: pip
 4 | python:
 5 |     - 2.7
 6 |     - 3.6
 7 | install:
 8 |     - pip install --upgrade pip 
 9 |     - pip install pycocotools
10 | script:
11 |     - true
12 | 


--------------------------------------------------------------------------------
/libs/cocoapi/LuaAPI/MaskApi.lua:
--------------------------------------------------------------------------------
  1 | --[[----------------------------------------------------------------------------
  2 | 
  3 | Interface for manipulating masks stored in RLE format.
  4 | 
  5 | For an overview of RLE please see http://mscoco.org/dataset/#download.
  6 | Additionally, more detailed information can be found in the Matlab MaskApi.m:
  7 |   https://github.com/pdollar/coco/blob/master/MatlabAPI/MaskApi.m
  8 | 
  9 | The following API functions are defined:
 10 |   encode - Encode binary masks using RLE.
 11 |   decode - Decode binary masks encoded via RLE.
 12 |   merge  - Compute union or intersection of encoded masks.
 13 |   iou    - Compute intersection over union between masks.
 14 |   nms    - Compute non-maximum suppression between ordered masks.
 15 |   area   - Compute area of encoded masks.
 16 |   toBbox - Get bounding boxes surrounding encoded masks.
 17 |   frBbox - Convert bounding boxes to encoded masks.
 18 |   frPoly - Convert polygon to encoded mask.
 19 |   drawCirc  - Draw circle into image (alters input).
 20 |   drawLine  - Draw line into image (alters input).
 21 |   drawMasks - Draw masks into image (alters input).
 22 | 
 23 | Usage:
 24 |   Rs     = MaskApi.encode( masks )
 25 |   masks  = MaskApi.decode( Rs )
 26 |   R      = MaskApi.merge( Rs, [intersect=false] )
 27 |   o      = MaskApi.iou( dt, gt, [iscrowd=false] )
 28 |   keep   = MaskApi.nms( dt, thr )
 29 |   a      = MaskApi.area( Rs )
 30 |   bbs    = MaskApi.toBbox( Rs )
 31 |   Rs     = MaskApi.frBbox( bbs, h, w )
 32 |   R      = MaskApi.frPoly( poly, h, w )
 33 |   MaskApi.drawCirc( img, x, y, rad, clr )
 34 |   MaskApi.drawLine( img, x0, y0, x1, y1, rad, clr )
 35 |   MaskApi.drawMasks( img, masks, [maxn=n], [alpha=.4], [clrs] )
 36 | For detailed usage information please see cocoDemo.lua.
 37 | 
 38 | In the API the following formats are used:
 39 |   R,Rs   - [table] Run-length encoding of binary mask(s)
 40 |   masks  - [nxhxw] Binary mask(s)
 41 |   bbs    - [nx4] Bounding box(es) stored as [x y w h]
 42 |   poly   - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...}
 43 |   dt,gt  - May be either bounding boxes or encoded masks
 44 | Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 45 | 
 46 | Common Objects in COntext (COCO) Toolbox.      version 3.0
 47 | Data, paper, and tutorials available at:  http://mscoco.org/
 48 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016.
 49 | Licensed under the Simplified BSD License [see coco/license.txt]
 50 | 
 51 | ------------------------------------------------------------------------------]]
 52 | 
 53 | local ffi = require 'ffi'
 54 | local coco = require 'coco.env'
 55 | 
 56 | coco.MaskApi = {}
 57 | local MaskApi = coco.MaskApi
 58 | 
 59 | coco.libmaskapi = ffi.load(package.searchpath('libmaskapi',package.cpath))
 60 | local libmaskapi = coco.libmaskapi
 61 | 
 62 | --------------------------------------------------------------------------------
 63 | 
 64 | MaskApi.encode = function( masks )
 65 |   local n, h, w = masks:size(1), masks:size(2), masks:size(3)
 66 |   masks = masks:type('torch.ByteTensor'):transpose(2,3)
 67 |   local data = masks:contiguous():data()
 68 |   local Qs = MaskApi._rlesInit(n)
 69 |   libmaskapi.rleEncode(Qs[0],data,h,w,n)
 70 |   return MaskApi._rlesToLua(Qs,n)
 71 | end
 72 | 
 73 | MaskApi.decode = function( Rs )
 74 |   local Qs, n, h, w = MaskApi._rlesFrLua(Rs)
 75 |   local masks = torch.ByteTensor(n,w,h):zero():contiguous()
 76 |   libmaskapi.rleDecode(Qs,masks:data(),n)
 77 |   MaskApi._rlesFree(Qs,n)
 78 |   return masks:transpose(2,3)
 79 | end
 80 | 
 81 | MaskApi.merge = function( Rs, intersect )
 82 |   intersect = intersect or 0
 83 |   local Qs, n, h, w = MaskApi._rlesFrLua(Rs)
 84 |   local Q = MaskApi._rlesInit(1)
 85 |   libmaskapi.rleMerge(Qs,Q,n,intersect)
 86 |   MaskApi._rlesFree(Qs,n)
 87 |   return MaskApi._rlesToLua(Q,1)[1]
 88 | end
 89 | 
 90 | MaskApi.iou = function( dt, gt, iscrowd )
 91 |   if not iscrowd then iscrowd = NULL else
 92 |     iscrowd = iscrowd:type('torch.ByteTensor'):contiguous():data()
 93 |   end
 94 |   if torch.isTensor(gt) and torch.isTensor(dt) then
 95 |     local nDt, k = dt:size(1), dt:size(2); assert(k==4)
 96 |     local nGt, k = gt:size(1), gt:size(2); assert(k==4)
 97 |     local dDt = dt:type('torch.DoubleTensor'):contiguous():data()
 98 |     local dGt = gt:type('torch.DoubleTensor'):contiguous():data()
 99 |     local o = torch.DoubleTensor(nGt,nDt):contiguous()
100 |     libmaskapi.bbIou(dDt,dGt,nDt,nGt,iscrowd,o:data())
101 |     return o:transpose(1,2)
102 |   else
103 |     local qDt, nDt = MaskApi._rlesFrLua(dt)
104 |     local qGt, nGt = MaskApi._rlesFrLua(gt)
105 |     local o = torch.DoubleTensor(nGt,nDt):contiguous()
106 |     libmaskapi.rleIou(qDt,qGt,nDt,nGt,iscrowd,o:data())
107 |     MaskApi._rlesFree(qDt,nDt); MaskApi._rlesFree(qGt,nGt)
108 |     return o:transpose(1,2)
109 |   end
110 | end
111 | 
112 | MaskApi.nms = function( dt, thr )
113 |   if torch.isTensor(dt) then
114 |     local n, k = dt:size(1), dt:size(2); assert(k==4)
115 |     local Q = dt:type('torch.DoubleTensor'):contiguous():data()
116 |     local kp = torch.IntTensor(n):contiguous()
117 |     libmaskapi.bbNms(Q,n,kp:data(),thr)
118 |     return kp
119 |   else
120 |     local Q, n = MaskApi._rlesFrLua(dt)
121 |     local kp = torch.IntTensor(n):contiguous()
122 |     libmaskapi.rleNms(Q,n,kp:data(),thr)
123 |     MaskApi._rlesFree(Q,n)
124 |     return kp
125 |   end
126 | end
127 | 
128 | MaskApi.area = function( Rs )
129 |   local Qs, n, h, w = MaskApi._rlesFrLua(Rs)
130 |   local a = torch.IntTensor(n):contiguous()
131 |   libmaskapi.rleArea(Qs,n,a:data())
132 |   MaskApi._rlesFree(Qs,n)
133 |   return a
134 | end
135 | 
136 | MaskApi.toBbox = function( Rs )
137 |   local Qs, n, h, w = MaskApi._rlesFrLua(Rs)
138 |   local bb = torch.DoubleTensor(n,4):contiguous()
139 |   libmaskapi.rleToBbox(Qs,bb:data(),n)
140 |   MaskApi._rlesFree(Qs,n)
141 |   return bb
142 | end
143 | 
144 | MaskApi.frBbox = function( bbs, h, w )
145 |   if bbs:dim()==1 then bbs=bbs:view(1,bbs:size(1)) end
146 |   local n, k = bbs:size(1), bbs:size(2); assert(k==4)
147 |   local data = bbs:type('torch.DoubleTensor'):contiguous():data()
148 |   local Qs = MaskApi._rlesInit(n)
149 |   libmaskapi.rleFrBbox(Qs[0],data,h,w,n)
150 |   return MaskApi._rlesToLua(Qs,n)
151 | end
152 | 
153 | MaskApi.frPoly = function( poly, h, w )
154 |   local n = #poly
155 |   local Qs, Q = MaskApi._rlesInit(n), MaskApi._rlesInit(1)
156 |   for i,p in pairs(poly) do
157 |     local xy = p:type('torch.DoubleTensor'):contiguous():data()
158 |     libmaskapi.rleFrPoly(Qs[i-1],xy,p:size(1)/2,h,w)
159 |   end
160 |   libmaskapi.rleMerge(Qs,Q[0],n,0)
161 |   MaskApi._rlesFree(Qs,n)
162 |   return MaskApi._rlesToLua(Q,1)[1]
163 | end
164 | 
165 | --------------------------------------------------------------------------------
166 | 
167 | MaskApi.drawCirc = function( img, x, y, rad, clr )
168 |   assert(img:isContiguous() and img:dim()==3)
169 |   local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data()
170 |   for dx=-rad,rad do for dy=-rad,rad do
171 |     local xi, yi = torch.round(x+dx), torch.round(y+dy)
172 |     if dx*dx+dy*dy<=rad*rad and xi>=0 and yi>=0 and xi<w and yi<h then
173 |       for c=1,k do data[(c-1)*h*w + yi*w + xi] = clr[c] end
174 |     end
175 |   end end
176 | end
177 | 
178 | MaskApi.drawLine = function( img, x0, y0, x1, y1, rad, clr )
179 |   assert(img:isContiguous() and img:dim()==3)
180 |   local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data()
181 |   local dx,dy,d; dx,dy=x1-x0,y1-y0; d=torch.sqrt(dx*dx+dy*dy); dx,dy=dx/d,dy/d
182 |   for i=0,d,.5 do for j=-rad,rad,.5 do
183 |     local xi, yi = torch.round(x0+dx*i+j*dy), torch.round(y0+dy*i-j*dx)
184 |     if xi>=0 and yi>=0 and xi<w and yi<h then
185 |       for c=1,k do data[(c-1)*h*w + yi*w + xi] = clr[c] end
186 |     end
187 |   end end
188 | end
189 | 
190 | MaskApi.drawMasks = function( img, masks, maxn, alpha, clrs )
191 |   assert(img:isContiguous() and img:dim()==3)
192 |   local n, h, w = masks:size(1), masks:size(2), masks:size(3)
193 |   if not maxn then maxn=n end
194 |   if not alpha then alpha=.4 end
195 |   if not clrs then clrs=torch.rand(n,3)*.6+.4 end
196 |   for i=1,math.min(maxn,n) do
197 |     local M = masks[i]:contiguous():data()
198 |     local B = torch.ByteTensor(h,w):zero():contiguous():data()
199 |     -- get boundaries B in masks M quickly
200 |     for y=0,h-2 do for x=0,w-2 do
201 |       local k=y*w+x
202 |       if M[k]~=M[k+1] then B[k],B[k+1]=1,1 end
203 |       if M[k]~=M[k+w] then B[k],B[k+w]=1,1 end
204 |       if M[k]~=M[k+1+w] then B[k],B[k+1+w]=1,1 end
205 |     end end
206 |     -- softly embed masks into image and add solid boundaries
207 |     for j=1,3 do
208 |       local O,c,a = img[j]:data(), clrs[i][j], alpha
209 |       for k=0,w*h-1 do if M[k]==1 then O[k]=O[k]*(1-a)+c*a end end
210 |       for k=0,w*h-1 do if B[k]==1 then O[k]=c end end
211 |     end
212 |   end
213 | end
214 | 
215 | --------------------------------------------------------------------------------
216 | 
217 | MaskApi._rlesToLua = function( Qs, n )
218 |   local h, w, Rs = tonumber(Qs[0].h), tonumber(Qs[0].w), {}
219 |   for i=1,n do Rs[i]={size={h,w}, counts={}} end
220 |   for i=1,n do
221 |     local s = libmaskapi.rleToString(Qs[i-1])
222 |     Rs[i].counts=ffi.string(s)
223 |     ffi.C.free(s)
224 |   end
225 |   MaskApi._rlesFree(Qs,n)
226 |   return Rs
227 | end
228 | 
229 | MaskApi._rlesFrLua = function( Rs )
230 |   if #Rs==0 then Rs={Rs} end
231 |   local n, h, w = #Rs, Rs[1].size[1], Rs[1].size[2]
232 |   local Qs = MaskApi._rlesInit(n)
233 |   for i=1,n do
234 |     local c = Rs[i].counts
235 |     if( torch.type(c)=='string' ) then
236 |       local s=ffi.new("char[?]",#c+1); ffi.copy(s,c)
237 |       libmaskapi.rleFrString(Qs[i-1],s,h,w)
238 |     elseif( torch.type(c)=='torch.IntTensor' ) then
239 |       libmaskapi.rleInit(Qs[i-1],h,w,c:size(1),c:contiguous():data())
240 |     else
241 |       assert(false,"invalid RLE")
242 |     end
243 |   end
244 |   return Qs, n, h, w
245 | end
246 | 
247 | MaskApi._rlesInit = function( n )
248 |   local Qs = ffi.new("RLE[?]",n)
249 |   for i=1,n do libmaskapi.rleInit(Qs[i-1],0,0,0,NULL) end
250 |   return Qs
251 | end
252 | 
253 | MaskApi._rlesFree = function( Qs, n )
254 |   for i=1,n do libmaskapi.rleFree(Qs[i-1]) end
255 | end
256 | 
257 | --------------------------------------------------------------------------------
258 | 
259 | ffi.cdef[[
260 |   void free(void *ptr);
261 |   typedef unsigned int uint;
262 |   typedef unsigned long siz;
263 |   typedef unsigned char byte;
264 |   typedef double* BB;
265 |   typedef struct { siz h, w, m; uint *cnts; } RLE;
266 |   void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
267 |   void rleFree( RLE *R );
268 |   void rlesInit( RLE **R, siz n );
269 |   void rlesFree( RLE **R, siz n );
270 |   void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
271 |   void rleDecode( const RLE *R, byte *mask, siz n );
272 |   void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
273 |   void rleArea( const RLE *R, siz n, uint *a );
274 |   void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
275 |   void rleNms( RLE *dt, siz n, uint *keep, double thr );
276 |   void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
277 |   void bbNms( BB dt, siz n, uint *keep, double thr );
278 |   void rleToBbox( const RLE *R, BB bb, siz n );
279 |   void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
280 |   void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
281 |   char* rleToString( const RLE *R );
282 |   void rleFrString( RLE *R, char *s, siz h, siz w );
283 | ]]
284 | 


--------------------------------------------------------------------------------
/libs/cocoapi/LuaAPI/cocoDemo.lua:
--------------------------------------------------------------------------------
 1 | -- Demo for the CocoApi (see CocoApi.lua)
 2 | coco = require 'coco'
 3 | image = require 'image'
 4 | 
 5 | -- initialize COCO api (please specify dataType/annType below)
 6 | annTypes = { 'instances', 'captions', 'person_keypoints' }
 7 | dataType, annType = 'val2014', annTypes[1]; -- specify dataType/annType
 8 | annFile = '../annotations/'..annType..'_'..dataType..'.json'
 9 | cocoApi=coco.CocoApi(annFile)
10 | 
11 | -- get all image ids, select one at random
12 | imgIds = cocoApi:getImgIds()
13 | imgId = imgIds[torch.random(imgIds:numel())]
14 | 
15 | -- load image
16 | img = cocoApi:loadImgs(imgId)[1]
17 | I = image.load('../images/'..dataType..'/'..img.file_name,3)
18 | 
19 | -- load and display instance annotations
20 | annIds = cocoApi:getAnnIds({imgId=imgId})
21 | anns = cocoApi:loadAnns(annIds)
22 | J = cocoApi:showAnns(I,anns)
23 | image.save('RES_'..img.file_name,J:double())
24 | 


--------------------------------------------------------------------------------
/libs/cocoapi/LuaAPI/env.lua:
--------------------------------------------------------------------------------
 1 | --[[----------------------------------------------------------------------------
 2 | 
 3 | Common Objects in COntext (COCO) Toolbox.      version 3.0
 4 | Data, paper, and tutorials available at:  http://mscoco.org/
 5 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016.
 6 | Licensed under the Simplified BSD License [see coco/license.txt]
 7 | 
 8 | ------------------------------------------------------------------------------]]
 9 | 
10 | local coco = {}
11 | return coco
12 | 


--------------------------------------------------------------------------------
/libs/cocoapi/LuaAPI/init.lua:
--------------------------------------------------------------------------------
 1 | --[[----------------------------------------------------------------------------
 2 | 
 3 | Common Objects in COntext (COCO) Toolbox.      version 3.0
 4 | Data, paper, and tutorials available at:  http://mscoco.org/
 5 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016.
 6 | Licensed under the Simplified BSD License [see coco/license.txt]
 7 | 
 8 | ------------------------------------------------------------------------------]]
 9 | 
10 | local coco = require 'coco.env'
11 | require 'coco.CocoApi'
12 | require 'coco.MaskApi'
13 | return coco
14 | 


--------------------------------------------------------------------------------
/libs/cocoapi/LuaAPI/rocks/coco-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "coco"
 2 | version = "scm-1"
 3 | 
 4 | source = {
 5 |   url = "git://github.com/pdollar/coco.git"
 6 | }
 7 | 
 8 | description = {
 9 |   summary = "Interface for accessing the Microsoft COCO dataset",
10 |   detailed = "See http://mscoco.org/ for more details",
11 |   homepage = "https://github.com/pdollar/coco",
12 |   license = "Simplified BSD"
13 | }
14 | 
15 | dependencies = {
16 |   "lua >= 5.1",
17 |   "torch >= 7.0",
18 |   "lua-cjson"
19 | }
20 | 
21 | build = {
22 |   type = "builtin",
23 |   modules = {
24 |     ["coco.env"] = "LuaAPI/env.lua",
25 |     ["coco.init"] = "LuaAPI/init.lua",
26 |     ["coco.MaskApi"] = "LuaAPI/MaskApi.lua",
27 |     ["coco.CocoApi"] = "LuaAPI/CocoApi.lua",
28 |     libmaskapi = {
29 |       sources = { "common/maskApi.c" },
30 |       incdirs = { "common/" }
31 |     }
32 |   }
33 | }
34 | 
35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec
36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec
37 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/MaskApi.m:
--------------------------------------------------------------------------------
  1 | classdef MaskApi
  2 |   % Interface for manipulating masks stored in RLE format.
  3 |   %
  4 |   % RLE is a simple yet efficient format for storing binary masks. RLE
  5 |   % first divides a vector (or vectorized image) into a series of piecewise
  6 |   % constant regions and then for each piece simply stores the length of
  7 |   % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
  8 |   % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
  9 |   % (note that the odd counts are always the numbers of zeros). Instead of
 10 |   % storing the counts directly, additional compression is achieved with a
 11 |   % variable bitrate representation based on a common scheme called LEB128.
 12 |   %
 13 |   % Compression is greatest given large piecewise constant regions.
 14 |   % Specifically, the size of the RLE is proportional to the number of
 15 |   % *boundaries* in M (or for an image the number of boundaries in the y
 16 |   % direction). Assuming fairly simple shapes, the RLE representation is
 17 |   % O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 18 |   % is substantially lower, especially for large simple objects (large n).
 19 |   %
 20 |   % Many common operations on masks can be computed directly using the RLE
 21 |   % (without need for decoding). This includes computations such as area,
 22 |   % union, intersection, etc. All of these operations are linear in the
 23 |   % size of the RLE, in other words they are O(sqrt(n)) where n is the area
 24 |   % of the object. Computing these operations on the original mask is O(n).
 25 |   % Thus, using the RLE can result in substantial computational savings.
 26 |   %
 27 |   % The following API functions are defined:
 28 |   %  encode - Encode binary masks using RLE.
 29 |   %  decode - Decode binary masks encoded via RLE.
 30 |   %  merge  - Compute union or intersection of encoded masks.
 31 |   %  iou    - Compute intersection over union between masks.
 32 |   %  nms    - Compute non-maximum suppression between ordered masks.
 33 |   %  area   - Compute area of encoded masks.
 34 |   %  toBbox - Get bounding boxes surrounding encoded masks.
 35 |   %  frBbox - Convert bounding boxes to encoded masks.
 36 |   %  frPoly - Convert polygon to encoded mask.
 37 |   %
 38 |   % Usage:
 39 |   %  Rs     = MaskApi.encode( masks )
 40 |   %  masks  = MaskApi.decode( Rs )
 41 |   %  R      = MaskApi.merge( Rs, [intersect=false] )
 42 |   %  o      = MaskApi.iou( dt, gt, [iscrowd=false] )
 43 |   %  keep   = MaskApi.nms( dt, thr )
 44 |   %  a      = MaskApi.area( Rs )
 45 |   %  bbs    = MaskApi.toBbox( Rs )
 46 |   %  Rs     = MaskApi.frBbox( bbs, h, w )
 47 |   %  R      = MaskApi.frPoly( poly, h, w )
 48 |   %
 49 |   % In the API the following formats are used:
 50 |   %  R,Rs   - [struct] Run-length encoding of binary mask(s)
 51 |   %  masks  - [hxwxn] Binary mask(s) (must have type uint8)
 52 |   %  bbs    - [nx4] Bounding box(es) stored as [x y w h]
 53 |   %  poly   - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...}
 54 |   %  dt,gt  - May be either bounding boxes or encoded masks
 55 |   % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 56 |   %
 57 |   % Finally, a note about the intersection over union (iou) computation.
 58 |   % The standard iou of a ground truth (gt) and detected (dt) object is
 59 |   %  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 60 |   % For "crowd" regions, we use a modified criteria. If a gt object is
 61 |   % marked as "iscrowd", we allow a dt to match any subregion of the gt.
 62 |   % Choosing gt' in the crowd gt that best matches the dt can be done using
 63 |   % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 64 |   %  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 65 |   % For crowd gt regions we use this modified criteria above for the iou.
 66 |   %
 67 |   % To compile use the following (some precompiled binaries are included):
 68 |   %   mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',...
 69 |   %     'private/maskApiMex.c','../common/maskApi.c',...
 70 |   %     '-I../common/','-outdir','private');
 71 |   % Please do not contact us for help with compiling.
 72 |   %
 73 |   % Microsoft COCO Toolbox.      version 2.0
 74 |   % Data, paper, and tutorials available at:  http://mscoco.org/
 75 |   % Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 76 |   % Licensed under the Simplified BSD License [see coco/license.txt]
 77 |   
 78 |   methods( Static )
 79 |     function Rs = encode( masks )
 80 |       Rs = maskApiMex( 'encode', masks );
 81 |     end
 82 |     
 83 |     function masks = decode( Rs )
 84 |       masks = maskApiMex( 'decode', Rs );
 85 |     end
 86 |     
 87 |     function R = merge( Rs, varargin )
 88 |       R = maskApiMex( 'merge', Rs, varargin{:} );
 89 |     end
 90 |     
 91 |     function o = iou( dt, gt, varargin )
 92 |       o = maskApiMex( 'iou', dt', gt', varargin{:} );
 93 |     end
 94 |     
 95 |     function keep = nms( dt, thr )
 96 |       keep = maskApiMex('nms',dt',thr);
 97 |     end
 98 |     
 99 |     function a = area( Rs )
100 |       a = maskApiMex( 'area', Rs );
101 |     end
102 |     
103 |     function bbs = toBbox( Rs )
104 |       bbs = maskApiMex( 'toBbox', Rs )';
105 |     end
106 |     
107 |     function Rs = frBbox( bbs, h, w )
108 |       Rs = maskApiMex( 'frBbox', bbs', h, w );
109 |     end
110 |     
111 |     function R = frPoly( poly, h, w )
112 |       R = maskApiMex( 'frPoly', poly, h , w );
113 |     end
114 |   end
115 |   
116 | end
117 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/cocoDemo.m:
--------------------------------------------------------------------------------
 1 | %% Demo for the CocoApi (see CocoApi.m)
 2 | 
 3 | %% initialize COCO api (please specify dataType/annType below)
 4 | annTypes = { 'instances', 'captions', 'person_keypoints' };
 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType
 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType);
 7 | coco=CocoApi(annFile);
 8 | 
 9 | %% display COCO categories and supercategories
10 | if( ~strcmp(annType,'captions') )
11 |   cats = coco.loadCats(coco.getCatIds());
12 |   nms={cats.name}; fprintf('COCO categories: ');
13 |   fprintf('%s, ',nms{:}); fprintf('\n');
14 |   nms=unique({cats.supercategory}); fprintf('COCO supercategories: ');
15 |   fprintf('%s, ',nms{:}); fprintf('\n');
16 | end
17 | 
18 | %% get all images containing given categories, select one at random
19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'});
20 | imgIds = coco.getImgIds('catIds',catIds);
21 | imgId = imgIds(randi(length(imgIds)));
22 | 
23 | %% load and display image
24 | img = coco.loadImgs(imgId);
25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name));
26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[])
27 | 
28 | %% load and display annotations
29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]);
30 | anns = coco.loadAnns(annIds); coco.showAnns(anns);
31 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/evalDemo.m:
--------------------------------------------------------------------------------
 1 | %% Demo demonstrating the algorithm result formats for COCO
 2 | 
 3 | %% select results type for demo (either bbox or segm)
 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here
 5 | fprintf('Running demo for *%s* results.\n\n',type);
 6 | 
 7 | %% initialize COCO ground truth api
 8 | dataDir='../'; prefix='instances'; dataType='val2014';
 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end
10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType);
11 | cocoGt=CocoApi(annFile);
12 | 
13 | %% initialize COCO detections api
14 | resFile='%s/results/%s_%s_fake%s100_results.json';
15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type);
16 | cocoDt=cocoGt.loadRes(resFile);
17 | 
18 | %% visialuze gt and dt side by side
19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100);
20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId);
21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name));
22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off;
23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth')
24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns);
25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off;
26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results')
27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns);
28 | 
29 | %% load raw JSON and show exact format for results
30 | fprintf('results structure have the following format:\n');
31 | res = gason(fileread(resFile)); disp(res)
32 | 
33 | %% the following command can be used to save the results back to disk
34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end
35 | 
36 | %% run COCO evaluation code (see CocoEval.m)
37 | cocoEval=CocoEval(cocoGt,cocoDt,type);
38 | cocoEval.params.imgIds=imgIds;
39 | cocoEval.evaluate();
40 | cocoEval.accumulate();
41 | cocoEval.summarize();
42 | 
43 | %% generate Derek Hoiem style analyis of false positives (slow)
44 | if(0), cocoEval.analyze(); end
45 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/gason.m:
--------------------------------------------------------------------------------
 1 | function out = gason( in )
 2 | % Convert between JSON strings and corresponding JSON objects.
 3 | %
 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev:
 5 | %                 https://github.com/vivkin/gason
 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the
 7 | % above link for license information and additional details about Gason.
 8 | %
 9 | % Given a JSON string, gason calls the C++ parser and converts the output
10 | % into an appropriate Matlab structure. As the parsing is performed in mex
11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take
12 | % only a few seconds to parse (compared to hours for pure Matlab parsers).
13 | %
14 | % Given a JSON object, gason calls the C++ encoder to convert the object
15 | % back into a JSON string representation. Nearly any Matlab struct, cell
16 | % array, or numeric array represent a valid JSON object. Note that gason()
17 | % can be used to go both from JSON string to JSON object and back.
18 | %
19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or
20 | % later). The following command compiles the parser (may require tweaking):
21 | %   mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',...
22 | %     'private/gasonMex.cpp','../common/gason.cpp',...
23 | %     '-I../common/','-outdir','private');
24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries
25 | % are included, please do not contact us for help with compiling. If needed
26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'.
27 | %
28 | % Note that by default JSON arrays that contain only numbers are stored as
29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of
30 | % the same type are stored as Matlab struct arrays. This is much faster and
31 | % can use considerably less memory than always using Matlab cell arrays.
32 | %
33 | % USAGE
34 | %  object = gason( string )
35 | %  string = gason( object )
36 | %
37 | % INPUTS/OUTPUTS
38 | %  string     - JSON string
39 | %  object     - JSON object
40 | %
41 | % EXAMPLE
42 | %  o = struct('first',{'piotr','ty'},'last',{'dollar','lin'})
43 | %  s = gason( o ) % convert JSON object -> JSON string
44 | %  p = gason( s ) % convert JSON string -> JSON object
45 | %
46 | % See also
47 | %
48 | % Microsoft COCO Toolbox.      version 2.0
49 | % Data, paper, and tutorials available at:  http://mscoco.org/
50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
51 | % Licensed under the Simplified BSD License [see coco/license.txt]
52 | 
53 | out = gasonMex( 'convert', in );
54 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/private/gasonMex.cpp:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "gason.h"
  8 | #include "mex.h"
  9 | #include "string.h"
 10 | #include "math.h"
 11 | #include <cstdint>
 12 | #include <iomanip>
 13 | #include <sstream>
 14 | typedef std::ostringstream ostrm;
 15 | typedef unsigned long siz;
 16 | typedef unsigned short ushort;
 17 | 
 18 | siz length( const JsonValue &a ) {
 19 |   // get number of elements in JSON_ARRAY or JSON_OBJECT
 20 |   siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k;
 21 | }
 22 | 
 23 | bool isRegularObjArray( const JsonValue &a ) {
 24 |   // check if all JSON_OBJECTs in JSON_ARRAY have the same fields
 25 |   JsonValue o=a.toNode()->value; siz k, n; const char **keys;
 26 |   n=length(o); keys=new const char*[n];
 27 |   k=0; for(auto j:o) keys[k++]=j->key;
 28 |   for( auto i:a ) {
 29 |     if(length(i->value)!=n) return false; k=0;
 30 |     for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false;
 31 |   }
 32 |   delete [] keys; return true;
 33 | }
 34 | 
 35 | mxArray* json( const JsonValue &o ) {
 36 |   // convert JsonValue to Matlab mxArray
 37 |   siz k, m, n; mxArray *M; const char **keys;
 38 |   switch( o.getTag() ) {
 39 |     case JSON_NUMBER:
 40 |       return mxCreateDoubleScalar(o.toNumber());
 41 |     case JSON_STRING:
 42 |       return mxCreateString(o.toString());
 43 |     case JSON_ARRAY: {
 44 |       if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL);
 45 |       JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag();
 46 |       n=length(o); bool isRegular=true;
 47 |       for(auto i:o) isRegular=isRegular && i->value.getTag()==tag;
 48 |       if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) {
 49 |         m=length(o0); keys=new const char*[m];
 50 |         k=0; for(auto j:o0) keys[k++]=j->key;
 51 |         M = mxCreateStructMatrix(1,n,m,keys);
 52 |         k=0; for(auto i:o) { m=0; for(auto j:i->value)
 53 |           mxSetFieldByNumber(M,k,m++,json(j->value)); k++; }
 54 |         delete [] keys; return M;
 55 |       } else if( isRegular && tag==JSON_NUMBER ) {
 56 |         M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M);
 57 |         k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M;
 58 |       } else {
 59 |         M = mxCreateCellMatrix(1,n);
 60 |         k=0; for(auto i:o) mxSetCell(M,k++,json(i->value));
 61 |         return M;
 62 |       }
 63 |     }
 64 |     case JSON_OBJECT:
 65 |       if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL);
 66 |       n=length(o); keys=new const char*[n];
 67 |       k=0; for(auto i:o) keys[k++]=i->key;
 68 |       M = mxCreateStructMatrix(1,1,n,keys); k=0;
 69 |       for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value));
 70 |       delete [] keys; return M;
 71 |     case JSON_TRUE:
 72 |       return mxCreateDoubleScalar(1);
 73 |     case JSON_FALSE:
 74 |       return mxCreateDoubleScalar(0);
 75 |     case JSON_NULL:
 76 |       return mxCreateDoubleMatrix(0,0,mxREAL);
 77 |     default: return NULL;
 78 |   }
 79 | }
 80 | 
 81 | template<class T, class C> ostrm& json( ostrm &S, T *A, siz n ) {
 82 |   // convert numeric array to JSON string with casting
 83 |   if(n==0) { S<<"[]"; return S; } if(n==1) { S<<C(A[0]); return S; }
 84 |   S<<"["; for(siz i=0; i<n-1; i++) S<<C(A[i])<<",";
 85 |   S<<C(A[n-1]); S<<"]"; return S;
 86 | }
 87 | 
 88 | template<class T> ostrm& json( ostrm &S, T *A, siz n ) {
 89 |   // convert numeric array to JSON string without casting
 90 |   return json<T,T>(S,A,n);
 91 | }
 92 | 
 93 | ostrm& json( ostrm &S, const char *A ) {
 94 |   // convert char array to JSON string (handle escape characters)
 95 |   #define RPL(a,b) case a: { S << b; A++; break; }
 96 |   S << "\""; while( *A>0 ) switch( *A ) {
 97 |     RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b");
 98 |     RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t");
 99 |     default: S << *A; A++;
100 |   }
101 |   S << "\""; return S;
102 | }
103 | 
104 | ostrm& json( ostrm& S, const JsonValue *o ) {
105 |   // convert JsonValue to JSON string
106 |   switch( o->getTag() ) {
107 |     case JSON_NUMBER: S << o->toNumber(); return S;
108 |     case JSON_TRUE:   S << "true"; return S;
109 |     case JSON_FALSE:  S << "false"; return S;
110 |     case JSON_NULL:   S << "null"; return S;
111 |     case JSON_STRING: return json(S,o->toString());
112 |     case JSON_ARRAY:
113 |       S << "["; for(auto i:*o) {
114 |         json(S,&i->value) << (i->next ? "," : ""); }
115 |       S << "]"; return S;
116 |     case JSON_OBJECT:
117 |       S << "{"; for(auto i:*o) {
118 |         json(S,i->key) << ":";
119 |         json(S,&i->value) << (i->next ? "," : ""); }
120 |       S << "}"; return S;
121 |     default: return S;
122 |   }
123 | }
124 | 
125 | ostrm& json( ostrm& S, const mxArray *M ) {
126 |   // convert Matlab mxArray to JSON string
127 |   siz i, j, m, n=mxGetNumberOfElements(M);
128 |   void *A=mxGetData(M); ostrm *nms;
129 |   switch( mxGetClassID(M) ) {
130 |     case mxDOUBLE_CLASS:  return json(S,(double*)   A,n);
131 |     case mxSINGLE_CLASS:  return json(S,(float*)    A,n);
132 |     case mxINT64_CLASS:   return json(S,(int64_t*)  A,n);
133 |     case mxUINT64_CLASS:  return json(S,(uint64_t*) A,n);
134 |     case mxINT32_CLASS:   return json(S,(int32_t*)  A,n);
135 |     case mxUINT32_CLASS:  return json(S,(uint32_t*) A,n);
136 |     case mxINT16_CLASS:   return json(S,(int16_t*)  A,n);
137 |     case mxUINT16_CLASS:  return json(S,(uint16_t*) A,n);
138 |     case mxINT8_CLASS:    return json<int8_t,int32_t>(S,(int8_t*) A,n);
139 |     case mxUINT8_CLASS:   return json<uint8_t,uint32_t>(S,(uint8_t*) A,n);
140 |     case mxLOGICAL_CLASS: return json<uint8_t,uint32_t>(S,(uint8_t*) A,n);
141 |     case mxCHAR_CLASS:    return json(S,mxArrayToString(M));
142 |     case mxCELL_CLASS:
143 |       S << "["; for(i=0; i<n-1; i++) json(S,mxGetCell(M,i)) << ",";
144 |       if(n>0) json(S,mxGetCell(M,n-1)); S << "]"; return S;
145 |     case mxSTRUCT_CLASS:
146 |       if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M);
147 |       if(m==0) { S<<"["; for(i=0; i<n; i++) S<<"{},"; S<<"]"; return S; }
148 |       if(n>1) S<<"["; nms=new ostrm[m];
149 |       for(j=0; j<m; j++) json(nms[j],mxGetFieldNameByNumber(M,j));
150 |       for(i=0; i<n; i++) for(j=0; j<m; j++) {
151 |         if(j==0) S << "{"; S << nms[j].str() << ":";
152 |         json(S,mxGetFieldByNumber(M,i,j)) << ((j<m-1) ? "," : "}");
153 |         if(j==m-1 && i<n-1) S<<",";
154 |       }
155 |       if(n>1) S<<"]"; delete [] nms; return S;
156 |     default:
157 |       mexErrMsgTxt( "Unknown type." ); return S;
158 |   }
159 | }
160 | 
161 | mxArray* mxCreateStringRobust( const char* str ) {
162 |   // convert char* to Matlab string (robust version of mxCreateString)
163 |   mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)};
164 |   M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M);
165 |   for( siz i=0; i<n[1]; i++ ) c[i]=str[i]; return M;
166 | }
167 | 
168 | char* mxArrayToStringRobust( const mxArray *M ) {
169 |   // convert Matlab string to char* (robust version of mxArrayToString)
170 |   if(!mxIsChar(M)) mexErrMsgTxt("String expected.");
171 |   ushort *c=(ushort*) mxGetData(M); char* str; siz n;
172 |   n=mxGetNumberOfElements(M); str=(char*) mxMalloc(n+1);
173 |   for( siz i=0; i<n; i++ ) str[i]=c[i]; str[n]=0; return str;
174 | }
175 | 
176 | void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
177 | {
178 |   char action[1024]; if(!nr) mexErrMsgTxt("Inputs expected.");
179 |   mxGetString(pr[0],action,1024); nr--; pr++;
180 |   char *endptr; JsonValue val; JsonAllocator allocator;
181 |   if( nl>1 ) mexErrMsgTxt("One output expected.");
182 |   
183 |   if(!strcmp(action,"convert")) {
184 |     if( nr!=1 ) mexErrMsgTxt("One input expected.");
185 |     if( mxGetClassID(pr[0])==mxCHAR_CLASS ) {
186 |       // object = mexFunction( string )
187 |       char *str = mxArrayToStringRobust(pr[0]);
188 |       int status = jsonParse(str, &endptr, &val, allocator);
189 |       if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
190 |       pl[0] = json(val); mxFree(str);
191 |     } else {
192 |       // string = mexFunction( object )
193 |       ostrm S; S << std::setprecision(12); json(S,pr[0]);
194 |       pl[0]=mxCreateStringRobust(S.str().c_str());
195 |     }
196 |     
197 |   } else if(!strcmp(action,"split")) {
198 |     // strings = mexFunction( string, k )
199 |     if( nr!=2 ) mexErrMsgTxt("Two input expected.");
200 |     char *str = mxArrayToStringRobust(pr[0]);
201 |     int status = jsonParse(str, &endptr, &val, allocator);
202 |     if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
203 |     if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected");
204 |     siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]);
205 |     k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k));
206 |     pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<<std::setprecision(12);
207 |     for(auto o:val) {
208 |       if(!t) { S.str(std::string()); S << "["; t=ceil(double(n)/k); }
209 |       json(S,&o->value); t--; if(!o->next) t=0; S << (t ? "," : "]");
210 |       if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str()));
211 |     }
212 |     
213 |   } else if(!strcmp(action,"merge")) {
214 |     // string = mexFunction( strings )
215 |     if( nr!=1 ) mexErrMsgTxt("One input expected.");
216 |     if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected.");
217 |     siz n = mxGetNumberOfElements(pr[0]);
218 |     ostrm S; S << std::setprecision(12); S << "[";
219 |     for( siz i=0; i<n; i++ ) {
220 |       char *str = mxArrayToStringRobust(mxGetCell(pr[0],i));
221 |       int status = jsonParse(str, &endptr, &val, allocator);
222 |       if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
223 |       if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected");
224 |       for(auto j:val) json(S,&j->value) << (j->next ? "," : "");
225 |       mxFree(str); if(i<n-1) S<<",";
226 |     }
227 |     S << "]"; pl[0]=mxCreateStringRobust(S.str().c_str());
228 |     
229 |   } else mexErrMsgTxt("Invalid action.");
230 | }
231 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/private/gasonMex.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionBLWX/FPN.pytorch/baa35139912edb23e1f153b8684b498061c70e92/libs/cocoapi/MatlabAPI/private/gasonMex.mexa64


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/private/gasonMex.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionBLWX/FPN.pytorch/baa35139912edb23e1f153b8684b498061c70e92/libs/cocoapi/MatlabAPI/private/gasonMex.mexmaci64


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/private/getPrmDflt.m:
--------------------------------------------------------------------------------
 1 | function varargout = getPrmDflt( prm, dfs, checkExtra )
 2 | % Helper to set default values (if not already set) of parameter struct.
 3 | %
 4 | % Takes input parameters and a list of 'name'/default pairs, and for each
 5 | % 'name' for which prm has no value (prm.(name) is not a field or 'name'
 6 | % does not appear in prm list), getPrmDflt assigns the given default
 7 | % value. If default value for variable 'name' is 'REQ', and value for
 8 | % 'name' is not given, an error is thrown. See below for usage details.
 9 | %
10 | % USAGE (nargout==1)
11 | %  prm = getPrmDflt( prm, dfs, [checkExtra] )
12 | %
13 | % USAGE (nargout>1)
14 | %  [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] )
15 | %
16 | % INPUTS
17 | %  prm          - param struct or cell of form {'name1' v1 'name2' v2 ...}
18 | %  dfs          - cell of form {'name1' def1 'name2' def2 ...}
19 | %  checkExtra   - [0] if 1 throw error if prm contains params not in dfs
20 | %                 if -1 if prm contains params not in dfs adds them
21 | %
22 | % OUTPUTS (nargout==1)
23 | %  prm    - parameter struct with fields 'name1' through 'nameN' assigned
24 | %
25 | % OUTPUTS (nargout>1)
26 | %  param1 - value assigned to parameter with 'name1'
27 | %   ...
28 | %  paramN - value assigned to parameter with 'nameN'
29 | %
30 | % EXAMPLE
31 | %  dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 };
32 | %  prm = getPrmDflt( struct('x',1,'y',1), dfs )
33 | %  [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs )
34 | %
35 | % See also INPUTPARSER
36 | %
37 | % Piotr's Computer Vision Matlab Toolbox      Version 2.60
38 | % Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
39 | % Licensed under the Simplified BSD License [see external/bsd.txt]
40 | 
41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end
42 | if nargin<=2, checkExtra = 0; end
43 | 
44 | % get the input parameters as two cell arrays: prmVal and prmField
45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end
46 | if iscell(prm)
47 |   if(mod(length(prm),2)), error('odd number of parameters in prm'); end
48 |   prmField = prm(1:2:end); prmVal = prm(2:2:end);
49 | else
50 |   if(~isstruct(prm)), error('prm must be a struct or a cell'); end
51 |   prmVal = struct2cell(prm); prmField = fieldnames(prm);
52 | end
53 | 
54 | % get and update default values using quick for loop
55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end);
56 | if checkExtra>0
57 |   for i=1:length(prmField)
58 |     j = find(strcmp(prmField{i},dfsField));
59 |     if isempty(j), error('parameter %s is not valid', prmField{i}); end
60 |     dfsVal(j) = prmVal(i);
61 |   end
62 | elseif checkExtra<0
63 |   for i=1:length(prmField)
64 |     j = find(strcmp(prmField{i},dfsField));
65 |     if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end
66 |     dfsVal(j) = prmVal(i);
67 |   end
68 | else
69 |   for i=1:length(prmField)
70 |     dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i);
71 |   end
72 | end
73 | 
74 | % check for missing values
75 | if any(strcmp('REQ',dfsVal))
76 |   cmpArray = find(strcmp('REQ',dfsVal));
77 |   error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] );
78 | end
79 | 
80 | % set output
81 | if nargout==1
82 |   varargout{1} = cell2struct( dfsVal, dfsField, 2 );
83 | else
84 |   varargout = dfsVal;
85 | end
86 | 


--------------------------------------------------------------------------------
/libs/cocoapi/MatlabAPI/private/maskApiMex.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "mex.h"
  8 | #include "maskApi.h"
  9 | #include <string.h>
 10 | 
 11 | void checkType( const mxArray *M, mxClassID id ) {
 12 |   if(mxGetClassID(M)!=id) mexErrMsgTxt("Invalid type.");
 13 | }
 14 | 
 15 | mxArray* toMxArray( const RLE *R, siz n ) {
 16 |   const char *fs[] = {"size", "counts"};
 17 |   mxArray *M=mxCreateStructMatrix(1,n,2,fs);
 18 |   for( siz i=0; i<n; i++ ) {
 19 |     mxArray *S=mxCreateNumericMatrix(1,2,mxDOUBLE_CLASS,mxREAL);
 20 |     mxSetFieldByNumber(M,i,0,S); double *s=mxGetPr(S);
 21 |     s[0]=R[i].h; s[1]=R[i].w; char *c=rleToString(R+i);
 22 |     mxSetFieldByNumber(M,i,1,mxCreateString(c)); free(c);
 23 |   }
 24 |   return M;
 25 | }
 26 | 
 27 | RLE* frMxArray( const mxArray *M, siz *n, bool same ) {
 28 |   const char *fs[] = {"size", "counts"}; siz i, j, m, h, w, O[2];
 29 |   const char *err="Invalid RLE struct array.";
 30 |   *n=mxGetNumberOfElements(M); RLE *R; rlesInit(&R,*n); if(!(*n)) return R;
 31 |   if(!mxIsStruct(M) || mxGetNumberOfFields(M)!=2) mexErrMsgTxt(err);
 32 |   for( i=0; i<2; i++ ) { O[i]=2; for( j=0; j<2; j++ ) {
 33 |     if(!strcmp(mxGetFieldNameByNumber(M,j),fs[i])) O[i]=j; }}
 34 |   for( i=0; i<2; i++ ) if(O[i]>1) mexErrMsgTxt(err);
 35 |   for( i=0; i<*n; i++ ) {
 36 |     mxArray *S, *C; double *s; void *c;
 37 |     S=mxGetFieldByNumber(M,i,O[0]); checkType(S,mxDOUBLE_CLASS);
 38 |     C=mxGetFieldByNumber(M,i,O[1]); s=mxGetPr(S); c=mxGetData(C);
 39 |     h=(siz)s[0]; w=(siz)s[1]; m=mxGetNumberOfElements(C);
 40 |     if(same && i>0 && (h!=R[0].h || w!=R[0].w)) mexErrMsgTxt(err);
 41 |     if( mxGetClassID(C)==mxDOUBLE_CLASS ) {
 42 |       rleInit(R+i,h,w,m,0);
 43 |       for(j=0; j<m; j++) R[i].cnts[j]=(uint)((double*)c)[j];
 44 |     } else if( mxGetClassID(C)==mxUINT32_CLASS ) {
 45 |       rleInit(R+i,h,w,m,(uint*)c);
 46 |     } else if( mxGetClassID(C)==mxCHAR_CLASS ) {
 47 |       char *c=mxMalloc(sizeof(char)*(m+1)); mxGetString(C,c,m+1);
 48 |       rleFrString(R+i,c,h,w); mxFree(c);
 49 |     }
 50 |     else mexErrMsgTxt(err);
 51 |   }
 52 |   return R;
 53 | }
 54 | 
 55 | void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
 56 | {
 57 |   char action[1024]; RLE *R=0; siz h=0, w=0, n=0;
 58 |   mxGetString(pr[0],action,1024); nr--; pr++;
 59 |   
 60 |   if(!strcmp(action,"encode")) {
 61 |     checkType(pr[0],mxUINT8_CLASS); byte *M=(byte*) mxGetData(pr[0]);
 62 |     const mwSize *ds=mxGetDimensions(pr[0]); n=mxGetN(pr[0])/ds[1];
 63 |     rlesInit(&R,n); rleEncode(R,M,ds[0],ds[1],n); pl[0]=toMxArray(R,n);
 64 |     
 65 |   } else if(!strcmp(action,"decode")) {
 66 |     R=frMxArray(pr[0],&n,1); mwSize ds[3];
 67 |     ds[0]=n?R[0].h:0; ds[1]=n?R[0].w:0; ds[2]=n;
 68 |     pl[0]=mxCreateNumericArray(3,ds,mxUINT8_CLASS,mxREAL);
 69 |     byte *M=(byte*) mxGetPr(pl[0]); rleDecode(R,M,n);
 70 |     
 71 |   } else if(!strcmp(action,"merge")) {
 72 |     R=frMxArray(pr[0],&n,1); RLE M;
 73 |     bool intersect = (nr>=2) ? (mxGetScalar(pr[1])>0) : false;
 74 |     rleMerge(R,&M,n,intersect); pl[0]=toMxArray(&M,1); rleFree(&M);
 75 |     
 76 |   } else if(!strcmp(action,"area")) {
 77 |     R=frMxArray(pr[0],&n,0);
 78 |     pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL);
 79 |     uint *a=(uint*) mxGetPr(pl[0]); rleArea(R,n,a);
 80 |     
 81 |   } else if(!strcmp(action,"iou")) {
 82 |     if(nr>2) checkType(pr[2],mxUINT8_CLASS); siz nDt, nGt;
 83 |     byte *iscrowd = nr>2 ? (byte*) mxGetPr(pr[2]) : NULL;
 84 |     if(mxIsStruct(pr[0]) || mxIsStruct(pr[1])) {
 85 |       RLE *dt=frMxArray(pr[0],&nDt,1), *gt=frMxArray(pr[1],&nGt,1);
 86 |       pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL);
 87 |       double *o=mxGetPr(pl[0]); rleIou(dt,gt,nDt,nGt,iscrowd,o);
 88 |       rlesFree(&dt,nDt); rlesFree(&gt,nGt);
 89 |     } else {
 90 |       checkType(pr[0],mxDOUBLE_CLASS); checkType(pr[1],mxDOUBLE_CLASS);
 91 |       double *dt=mxGetPr(pr[0]); nDt=mxGetN(pr[0]);
 92 |       double *gt=mxGetPr(pr[1]); nGt=mxGetN(pr[1]);
 93 |       pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL);
 94 |       double *o=mxGetPr(pl[0]); bbIou(dt,gt,nDt,nGt,iscrowd,o);
 95 |     }
 96 |     
 97 |   } else if(!strcmp(action,"nms")) {
 98 |     siz n; uint *keep; double thr=(double) mxGetScalar(pr[1]);
 99 |     if(mxIsStruct(pr[0])) {
100 |       RLE *dt=frMxArray(pr[0],&n,1);
101 |       pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL);
102 |       keep=(uint*) mxGetPr(pl[0]); rleNms(dt,n,keep,thr);
103 |       rlesFree(&dt,n);
104 |     } else {
105 |       checkType(pr[0],mxDOUBLE_CLASS);
106 |       double *dt=mxGetPr(pr[0]); n=mxGetN(pr[0]);
107 |       pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL);
108 |       keep=(uint*) mxGetPr(pl[0]); bbNms(dt,n,keep,thr);
109 |     }
110 |     
111 |   } else if(!strcmp(action,"toBbox")) {
112 |     R=frMxArray(pr[0],&n,0);
113 |     pl[0]=mxCreateNumericMatrix(4,n,mxDOUBLE_CLASS,mxREAL);
114 |     BB bb=mxGetPr(pl[0]); rleToBbox(R,bb,n);
115 |     
116 |   } else if(!strcmp(action,"frBbox")) {
117 |     checkType(pr[0],mxDOUBLE_CLASS);
118 |     double *bb=mxGetPr(pr[0]); n=mxGetN(pr[0]);
119 |     h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]);
120 |     rlesInit(&R,n); rleFrBbox(R,bb,h,w,n); pl[0]=toMxArray(R,n);
121 |     
122 |   } else if(!strcmp(action,"frPoly")) {
123 |     checkType(pr[0],mxCELL_CLASS); n=mxGetNumberOfElements(pr[0]);
124 |     h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); rlesInit(&R,n);
125 |     for(siz i=0; i<n; i++) {
126 |       mxArray *XY=mxGetCell(pr[0],i); checkType(XY,mxDOUBLE_CLASS);
127 |       siz k=mxGetNumberOfElements(XY)/2; double *xy=mxGetPr(XY);
128 |       rleFrPoly(R+i,xy,k,h,w);
129 |     }
130 |     RLE M; rleMerge(R,&M,n,0); pl[0]=toMxArray(&M,1); rleFree(&M);
131 |     
132 |   } else mexErrMsgTxt("Invalid action.");
133 |   if( R!=0 ) { rlesFree(&R,n); R=0; }
134 | }
135 | 


--------------------------------------------------------------------------------
/libs/cocoapi/PythonAPI/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |     # install pycocotools locally
3 | 	python setup.py build_ext --inplace
4 | 	rm -rf build
5 | 
6 | install:
7 | 	# install pycocotools to the Python site-packages
8 | 	python setup.py build_ext install
9 | 	rm -rf build


--------------------------------------------------------------------------------
/libs/cocoapi/PythonAPI/pycocoEvalDemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%matplotlib inline\n",
 12 |     "import matplotlib.pyplot as plt\n",
 13 |     "from pycocotools.coco import COCO\n",
 14 |     "from pycocotools.cocoeval import COCOeval\n",
 15 |     "import numpy as np\n",
 16 |     "import skimage.io as io\n",
 17 |     "import pylab\n",
 18 |     "pylab.rcParams['figure.figsize'] = (10.0, 8.0)"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "name": "stdout",
 30 |      "output_type": "stream",
 31 |      "text": [
 32 |       "Running demo for *bbox* results.\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "annType = ['segm','bbox','keypoints']\n",
 38 |     "annType = annType[1]      #specify type here\n",
 39 |     "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n",
 40 |     "print 'Running demo for *%s* results.'%(annType)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "loading annotations into memory...\n",
 55 |       "Done (t=8.01s)\n",
 56 |       "creating index...\n",
 57 |       "index created!\n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "#initialize COCO ground truth api\n",
 63 |     "dataDir='../'\n",
 64 |     "dataType='val2014'\n",
 65 |     "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n",
 66 |     "cocoGt=COCO(annFile)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 4,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "Loading and preparing results...     \n",
 81 |       "DONE (t=0.05s)\n",
 82 |       "creating index...\n",
 83 |       "index created!\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "#initialize COCO detections api\n",
 89 |     "resFile='%s/results/%s_%s_fake%s100_results.json'\n",
 90 |     "resFile = resFile%(dataDir, prefix, dataType, annType)\n",
 91 |     "cocoDt=cocoGt.loadRes(resFile)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "imgIds=sorted(cocoGt.getImgIds())\n",
103 |     "imgIds=imgIds[0:100]\n",
104 |     "imgId = imgIds[np.random.randint(100)]"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {
111 |     "collapsed": false
112 |    },
113 |    "outputs": [
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "Running per image evaluation...      \n",
119 |       "DONE (t=0.46s).\n",
120 |       "Accumulating evaluation results...   \n",
121 |       "DONE (t=0.38s).\n",
122 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505\n",
123 |       " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.697\n",
124 |       " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.573\n",
125 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n",
126 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n",
127 |       " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n",
128 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.387\n",
129 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.594\n",
130 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.595\n",
131 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n",
132 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n",
133 |       " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "# running evaluation\n",
139 |     "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n",
140 |     "cocoEval.params.imgIds  = imgIds\n",
141 |     "cocoEval.evaluate()\n",
142 |     "cocoEval.accumulate()\n",
143 |     "cocoEval.summarize()"
144 |    ]
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 2",
150 |    "language": "python",
151 |    "name": "python2"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 2
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython2",
163 |    "version": "2.7.10"
164 |   }
165 |  },
166 |  "nbformat": 4,
167 |  "nbformat_minor": 0
168 | }
169 | 


--------------------------------------------------------------------------------
/libs/cocoapi/PythonAPI/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/libs/cocoapi/PythonAPI/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | import pycocotools._mask as _mask
  3 | 
  4 | # Interface for manipulating masks stored in RLE format.
  5 | #
  6 | # RLE is a simple yet efficient format for storing binary masks. RLE
  7 | # first divides a vector (or vectorized image) into a series of piecewise
  8 | # constant regions and then for each piece simply stores the length of
  9 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 10 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 11 | # (note that the odd counts are always the numbers of zeros). Instead of
 12 | # storing the counts directly, additional compression is achieved with a
 13 | # variable bitrate representation based on a common scheme called LEB128.
 14 | #
 15 | # Compression is greatest given large piecewise constant regions.
 16 | # Specifically, the size of the RLE is proportional to the number of
 17 | # *boundaries* in M (or for an image the number of boundaries in the y
 18 | # direction). Assuming fairly simple shapes, the RLE representation is
 19 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 20 | # is substantially lower, especially for large simple objects (large n).
 21 | #
 22 | # Many common operations on masks can be computed directly using the RLE
 23 | # (without need for decoding). This includes computations such as area,
 24 | # union, intersection, etc. All of these operations are linear in the
 25 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 26 | # of the object. Computing these operations on the original mask is O(n).
 27 | # Thus, using the RLE can result in substantial computational savings.
 28 | #
 29 | # The following API functions are defined:
 30 | #  encode         - Encode binary masks using RLE.
 31 | #  decode         - Decode binary masks encoded via RLE.
 32 | #  merge          - Compute union or intersection of encoded masks.
 33 | #  iou            - Compute intersection over union between masks.
 34 | #  area           - Compute area of encoded masks.
 35 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 36 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 37 | #
 38 | # Usage:
 39 | #  Rs     = encode( masks )
 40 | #  masks  = decode( Rs )
 41 | #  R      = merge( Rs, intersect=false )
 42 | #  o      = iou( dt, gt, iscrowd )
 43 | #  a      = area( Rs )
 44 | #  bbs    = toBbox( Rs )
 45 | #  Rs     = frPyObjects( [pyObjects], h, w )
 46 | #
 47 | # In the API the following formats are used:
 48 | #  Rs      - [dict] Run-length encoding of binary masks
 49 | #  R       - dict Run-length encoding of binary mask
 50 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 51 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 52 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 53 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 54 | #  dt,gt   - May be either bounding boxes or encoded masks
 55 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 56 | #
 57 | # Finally, a note about the intersection over union (iou) computation.
 58 | # The standard iou of a ground truth (gt) and detected (dt) object is
 59 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 60 | # For "crowd" regions, we use a modified criteria. If a gt object is
 61 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 62 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 63 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 64 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 65 | # For crowd gt regions we use this modified criteria above for the iou.
 66 | #
 67 | # To compile run "python setup.py build_ext --inplace"
 68 | # Please do not contact us for help with compiling.
 69 | #
 70 | # Microsoft COCO Toolbox.      version 2.0
 71 | # Data, paper, and tutorials available at:  http://mscoco.org/
 72 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 73 | # Licensed under the Simplified BSD License [see coco/license.txt]
 74 | 
 75 | iou         = _mask.iou
 76 | merge       = _mask.merge
 77 | frPyObjects = _mask.frPyObjects
 78 | 
 79 | def encode(bimask):
 80 |     if len(bimask.shape) == 3:
 81 |         return _mask.encode(bimask)
 82 |     elif len(bimask.shape) == 2:
 83 |         h, w = bimask.shape
 84 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 85 | 
 86 | def decode(rleObjs):
 87 |     if type(rleObjs) == list:
 88 |         return _mask.decode(rleObjs)
 89 |     else:
 90 |         return _mask.decode([rleObjs])[:,:,0]
 91 | 
 92 | def area(rleObjs):
 93 |     if type(rleObjs) == list:
 94 |         return _mask.area(rleObjs)
 95 |     else:
 96 |         return _mask.area([rleObjs])[0]
 97 | 
 98 | def toBbox(rleObjs):
 99 |     if type(rleObjs) == list:
100 |         return _mask.toBbox(rleObjs)
101 |     else:
102 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/libs/cocoapi/PythonAPI/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Extension
 2 | import numpy as np
 3 | 
 4 | # To compile and install locally run "python setup.py build_ext --inplace"
 5 | # To install library to Python site-packages run "python setup.py build_ext install"
 6 | 
 7 | ext_modules = [
 8 |     Extension(
 9 |         'pycocotools._mask',
10 |         sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'],
11 |         include_dirs = [np.get_include(), '../common'],
12 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
13 |     )
14 | ]
15 | 
16 | setup(
17 |     name='pycocotools',
18 |     packages=['pycocotools'],
19 |     package_dir = {'pycocotools': 'pycocotools'},
20 |     install_requires=[
21 |         'setuptools>=18.0',
22 |         'cython>=0.27.3',
23 |         'matplotlib>=2.1.0'
24 |     ],
25 |     version='2.0',
26 |     ext_modules= ext_modules
27 | )
28 | 


--------------------------------------------------------------------------------
/libs/cocoapi/README.txt:
--------------------------------------------------------------------------------
 1 | COCO API - http://cocodataset.org/
 2 | 
 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality.
 4 | 
 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website.
 6 | -Please download, unzip, and place the images in: coco/images/
 7 | -Please download and place the annotations in: coco/annotations/
 8 | For substantially more details on the API please see http://cocodataset.org/#download.
 9 | 
10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage.
11 | 
12 | To install:
13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided)
14 | -For Python, run "make" under coco/PythonAPI
15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/
16 | 


--------------------------------------------------------------------------------
/libs/cocoapi/common/gason.cpp:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #include "gason.h"
  3 | #include <stdlib.h>
  4 | 
  5 | #define JSON_ZONE_SIZE 4096
  6 | #define JSON_STACK_SIZE 32
  7 | 
  8 | const char *jsonStrError(int err) {
  9 |     switch (err) {
 10 | #define XX(no, str) \
 11 |     case JSON_##no: \
 12 |         return str;
 13 |         JSON_ERRNO_MAP(XX)
 14 | #undef XX
 15 |     default:
 16 |         return "unknown";
 17 |     }
 18 | }
 19 | 
 20 | void *JsonAllocator::allocate(size_t size) {
 21 |     size = (size + 7) & ~7;
 22 | 
 23 |     if (head && head->used + size <= JSON_ZONE_SIZE) {
 24 |         char *p = (char *)head + head->used;
 25 |         head->used += size;
 26 |         return p;
 27 |     }
 28 | 
 29 |     size_t allocSize = sizeof(Zone) + size;
 30 |     Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize);
 31 |     if (zone == nullptr)
 32 |         return nullptr;
 33 |     zone->used = allocSize;
 34 |     if (allocSize <= JSON_ZONE_SIZE || head == nullptr) {
 35 |         zone->next = head;
 36 |         head = zone;
 37 |     } else {
 38 |         zone->next = head->next;
 39 |         head->next = zone;
 40 |     }
 41 |     return (char *)zone + sizeof(Zone);
 42 | }
 43 | 
 44 | void JsonAllocator::deallocate() {
 45 |     while (head) {
 46 |         Zone *next = head->next;
 47 |         free(head);
 48 |         head = next;
 49 |     }
 50 | }
 51 | 
 52 | static inline bool isspace(char c) {
 53 |     return c == ' ' || (c >= '\t' && c <= '\r');
 54 | }
 55 | 
 56 | static inline bool isdelim(char c) {
 57 |     return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c;
 58 | }
 59 | 
 60 | static inline bool isdigit(char c) {
 61 |     return c >= '0' && c <= '9';
 62 | }
 63 | 
 64 | static inline bool isxdigit(char c) {
 65 |     return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F');
 66 | }
 67 | 
 68 | static inline int char2int(char c) {
 69 |     if (c <= '9')
 70 |         return c - '0';
 71 |     return (c & ~' ') - 'A' + 10;
 72 | }
 73 | 
 74 | static double string2double(char *s, char **endptr) {
 75 |     char ch = *s;
 76 |     if (ch == '-')
 77 |         ++s;
 78 | 
 79 |     double result = 0;
 80 |     while (isdigit(*s))
 81 |         result = (result * 10) + (*s++ - '0');
 82 | 
 83 |     if (*s == '.') {
 84 |         ++s;
 85 | 
 86 |         double fraction = 1;
 87 |         while (isdigit(*s)) {
 88 |             fraction *= 0.1;
 89 |             result += (*s++ - '0') * fraction;
 90 |         }
 91 |     }
 92 | 
 93 |     if (*s == 'e' || *s == 'E') {
 94 |         ++s;
 95 | 
 96 |         double base = 10;
 97 |         if (*s == '+')
 98 |             ++s;
 99 |         else if (*s == '-') {
100 |             ++s;
101 |             base = 0.1;
102 |         }
103 | 
104 |         unsigned int exponent = 0;
105 |         while (isdigit(*s))
106 |             exponent = (exponent * 10) + (*s++ - '0');
107 | 
108 |         double power = 1;
109 |         for (; exponent; exponent >>= 1, base *= base)
110 |             if (exponent & 1)
111 |                 power *= base;
112 | 
113 |         result *= power;
114 |     }
115 | 
116 |     *endptr = s;
117 |     return ch == '-' ? -result : result;
118 | }
119 | 
120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) {
121 |     if (!tail)
122 |         return node->next = node;
123 |     node->next = tail->next;
124 |     tail->next = node;
125 |     return node;
126 | }
127 | 
128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) {
129 |     if (tail) {
130 |         auto head = tail->next;
131 |         tail->next = nullptr;
132 |         return JsonValue(tag, head);
133 |     }
134 |     return JsonValue(tag, nullptr);
135 | }
136 | 
137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) {
138 |     JsonNode *tails[JSON_STACK_SIZE];
139 |     JsonTag tags[JSON_STACK_SIZE];
140 |     char *keys[JSON_STACK_SIZE];
141 |     JsonValue o;
142 |     int pos = -1;
143 |     bool separator = true;
144 |     JsonNode *node;
145 |     *endptr = s;
146 | 
147 |     while (*s) {
148 |         while (isspace(*s)) {
149 |             ++s;
150 |             if (!*s) break;
151 |         }
152 |         *endptr = s++;
153 |         switch (**endptr) {
154 |         case '-':
155 |             if (!isdigit(*s) && *s != '.') {
156 |                 *endptr = s;
157 |                 return JSON_BAD_NUMBER;
158 |             }
159 |         case '0':
160 |         case '1':
161 |         case '2':
162 |         case '3':
163 |         case '4':
164 |         case '5':
165 |         case '6':
166 |         case '7':
167 |         case '8':
168 |         case '9':
169 |             o = JsonValue(string2double(*endptr, &s));
170 |             if (!isdelim(*s)) {
171 |                 *endptr = s;
172 |                 return JSON_BAD_NUMBER;
173 |             }
174 |             break;
175 |         case '"':
176 |             o = JsonValue(JSON_STRING, s);
177 |             for (char *it = s; *s; ++it, ++s) {
178 |                 int c = *it = *s;
179 |                 if (c == '\\') {
180 |                     c = *++s;
181 |                     switch (c) {
182 |                     case '\\':
183 |                     case '"':
184 |                     case '/':
185 |                         *it = c;
186 |                         break;
187 |                     case 'b':
188 |                         *it = '\b';
189 |                         break;
190 |                     case 'f':
191 |                         *it = '\f';
192 |                         break;
193 |                     case 'n':
194 |                         *it = '\n';
195 |                         break;
196 |                     case 'r':
197 |                         *it = '\r';
198 |                         break;
199 |                     case 't':
200 |                         *it = '\t';
201 |                         break;
202 |                     case 'u':
203 |                         c = 0;
204 |                         for (int i = 0; i < 4; ++i) {
205 |                             if (isxdigit(*++s)) {
206 |                                 c = c * 16 + char2int(*s);
207 |                             } else {
208 |                                 *endptr = s;
209 |                                 return JSON_BAD_STRING;
210 |                             }
211 |                         }
212 |                         if (c < 0x80) {
213 |                             *it = c;
214 |                         } else if (c < 0x800) {
215 |                             *it++ = 0xC0 | (c >> 6);
216 |                             *it = 0x80 | (c & 0x3F);
217 |                         } else {
218 |                             *it++ = 0xE0 | (c >> 12);
219 |                             *it++ = 0x80 | ((c >> 6) & 0x3F);
220 |                             *it = 0x80 | (c & 0x3F);
221 |                         }
222 |                         break;
223 |                     default:
224 |                         *endptr = s;
225 |                         return JSON_BAD_STRING;
226 |                     }
227 |                 } else if ((unsigned int)c < ' ' || c == '\x7F') {
228 |                     *endptr = s;
229 |                     return JSON_BAD_STRING;
230 |                 } else if (c == '"') {
231 |                     *it = 0;
232 |                     ++s;
233 |                     break;
234 |                 }
235 |             }
236 |             if (!isdelim(*s)) {
237 |                 *endptr = s;
238 |                 return JSON_BAD_STRING;
239 |             }
240 |             break;
241 |         case 't':
242 |             if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3])))
243 |                 return JSON_BAD_IDENTIFIER;
244 |             o = JsonValue(JSON_TRUE);
245 |             s += 3;
246 |             break;
247 |         case 'f':
248 |             if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4])))
249 |                 return JSON_BAD_IDENTIFIER;
250 |             o = JsonValue(JSON_FALSE);
251 |             s += 4;
252 |             break;
253 |         case 'n':
254 |             if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3])))
255 |                 return JSON_BAD_IDENTIFIER;
256 |             o = JsonValue(JSON_NULL);
257 |             s += 3;
258 |             break;
259 |         case ']':
260 |             if (pos == -1)
261 |                 return JSON_STACK_UNDERFLOW;
262 |             if (tags[pos] != JSON_ARRAY)
263 |                 return JSON_MISMATCH_BRACKET;
264 |             o = listToValue(JSON_ARRAY, tails[pos--]);
265 |             break;
266 |         case '}':
267 |             if (pos == -1)
268 |                 return JSON_STACK_UNDERFLOW;
269 |             if (tags[pos] != JSON_OBJECT)
270 |                 return JSON_MISMATCH_BRACKET;
271 |             if (keys[pos] != nullptr)
272 |                 return JSON_UNEXPECTED_CHARACTER;
273 |             o = listToValue(JSON_OBJECT, tails[pos--]);
274 |             break;
275 |         case '[':
276 |             if (++pos == JSON_STACK_SIZE)
277 |                 return JSON_STACK_OVERFLOW;
278 |             tails[pos] = nullptr;
279 |             tags[pos] = JSON_ARRAY;
280 |             keys[pos] = nullptr;
281 |             separator = true;
282 |             continue;
283 |         case '{':
284 |             if (++pos == JSON_STACK_SIZE)
285 |                 return JSON_STACK_OVERFLOW;
286 |             tails[pos] = nullptr;
287 |             tags[pos] = JSON_OBJECT;
288 |             keys[pos] = nullptr;
289 |             separator = true;
290 |             continue;
291 |         case ':':
292 |             if (separator || keys[pos] == nullptr)
293 |                 return JSON_UNEXPECTED_CHARACTER;
294 |             separator = true;
295 |             continue;
296 |         case ',':
297 |             if (separator || keys[pos] != nullptr)
298 |                 return JSON_UNEXPECTED_CHARACTER;
299 |             separator = true;
300 |             continue;
301 |         case '\0':
302 |             continue;
303 |         default:
304 |             return JSON_UNEXPECTED_CHARACTER;
305 |         }
306 | 
307 |         separator = false;
308 | 
309 |         if (pos == -1) {
310 |             *endptr = s;
311 |             *value = o;
312 |             return JSON_OK;
313 |         }
314 | 
315 |         if (tags[pos] == JSON_OBJECT) {
316 |             if (!keys[pos]) {
317 |                 if (o.getTag() != JSON_STRING)
318 |                     return JSON_UNQUOTED_KEY;
319 |                 keys[pos] = o.toString();
320 |                 continue;
321 |             }
322 |             if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr)
323 |                 return JSON_ALLOCATION_FAILURE;
324 |             tails[pos] = insertAfter(tails[pos], node);
325 |             tails[pos]->key = keys[pos];
326 |             keys[pos] = nullptr;
327 |         } else {
328 |             if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr)
329 |                 return JSON_ALLOCATION_FAILURE;
330 |             tails[pos] = insertAfter(tails[pos], node);
331 |         }
332 |         tails[pos]->value = o;
333 |     }
334 |     return JSON_BREAKING_BAD;
335 | }
336 | 


--------------------------------------------------------------------------------
/libs/cocoapi/common/gason.h:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #pragma once
  3 | 
  4 | #include <stdint.h>
  5 | #include <stddef.h>
  6 | #include <assert.h>
  7 | 
  8 | enum JsonTag {
  9 |     JSON_NUMBER = 0,
 10 |     JSON_STRING,
 11 |     JSON_ARRAY,
 12 |     JSON_OBJECT,
 13 |     JSON_TRUE,
 14 |     JSON_FALSE,
 15 |     JSON_NULL = 0xF
 16 | };
 17 | 
 18 | struct JsonNode;
 19 | 
 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
 22 | #define JSON_VALUE_TAG_MASK 0xF
 23 | #define JSON_VALUE_TAG_SHIFT 47
 24 | 
 25 | union JsonValue {
 26 |     uint64_t ival;
 27 |     double fval;
 28 | 
 29 |     JsonValue(double x)
 30 |         : fval(x) {
 31 |     }
 32 |     JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
 33 |         assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
 34 |         ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
 35 |     }
 36 |     bool isDouble() const {
 37 |         return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
 38 |     }
 39 |     JsonTag getTag() const {
 40 |         return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
 41 |     }
 42 |     uint64_t getPayload() const {
 43 |         assert(!isDouble());
 44 |         return ival & JSON_VALUE_PAYLOAD_MASK;
 45 |     }
 46 |     double toNumber() const {
 47 |         assert(getTag() == JSON_NUMBER);
 48 |         return fval;
 49 |     }
 50 |     char *toString() const {
 51 |         assert(getTag() == JSON_STRING);
 52 |         return (char *)getPayload();
 53 |     }
 54 |     JsonNode *toNode() const {
 55 |         assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
 56 |         return (JsonNode *)getPayload();
 57 |     }
 58 | };
 59 | 
 60 | struct JsonNode {
 61 |     JsonValue value;
 62 |     JsonNode *next;
 63 |     char *key;
 64 | };
 65 | 
 66 | struct JsonIterator {
 67 |     JsonNode *p;
 68 | 
 69 |     void operator++() {
 70 |         p = p->next;
 71 |     }
 72 |     bool operator!=(const JsonIterator &x) const {
 73 |         return p != x.p;
 74 |     }
 75 |     JsonNode *operator*() const {
 76 |         return p;
 77 |     }
 78 |     JsonNode *operator->() const {
 79 |         return p;
 80 |     }
 81 | };
 82 | 
 83 | inline JsonIterator begin(JsonValue o) {
 84 |     return JsonIterator{o.toNode()};
 85 | }
 86 | inline JsonIterator end(JsonValue) {
 87 |     return JsonIterator{nullptr};
 88 | }
 89 | 
 90 | #define JSON_ERRNO_MAP(XX)                           \
 91 |     XX(OK, "ok")                                     \
 92 |     XX(BAD_NUMBER, "bad number")                     \
 93 |     XX(BAD_STRING, "bad string")                     \
 94 |     XX(BAD_IDENTIFIER, "bad identifier")             \
 95 |     XX(STACK_OVERFLOW, "stack overflow")             \
 96 |     XX(STACK_UNDERFLOW, "stack underflow")           \
 97 |     XX(MISMATCH_BRACKET, "mismatch bracket")         \
 98 |     XX(UNEXPECTED_CHARACTER, "unexpected character") \
 99 |     XX(UNQUOTED_KEY, "unquoted key")                 \
100 |     XX(BREAKING_BAD, "breaking bad")                 \
101 |     XX(ALLOCATION_FAILURE, "allocation failure")
102 | 
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 |     JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 | 
109 | const char *jsonStrError(int err);
110 | 
111 | class JsonAllocator {
112 |     struct Zone {
113 |         Zone *next;
114 |         size_t used;
115 |     } *head = nullptr;
116 | 
117 | public:
118 |     JsonAllocator() = default;
119 |     JsonAllocator(const JsonAllocator &) = delete;
120 |     JsonAllocator &operator=(const JsonAllocator &) = delete;
121 |     JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 |         x.head = nullptr;
123 |     }
124 |     JsonAllocator &operator=(JsonAllocator &&x) {
125 |         head = x.head;
126 |         x.head = nullptr;
127 |         return *this;
128 |     }
129 |     ~JsonAllocator() {
130 |         deallocate();
131 |     }
132 |     void *allocate(size_t size);
133 |     void deallocate();
134 | };
135 | 
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 | 


--------------------------------------------------------------------------------
/libs/cocoapi/common/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, xp, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       if(j%2==0) xp=x; else if(xp<x) { ys=0; ye=h-1; }
142 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
143 |     }
144 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
145 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
146 |   }
147 | }
148 | 
149 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
150 |   siz i; for( i=0; i<n; i++ ) {
151 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
152 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
153 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
154 |     rleFrPoly( R+i, xy, 4, h, w );
155 |   }
156 | }
157 | 
158 | int uintCompare(const void *a, const void *b) {
159 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
160 | }
161 | 
162 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
163 |   /* upsample and get discrete points densely along entire boundary */
164 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
165 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
166 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
167 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
168 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
169 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
170 |   for( j=0; j<k; j++ ) {
171 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
172 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
173 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
174 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
175 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
176 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
177 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
178 |     } else for( d=0; d<=dy; d++ ) {
179 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
180 |     }
181 |   }
182 |   /* get points along y-boundary and downsample */
183 |   free(x); free(y); k=m; m=0; double xd, yd;
184 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
185 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
186 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
187 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
188 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
189 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
190 |     x[m]=(int) xd; y[m]=(int) yd; m++;
191 |   }
192 |   /* compute rle encoding given y-boundary points */
193 |   k=m; a=malloc(sizeof(uint)*(k+1));
194 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
195 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
196 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
197 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
198 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
199 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
200 |     j++; if(j<k) b[m-1]+=a[j++]; }
201 |   rleInit(R,h,w,m,b); free(a); free(b);
202 | }
203 | 
204 | char* rleToString( const RLE *R ) {
205 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
206 |   siz i, m=R->m, p=0; long x; int more;
207 |   char *s=malloc(sizeof(char)*m*6);
208 |   for( i=0; i<m; i++ ) {
209 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
210 |     while( more ) {
211 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
212 |       if(more) c |= 0x20; c+=48; s[p++]=c;
213 |     }
214 |   }
215 |   s[p]=0; return s;
216 | }
217 | 
218 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
219 |   siz m=0, p=0, k; long x; int more; uint *cnts;
220 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
221 |   while( s[p] ) {
222 |     x=0; k=0; more=1;
223 |     while( more ) {
224 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
225 |       more = c & 0x20; p++; k++;
226 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
227 |     }
228 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
229 |   }
230 |   rleInit(R,h,w,m,cnts); free(cnts);
231 | }
232 | 


--------------------------------------------------------------------------------
/libs/cocoapi/common/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/libs/cocoapi/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/libs/cocoapi/results/val2014_fake_eval_res.txt:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------------
 2 | type=segm
 3 | Running per image evaluation...      DONE (t=0.45s).
 4 | Accumulating evaluation results...   DONE (t=0.08s).
 5 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.320
 6 |  Average Precision (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.562
 7 |  Average Precision (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.299
 8 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.387
 9 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.310
10 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.327
11 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.268
12 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.415
13 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.417
14 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.469
15 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.377
16 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.381
17 | 
18 | ------------------------------------------------------------------------------
19 | type=bbox
20 | Running per image evaluation...      DONE (t=0.34s).
21 | Accumulating evaluation results...   DONE (t=0.08s).
22 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.505
23 |  Average Precision (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.697
24 |  Average Precision (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.573
25 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586
26 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519
27 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501
28 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.387
29 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.594
30 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.595
31 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640
32 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566
33 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564
34 | 
35 | ------------------------------------------------------------------------------
36 | type=keypoints
37 | Running per image evaluation...      DONE (t=0.06s).
38 | Accumulating evaluation results...   DONE (t=0.00s).
39 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.372
40 |  Average Precision (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.636
41 |  Average Precision (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.348
42 |  Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.384
43 |  Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.386
44 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.514
45 |  Average Recall    (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.734
46 |  Average Recall    (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.504
47 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.508
48 |  Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.522
49 | 


--------------------------------------------------------------------------------
/libs/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | 
 7 | __all__ = [
 8 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
 9 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 |     'deform_roi_pooling'
12 | ]
13 | 


--------------------------------------------------------------------------------
/libs/dcn/deform_pool.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Function
  4 | from torch.autograd.function import once_differentiable
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from . import deform_pool_cuda
  8 | 
  9 | 
 10 | class DeformRoIPoolingFunction(Function):
 11 | 
 12 |     @staticmethod
 13 |     def forward(ctx,
 14 |                 data,
 15 |                 rois,
 16 |                 offset,
 17 |                 spatial_scale,
 18 |                 out_size,
 19 |                 out_channels,
 20 |                 no_trans,
 21 |                 group_size=1,
 22 |                 part_size=None,
 23 |                 sample_per_part=4,
 24 |                 trans_std=.0):
 25 |         # TODO: support unsquare RoIs
 26 |         out_h, out_w = _pair(out_size)
 27 |         assert isinstance(out_h, int) and isinstance(out_w, int)
 28 |         assert out_h == out_w
 29 |         out_size = out_h  # out_h and out_w must be equal
 30 | 
 31 |         ctx.spatial_scale = spatial_scale
 32 |         ctx.out_size = out_size
 33 |         ctx.out_channels = out_channels
 34 |         ctx.no_trans = no_trans
 35 |         ctx.group_size = group_size
 36 |         ctx.part_size = out_size if part_size is None else part_size
 37 |         ctx.sample_per_part = sample_per_part
 38 |         ctx.trans_std = trans_std
 39 | 
 40 |         assert 0.0 <= ctx.trans_std <= 1.0
 41 |         if not data.is_cuda:
 42 |             raise NotImplementedError
 43 | 
 44 |         n = rois.shape[0]
 45 |         output = data.new_empty(n, out_channels, out_size, out_size)
 46 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
 47 |         deform_pool_cuda.deform_psroi_pooling_cuda_forward(
 48 |             data, rois, offset, output, output_count, ctx.no_trans,
 49 |             ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
 50 |             ctx.part_size, ctx.sample_per_part, ctx.trans_std)
 51 | 
 52 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
 53 |             ctx.save_for_backward(data, rois, offset)
 54 |         ctx.output_count = output_count
 55 | 
 56 |         return output
 57 | 
 58 |     @staticmethod
 59 |     @once_differentiable
 60 |     def backward(ctx, grad_output):
 61 |         if not grad_output.is_cuda:
 62 |             raise NotImplementedError
 63 | 
 64 |         data, rois, offset = ctx.saved_tensors
 65 |         output_count = ctx.output_count
 66 |         grad_input = torch.zeros_like(data)
 67 |         grad_rois = None
 68 |         grad_offset = torch.zeros_like(offset)
 69 | 
 70 |         deform_pool_cuda.deform_psroi_pooling_cuda_backward(
 71 |             grad_output, data, rois, offset, output_count, grad_input,
 72 |             grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
 73 |             ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
 74 |             ctx.trans_std)
 75 |         return (grad_input, grad_rois, grad_offset, None, None, None, None,
 76 |                 None, None, None, None)
 77 | 
 78 | 
 79 | deform_roi_pooling = DeformRoIPoolingFunction.apply
 80 | 
 81 | 
 82 | class DeformRoIPooling(nn.Module):
 83 | 
 84 |     def __init__(self,
 85 |                  spatial_scale,
 86 |                  out_size,
 87 |                  out_channels,
 88 |                  no_trans,
 89 |                  group_size=1,
 90 |                  part_size=None,
 91 |                  sample_per_part=4,
 92 |                  trans_std=.0):
 93 |         super(DeformRoIPooling, self).__init__()
 94 |         self.spatial_scale = spatial_scale
 95 |         self.out_size = _pair(out_size)
 96 |         self.out_channels = out_channels
 97 |         self.no_trans = no_trans
 98 |         self.group_size = group_size
 99 |         self.part_size = out_size if part_size is None else part_size
100 |         self.sample_per_part = sample_per_part
101 |         self.trans_std = trans_std
102 | 
103 |     def forward(self, data, rois, offset):
104 |         if self.no_trans:
105 |             offset = data.new_empty(0)
106 |         return deform_roi_pooling(data, rois, offset, self.spatial_scale,
107 |                                   self.out_size, self.out_channels,
108 |                                   self.no_trans, self.group_size,
109 |                                   self.part_size, self.sample_per_part,
110 |                                   self.trans_std)
111 | 
112 | 
113 | class DeformRoIPoolingPack(DeformRoIPooling):
114 | 
115 |     def __init__(self,
116 |                  spatial_scale,
117 |                  out_size,
118 |                  out_channels,
119 |                  no_trans,
120 |                  group_size=1,
121 |                  part_size=None,
122 |                  sample_per_part=4,
123 |                  trans_std=.0,
124 |                  num_offset_fcs=3,
125 |                  deform_fc_channels=1024):
126 |         super(DeformRoIPoolingPack,
127 |               self).__init__(spatial_scale, out_size, out_channels, no_trans,
128 |                              group_size, part_size, sample_per_part, trans_std)
129 | 
130 |         self.num_offset_fcs = num_offset_fcs
131 |         self.deform_fc_channels = deform_fc_channels
132 | 
133 |         if not no_trans:
134 |             seq = []
135 |             ic = self.out_size[0] * self.out_size[1] * self.out_channels
136 |             for i in range(self.num_offset_fcs):
137 |                 if i < self.num_offset_fcs - 1:
138 |                     oc = self.deform_fc_channels
139 |                 else:
140 |                     oc = self.out_size[0] * self.out_size[1] * 2
141 |                 seq.append(nn.Linear(ic, oc))
142 |                 ic = oc
143 |                 if i < self.num_offset_fcs - 1:
144 |                     seq.append(nn.ReLU(inplace=True))
145 |             self.offset_fc = nn.Sequential(*seq)
146 |             self.offset_fc[-1].weight.data.zero_()
147 |             self.offset_fc[-1].bias.data.zero_()
148 | 
149 |     def forward(self, data, rois):
150 |         assert data.size(1) == self.out_channels
151 |         n = rois.shape[0]
152 |         if n == 0:
153 |             return data.new_empty(n, self.out_channels, self.out_size[0],
154 |                                   self.out_size[1])
155 |         if self.no_trans:
156 |             offset = data.new_empty(0)
157 |             return deform_roi_pooling(data, rois, offset, self.spatial_scale,
158 |                                       self.out_size, self.out_channels,
159 |                                       self.no_trans, self.group_size,
160 |                                       self.part_size, self.sample_per_part,
161 |                                       self.trans_std)
162 |         else:
163 |             offset = data.new_empty(0)
164 |             x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
165 |                                    self.out_size, self.out_channels, True,
166 |                                    self.group_size, self.part_size,
167 |                                    self.sample_per_part, self.trans_std)
168 |             offset = self.offset_fc(x.view(n, -1))
169 |             offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
170 |             return deform_roi_pooling(data, rois, offset, self.spatial_scale,
171 |                                       self.out_size, self.out_channels,
172 |                                       self.no_trans, self.group_size,
173 |                                       self.part_size, self.sample_per_part,
174 |                                       self.trans_std)
175 | 
176 | 
177 | class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
178 | 
179 |     def __init__(self,
180 |                  spatial_scale,
181 |                  out_size,
182 |                  out_channels,
183 |                  no_trans,
184 |                  group_size=1,
185 |                  part_size=None,
186 |                  sample_per_part=4,
187 |                  trans_std=.0,
188 |                  num_offset_fcs=3,
189 |                  num_mask_fcs=2,
190 |                  deform_fc_channels=1024):
191 |         super(ModulatedDeformRoIPoolingPack,
192 |               self).__init__(spatial_scale, out_size, out_channels, no_trans,
193 |                              group_size, part_size, sample_per_part, trans_std)
194 | 
195 |         self.num_offset_fcs = num_offset_fcs
196 |         self.num_mask_fcs = num_mask_fcs
197 |         self.deform_fc_channels = deform_fc_channels
198 | 
199 |         if not no_trans:
200 |             offset_fc_seq = []
201 |             ic = self.out_size[0] * self.out_size[1] * self.out_channels
202 |             for i in range(self.num_offset_fcs):
203 |                 if i < self.num_offset_fcs - 1:
204 |                     oc = self.deform_fc_channels
205 |                 else:
206 |                     oc = self.out_size[0] * self.out_size[1] * 2
207 |                 offset_fc_seq.append(nn.Linear(ic, oc))
208 |                 ic = oc
209 |                 if i < self.num_offset_fcs - 1:
210 |                     offset_fc_seq.append(nn.ReLU(inplace=True))
211 |             self.offset_fc = nn.Sequential(*offset_fc_seq)
212 |             self.offset_fc[-1].weight.data.zero_()
213 |             self.offset_fc[-1].bias.data.zero_()
214 | 
215 |             mask_fc_seq = []
216 |             ic = self.out_size[0] * self.out_size[1] * self.out_channels
217 |             for i in range(self.num_mask_fcs):
218 |                 if i < self.num_mask_fcs - 1:
219 |                     oc = self.deform_fc_channels
220 |                 else:
221 |                     oc = self.out_size[0] * self.out_size[1]
222 |                 mask_fc_seq.append(nn.Linear(ic, oc))
223 |                 ic = oc
224 |                 if i < self.num_mask_fcs - 1:
225 |                     mask_fc_seq.append(nn.ReLU(inplace=True))
226 |                 else:
227 |                     mask_fc_seq.append(nn.Sigmoid())
228 |             self.mask_fc = nn.Sequential(*mask_fc_seq)
229 |             self.mask_fc[-2].weight.data.zero_()
230 |             self.mask_fc[-2].bias.data.zero_()
231 | 
232 |     def forward(self, data, rois):
233 |         assert data.size(1) == self.out_channels
234 |         n = rois.shape[0]
235 |         if n == 0:
236 |             return data.new_empty(n, self.out_channels, self.out_size[0],
237 |                                   self.out_size[1])
238 |         if self.no_trans:
239 |             offset = data.new_empty(0)
240 |             return deform_roi_pooling(data, rois, offset, self.spatial_scale,
241 |                                       self.out_size, self.out_channels,
242 |                                       self.no_trans, self.group_size,
243 |                                       self.part_size, self.sample_per_part,
244 |                                       self.trans_std)
245 |         else:
246 |             offset = data.new_empty(0)
247 |             x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
248 |                                    self.out_size, self.out_channels, True,
249 |                                    self.group_size, self.part_size,
250 |                                    self.sample_per_part, self.trans_std)
251 |             offset = self.offset_fc(x.view(n, -1))
252 |             offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
253 |             mask = self.mask_fc(x.view(n, -1))
254 |             mask = mask.view(n, 1, self.out_size[0], self.out_size[1])
255 |             return deform_roi_pooling(
256 |                 data, rois, offset, self.spatial_scale, self.out_size,
257 |                 self.out_channels, self.no_trans, self.group_size,
258 |                 self.part_size, self.sample_per_part, self.trans_std) * mask
259 | 


--------------------------------------------------------------------------------
/libs/dcn/src/deform_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <torch/extension.h>
 9 | #include <ATen/DeviceGuard.h>
10 | 
11 | #include <cmath>
12 | #include <vector>
13 | 
14 | void DeformablePSROIPoolForward(
15 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
16 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
17 |     const int height, const int width, const int num_bbox,
18 |     const int channels_trans, const int no_trans, const float spatial_scale,
19 |     const int output_dim, const int group_size, const int pooled_size,
20 |     const int part_size, const int sample_per_part, const float trans_std);
21 | 
22 | void DeformablePSROIPoolBackwardAcc(
23 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
24 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
25 |     at::Tensor trans_grad, const int batch, const int channels,
26 |     const int height, const int width, const int num_bbox,
27 |     const int channels_trans, const int no_trans, const float spatial_scale,
28 |     const int output_dim, const int group_size, const int pooled_size,
29 |     const int part_size, const int sample_per_part, const float trans_std);
30 | 
31 | void deform_psroi_pooling_cuda_forward(
32 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
33 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
34 |     const int output_dim, const int group_size, const int pooled_size,
35 |     const int part_size, const int sample_per_part, const float trans_std) {
36 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
37 |   at::DeviceGuard guard(input.device());
38 | 
39 |   const int batch = input.size(0);
40 |   const int channels = input.size(1);
41 |   const int height = input.size(2);
42 |   const int width = input.size(3);
43 |   const int channels_trans = no_trans ? 2 : trans.size(1);
44 | 
45 |   const int num_bbox = bbox.size(0);
46 |   if (num_bbox != out.size(0))
47 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
48 |              out.size(0), num_bbox);
49 | 
50 |   DeformablePSROIPoolForward(
51 |       input, bbox, trans, out, top_count, batch, channels, height, width,
52 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
53 |       pooled_size, part_size, sample_per_part, trans_std);
54 | }
55 | 
56 | void deform_psroi_pooling_cuda_backward(
57 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
58 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
59 |     const int no_trans, const float spatial_scale, const int output_dim,
60 |     const int group_size, const int pooled_size, const int part_size,
61 |     const int sample_per_part, const float trans_std) {
62 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
63 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
64 |   at::DeviceGuard guard(input.device());
65 | 
66 |   const int batch = input.size(0);
67 |   const int channels = input.size(1);
68 |   const int height = input.size(2);
69 |   const int width = input.size(3);
70 |   const int channels_trans = no_trans ? 2 : trans.size(1);
71 | 
72 |   const int num_bbox = bbox.size(0);
73 |   if (num_bbox != out_grad.size(0))
74 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
75 |              out_grad.size(0), num_bbox);
76 | 
77 |   DeformablePSROIPoolBackwardAcc(
78 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
79 |       channels, height, width, num_bbox, channels_trans, no_trans,
80 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
81 |       sample_per_part, trans_std);
82 | }
83 | 
84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
85 |   m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
86 |         "deform psroi pooling forward(CUDA)");
87 |   m.def("deform_psroi_pooling_cuda_backward",
88 |         &deform_psroi_pooling_cuda_backward,
89 |         "deform psroi pooling backward(CUDA)");
90 | }
91 | 


--------------------------------------------------------------------------------
/libs/font.TTF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DetectionBLWX/FPN.pytorch/baa35139912edb23e1f153b8684b498061c70e92/libs/font.TTF


--------------------------------------------------------------------------------
/libs/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd cocoapi/PythonAPI
 6 | make
 7 | cd ..
 8 | cd ..
 9 | 
10 | python setup.py develop
11 | rm -rf build
12 | rm -rf FPN.egg-info


--------------------------------------------------------------------------------
/libs/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/libs/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from . import nms_cpu, nms_cuda
  5 | 
  6 | 
  7 | def nms(dets, iou_thr, device_id=None):
  8 |     """Dispatch to either CPU or GPU NMS implementations.
  9 | 
 10 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
 11 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
 12 |     will be used. The returned type will always be the same as inputs.
 13 | 
 14 |     Arguments:
 15 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
 16 |         iou_thr (float): IoU threshold for NMS.
 17 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
 18 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
 19 | 
 20 |     Returns:
 21 |         tuple: kept bboxes and indice, which is always the same data type as
 22 |             the input.
 23 | 
 24 |     Example:
 25 |         >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
 26 |         >>>                  [49.3, 32.9, 51.0, 35.3, 0.9],
 27 |         >>>                  [49.2, 31.8, 51.0, 35.4, 0.5],
 28 |         >>>                  [35.1, 11.5, 39.1, 15.7, 0.5],
 29 |         >>>                  [35.6, 11.8, 39.3, 14.2, 0.5],
 30 |         >>>                  [35.3, 11.5, 39.9, 14.5, 0.4],
 31 |         >>>                  [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32)
 32 |         >>> iou_thr = 0.7
 33 |         >>> suppressed, inds = nms(dets, iou_thr)
 34 |         >>> assert len(inds) == len(suppressed) == 3
 35 |     """
 36 |     # convert dets (tensor or numpy array) to tensor
 37 |     if isinstance(dets, torch.Tensor):
 38 |         is_numpy = False
 39 |         dets_th = dets
 40 |     elif isinstance(dets, np.ndarray):
 41 |         is_numpy = True
 42 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
 43 |         dets_th = torch.from_numpy(dets).to(device)
 44 |     else:
 45 |         raise TypeError(
 46 |             'dets must be either a Tensor or numpy array, but got {}'.format(
 47 |                 type(dets)))
 48 | 
 49 |     # execute cpu or cuda nms
 50 |     if dets_th.shape[0] == 0:
 51 |         inds = dets_th.new_zeros(0, dtype=torch.long)
 52 |     else:
 53 |         if dets_th.is_cuda:
 54 |             inds = nms_cuda.nms(dets_th, iou_thr)
 55 |         else:
 56 |             inds = nms_cpu.nms(dets_th, iou_thr)
 57 | 
 58 |     if is_numpy:
 59 |         inds = inds.cpu().numpy()
 60 |     return dets[inds, :], inds
 61 | 
 62 | 
 63 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
 64 |     """Dispatch to only CPU Soft NMS implementations.
 65 | 
 66 |     The input can be either a torch tensor or numpy array.
 67 |     The returned type will always be the same as inputs.
 68 | 
 69 |     Arguments:
 70 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
 71 |         iou_thr (float): IoU threshold for Soft NMS.
 72 |         method (str): either 'linear' or 'gaussian'
 73 |         sigma (float): hyperparameter for gaussian method
 74 |         min_score (float): score filter threshold
 75 | 
 76 |     Returns:
 77 |         tuple: new det bboxes and indice, which is always the same
 78 |         data type as the input.
 79 | 
 80 |     Example:
 81 |         >>> dets = np.array([[4., 3., 5., 3., 0.9],
 82 |         >>>                  [4., 3., 5., 4., 0.9],
 83 |         >>>                  [3., 1., 3., 1., 0.5],
 84 |         >>>                  [3., 1., 3., 1., 0.5],
 85 |         >>>                  [3., 1., 3., 1., 0.4],
 86 |         >>>                  [3., 1., 3., 1., 0.0]], dtype=np.float32)
 87 |         >>> iou_thr = 0.7
 88 |         >>> new_dets, inds = soft_nms(dets, iou_thr, sigma=0.5)
 89 |         >>> assert len(inds) == len(new_dets) == 3
 90 |     """
 91 |     # convert dets (tensor or numpy array) to tensor
 92 |     if isinstance(dets, torch.Tensor):
 93 |         is_tensor = True
 94 |         dets_t = dets.detach().cpu()
 95 |     elif isinstance(dets, np.ndarray):
 96 |         is_tensor = False
 97 |         dets_t = torch.from_numpy(dets)
 98 |     else:
 99 |         raise TypeError(
100 |             'dets must be either a Tensor or numpy array, but got {}'.format(
101 |                 type(dets)))
102 | 
103 |     method_codes = {'linear': 1, 'gaussian': 2}
104 |     if method not in method_codes:
105 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
106 |     results = nms_cpu.soft_nms(dets_t, iou_thr, method_codes[method], sigma,
107 |                                min_score)
108 | 
109 |     new_dets = results[:, :5]
110 |     inds = results[:, 5]
111 | 
112 |     if is_tensor:
113 |         return new_dets.to(
114 |             device=dets.device, dtype=dets.dtype), inds.to(
115 |                 device=dets.device, dtype=torch.long)
116 |     else:
117 |         return new_dets.numpy().astype(dets.dtype), inds.numpy().astype(
118 |             np.int64)
119 | 


--------------------------------------------------------------------------------
/libs/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
  1 | // Modified from https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx, Soft-NMS is added
  2 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | #include <torch/extension.h>
  4 | 
  5 | template <typename scalar_t>
  6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
  7 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
  8 | 
  9 |   if (dets.numel() == 0) {
 10 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
 11 |   }
 12 | 
 13 |   auto x1_t = dets.select(1, 0).contiguous();
 14 |   auto y1_t = dets.select(1, 1).contiguous();
 15 |   auto x2_t = dets.select(1, 2).contiguous();
 16 |   auto y2_t = dets.select(1, 3).contiguous();
 17 |   auto scores = dets.select(1, 4).contiguous();
 18 | 
 19 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
 20 | 
 21 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 22 | 
 23 |   auto ndets = dets.size(0);
 24 |   at::Tensor suppressed_t =
 25 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
 26 | 
 27 |   auto suppressed = suppressed_t.data<uint8_t>();
 28 |   auto order = order_t.data<int64_t>();
 29 |   auto x1 = x1_t.data<scalar_t>();
 30 |   auto y1 = y1_t.data<scalar_t>();
 31 |   auto x2 = x2_t.data<scalar_t>();
 32 |   auto y2 = y2_t.data<scalar_t>();
 33 |   auto areas = areas_t.data<scalar_t>();
 34 | 
 35 |   for (int64_t _i = 0; _i < ndets; _i++) {
 36 |     auto i = order[_i];
 37 |     if (suppressed[i] == 1) continue;
 38 |     auto ix1 = x1[i];
 39 |     auto iy1 = y1[i];
 40 |     auto ix2 = x2[i];
 41 |     auto iy2 = y2[i];
 42 |     auto iarea = areas[i];
 43 | 
 44 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
 45 |       auto j = order[_j];
 46 |       if (suppressed[j] == 1) continue;
 47 |       auto xx1 = std::max(ix1, x1[j]);
 48 |       auto yy1 = std::max(iy1, y1[j]);
 49 |       auto xx2 = std::min(ix2, x2[j]);
 50 |       auto yy2 = std::min(iy2, y2[j]);
 51 | 
 52 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
 53 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
 54 |       auto inter = w * h;
 55 |       auto ovr = inter / (iarea + areas[j] - inter);
 56 |       if (ovr >= threshold) suppressed[j] = 1;
 57 |     }
 58 |   }
 59 |   return at::nonzero(suppressed_t == 0).squeeze(1);
 60 | }
 61 | 
 62 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 63 |   at::Tensor result;
 64 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
 65 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
 66 |   });
 67 |   return result;
 68 | }
 69 | 
 70 | template <typename scalar_t>
 71 | at::Tensor soft_nms_cpu_kernel(const at::Tensor& dets, const float threshold,
 72 |                                const unsigned char method, const float sigma, const float min_score) {
 73 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 74 | 
 75 |   if (dets.numel() == 0) {
 76 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
 77 |   }
 78 | 
 79 |   auto x1_t = dets.select(1, 0).contiguous();
 80 |   auto y1_t = dets.select(1, 1).contiguous();
 81 |   auto x2_t = dets.select(1, 2).contiguous();
 82 |   auto y2_t = dets.select(1, 3).contiguous();
 83 |   auto scores_t = dets.select(1, 4).contiguous();
 84 | 
 85 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
 86 | 
 87 |   auto ndets = dets.size(0);
 88 |   auto x1 = x1_t.data<scalar_t>();
 89 |   auto y1 = y1_t.data<scalar_t>();
 90 |   auto x2 = x2_t.data<scalar_t>();
 91 |   auto y2 = y2_t.data<scalar_t>();
 92 |   auto scores = scores_t.data<scalar_t>();
 93 |   auto areas = areas_t.data<scalar_t>();
 94 | 
 95 |   int64_t pos = 0;
 96 |   at::Tensor inds_t = at::arange(ndets, dets.options());
 97 |   auto inds = inds_t.data<scalar_t>();
 98 | 
 99 |   for (int64_t i = 0; i < ndets; i++) {
100 |     auto max_score = scores[i];
101 |     auto max_pos = i;
102 | 
103 |     auto ix1 = x1[i];
104 |     auto iy1 = y1[i];
105 |     auto ix2 = x2[i];
106 |     auto iy2 = y2[i];
107 |     auto iscore = scores[i];
108 |     auto iarea = areas[i];
109 |     auto iind = inds[i];
110 | 
111 |     pos = i + 1;
112 |     // get max box
113 |     while (pos < ndets){
114 |         if (max_score < scores[pos]) {
115 |             max_score = scores[pos];
116 |             max_pos = pos;
117 |         }
118 |         pos = pos + 1;
119 |     }
120 |     // add max box as a detection
121 |     x1[i] = x1[max_pos];
122 |     y1[i] = y1[max_pos];
123 |     x2[i] = x2[max_pos];
124 |     y2[i] = y2[max_pos];
125 |     scores[i] = scores[max_pos];
126 |     areas[i] = areas[max_pos];
127 |     inds[i] = inds[max_pos];
128 | 
129 |     // swap ith box with position of max box
130 |     x1[max_pos] =  ix1;
131 |     y1[max_pos] =  iy1;
132 |     x2[max_pos] =  ix2;
133 |     y2[max_pos] =  iy2;
134 |     scores[max_pos] = iscore;
135 |     areas[max_pos] = iarea;
136 |     inds[max_pos] = iind;
137 | 
138 |     ix1 = x1[i];
139 |     iy1 = y1[i];
140 |     ix2 = x2[i];
141 |     iy2 = y2[i];
142 |     iscore = scores[i];
143 |     iarea = areas[i];
144 | 
145 |     pos = i + 1;
146 |     // NMS iterations, note that N changes if detection boxes fall below threshold
147 |     while (pos < ndets) {
148 |       auto xx1 = std::max(ix1, x1[pos]);
149 |       auto yy1 = std::max(iy1, y1[pos]);
150 |       auto xx2 = std::min(ix2, x2[pos]);
151 |       auto yy2 = std::min(iy2, y2[pos]);
152 | 
153 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
154 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
155 |       auto inter = w * h;
156 |       auto ovr = inter / (iarea + areas[pos] - inter);
157 | 
158 |       scalar_t weight = 1.;
159 |       if (method == 1) {
160 |         if (ovr > threshold) weight = 1 - ovr;
161 |       }
162 |       else if (method == 2) {
163 |         weight = std::exp(-(ovr * ovr) / sigma);
164 |       }
165 |       else {
166 |         // original NMS
167 |         if (ovr > threshold) {
168 |             weight = 0;
169 |         }
170 |         else {
171 |             weight = 1;
172 |         }
173 |       }
174 |       scores[pos] = weight * scores[pos];
175 |       // if box score falls below threshold, discard the box by
176 |       // swapping with last box update N
177 |       if (scores[pos] < min_score) {
178 |         x1[pos] = x1[ndets - 1];
179 |         y1[pos] = y1[ndets - 1];
180 |         x2[pos] = x2[ndets - 1];
181 |         y2[pos] = y2[ndets - 1];
182 |         scores[pos] = scores[ndets - 1];
183 |         areas[pos] = areas[ndets - 1];
184 |         inds[pos] = inds[ndets - 1];
185 |         ndets = ndets -1;
186 |         pos = pos - 1;
187 |       }
188 |       pos = pos + 1;
189 |     }
190 |   }
191 |   at::Tensor result = at::zeros({6, ndets}, dets.options());
192 |   result[0] = x1_t.slice(0, 0, ndets);
193 |   result[1] = y1_t.slice(0, 0, ndets);
194 |   result[2] = x2_t.slice(0, 0, ndets);
195 |   result[3] = y2_t.slice(0, 0, ndets);
196 |   result[4] = scores_t.slice(0, 0, ndets);
197 |   result[5] = inds_t.slice(0, 0, ndets);
198 | 
199 |   result =result.t().contiguous();
200 |   return result;
201 | }
202 | 
203 | at::Tensor soft_nms(const at::Tensor& dets, const float threshold,
204 |                     const unsigned char method, const float sigma, const float min_score) {
205 |   at::Tensor result;
206 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "soft_nms", [&] {
207 |     result = soft_nms_cpu_kernel<scalar_t>(dets, threshold, method, sigma, min_score);
208 |   });
209 |   return result;
210 | }
211 | 
212 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
213 |   m.def("nms", &nms, "non-maximum suppression");
214 |   m.def("soft_nms", &soft_nms, "soft non-maximum suppression");
215 | }
216 | 


--------------------------------------------------------------------------------
/libs/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }
18 | 


--------------------------------------------------------------------------------
/libs/nms/src/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include <ATen/DeviceGuard.h>
  5 | 
  6 | #include <THC/THC.h>
  7 | #include <THC/THCDeviceUtils.cuh>
  8 | 
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 13 | 
 14 | __device__ inline float devIoU(float const * const a, float const * const b) {
 15 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 16 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 17 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 18 |   float interS = width * height;
 19 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 20 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 21 |   return interS / (Sa + Sb - interS);
 22 | }
 23 | 
 24 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 25 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 26 |   const int row_start = blockIdx.y;
 27 |   const int col_start = blockIdx.x;
 28 | 
 29 |   // if (row_start > col_start) return;
 30 | 
 31 |   const int row_size =
 32 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 33 |   const int col_size =
 34 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 35 | 
 36 |   __shared__ float block_boxes[threadsPerBlock * 5];
 37 |   if (threadIdx.x < col_size) {
 38 |     block_boxes[threadIdx.x * 5 + 0] =
 39 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 40 |     block_boxes[threadIdx.x * 5 + 1] =
 41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 42 |     block_boxes[threadIdx.x * 5 + 2] =
 43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 44 |     block_boxes[threadIdx.x * 5 + 3] =
 45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 46 |     block_boxes[threadIdx.x * 5 + 4] =
 47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 48 |   }
 49 |   __syncthreads();
 50 | 
 51 |   if (threadIdx.x < row_size) {
 52 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 53 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 54 |     int i = 0;
 55 |     unsigned long long t = 0;
 56 |     int start = 0;
 57 |     if (row_start == col_start) {
 58 |       start = threadIdx.x + 1;
 59 |     }
 60 |     for (i = start; i < col_size; i++) {
 61 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 62 |         t |= 1ULL << i;
 63 |       }
 64 |     }
 65 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 66 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 67 |   }
 68 | }
 69 | 
 70 | // boxes is a N x 5 tensor
 71 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 72 | 
 73 |   // Ensure CUDA uses the input tensor device.
 74 |   at::DeviceGuard guard(boxes.device());
 75 | 
 76 |   using scalar_t = float;
 77 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 78 |   auto scores = boxes.select(1, 4);
 79 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 80 |   auto boxes_sorted = boxes.index_select(0, order_t);
 81 | 
 82 |   int boxes_num = boxes.size(0);
 83 | 
 84 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 85 | 
 86 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 87 | 
 88 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 89 | 
 90 |   unsigned long long* mask_dev = NULL;
 91 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 92 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 93 | 
 94 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 95 | 
 96 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 97 |               THCCeilDiv(boxes_num, threadsPerBlock));
 98 |   dim3 threads(threadsPerBlock);
 99 |   nms_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(boxes_num,
100 |                                   nms_overlap_thresh,
101 |                                   boxes_dev,
102 |                                   mask_dev);
103 | 
104 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
105 |   THCudaCheck(cudaMemcpyAsync(
106 | 			  &mask_host[0],
107 | 			  mask_dev,
108 | 			  sizeof(unsigned long long) * boxes_num * col_blocks,
109 | 			  cudaMemcpyDeviceToHost,
110 | 			  at::cuda::getCurrentCUDAStream()
111 | 			  ));
112 | 
113 |   std::vector<unsigned long long> remv(col_blocks);
114 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
115 | 
116 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
117 |   int64_t* keep_out = keep.data<int64_t>();
118 | 
119 |   int num_to_keep = 0;
120 |   for (int i = 0; i < boxes_num; i++) {
121 |     int nblock = i / threadsPerBlock;
122 |     int inblock = i % threadsPerBlock;
123 | 
124 |     if (!(remv[nblock] & (1ULL << inblock))) {
125 |       keep_out[num_to_keep++] = i;
126 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
127 |       for (int j = nblock; j < col_blocks; j++) {
128 |         remv[j] |= p[j];
129 |       }
130 |     }
131 |   }
132 | 
133 |   THCudaFree(state, mask_dev);
134 |   // TODO improve this part
135 |   return std::get<0>(order_t.index({
136 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
137 |                          order_t.device(), keep.scalar_type())
138 |                      }).sort(0, false));
139 | }
140 | 


--------------------------------------------------------------------------------
/libs/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import RoIAlign, roi_align
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/libs/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from roi_align import RoIAlign  # noqa: E402, isort:skip
10 | 
11 | feat_size = 15
12 | spatial_scale = 1.0 / 8
13 | img_size = feat_size / spatial_scale
14 | num_imgs = 2
15 | num_rois = 20
16 | 
17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
19 | rois[:, 2:] += img_size * 0.5
20 | rois = np.hstack((batch_ind, rois))
21 | 
22 | feat = torch.randn(
23 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
24 | rois = torch.from_numpy(rois).float().cuda()
25 | inputs = (feat, rois)
26 | print('Gradcheck for roi align...')
27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
28 | print(test)
29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
30 | print(test)
31 | 


--------------------------------------------------------------------------------
/libs/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | from . import roi_align_cuda
 7 | 
 8 | 
 9 | class RoIAlignFunction(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
13 |         out_h, out_w = _pair(out_size)
14 |         assert isinstance(out_h, int) and isinstance(out_w, int)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sample_num = sample_num
17 |         ctx.save_for_backward(rois)
18 |         ctx.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
24 |         if features.is_cuda:
25 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
26 |                                    sample_num, output)
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     @once_differentiable
34 |     def backward(ctx, grad_output):
35 |         feature_size = ctx.feature_size
36 |         spatial_scale = ctx.spatial_scale
37 |         sample_num = ctx.sample_num
38 |         rois = ctx.saved_tensors[0]
39 |         assert (feature_size is not None and grad_output.is_cuda)
40 | 
41 |         batch_size, num_channels, data_height, data_width = feature_size
42 |         out_w = grad_output.size(3)
43 |         out_h = grad_output.size(2)
44 | 
45 |         grad_input = grad_rois = None
46 |         if ctx.needs_input_grad[0]:
47 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
48 |                                         data_width)
49 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
50 |                                     out_w, spatial_scale, sample_num,
51 |                                     grad_input)
52 | 
53 |         return grad_input, grad_rois, None, None, None
54 | 
55 | 
56 | roi_align = RoIAlignFunction.apply
57 | 
58 | 
59 | class RoIAlign(nn.Module):
60 | 
61 |     def __init__(self,
62 |                  out_size,
63 |                  spatial_scale,
64 |                  sample_num=0,
65 |                  use_torchvision=False):
66 |         super(RoIAlign, self).__init__()
67 | 
68 |         self.out_size = _pair(out_size)
69 |         self.spatial_scale = float(spatial_scale)
70 |         self.sample_num = int(sample_num)
71 |         self.use_torchvision = use_torchvision
72 | 
73 |     def forward(self, features, rois):
74 |         if self.use_torchvision:
75 |             from torchvision.ops import roi_align as tv_roi_align
76 |             return tv_roi_align(features, rois, self.out_size,
77 |                                 self.spatial_scale, self.sample_num)
78 |         else:
79 |             return roi_align(features, rois, self.out_size, self.spatial_scale,
80 |                              self.sample_num)
81 | 
82 |     def __repr__(self):
83 |         format_str = self.__class__.__name__
84 |         format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
85 |             self.out_size, self.spatial_scale, self.sample_num)
86 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
87 |         return format_str
88 | 


--------------------------------------------------------------------------------
/libs/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | #include <cmath>
 6 | #include <vector>
 7 | 
 8 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 9 |                            const float spatial_scale, const int sample_num,
10 |                            const int channels, const int height,
11 |                            const int width, const int num_rois,
12 |                            const int pooled_height, const int pooled_width,
13 |                            at::Tensor output);
14 | 
15 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
16 |                             const float spatial_scale, const int sample_num,
17 |                             const int channels, const int height,
18 |                             const int width, const int num_rois,
19 |                             const int pooled_height, const int pooled_width,
20 |                             at::Tensor bottom_grad);
21 | 
22 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
23 | #define CHECK_CONTIGUOUS(x) \
24 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
25 | #define CHECK_INPUT(x) \
26 |   CHECK_CUDA(x);       \
27 |   CHECK_CONTIGUOUS(x)
28 | 
29 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
30 |                            int pooled_height, int pooled_width,
31 |                            float spatial_scale, int sample_num,
32 |                            at::Tensor output) {
33 |   CHECK_INPUT(features);
34 |   CHECK_INPUT(rois);
35 |   CHECK_INPUT(output);
36 |   at::DeviceGuard guard(features.device());
37 | 
38 |   // Number of ROIs
39 |   int num_rois = rois.size(0);
40 |   int size_rois = rois.size(1);
41 | 
42 |   if (size_rois != 5) {
43 |     printf("wrong roi size\n");
44 |     return 0;
45 |   }
46 | 
47 |   int num_channels = features.size(1);
48 |   int data_height = features.size(2);
49 |   int data_width = features.size(3);
50 | 
51 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
52 |                          num_channels, data_height, data_width, num_rois,
53 |                          pooled_height, pooled_width, output);
54 | 
55 |   return 1;
56 | }
57 | 
58 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
59 |                             int pooled_height, int pooled_width,
60 |                             float spatial_scale, int sample_num,
61 |                             at::Tensor bottom_grad) {
62 |   CHECK_INPUT(top_grad);
63 |   CHECK_INPUT(rois);
64 |   CHECK_INPUT(bottom_grad);
65 |   at::DeviceGuard guard(top_grad.device());
66 | 
67 |   // Number of ROIs
68 |   int num_rois = rois.size(0);
69 |   int size_rois = rois.size(1);
70 |   if (size_rois != 5) {
71 |     printf("wrong roi size\n");
72 |     return 0;
73 |   }
74 | 
75 |   int num_channels = bottom_grad.size(1);
76 |   int data_height = bottom_grad.size(2);
77 |   int data_width = bottom_grad.size(3);
78 | 
79 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
80 |                           num_channels, data_height, data_width, num_rois,
81 |                           pooled_height, pooled_width, bottom_grad);
82 | 
83 |   return 1;
84 | }
85 | 
86 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
87 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
88 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
89 | }
90 | 


--------------------------------------------------------------------------------
/libs/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_pool import RoIPool, roi_pool
2 | 
3 | __all__ = ['roi_pool', 'RoIPool']
4 | 


--------------------------------------------------------------------------------
/libs/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import torch
 5 | from torch.autograd import gradcheck
 6 | 
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_pool import RoIPool  # noqa: E402, isort:skip
 9 | 
10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
12 |                      [1, 67, 40, 110, 120]]).cuda()
13 | inputs = (feat, rois)
14 | print('Gradcheck for roi pooling...')
15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
16 | print(test)
17 | 


--------------------------------------------------------------------------------
/libs/roi_pool/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from . import roi_pool_cuda
 8 | 
 9 | 
10 | class RoIPoolFunction(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, features, rois, out_size, spatial_scale):
14 |         assert features.is_cuda
15 |         out_h, out_w = _pair(out_size)
16 |         assert isinstance(out_h, int) and isinstance(out_w, int)
17 |         ctx.save_for_backward(rois)
18 |         num_channels = features.size(1)
19 |         num_rois = rois.size(0)
20 |         out_size = (num_rois, num_channels, out_h, out_w)
21 |         output = features.new_zeros(out_size)
22 |         argmax = features.new_zeros(out_size, dtype=torch.int)
23 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
24 |                               output, argmax)
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.feature_size = features.size()
27 |         ctx.argmax = argmax
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         assert grad_output.is_cuda
35 |         spatial_scale = ctx.spatial_scale
36 |         feature_size = ctx.feature_size
37 |         argmax = ctx.argmax
38 |         rois = ctx.saved_tensors[0]
39 |         assert feature_size is not None
40 | 
41 |         grad_input = grad_rois = None
42 |         if ctx.needs_input_grad[0]:
43 |             grad_input = grad_output.new_zeros(feature_size)
44 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
45 |                                    spatial_scale, grad_input)
46 | 
47 |         return grad_input, grad_rois, None, None
48 | 
49 | 
50 | roi_pool = RoIPoolFunction.apply
51 | 
52 | 
53 | class RoIPool(nn.Module):
54 | 
55 |     def __init__(self, out_size, spatial_scale, use_torchvision=False):
56 |         super(RoIPool, self).__init__()
57 | 
58 |         self.out_size = _pair(out_size)
59 |         self.spatial_scale = float(spatial_scale)
60 |         self.use_torchvision = use_torchvision
61 | 
62 |     def forward(self, features, rois):
63 |         if self.use_torchvision:
64 |             from torchvision.ops import roi_pool as tv_roi_pool
65 |             return tv_roi_pool(features, rois, self.out_size,
66 |                                self.spatial_scale)
67 |         else:
68 |             return roi_pool(features, rois, self.out_size, self.spatial_scale)
69 | 
70 |     def __repr__(self):
71 |         format_str = self.__class__.__name__
72 |         format_str += '(out_size={}, spatial_scale={}'.format(
73 |             self.out_size, self.spatial_scale)
74 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
75 |         return format_str
76 | 


--------------------------------------------------------------------------------
/libs/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                           const float spatial_scale, const int channels,
 8 |                           const int height, const int width, const int num_rois,
 9 |                           const int pooled_h, const int pooled_w,
10 |                           at::Tensor output, at::Tensor argmax);
11 | 
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 |                            const at::Tensor argmax, const float spatial_scale,
14 |                            const int batch_size, const int channels,
15 |                            const int height, const int width,
16 |                            const int num_rois, const int pooled_h,
17 |                            const int pooled_w, at::Tensor bottom_grad);
18 | 
19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 |   CHECK_CUDA(x);       \
24 |   CHECK_CONTIGUOUS(x)
25 | 
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 |                              int pooled_height, int pooled_width,
28 |                              float spatial_scale, at::Tensor output,
29 |                              at::Tensor argmax) {
30 |   CHECK_INPUT(features);
31 |   CHECK_INPUT(rois);
32 |   CHECK_INPUT(output);
33 |   CHECK_INPUT(argmax);
34 |   at::DeviceGuard guard(features.device());
35 | 
36 |   // Number of ROIs
37 |   int num_rois = rois.size(0);
38 |   int size_rois = rois.size(1);
39 | 
40 |   if (size_rois != 5) {
41 |     printf("wrong roi size\n");
42 |     return 0;
43 |   }
44 | 
45 |   int channels = features.size(1);
46 |   int height = features.size(2);
47 |   int width = features.size(3);
48 | 
49 |   ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
50 |                         num_rois, pooled_height, pooled_width, output, argmax);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                               at::Tensor argmax, float spatial_scale,
57 |                               at::Tensor bottom_grad) {
58 |   CHECK_INPUT(top_grad);
59 |   CHECK_INPUT(rois);
60 |   CHECK_INPUT(argmax);
61 |   CHECK_INPUT(bottom_grad);
62 |   at::DeviceGuard guard(top_grad.device());
63 | 
64 |   int pooled_height = top_grad.size(2);
65 |   int pooled_width = top_grad.size(3);
66 |   int num_rois = rois.size(0);
67 |   int size_rois = rois.size(1);
68 | 
69 |   if (size_rois != 5) {
70 |     printf("wrong roi size\n");
71 |     return 0;
72 |   }
73 |   int batch_size = bottom_grad.size(0);
74 |   int channels = bottom_grad.size(1);
75 |   int height = bottom_grad.size(2);
76 |   int width = bottom_grad.size(3);
77 | 
78 |   ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
79 |                          channels, height, width, num_rois, pooled_height,
80 |                          pooled_width, bottom_grad);
81 | 
82 |   return 1;
83 | }
84 | 
85 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
86 |   m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
87 |   m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
88 | }
89 | 


--------------------------------------------------------------------------------
/libs/roi_pool/src/roi_pool_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/cuda/CUDAContext.h>
  3 | #include <THC/THCAtomics.cuh>
  4 | 
  5 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
  6 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
  7 |        i += blockDim.x * gridDim.x)
  8 | 
  9 | #define THREADS_PER_BLOCK 1024
 10 | 
 11 | inline int GET_BLOCKS(const int N) {
 12 |   int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
 13 |   int max_block_num = 65000;
 14 |   return min(optimal_block_num, max_block_num);
 15 | }
 16 | 
 17 | template <typename scalar_t>
 18 | __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
 19 |                                const scalar_t *rois,
 20 |                                const scalar_t spatial_scale, const int channels,
 21 |                                const int height, const int width,
 22 |                                const int pooled_h, const int pooled_w,
 23 |                                scalar_t *top_data, int *argmax_data) {
 24 |   CUDA_1D_KERNEL_LOOP(index, nthreads) {
 25 |     // (n, c, ph, pw) is an element in the pooled output
 26 |     int pw = index % pooled_w;
 27 |     int ph = (index / pooled_w) % pooled_h;
 28 |     int c = (index / pooled_w / pooled_h) % channels;
 29 |     int n = index / pooled_w / pooled_h / channels;
 30 | 
 31 |     const scalar_t *offset_rois = rois + n * 5;
 32 |     int roi_batch_ind = offset_rois[0];
 33 |     // calculate the roi region on feature maps
 34 |     scalar_t roi_x1 = offset_rois[1] * spatial_scale;
 35 |     scalar_t roi_y1 = offset_rois[2] * spatial_scale;
 36 |     scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale;
 37 |     scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale;
 38 | 
 39 |     // force malformed rois to be 1x1
 40 |     scalar_t roi_w = roi_x2 - roi_x1;
 41 |     scalar_t roi_h = roi_y2 - roi_y1;
 42 |     if (roi_w <= 0 || roi_h <= 0) continue;
 43 | 
 44 |     scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w);
 45 |     scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h);
 46 | 
 47 |     // the corresponding bin region
 48 |     int bin_x1 = floor(static_cast<scalar_t>(pw) * bin_size_w + roi_x1);
 49 |     int bin_y1 = floor(static_cast<scalar_t>(ph) * bin_size_h + roi_y1);
 50 |     int bin_x2 = ceil(static_cast<scalar_t>(pw + 1) * bin_size_w + roi_x1);
 51 |     int bin_y2 = ceil(static_cast<scalar_t>(ph + 1) * bin_size_h + roi_y1);
 52 | 
 53 |     // add roi offsets and clip to input boundaries
 54 |     bin_x1 = min(max(bin_x1, 0), width);
 55 |     bin_y1 = min(max(bin_y1, 0), height);
 56 |     bin_x2 = min(max(bin_x2, 0), width);
 57 |     bin_y2 = min(max(bin_y2, 0), height);
 58 |     bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);
 59 | 
 60 |     // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
 61 |     int max_idx = -1;
 62 |     bottom_data += (roi_batch_ind * channels + c) * height * width;
 63 | 
 64 |     // Define an empty pooling region to be zero
 65 |     scalar_t max_val = is_empty ? static_cast<scalar_t>(0)
 66 |                                 : bottom_data[bin_y1 * width + bin_x1] - 1;
 67 | 
 68 |     for (int h = bin_y1; h < bin_y2; ++h) {
 69 |       for (int w = bin_x1; w < bin_x2; ++w) {
 70 |         int offset = h * width + w;
 71 |         if (bottom_data[offset] > max_val) {
 72 |           max_val = bottom_data[offset];
 73 |           max_idx = offset;
 74 |         }
 75 |       }
 76 |     }
 77 |     top_data[index] = max_val;
 78 |     if (argmax_data != NULL) argmax_data[index] = max_idx;
 79 |   }
 80 | }
 81 | 
 82 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 83 |                           const float spatial_scale, const int channels,
 84 |                           const int height, const int width, const int num_rois,
 85 |                           const int pooled_h, const int pooled_w,
 86 |                           at::Tensor output, at::Tensor argmax) {
 87 |   const int output_size = num_rois * channels * pooled_h * pooled_w;
 88 | 
 89 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
 90 |       features.scalar_type(), "ROIPoolLaucherForward", ([&] {
 91 |         const scalar_t *bottom_data = features.data<scalar_t>();
 92 |         const scalar_t *rois_data = rois.data<scalar_t>();
 93 |         scalar_t *top_data = output.data<scalar_t>();
 94 |         int *argmax_data = argmax.data<int>();
 95 | 
 96 |         ROIPoolForward<scalar_t>
 97 |             <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>(
 98 |                 output_size, bottom_data, rois_data, scalar_t(spatial_scale),
 99 |                 channels, height, width, pooled_h, pooled_w, top_data,
100 |                 argmax_data);
101 |       }));
102 |   THCudaCheck(cudaGetLastError());
103 |   return 1;
104 | }
105 | 
106 | template <typename scalar_t>
107 | __global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff,
108 |                                 const scalar_t *rois, const int *argmax_data,
109 |                                 const scalar_t spatial_scale,
110 |                                 const int channels, const int height,
111 |                                 const int width, const int pooled_h,
112 |                                 const int pooled_w, scalar_t *bottom_diff) {
113 |   CUDA_1D_KERNEL_LOOP(index, nthreads) {
114 |     int pw = index % pooled_w;
115 |     int ph = (index / pooled_w) % pooled_h;
116 |     int c = (index / pooled_w / pooled_h) % channels;
117 |     int n = index / pooled_w / pooled_h / channels;
118 | 
119 |     int roi_batch_ind = rois[n * 5];
120 |     int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +
121 |                                    ph * pooled_w + pw];
122 | 
123 |     atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width +
124 |                   bottom_index,
125 |               top_diff[index]);
126 |   }
127 | }
128 | 
129 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
130 |                            const at::Tensor argmax, const float spatial_scale,
131 |                            const int batch_size, const int channels,
132 |                            const int height, const int width,
133 |                            const int num_rois, const int pooled_h,
134 |                            const int pooled_w, at::Tensor bottom_grad) {
135 |   const int output_size = num_rois * pooled_h * pooled_w * channels;
136 | 
137 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
138 |       top_grad.scalar_type(), "ROIPoolLaucherBackward", ([&] {
139 |         const scalar_t *top_diff = top_grad.data<scalar_t>();
140 |         const scalar_t *rois_data = rois.data<scalar_t>();
141 |         const int *argmax_data = argmax.data<int>();
142 |         scalar_t *bottom_diff = bottom_grad.data<scalar_t>();
143 | 
144 |         if (sizeof(scalar_t) == sizeof(double)) {
145 |           fprintf(stderr, "double is not supported\n");
146 |           exit(-1);
147 |         }
148 | 
149 |         ROIPoolBackward<scalar_t>
150 |             <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, at::cuda::getCurrentCUDAStream()>>>(
151 |                 output_size, top_diff, rois_data, argmax_data,
152 |                 scalar_t(spatial_scale), channels, height, width, pooled_h,
153 |                 pooled_w, bottom_diff);
154 |       }));
155 |   THCudaCheck(cudaGetLastError());
156 |   return 1;
157 | }
158 | 


--------------------------------------------------------------------------------
/libs/setup.py:
--------------------------------------------------------------------------------
 1 | '''setup modified from mmdet'''
 2 | import os
 3 | import torch
 4 | from setuptools import find_packages, setup
 5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 6 | 
 7 | 
 8 | def make_cuda_ext(name, module, sources):
 9 |     define_macros = []
10 |     if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
11 |         define_macros += [("WITH_CUDA", None)]
12 |     else:
13 |         raise EnvironmentError('CUDA is required to compile FPN!')
14 |     return CUDAExtension(
15 |         name='{}.{}'.format(module, name),
16 |         sources=[os.path.join(*module.split('.'), p) for p in sources],
17 |         define_macros=define_macros,
18 |         extra_compile_args={
19 |             'cxx': [],
20 |             'nvcc': [
21 |                 '-D__CUDA_NO_HALF_OPERATORS__',
22 |                 '-D__CUDA_NO_HALF_CONVERSIONS__',
23 |                 '-D__CUDA_NO_HALF2_OPERATORS__',
24 |             ]
25 |         })
26 | 
27 | 
28 | setup(
29 | 		name='FPN',
30 | 		version='0.1.0',
31 | 		description='FPN for object detection',
32 | 		classifiers=['License :: OSI Approved :: MIT License',
33 | 					 'Programming Language :: Python :: 3',
34 | 					 'Intended Audience :: Developers',
35 | 					 'Operating System :: OS Independent'],
36 | 		author='Charles',
37 | 		author_email='charlesjzc@qq.com',
38 | 		url='https://github.com/DetectionBLWX/FPN.pytorch',
39 | 		license='MIT',
40 | 		include_package_data=True,
41 | 		packages=find_packages(),
42 | 		ext_modules=[
43 | 						make_cuda_ext(
44 | 										name='deform_conv_cuda', 
45 | 										module='dcn',
46 | 										sources=['src/deform_conv_cuda.cpp', 'src/deform_conv_cuda_kernel.cu']
47 | 									),
48 | 						make_cuda_ext(
49 | 										name='deform_pool_cuda', 
50 | 										module='dcn',
51 | 										sources=['src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu']
52 | 									),
53 | 						make_cuda_ext(
54 | 										name='nms_cpu', 
55 | 										module='nms',
56 | 										sources=['src/nms_cpu.cpp']
57 | 									),
58 | 						make_cuda_ext(
59 | 										name='nms_cuda', 
60 | 										module='nms',
61 | 										sources=['src/nms_cuda.cpp', 'src/nms_kernel.cu']
62 | 									),
63 | 						make_cuda_ext(
64 | 										name='roi_align_cuda',
65 | 										module='roi_align',
66 | 										sources=['src/roi_align_cuda.cpp', 'src/roi_align_kernel.cu']
67 | 									),
68 | 						make_cuda_ext(
69 | 										name='roi_pool_cuda',
70 | 										module='roi_pool',
71 | 										sources=['src/roi_pool_cuda.cpp', 'src/roi_pool_kernel.cu']
72 | 									),
73 | 					],
74 | 		cmdclass={'build_ext': BuildExtension},
75 | 		zip_safe=False
76 | 	)


--------------------------------------------------------------------------------
/modules/backbones/FPNResNets.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Function:
  3 | 	Feature Pyramid Network of ResNets
  4 | Author:
  5 | 	Charles
  6 | '''
  7 | import torch
  8 | import torchvision
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | from modules.utils.initialization import *
 12 | 
 13 | 
 14 | '''resnet from torchvision==0.4.0'''
 15 | def ResNets(resnet_type, pretrained=False):
 16 | 	if resnet_type == 'resnet18':
 17 | 		model = torchvision.models.resnet18(pretrained=pretrained)
 18 | 	elif resnet_type == 'resnet34':
 19 | 		model = torchvision.models.resnet34(pretrained=pretrained)
 20 | 	elif resnet_type == 'resnet50':
 21 | 		model = torchvision.models.resnet50(pretrained=pretrained)
 22 | 	elif resnet_type == 'resnet101':
 23 | 		model = torchvision.models.resnet101(pretrained=pretrained)
 24 | 	elif resnet_type == 'resnet152':
 25 | 		model = torchvision.models.resnet152(pretrained=pretrained)
 26 | 	else:
 27 | 		raise ValueError('Unsupport resnet_type <%s>...' % resnet_type)
 28 | 	return model
 29 | 
 30 | 
 31 | '''FPN by using ResNets'''
 32 | class FPNResNets(nn.Module):
 33 | 	def __init__(self, mode, cfg, logger_handle, **kwargs):
 34 | 		super(FPNResNets, self).__init__()
 35 | 		self.logger_handle = logger_handle
 36 | 		self.pretrained_model_path = cfg.PRETRAINED_MODEL_PATH
 37 | 		self.backbone = ResNets(resnet_type=cfg.BACKBONE_TYPE, pretrained=False)
 38 | 		if mode == 'TRAIN':
 39 | 			self.initializeBackbone()
 40 | 		self.backbone.avgpool = None
 41 | 		self.backbone.fc = None
 42 | 		# parse backbone
 43 | 		self.base_layer0 = nn.Sequential(self.backbone.conv1, self.backbone.bn1, self.backbone.relu, self.backbone.maxpool)
 44 | 		self.base_layer1 = nn.Sequential(self.backbone.layer1)
 45 | 		self.base_layer2 = nn.Sequential(self.backbone.layer2)
 46 | 		self.base_layer3 = nn.Sequential(self.backbone.layer3)
 47 | 		self.base_layer4 = nn.Sequential(self.backbone.layer4)
 48 | 		# add lateral layers
 49 | 		in_channels = [512, 256, 128, 64] if cfg.BACKBONE_TYPE in ['resnet18', 'resnet34'] else [2048, 1024, 512, 256]
 50 | 		self.lateral_layer0 = nn.Conv2d(in_channels=in_channels[0], out_channels=256, kernel_size=1, stride=1, padding=0)
 51 | 		self.lateral_layer1 = nn.Conv2d(in_channels=in_channels[1], out_channels=256, kernel_size=1, stride=1, padding=0)
 52 | 		self.lateral_layer2 = nn.Conv2d(in_channels=in_channels[2], out_channels=256, kernel_size=1, stride=1, padding=0)
 53 | 		self.lateral_layer3 = nn.Conv2d(in_channels=in_channels[3], out_channels=256, kernel_size=1, stride=1, padding=0)
 54 | 		# add smooth layers
 55 | 		self.smooth_layer0 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 56 | 		self.smooth_layer1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 57 | 		self.smooth_layer2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 58 | 		self.smooth_layer3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 59 | 		# add downsample layer
 60 | 		self.downsample_layer = nn.MaxPool2d(kernel_size=1, stride=2)
 61 | 	'''forward'''
 62 | 	def forward(self, x):
 63 | 		# bottom-up
 64 | 		c1 = self.base_layer0(x)
 65 | 		c2 = self.base_layer1(c1)
 66 | 		c3 = self.base_layer2(c2)
 67 | 		c4 = self.base_layer3(c3)
 68 | 		c5 = self.base_layer4(c4)
 69 | 		# top-down
 70 | 		p5 = self.lateral_layer0(c5)
 71 | 		p4 = self.upsampleAdd(p5, self.lateral_layer1(c4))
 72 | 		p3 = self.upsampleAdd(p4, self.lateral_layer2(c3))
 73 | 		p2 = self.upsampleAdd(p3, self.lateral_layer3(c2))
 74 | 		# obtain fpn features
 75 | 		p5 = self.smooth_layer0(p5)
 76 | 		p4 = self.smooth_layer1(p4)
 77 | 		p3 = self.smooth_layer2(p3)
 78 | 		p2 = self.smooth_layer3(p2)
 79 | 		p6 = self.downsample_layer(p5)
 80 | 		# return all feature pyramid levels
 81 | 		return [p2, p3, p4, p5, p6]
 82 | 	'''upsample and add'''
 83 | 	def upsampleAdd(self, p, c):
 84 | 		_, _, H, W = c.size()
 85 | 		return F.interpolate(p, size=(H, W), mode='nearest') + c
 86 | 	'''initialize backbone'''
 87 | 	def initializeBackbone(self):
 88 | 		if self.pretrained_model_path:
 89 | 			self.backbone.load_state_dict({k:v for k,v in torch.load(self.pretrained_model_path).items() if k in self.backbone.state_dict()})
 90 | 			self.logger_handle.info('Loading pretrained weights from %s for backbone network...' % self.pretrained_model_path)
 91 | 		else:
 92 | 			self.backbone = ResNets(resnet_type=self.backbone_type, pretrained=True)
 93 | 	'''initialize added layers in fpn'''
 94 | 	def initializeAddedLayers(self, init_method='xavier'):
 95 | 		# normal init
 96 | 		if init_method == 'normal':
 97 | 			for layer in [self.lateral_layer0, self.lateral_layer1, self.lateral_layer2, self.lateral_layer3,
 98 | 						  self.smooth_layer0, self.smooth_layer1, self.smooth_layer2, self.smooth_layer3]:
 99 | 				normalInit(layer, std=0.01)
100 | 		# kaiming init
101 | 		elif init_method == 'kaiming':
102 | 			for layer in [self.lateral_layer0, self.lateral_layer1, self.lateral_layer2, self.lateral_layer3,
103 | 						  self.smooth_layer0, self.smooth_layer1, self.smooth_layer2, self.smooth_layer3]:
104 | 				kaimingInit(layer, nonlinearity='relu')
105 | 		# xavier init
106 | 		elif init_method == 'xavier':
107 | 			for layer in [self.lateral_layer0, self.lateral_layer1, self.lateral_layer2, self.lateral_layer3,
108 | 						  self.smooth_layer0, self.smooth_layer1, self.smooth_layer2, self.smooth_layer3]:
109 | 				xavierInit(layer, distribution='uniform')
110 | 		# unsupport
111 | 		else:
112 | 			raise RuntimeError('Unsupport initializeAddedLayers.init_method <%s>...' % init_method)


--------------------------------------------------------------------------------
/modules/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | '''import all'''
2 | from .FPNResNets import FPNResNets


--------------------------------------------------------------------------------
/modules/losses/CELoss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	define the cross entropy loss
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | '''cross entropy loss'''
11 | def CrossEntropyLoss(preds, targets, loss_weight=1.0, size_average=True, avg_factor=None):
12 | 	loss = F.cross_entropy(preds, targets, reduction='none')
13 | 	if avg_factor is None:
14 | 		loss = loss.mean() if size_average else loss.sum()
15 | 	else:
16 | 		loss = (loss.sum() / avg_factor) if size_average else loss.sum()
17 | 	return loss * loss_weight
18 | 
19 | 
20 | '''binary cross entropy loss'''
21 | def BinaryCrossEntropyLoss(preds, targets, loss_weight=1.0, size_average=True, avg_factor=None):
22 | 	loss = F.binary_cross_entropy_with_logits(preds, targets.float(), reduction='none')
23 | 	if avg_factor is None:
24 | 		loss = loss.mean() if size_average else loss.sum()
25 | 	else:
26 | 		loss = (loss.sum() / avg_factor) if size_average else loss.sum()
27 | 	return loss * loss_weight


--------------------------------------------------------------------------------
/modules/losses/IoULoss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	define the iou losses
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import torch
 8 | 
 9 | 
10 | '''giou loss, I borrow the code from mmdet'''
11 | def GIoULoss(bbox_preds, bbox_targets, eps=1e-7, size_average=True, loss_weight=1.0, avg_factor=None):
12 | 	# overlap
13 | 	lt = torch.max(bbox_preds[:, :2], bbox_targets[:, :2])
14 | 	rb = torch.min(bbox_preds[:, 2:], bbox_targets[:, 2:])
15 | 	wh = (rb - lt + 1).clamp(min=0)
16 | 	overlap = wh[:, 0] * wh[:, 1]
17 | 	# union
18 | 	ap = (bbox_preds[:, 2] - bbox_preds[:, 0] + 1) * (bbox_preds[:, 3] - bbox_preds[:, 1] + 1)
19 | 	ag = (bbox_targets[:, 2] - bbox_targets[:, 0] + 1) * (bbox_targets[:, 3] - bbox_targets[:, 1] + 1)
20 | 	union = ap + ag - overlap + eps
21 | 	# IoU
22 | 	ious = overlap / union
23 | 	# enclose area
24 | 	enclose_x1y1 = torch.min(bbox_preds[:, :2], bbox_targets[:, :2])
25 | 	enclose_x2y2 = torch.max(bbox_preds[:, 2:], bbox_targets[:, 2:])
26 | 	enclose_wh = (enclose_x2y2 - enclose_x1y1 + 1).clamp(min=0)
27 | 	enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps
28 | 	# GIoU
29 | 	gious = ious - (enclose_area - union) / enclose_area
30 | 	loss = 1 - gious
31 | 	# summary and return the loss
32 | 	if avg_factor is None:
33 | 		loss = loss.mean() if size_average else loss.sum()
34 | 	else:
35 | 		loss = (loss.sum() / avg_factor) if size_average else loss.sum()
36 | 	return loss * loss_weight


--------------------------------------------------------------------------------
/modules/losses/__init__.py:
--------------------------------------------------------------------------------
1 | '''import all'''
2 | from .CELoss import *
3 | from .IoULoss import *
4 | from .smoothL1 import *
5 | from .focalLoss import *


--------------------------------------------------------------------------------
/modules/losses/focalLoss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	define the focal loss
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | '''sigmoid focal loss'''
11 | def pySigmoidFocalLoss(preds, targets, loss_weight=1.0, gamma=2.0, alpha=0.25, size_average=True, avg_factor=None):
12 | 	preds_sigmoid = preds.sigmoid()
13 | 	targets = targets.type_as(preds)
14 | 	pt = (1 - preds_sigmoid) * targets + preds_sigmoid * (1 - targets)
15 | 	focal_weight = (alpha * targets + (1 - alpha) * (1 - targets)) * pt.pow(gamma)
16 | 	loss = F.binary_cross_entropy_with_logits(preds, targets, reduction='none') * focal_weight
17 | 	if avg_factor is None:
18 | 		loss = loss.mean() if size_average else loss.sum()
19 | 	else:
20 | 		loss = (loss.sum() / avg_factor) if size_average else loss.sum()
21 | 	return loss * loss_weight


--------------------------------------------------------------------------------
/modules/losses/smoothL1.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	define the smooth l1 losses
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import torch
 8 | import numpy as np
 9 | 
10 | 
11 | '''smooth l1 loss with beta'''
12 | def betaSmoothL1Loss(bbox_preds, bbox_targets, beta=1, size_average=True, loss_weight=1.0, avg_factor=None):
13 | 	diff = torch.abs(bbox_preds - bbox_targets)
14 | 	loss = torch.where(diff < beta, 0.5 * diff ** 2 / beta, diff - 0.5 * beta)
15 | 	if avg_factor is None:
16 | 		loss = loss.mean() if size_average else loss.sum()
17 | 	else:
18 | 		loss = (loss.sum() / avg_factor) if size_average else loss.sum()
19 | 	return loss * loss_weight
20 | 
21 | 
22 | '''balanced smooth l1 Loss, I borrow the code from mmdet'''
23 | def balancedSmoothL1Loss(bbox_preds, bbox_targets, beta=1.0, alpha=0.5, gamma=1.5, size_average=True, loss_weight=1.0, avg_factor=None):
24 | 	assert (beta > 0.) and (bbox_preds.size() == bbox_targets.size()) and (bbox_targets.numel() > 0)
25 | 	diff = torch.abs(bbox_preds - bbox_targets)
26 | 	b = np.e ** (gamma / alpha) - 1
27 | 	loss = torch.where(diff < beta, alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, gamma * diff + gamma / b - alpha * beta)
28 | 	if avg_factor is None:
29 | 		loss = loss.mean() if size_average else loss.sum()
30 | 	else:
31 | 		loss = (loss.sum() / avg_factor) if size_average else loss.sum()
32 | 	return loss * loss_weight


--------------------------------------------------------------------------------
/modules/utils/__init__.py:
--------------------------------------------------------------------------------
1 | '''import all'''
2 | from .misc import *
3 | from .anchors import *
4 | from .datasets import *
5 | from .initialization import *


--------------------------------------------------------------------------------
/modules/utils/anchors.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	define the utils to generate anchors
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import torch
 8 | 
 9 | 
10 | '''
11 | Function:
12 | 	anchor generator
13 | Input for __init__:
14 | 	--size_base(int): the base anchor size.
15 | 	--scales(list): scales for anchor boxes.
16 | 	--ratios(list): ratios for anchor boxes.
17 | Input for generate:
18 | 	--feature_shape(tuple): the size of feature maps in corresponding pyramid level.
19 | 	--feature_stride(int): the feature stride in corresponding pyramid level.
20 | 	--device: specify cpu or cuda.
21 | Return:
22 | 	--anchors(torch.FloatTensor): [nA, 4], the format is (x1, y1, x2, y2).
23 | '''
24 | class AnchorGenerator(object):
25 | 	def __init__(self, size_base, scales=[8], ratios=[0.5, 1, 2], **kwargs):
26 | 		self.size_base = size_base
27 | 		self.scales = torch.Tensor(scales)
28 | 		self.ratios = torch.Tensor(ratios)
29 | 		self.base_anchors = self.__generateBaseAnchors()
30 | 	'''generate anchors'''
31 | 	def generate(self, feature_shape=None, feature_stride=None, device='cuda'):
32 | 		base_anchors = self.base_anchors.to(device)
33 | 		feat_h, feat_w = feature_shape
34 | 		shift_x = torch.arange(0, feat_w, device=device) * feature_stride
35 | 		shift_y = torch.arange(0, feat_h, device=device) * feature_stride
36 | 		shift_xx, shift_yy = self.__meshgrid(shift_x, shift_y)
37 | 		shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
38 | 		shifts = shifts.type_as(base_anchors)
39 | 		all_anchors = base_anchors[None, :, :] + shifts[:, None, :].float()
40 | 		all_anchors = all_anchors.view(-1, 4)
41 | 		return all_anchors
42 | 	'''meshgrid'''
43 | 	def __meshgrid(self, x, y):
44 | 		xx = x.repeat(len(y))
45 | 		yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
46 | 		return xx, yy
47 | 	'''generate base anchors'''
48 | 	def __generateBaseAnchors(self):
49 | 		w = self.size_base
50 | 		h = self.size_base
51 | 		x_ctr = 0.5 * (w - 1)
52 | 		y_ctr = 0.5 * (h - 1)
53 | 		h_ratios = torch.sqrt(self.ratios)
54 | 		w_ratios = 1 / h_ratios
55 | 		ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
56 | 		hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
57 | 		base_anchors = torch.stack([x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)], dim=-1).round()
58 | 		return base_anchors


--------------------------------------------------------------------------------
/modules/utils/datasets/Custom.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	some shared methods for all datasets.
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import torch
 8 | 
 9 | 
10 | '''nearest ratio random sampler, used in <TRAIN> mode'''
11 | class NearestRatioRandomSampler(torch.utils.data.sampler.Sampler):
12 | 	def __init__(self, img_ratios, batch_size, **kwargs):
13 | 		super().__init__(data_source=None)
14 | 		self.img_ratios = img_ratios
15 | 		self.batch_size = batch_size
16 | 	def __iter__(self):
17 | 		img_ratios = torch.tensor(self.img_ratios)
18 | 		tall_indices = (img_ratios < 1).nonzero().view(-1)
19 | 		fat_indices = (img_ratios >= 1).nonzero().view(-1)
20 | 		tall_indices_length = len(tall_indices)
21 | 		fat_indices_length = len(fat_indices)
22 | 		tall_indices = tall_indices[torch.randperm(tall_indices_length)]
23 | 		fat_indices = fat_indices[torch.randperm(fat_indices_length)]
24 | 		num_tall_remainder = tall_indices_length % self.batch_size
25 | 		num_fat_remainder = fat_indices_length % self.batch_size
26 | 		tall_indices = tall_indices[:tall_indices_length-num_tall_remainder]
27 | 		fat_indices = fat_indices[:fat_indices_length-num_fat_remainder]
28 | 		tall_indices = tall_indices.view(-1, self.batch_size)
29 | 		fat_indices = fat_indices.view(-1, self.batch_size)
30 | 		merge_indices = torch.cat([tall_indices, fat_indices], dim=0)
31 | 		merge_indices = merge_indices[torch.randperm(len(merge_indices))].view(-1)
32 | 		return iter(merge_indices.tolist())
33 | 	def __len__(self):
34 | 		return len(self.img_ratios)


--------------------------------------------------------------------------------
/modules/utils/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | '''import all'''
2 | from .COCODataset import COCODataset
3 | from .Custom import NearestRatioRandomSampler


--------------------------------------------------------------------------------
/modules/utils/initialization.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Function:
 3 | 	some weight initialization methods from mmcv
 4 | Author:
 5 | 	Charles
 6 | '''
 7 | import numpy as np
 8 | import torch.nn as nn
 9 | 
10 | 
11 | '''constant init'''
12 | def constantInit(module, val, bias=0):
13 | 	if hasattr(module, 'weight') and module.weight is not None:
14 | 		nn.init.constant_(module.weight, val)
15 | 	if hasattr(module, 'bias') and module.bias is not None:
16 | 		nn.init.constant_(module.bias, bias)
17 | 
18 | 
19 | '''xavier init'''
20 | def xavierInit(module, gain=1, bias=0, distribution='normal'):
21 | 	assert distribution in ['uniform', 'normal']
22 | 	if distribution == 'uniform':
23 | 		nn.init.xavier_uniform_(module.weight, gain=gain)
24 | 	else:
25 | 		nn.init.xavier_normal_(module.weight, gain=gain)
26 | 	if hasattr(module, 'bias') and module.bias is not None:
27 | 		nn.init.constant_(module.bias, bias)
28 | 
29 | 
30 | '''normal init'''
31 | def normalInit(module, mean=0, std=1, bias=0):
32 | 	nn.init.normal_(module.weight, mean, std)
33 | 	if hasattr(module, 'bias') and module.bias is not None:
34 | 		nn.init.constant_(module.bias, bias)
35 | 
36 | 
37 | '''uniform init'''
38 | def uniformInit(module, a=0, b=1, bias=0):
39 | 	nn.init.uniform_(module.weight, a, b)
40 | 	if hasattr(module, 'bias') and module.bias is not None:
41 | 		nn.init.constant_(module.bias, bias)
42 | 
43 | 
44 | '''kaiming init'''
45 | def kaimingInit(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal'):
46 | 	assert distribution in ['uniform', 'normal']
47 | 	if distribution == 'uniform':
48 | 		nn.init.kaiming_uniform_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
49 | 	else:
50 | 		nn.init.kaiming_normal_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
51 | 	if hasattr(module, 'bias') and module.bias is not None:
52 | 		nn.init.constant_(module.bias, bias)
53 | 
54 | 
55 | '''`XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch, Acknowledgment to FAIR's internal code'''
56 | def caffe2XavierInit(module, bias=0):
57 | 	kaimingInit(module, a=1, mode='fan_in', nonlinearity='leaky_relu', distribution='uniform')
58 | 
59 | 
60 | '''initialize conv/fc bias value according to giving probablity'''
61 | def biasInitWithProb(prior_prob):
62 | 	bias_init = float(-np.log((1 - prior_prob) / prior_prob))
63 | 	return bias_init


--------------------------------------------------------------------------------
/modules/utils/misc.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Function:
  3 | 	some util functions used for many module files.
  4 | Author:
  5 | 	Charles
  6 | '''
  7 | import os
  8 | import torch
  9 | import logging
 10 | from torch.nn.utils import clip_grad
 11 | 
 12 | 
 13 | '''check the existence of dirpath'''
 14 | def checkDir(dirpath):
 15 | 	if not os.path.exists(dirpath):
 16 | 		os.mkdir(dirpath)
 17 | 		return False
 18 | 	return True
 19 | 
 20 | 
 21 | '''log function.'''
 22 | class Logger():
 23 | 	def __init__(self, logfilepath, **kwargs):
 24 | 		logging.basicConfig(level=logging.INFO,
 25 | 							format='%(asctime)s %(levelname)-8s %(message)s',
 26 | 							datefmt='%Y-%m-%d %H:%M:%S',
 27 | 							handlers=[logging.FileHandler(logfilepath),
 28 | 									  logging.StreamHandler()])
 29 | 	@staticmethod
 30 | 	def log(level, message):
 31 | 		logging.log(level, message)
 32 | 	@staticmethod
 33 | 	def debug(message):
 34 | 		Logger.log(logging.DEBUG, message)
 35 | 	@staticmethod
 36 | 	def info(message):
 37 | 		Logger.log(logging.INFO, message)
 38 | 	@staticmethod
 39 | 	def warning(message):
 40 | 		Logger.log(logging.WARNING, message)
 41 | 	@staticmethod
 42 | 	def error(message):
 43 | 		Logger.log(logging.ERROR, message)
 44 | 
 45 | 
 46 | '''load class labels.'''
 47 | def loadclsnames(clsnamespath):
 48 | 	names = []
 49 | 	for line in open(clsnamespath):
 50 | 		if line.strip('\n'):
 51 | 			names.append(line.strip('\n'))
 52 | 	return names
 53 | 
 54 | 
 55 | '''some functions for bboxes, the format of all the input bboxes are (x1, y1, x2, y2)'''
 56 | class BBoxFunctions(object):
 57 | 	def __init__(self):
 58 | 		self.info = 'bbox functions'
 59 | 	def __repr__(self):
 60 | 		return self.info
 61 | 	'''convert anchors to proposals, anchors size: B x N x 4'''
 62 | 	@staticmethod
 63 | 	def anchors2Proposals(anchors, deltas):
 64 | 		widths = anchors[..., 2] - anchors[..., 0] + 1.0
 65 | 		heights = anchors[..., 3] - anchors[..., 1] + 1.0
 66 | 		cxs = anchors[..., 0] + 0.5 * widths
 67 | 		cys = anchors[..., 1] + 0.5 * heights
 68 | 		dx = deltas[..., 0::4]
 69 | 		dy = deltas[..., 1::4]
 70 | 		dw = deltas[..., 2::4]
 71 | 		dh = deltas[..., 3::4]
 72 | 		cxs_pred = dx * widths.unsqueeze(2) + cxs.unsqueeze(2)
 73 | 		cys_pred = dy * heights.unsqueeze(2) + cys.unsqueeze(2)
 74 | 		ws_pred = torch.exp(dw) * widths.unsqueeze(2)
 75 | 		hs_pred = torch.exp(dh) * heights.unsqueeze(2)
 76 | 		boxes_pred = deltas.clone()
 77 | 		boxes_pred[..., 0::4] = cxs_pred - 0.5 * ws_pred
 78 | 		boxes_pred[..., 1::4] = cys_pred - 0.5 * hs_pred
 79 | 		boxes_pred[..., 2::4] = cxs_pred + 0.5 * ws_pred
 80 | 		boxes_pred[..., 3::4] = cys_pred + 0.5 * hs_pred
 81 | 		# [x1, y1, x2, y2]
 82 | 		return boxes_pred
 83 | 	'''clip boxes, boxes size: B x N x 4, img_info: B x 3(height, width, scale_factor)'''
 84 | 	@staticmethod
 85 | 	def clipBoxes(boxes, img_info):
 86 | 		for i in range(boxes.size(0)):
 87 | 			boxes[i, :, 0::4].clamp_(0, img_info[i, 1]-1)
 88 | 			boxes[i, :, 1::4].clamp_(0, img_info[i, 0]-1)
 89 | 			boxes[i, :, 2::4].clamp_(0, img_info[i, 1]-1)
 90 | 			boxes[i, :, 3::4].clamp_(0, img_info[i, 0]-1)
 91 | 		return boxes
 92 | 	'''calculate iou, boxes1(anchors): N x 4 or B x N x 4, boxes2(gts): B x K x 5'''
 93 | 	@staticmethod
 94 | 	def calcIoUs(boxes1, boxes2):
 95 | 		batch_size = boxes2.size(0)
 96 | 		if boxes1.dim() == 2:
 97 | 			num_boxes1 = boxes1.size(0)
 98 | 			num_boxes2 = boxes2.size(1)
 99 | 			boxes1 = boxes1.view(1, num_boxes1, 4).expand(batch_size, num_boxes1, 4).contiguous()
100 | 			boxes2 = boxes2[..., :4].contiguous()
101 | 			# calc boxes2(gts) areas
102 | 			boxes2_ws = boxes2[..., 2] - boxes2[..., 0] + 1
103 | 			boxes2_hs = boxes2[..., 3] - boxes2[..., 1] + 1
104 | 			boxes2_areas = (boxes2_ws * boxes2_hs).view(batch_size, 1, num_boxes2)
105 | 			# calc boxes1(anchors) areas
106 | 			boxes1_ws = boxes1[..., 2] - boxes1[..., 0] + 1
107 | 			boxes1_hs = boxes1[..., 3] - boxes1[..., 1] + 1
108 | 			boxes1_areas = (boxes1_ws * boxes1_hs).view(batch_size, num_boxes1, 1)
109 | 			# find the error boxes
110 | 			boxes1_error = (boxes1_ws == 1) & (boxes1_hs == 1)
111 | 			boxes2_error = (boxes2_ws == 1) & (boxes2_hs == 1)
112 | 			# re-format boxes
113 | 			boxes1 = boxes1.view(batch_size, num_boxes1, 1, 4).expand(batch_size, num_boxes1, num_boxes2, 4)
114 | 			boxes2 = boxes2.view(batch_size, 1, num_boxes2, 4).expand(batch_size, num_boxes1, num_boxes2, 4)
115 | 			# calc inter area
116 | 			iws = torch.min(boxes1[..., 2], boxes2[..., 2]) - torch.max(boxes1[..., 0], boxes2[..., 0]) + 1
117 | 			iws[iws < 0] = 0
118 | 			ihs = torch.min(boxes1[..., 3], boxes2[..., 3]) - torch.max(boxes1[..., 1], boxes2[..., 1]) + 1
119 | 			ihs[ihs < 0] = 0
120 | 			# union area
121 | 			uas = boxes1_areas + boxes2_areas - (iws * ihs)
122 | 			# overlaps
123 | 			overlaps = iws * ihs / uas
124 | 			overlaps.masked_fill_(boxes2_error.view(batch_size, 1, num_boxes2).expand(batch_size, num_boxes1, num_boxes2), 0)
125 | 			overlaps.masked_fill_(boxes1_error.view(batch_size, num_boxes1, 1).expand(batch_size, num_boxes1, num_boxes2), -1)
126 | 		elif boxes1.dim() == 3:
127 | 			num_boxes1 = boxes1.size(1)
128 | 			num_boxes2 = boxes2.size(1)
129 | 			if boxes1.size(2) == 4:
130 | 				boxes1 = boxes1[..., :4].contiguous()
131 | 			else:
132 | 				boxes1 = boxes1[..., 1:5].contiguous()
133 | 			boxes2 = boxes2[..., :4].contiguous()
134 | 			# calc boxes2(gts) areas
135 | 			boxes2_ws = boxes2[..., 2] - boxes2[..., 0] + 1
136 | 			boxes2_hs = boxes2[..., 3] - boxes2[..., 1] + 1
137 | 			boxes2_areas = (boxes2_ws * boxes2_hs).view(batch_size, 1, num_boxes2)
138 | 			# calc boxes1(anchors) areas
139 | 			boxes1_ws = boxes1[..., 2] - boxes1[..., 0] + 1
140 | 			boxes1_hs = boxes1[..., 3] - boxes1[..., 1] + 1
141 | 			boxes1_areas = (boxes1_ws * boxes1_hs).view(batch_size, num_boxes1, 1)
142 | 			# find the error boxes
143 | 			boxes1_error = (boxes1_ws == 1) & (boxes1_hs == 1)
144 | 			boxes2_error = (boxes2_ws == 1) & (boxes2_hs == 1)
145 | 			# re-format boxes
146 | 			boxes1 = boxes1.view(batch_size, num_boxes1, 1, 4).expand(batch_size, num_boxes1, num_boxes2, 4)
147 | 			boxes2 = boxes2.view(batch_size, 1, num_boxes2, 4).expand(batch_size, num_boxes1, num_boxes2, 4)
148 | 			# calc inter area
149 | 			iws = torch.min(boxes1[..., 2], boxes2[..., 2]) - torch.max(boxes1[..., 0], boxes2[..., 0]) + 1
150 | 			iws[iws < 0] = 0
151 | 			ihs = torch.min(boxes1[..., 3], boxes2[..., 3]) - torch.max(boxes1[..., 1], boxes2[..., 1]) + 1
152 | 			ihs[ihs < 0] = 0
153 | 			# union area
154 | 			uas = boxes1_areas + boxes2_areas - (iws * ihs)
155 | 			# overlaps
156 | 			overlaps = iws * ihs / uas
157 | 			overlaps.masked_fill_(boxes2_error.view(batch_size, 1, num_boxes2).expand(batch_size, num_boxes1, num_boxes2), 0)
158 | 			overlaps.masked_fill_(boxes1_error.view(batch_size, num_boxes1, 1).expand(batch_size, num_boxes1, num_boxes2), -1)
159 | 		else:
160 | 			raise ValueError('boxes1(anchors) dimension error in BBoxFunctions.calcIoUs')
161 | 		return overlaps
162 | 	'''encode bboxes'''
163 | 	@staticmethod
164 | 	def encodeBboxes(boxes_pred, boxes_gt):
165 | 		if boxes_pred.dim() == 2:
166 | 			# convert (x1, y1, x2, y2) to (cx, cy, w, h) 
167 | 			widths_pred = boxes_pred[..., 2] - boxes_pred[..., 0] + 1.0
168 | 			heights_pred = boxes_pred[..., 3] - boxes_pred[..., 1] + 1.0
169 | 			centerxs_pred = boxes_pred[..., 0] + 0.5 * widths_pred
170 | 			centerys_pred = boxes_pred[..., 1] + 0.5 * heights_pred
171 | 			widths_gt = boxes_gt[..., 2] - boxes_gt[..., 0] + 1.0
172 | 			heights_gt = boxes_gt[..., 3] - boxes_gt[..., 1] + 1.0
173 | 			centerxs_gt = boxes_gt[..., 0] + 0.5 * widths_gt
174 | 			centerys_gt = boxes_gt[..., 1] + 0.5 * heights_gt
175 | 			# calculate targets
176 | 			dxs_target = (centerxs_gt - centerxs_pred.view(1, -1).expand_as(centerxs_gt)) / widths_pred
177 | 			dys_target = (centerys_gt - centerys_pred.view(1, -1).expand_as(centerys_gt)) / heights_pred
178 | 			dws_target = torch.log(widths_gt / widths_pred.view(1, -1).expand_as(widths_gt))
179 | 			dhs_target = torch.log(heights_gt / heights_pred.view(1, -1).expand_as(heights_gt))
180 | 		elif boxes_pred.dim() == 3:
181 | 			# convert (x1, y1, x2, y2) to (cx, cy, w, h) 
182 | 			widths_pred = boxes_pred[..., 2] - boxes_pred[..., 0] + 1.0
183 | 			heights_pred = boxes_pred[..., 3] - boxes_pred[..., 1] + 1.0
184 | 			centerxs_pred = boxes_pred[..., 0] + 0.5 * widths_pred
185 | 			centerys_pred = boxes_pred[..., 1] + 0.5 * heights_pred
186 | 			widths_gt = boxes_gt[..., 2] - boxes_gt[..., 0] + 1.0
187 | 			heights_gt = boxes_gt[..., 3] - boxes_gt[..., 1] + 1.0
188 | 			centerxs_gt = boxes_gt[..., 0] + 0.5 * widths_gt
189 | 			centerys_gt = boxes_gt[..., 1] + 0.5 * heights_gt
190 | 			# calculate targets
191 | 			dxs_target = (centerxs_gt - centerxs_pred) / widths_pred
192 | 			dys_target = (centerys_gt - centerys_pred) / heights_pred
193 | 			dws_target = torch.log(widths_gt / widths_pred)
194 | 			dhs_target = torch.log(heights_gt / heights_pred)
195 | 		else:
196 | 			raise ValueError('boxes_pred dimension error in BBoxFunctions.encodeBboxes')
197 | 		return torch.stack((dxs_target, dys_target, dws_target, dhs_target), 2)
198 | 	'''decode bboxes'''
199 | 	@staticmethod
200 | 	def decodeBboxes(boxes, deltas):
201 | 		widths = boxes[..., 2] - boxes[..., 0] + 1.0
202 | 		heights = boxes[..., 3] - boxes[..., 1] + 1.0
203 | 		cxs = boxes[..., 0] + 0.5 * widths
204 | 		cys = boxes[..., 1] + 0.5 * heights
205 | 		dxs = deltas[..., 0::4]
206 | 		dys = deltas[..., 1::4]
207 | 		dws = deltas[..., 2::4]
208 | 		dhs = deltas[..., 3::4]
209 | 		cxs_pred = dxs * widths.unsqueeze(2) + cxs.unsqueeze(2)
210 | 		cys_pred = dys * heights.unsqueeze(2) + cys.unsqueeze(2)
211 | 		ws_pred = torch.exp(dws) * widths.unsqueeze(2)
212 | 		hs_pred = torch.exp(dhs) * heights.unsqueeze(2)
213 | 		boxes_pred = deltas.clone()
214 | 		boxes_pred[..., 0::4] = cxs_pred - ws_pred * 0.5
215 | 		boxes_pred[..., 1::4] = cys_pred - hs_pred * 0.5
216 | 		boxes_pred[..., 2::4] = cxs_pred + ws_pred * 0.5
217 | 		boxes_pred[..., 3::4] = cys_pred + hs_pred * 0.5
218 | 		# [x1, y1, x2, y2]
219 | 		return boxes_pred
220 | 
221 | 
222 | '''adjust learning rate'''
223 | def adjustLearningRate(optimizer, target_lr, logger_handle=None):
224 | 	if logger_handle is not None:
225 | 		logger_handle.info('Adjust learning rate to %s...' % str(target_lr))
226 | 	for param_group in optimizer.param_groups:
227 | 		param_group['lr'] = target_lr
228 | 	return True
229 | 
230 | 
231 | '''save checkpoints'''
232 | def saveCheckpoints(state_dict, savepath, logger_handle):
233 | 	logger_handle.info('Saving state_dict in %s...' % savepath)
234 | 	torch.save(state_dict, savepath)
235 | 	return True
236 | 
237 | 
238 | '''load checkpoints'''
239 | def loadCheckpoints(checkpointspath, logger_handle):
240 | 	logger_handle.info('Loading checkpoints from %s...' % checkpointspath)
241 | 	checkpoints = torch.load(checkpointspath)
242 | 	return checkpoints
243 | 
244 | 
245 | '''clip gradient'''
246 | def clipGradients(params, max_norm=35, norm_type=2):
247 | 	params = list(filter(lambda p: p.requires_grad and p.grad is not None, params))
248 | 	if len(params) > 0:
249 | 		clip_grad.clip_grad_norm_(params, max_norm=max_norm, norm_type=norm_type)


--------------------------------------------------------------------------------
/names/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush


--------------------------------------------------------------------------------
/names/voc.names:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Function:
  3 | 	test mAP
  4 | Author:
  5 | 	Charles
  6 | '''
  7 | import json
  8 | import torch
  9 | import warnings
 10 | import argparse
 11 | import numpy as np
 12 | from modules.utils import *
 13 | from libs.nms.nms_wrapper import nms
 14 | from cfgs.getcfg import getCfgByDatasetAndBackbone
 15 | from modules.fasterRCNN import FasterRCNNFPNResNets
 16 | warnings.filterwarnings("ignore")
 17 | 
 18 | 
 19 | '''parse arguments for testing'''
 20 | def parseArgs():
 21 | 	parser = argparse.ArgumentParser(description='Faster R-CNN with FPN')
 22 | 	parser.add_argument('--datasetname', dest='datasetname', help='dataset for testing.', default='', type=str, required=True)
 23 | 	parser.add_argument('--annfilepath', dest='annfilepath', help='used to specify annfilepath.', default='', type=str)
 24 | 	parser.add_argument('--datasettype', dest='datasettype', help='used to specify datasettype.', default='val2017', type=str)
 25 | 	parser.add_argument('--backbonename', dest='backbonename', help='backbone network for testing.', default='', type=str, required=True)
 26 | 	parser.add_argument('--checkpointspath', dest='checkpointspath', help='checkpoints you want to use.', default='', type=str, required=True)
 27 | 	parser.add_argument('--nmsthresh', dest='nmsthresh', help='thresh used in nms.', default=0.5, type=float)
 28 | 	args = parser.parse_args()
 29 | 	return args
 30 | 
 31 | 
 32 | '''test mAP'''
 33 | def test():
 34 | 	# prepare base things
 35 | 	args = parseArgs()
 36 | 	cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename)
 37 | 	checkDir(cfg.TEST_BACKUPDIR)
 38 | 	logger_handle = Logger(cfg.TEST_LOGFILE)
 39 | 	use_cuda = torch.cuda.is_available()
 40 | 	clsnames = loadclsnames(cfg.CLSNAMESPATH)
 41 | 	# prepare dataset
 42 | 	if args.datasetname == 'coco':
 43 | 		dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=-1, use_color_jitter=False, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL, mode='TEST', datasettype=args.datasettype, annfilepath=args.annfilepath)
 44 | 	else:
 45 | 		raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname)
 46 | 	dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
 47 | 	# prepare model
 48 | 	if args.backbonename.find('resnet') != -1:
 49 | 		model = FasterRCNNFPNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle)
 50 | 	else:
 51 | 		raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename)
 52 | 	if use_cuda:
 53 | 		model = model.cuda()
 54 | 	# load checkpoints
 55 | 	checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
 56 | 	model.load_state_dict(checkpoints['model'])
 57 | 	model.eval()
 58 | 	# test mAP
 59 | 	FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
 60 | 	results = []
 61 | 	img_ids = []
 62 | 	for batch_idx, samples in enumerate(dataloader):
 63 | 		logger_handle.info('detect %s/%s...' % (batch_idx+1, len(dataloader)))
 64 | 		# --do detect
 65 | 		img_id, img, w_ori, h_ori, gt_boxes, img_info, num_gt_boxes = samples
 66 | 		img_id, w_ori, h_ori, scale_factor = int(img_id.item()), w_ori.item(), h_ori.item(), img_info[0][-1].item()
 67 | 		img_ids.append(img_id)
 68 | 		with torch.no_grad():
 69 | 			output = model(x=img.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor))
 70 | 		rois = output[0].data[..., 1:5]
 71 | 		cls_probs = output[1].data
 72 | 		bbox_preds = output[2].data
 73 | 		# --parse the results
 74 | 		if cfg.IS_CLASS_AGNOSTIC:
 75 | 			box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
 76 | 			box_deltas = box_deltas.view(1, -1, 4)
 77 | 		else:
 78 | 			box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
 79 | 			box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES)
 80 | 		boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas)
 81 | 		boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, torch.from_numpy(np.array([h_ori*scale_factor, w_ori*scale_factor, scale_factor])).unsqueeze(0).type(FloatTensor).data)
 82 | 		boxes_pred = boxes_pred.squeeze()
 83 | 		scores = cls_probs.squeeze()
 84 | 		thresh = 0.05
 85 | 		for j in range(1, cfg.NUM_CLASSES):
 86 | 			idxs = torch.nonzero(scores[:, j] > thresh).view(-1)
 87 | 			if idxs.numel() > 0:
 88 | 				cls_scores = scores[:, j][idxs]
 89 | 				_, order = torch.sort(cls_scores, 0, True)
 90 | 				if cfg.IS_CLASS_AGNOSTIC:
 91 | 					cls_boxes = boxes_pred[idxs, :]
 92 | 				else:
 93 | 					cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4]
 94 | 				cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
 95 | 				cls_dets = cls_dets[order]
 96 | 				_, keep_idxs = nms(cls_dets, args.nmsthresh)
 97 | 				cls_dets = cls_dets[keep_idxs.view(-1).long()]
 98 | 				for cls_det in cls_dets:
 99 | 					category_id = dataset.clsids2cococlsids_dict.get(j)
100 | 					x1, y1, x2, y2, score = cls_det
101 | 					x1 = x1.item() / scale_factor
102 | 					x2 = x2.item() / scale_factor
103 | 					y1 = y1.item() / scale_factor
104 | 					y2 = y2.item() / scale_factor
105 | 					bbox = [x1, y1, x2, y2]
106 | 					bbox[2] = bbox[2] - bbox[0]
107 | 					bbox[3] = bbox[3] - bbox[1]
108 | 					image_result = {
109 | 									'image_id': img_id,
110 | 									'category_id': int(category_id),
111 | 									'score': float(score.item()),
112 | 									'bbox': bbox
113 | 								}
114 | 					results.append(image_result)
115 | 	json.dump(results, open(cfg.TEST_BBOXES_SAVE_PATH, 'w'), indent=4)
116 | 	if args.datasettype in ['val2017']:
117 | 		dataset.doDetectionEval(img_ids, cfg.TEST_BBOXES_SAVE_PATH)
118 | 
119 | 
120 | '''run'''
121 | if __name__ == '__main__':
122 | 	test()


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Function:
  3 | 	train the model
  4 | Author:
  5 | 	Charles
  6 | '''
  7 | import os
  8 | import torch
  9 | import warnings
 10 | import argparse
 11 | import torch.nn as nn
 12 | import torch.optim as optim
 13 | from modules.utils import *
 14 | from cfgs.getcfg import getCfgByDatasetAndBackbone
 15 | from modules.fasterRCNN import FasterRCNNFPNResNets
 16 | warnings.filterwarnings("ignore")
 17 | 
 18 | 
 19 | '''parse arguments for training'''
 20 | def parseArgs():
 21 | 	parser = argparse.ArgumentParser(description='Faster R-CNN with FPN')
 22 | 	parser.add_argument('--datasetname', dest='datasetname', help='dataset for training.', default='', type=str, required=True)
 23 | 	parser.add_argument('--backbonename', dest='backbonename', help='backbone network for training.', default='', type=str, required=True)
 24 | 	parser.add_argument('--checkpointspath', dest='checkpointspath', help='checkpoints you want to use.', default='', type=str)
 25 | 	args = parser.parse_args()
 26 | 	return args
 27 | 
 28 | 
 29 | '''train model'''
 30 | def train():
 31 | 	# prepare base things
 32 | 	args = parseArgs()
 33 | 	cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename)
 34 | 	checkDir(cfg.TRAIN_BACKUPDIR)
 35 | 	logger_handle = Logger(cfg.TRAIN_LOGFILE)
 36 | 	use_cuda = torch.cuda.is_available()
 37 | 	is_multi_gpus = cfg.IS_MULTI_GPUS
 38 | 	if is_multi_gpus: assert use_cuda
 39 | 	# prepare dataset
 40 | 	if args.datasetname == 'coco':
 41 | 		dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=cfg.MAX_NUM_GT_BOXES, use_color_jitter=cfg.USE_COLOR_JITTER, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL, mode='TRAIN', datasettype='train2017')
 42 | 		dataloader = torch.utils.data.DataLoader(dataset, batch_size=cfg.BATCHSIZE, sampler=NearestRatioRandomSampler(dataset.img_ratios, cfg.BATCHSIZE), num_workers=cfg.NUM_WORKERS, collate_fn=COCODataset.paddingCollateFn, pin_memory=cfg.PIN_MEMORY)
 43 | 	else:
 44 | 		raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname)
 45 | 	# prepare model
 46 | 	if args.backbonename.find('resnet') != -1:
 47 | 		model = FasterRCNNFPNResNets(mode='TRAIN', cfg=cfg, logger_handle=logger_handle)
 48 | 	else:
 49 | 		raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename)
 50 | 	start_epoch = 1
 51 | 	end_epoch = cfg.MAX_EPOCHS
 52 | 	if use_cuda:
 53 | 		model = model.cuda()
 54 | 	# prepare optimizer
 55 | 	learning_rate_idx = 0
 56 | 	if cfg.IS_USE_WARMUP:
 57 | 		learning_rate = cfg.LEARNING_RATES[learning_rate_idx] / 3
 58 | 	else:
 59 | 		learning_rate = cfg.LEARNING_RATES[learning_rate_idx]
 60 | 	optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=cfg.MOMENTUM, weight_decay=cfg.WEIGHT_DECAY)
 61 | 	# check checkpoints path
 62 | 	if args.checkpointspath:
 63 | 		checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
 64 | 		model.load_state_dict(checkpoints['model'])
 65 | 		optimizer.load_state_dict(checkpoints['optimizer'])
 66 | 		start_epoch = checkpoints['epoch'] + 1
 67 | 		for epoch in range(1, start_epoch):
 68 | 			if epoch in cfg.LR_ADJUST_EPOCHS:
 69 | 				learning_rate_idx += 1
 70 | 	# data parallel
 71 | 	if is_multi_gpus:
 72 | 		model = nn.DataParallel(model)
 73 | 	# print config
 74 | 	logger_handle.info('Dataset used: %s, Number of images: %s' % (args.datasetname, len(dataset)))
 75 | 	logger_handle.info('Backbone used: %s' % args.backbonename)
 76 | 	logger_handle.info('Checkpoints used: %s' % args.checkpointspath)
 77 | 	logger_handle.info('Config file used: %s' % cfg_file_path)
 78 | 	# train
 79 | 	FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
 80 | 	for epoch in range(start_epoch, end_epoch+1):
 81 | 		# --set train mode
 82 | 		if is_multi_gpus:
 83 | 			model.module.setTrain()
 84 | 		else:
 85 | 			model.setTrain()
 86 | 		# --adjust learning rate
 87 | 		if epoch in cfg.LR_ADJUST_EPOCHS:
 88 | 			learning_rate_idx += 1
 89 | 			adjustLearningRate(optimizer=optimizer, target_lr=cfg.LEARNING_RATES[learning_rate_idx], logger_handle=logger_handle)
 90 | 		# --log info
 91 | 		logger_handle.info('Start epoch %s, learning rate is %s...' % (epoch, cfg.LEARNING_RATES[learning_rate_idx]))
 92 | 		# --train epoch
 93 | 		for batch_idx, samples in enumerate(dataloader):
 94 | 			if (epoch == 1) and (cfg.IS_USE_WARMUP) and (batch_idx <= cfg.NUM_WARMUP_STEPS):
 95 | 				assert learning_rate_idx == 0, 'BUGS may exist...'
 96 | 				target_lr = cfg.LEARNING_RATES[learning_rate_idx] / 3
 97 | 				target_lr += (cfg.LEARNING_RATES[learning_rate_idx] - cfg.LEARNING_RATES[learning_rate_idx] / 3) * batch_idx / cfg.NUM_WARMUP_STEPS
 98 | 				adjustLearningRate(optimizer=optimizer, target_lr=target_lr)
 99 | 			optimizer.zero_grad()
100 | 			img_ids, imgs, gt_boxes, img_info, num_gt_boxes = samples
101 | 			output = model(x=imgs.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor))
102 | 			rois, cls_probs, bbox_preds, rpn_cls_loss, rpn_reg_loss, loss_cls, loss_reg = output
103 | 			loss = rpn_cls_loss.mean() + rpn_reg_loss.mean() + loss_cls.mean() + loss_reg.mean()
104 | 			logger_handle.info('[EPOCH]: %s/%s, [BTACH]: %s/%s, [LEARNING_RATE]: %s, [DATASET]: %s \n\t [LOSS]: rpn_cls_loss %.4f, rpn_reg_loss %.4f, loss_cls %.4f, loss_reg %.4f, total %.4f' % \
105 | 								(epoch, end_epoch, (batch_idx+1), len(dataloader), cfg.LEARNING_RATES[learning_rate_idx], args.datasetname, rpn_cls_loss.mean().item(), rpn_reg_loss.mean().item(), loss_cls.mean().item(), loss_reg.mean().item(), loss.item()))
106 | 			loss.backward()
107 | 			clipGradients(model.parameters(), max_norm=cfg.GRAD_CLIP_MAX_NORM, norm_type=cfg.GRAD_CLIP_NORM_TYPE)
108 | 			optimizer.step()
109 | 		# --save model
110 | 		if (epoch % cfg.SAVE_INTERVAL == 0) or (epoch == end_epoch):
111 | 			state_dict = {'epoch': epoch,
112 | 						  'model': model.module.state_dict() if is_multi_gpus else model.state_dict(),
113 | 						  'optimizer': optimizer.state_dict()}
114 | 			savepath = os.path.join(cfg.TRAIN_BACKUPDIR, 'epoch_%s.pth' % epoch)
115 | 			saveCheckpoints(state_dict, savepath, logger_handle)
116 | 
117 | 
118 | '''run'''
119 | if __name__ == '__main__':
120 | 	train()


--------------------------------------------------------------------------------