├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── cfgs ├── cfg_coco_resnet101.py ├── cfg_coco_resnet18.py ├── cfg_coco_resnet34.py ├── cfg_coco_resnet50.py └── getcfg.py ├── demo.py ├── docs ├── Res101FPN_pytorch_epoch12.MD └── Res50FPN_pytorch_epoch12.MD ├── libs ├── cocoapi │ ├── .gitignore │ ├── .travis.yml │ ├── LuaAPI │ │ ├── CocoApi.lua │ │ ├── MaskApi.lua │ │ ├── cocoDemo.lua │ │ ├── env.lua │ │ ├── init.lua │ │ └── rocks │ │ │ └── coco-scm-1.rockspec │ ├── MatlabAPI │ │ ├── CocoApi.m │ │ ├── CocoEval.m │ │ ├── CocoUtils.m │ │ ├── MaskApi.m │ │ ├── cocoDemo.m │ │ ├── evalDemo.m │ │ ├── gason.m │ │ └── private │ │ │ ├── gasonMex.cpp │ │ │ ├── gasonMex.mexa64 │ │ │ ├── gasonMex.mexmaci64 │ │ │ ├── getPrmDflt.m │ │ │ └── maskApiMex.c │ ├── PythonAPI │ │ ├── Makefile │ │ ├── pycocoDemo.ipynb │ │ ├── pycocoEvalDemo.ipynb │ │ ├── pycocotools │ │ │ ├── __init__.py │ │ │ ├── _mask.pyx │ │ │ ├── coco.py │ │ │ ├── cocoeval.py │ │ │ └── mask.py │ │ └── setup.py │ ├── README.txt │ ├── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h │ ├── license.txt │ └── results │ │ ├── captions_val2014_fakecap_results.json │ │ ├── instances_val2014_fakebbox100_results.json │ │ ├── instances_val2014_fakesegm100_results.json │ │ ├── person_keypoints_val2014_fakekeypoints100_results.json │ │ └── val2014_fake_eval_res.txt ├── dcn │ ├── __init__.py │ ├── deform_conv.py │ ├── deform_pool.py │ └── src │ │ ├── deform_conv_cuda.cpp │ │ ├── deform_conv_cuda_kernel.cu │ │ ├── deform_pool_cuda.cpp │ │ └── deform_pool_cuda_kernel.cu ├── font.TTF ├── make.sh ├── nms │ ├── __init__.py │ ├── nms_wrapper.py │ └── src │ │ ├── nms_cpu.cpp │ │ ├── nms_cuda.cpp │ │ └── nms_kernel.cu ├── roi_align │ ├── __init__.py │ ├── gradcheck.py │ ├── roi_align.py │ └── src │ │ ├── roi_align_cuda.cpp │ │ └── roi_align_kernel.cu ├── roi_pool │ ├── __init__.py │ ├── gradcheck.py │ ├── roi_pool.py │ └── src │ │ ├── roi_pool_cuda.cpp │ │ └── roi_pool_kernel.cu └── setup.py ├── modules ├── RPN.py ├── backbones │ ├── FPNResNets.py │ └── __init__.py ├── fasterRCNN.py ├── losses │ ├── CELoss.py │ ├── IoULoss.py │ ├── __init__.py │ ├── focalLoss.py │ └── smoothL1.py └── utils │ ├── __init__.py │ ├── anchors.py │ ├── datasets │ ├── COCODataset.py │ ├── Custom.py │ └── __init__.py │ ├── initialization.py │ └── misc.py ├── names ├── coco.names └── voc.names ├── test.py └── train.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * linguist-language=python 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 DetectionBLWX 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FPN 2 | ``` 3 | Pytorch Implementation of "Feature Pyramid Networks for Object Detection" 4 | You can star this repository to keep track of the project if it's helpful for you, thank you for your support. 5 | ``` 6 | 7 | 8 | # Environment 9 | ``` 10 | OS: Ubuntu 16.04 11 | Python: python3.x with torch==1.2.0, torchvision==0.4.0 12 | ``` 13 | 14 | 15 | # Performance 16 | | Backbone | Train | Test | Pretrained Model | Epochs | Learning Rate | RoI per image | AP | 17 | | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | 18 | | Res50-FPN | trainval35k | minival5k | Pytorch | 12 | 2e-2/2e-3/2e-4 | 512 | [35.5](docs/Res50FPN_pytorch_epoch12.MD) | 19 | | Res101-FPN | trainval35k | minival5k | Pytorch | 12 | 2e-2/2e-3/2e-4 | 512 | [37.4](docs/Res101FPN_pytorch_epoch12.MD) | 20 | 21 | 22 | # Trained models 23 | ``` 24 | You could get the trained models reported above at 25 | https://drive.google.com/open?id=1xm8z-EMbNG17sQzd-2FRRLVk_N7UIOhE 26 | ``` 27 | 28 | 29 | # Usage 30 | #### Setup 31 | ``` 32 | cd libs 33 | sh make.sh 34 | ``` 35 | #### Train 36 | ``` 37 | usage: train.py [-h] --datasetname DATASETNAME --backbonename BACKBONENAME 38 | [--checkpointspath CHECKPOINTSPATH] 39 | optional arguments: 40 | -h, --help show this help message and exit 41 | --datasetname DATASETNAME 42 | dataset for training. 43 | --backbonename BACKBONENAME 44 | backbone network for training. 45 | --checkpointspath CHECKPOINTSPATH 46 | checkpoints you want to use. 47 | cmd example: 48 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py --datasetname coco --backbonename resnet50 49 | ``` 50 | #### Test 51 | ``` 52 | usage: test.py [-h] --datasetname DATASETNAME [--annfilepath ANNFILEPATH] 53 | [--datasettype DATASETTYPE] --backbonename BACKBONENAME 54 | --checkpointspath CHECKPOINTSPATH [--nmsthresh NMSTHRESH] 55 | optional arguments: 56 | -h, --help show this help message and exit 57 | --datasetname DATASETNAME 58 | dataset for testing. 59 | --annfilepath ANNFILEPATH 60 | used to specify annfilepath. 61 | --datasettype DATASETTYPE 62 | used to specify datasettype. 63 | --backbonename BACKBONENAME 64 | backbone network for testing. 65 | --checkpointspath CHECKPOINTSPATH 66 | checkpoints you want to use. 67 | --nmsthresh NMSTHRESH 68 | thresh used in nms. 69 | cmd example: 70 | CUDA_VISIBLE_DEVICES=0 python test.py --checkpointspath fpn_res50_trainbackup_coco/epoch_12.pth --datasetname coco --backbonename resnet50 71 | ``` 72 | #### Demo 73 | ``` 74 | usage: demo.py [-h] --imagepath IMAGEPATH --backbonename BACKBONENAME 75 | --datasetname DATASETNAME --checkpointspath CHECKPOINTSPATH 76 | [--nmsthresh NMSTHRESH] [--confthresh CONFTHRESH] 77 | optional arguments: 78 | -h, --help show this help message and exit 79 | --imagepath IMAGEPATH 80 | image you want to detect. 81 | --backbonename BACKBONENAME 82 | backbone network for demo. 83 | --datasetname DATASETNAME 84 | dataset used to train. 85 | --checkpointspath CHECKPOINTSPATH 86 | checkpoints you want to use. 87 | --nmsthresh NMSTHRESH 88 | thresh used in nms. 89 | --confthresh CONFTHRESH 90 | thresh used in showing bounding box. 91 | cmd example: 92 | CUDA_VISIBLE_DEVICES=0 python demo.py --checkpointspath fpn_res50_trainbackup_coco/epoch_12.pth --datasetname coco --backbonename resnet50 --imagepath 000001.jpg 93 | ``` 94 | 95 | 96 | # Reference 97 | ``` 98 | [1]. https://github.com/jwyang/fpn.pytorch 99 | [2]. https://github.com/open-mmlab/mmdetection 100 | ``` -------------------------------------------------------------------------------- /cfgs/cfg_coco_resnet101.py: -------------------------------------------------------------------------------- 1 | '''cfg file for coco2017 dataset''' 2 | 3 | 4 | # anchors 5 | ANCHOR_SCALES = [8] 6 | ANCHOR_RATIOS = [0.5, 1, 2] 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64] 8 | # RPN, RoI settings 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000 10 | TRAIN_RPN_POST_NMS_TOP_N = 2000 11 | TRAIN_RPN_NMS_THRESH = 0.7 12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3 13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7 14 | TRAIN_RPN_FG_FRACTION = 0.5 15 | TRAIN_RPN_BATCHSIZE = 256 16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0] 19 | TRAIN_POOLING_SIZE = 7 20 | TRAIN_POOLING_SAMPLE_NUM = 2 21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56 22 | TRAIN_ROI_BATCHSIZE = 512 23 | TRAIN_ROI_FG_FRACTION = 0.25 24 | TRAIN_ROI_FG_THRESH = 0.5 25 | TRAIN_ROI_BG_THRESH_HI = 0.5 26 | TRAIN_ROI_BG_THRESH_LO = 0.0 27 | TEST_RPN_PRE_NMS_TOP_N = 1000 28 | TEST_RPN_POST_NMS_TOP_N = 1000 29 | TEST_RPN_NMS_THRESH = 0.7 30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3 31 | TEST_RPN_POSITIVE_OVERLAP = 0.7 32 | TEST_RPN_FG_FRACTION = 0.5 33 | TEST_RPN_BATCHSIZE = 256 34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 36 | TEST_POOLING_METHOD = ['align', 'pool'][0] 37 | TEST_POOLING_SIZE = 7 38 | TEST_POOLING_SAMPLE_NUM = 2 39 | TEST_ROI_MAP_LEVEL_SCALE = 56 40 | TEST_ROI_BATCHSIZE = 512 41 | TEST_ROI_FG_FRACTION = 0.25 42 | TEST_ROI_FG_THRESH = 0.5 43 | TEST_ROI_BG_THRESH_HI = 0.5 44 | TEST_ROI_BG_THRESH_LO = 0.0 45 | # backbone 46 | BACKBONE_TYPE = 'resnet101' 47 | PRETRAINED_MODEL_PATH = '' 48 | USE_CAFFE_PRETRAINED_MODEL = False 49 | FIXED_FRONT_BLOCKS = True 50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'} 51 | IS_MULTI_GPUS = True 52 | IS_CLASS_AGNOSTIC = False 53 | # dataset 54 | DATASET_ROOT_DIR = '' 55 | MAX_NUM_GT_BOXES = 50 56 | NUM_CLASSES = 81 57 | NUM_WORKERS = 8 58 | PIN_MEMORY = True 59 | BATCHSIZE = 16 60 | CLSNAMESPATH = 'names/coco.names' 61 | USE_COLOR_JITTER = False 62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}} 63 | # loss function 64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}} 65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}} 66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}} 67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}} 68 | # optimizer 69 | MOMENTUM = 0.9 70 | WEIGHT_DECAY = 0.0001 71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)] 72 | LR_ADJUST_EPOCHS = [9, 12] 73 | MAX_EPOCHS = 12 74 | IS_USE_WARMUP = True 75 | NUM_WARMUP_STEPS = 500 76 | GRAD_CLIP_MAX_NORM = 35 77 | GRAD_CLIP_NORM_TYPE = 2 78 | # image size (max_len, min_len) 79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800} 80 | # record 81 | TRAIN_BACKUPDIR = 'fpn_res101_trainbackup_coco' 82 | TRAIN_LOGFILE = 'fpn_res101_trainbackup_coco/train.log' 83 | TEST_BACKUPDIR = 'fpn_res101_testbackup_coco' 84 | TEST_LOGFILE = 'fpn_res101_testbackup_coco/test.log' 85 | TEST_BBOXES_SAVE_PATH = 'fpn_res101_testbackup_coco/fpn_res101_detection_results_coco.json' 86 | SAVE_INTERVAL = 1 -------------------------------------------------------------------------------- /cfgs/cfg_coco_resnet18.py: -------------------------------------------------------------------------------- 1 | '''cfg file for coco2017 dataset''' 2 | 3 | 4 | # anchors 5 | ANCHOR_SCALES = [8] 6 | ANCHOR_RATIOS = [0.5, 1, 2] 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64] 8 | # RPN, RoI settings 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000 10 | TRAIN_RPN_POST_NMS_TOP_N = 2000 11 | TRAIN_RPN_NMS_THRESH = 0.7 12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3 13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7 14 | TRAIN_RPN_FG_FRACTION = 0.5 15 | TRAIN_RPN_BATCHSIZE = 256 16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0] 19 | TRAIN_POOLING_SIZE = 7 20 | TRAIN_POOLING_SAMPLE_NUM = 2 21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56 22 | TRAIN_ROI_BATCHSIZE = 512 23 | TRAIN_ROI_FG_FRACTION = 0.25 24 | TRAIN_ROI_FG_THRESH = 0.5 25 | TRAIN_ROI_BG_THRESH_HI = 0.5 26 | TRAIN_ROI_BG_THRESH_LO = 0.0 27 | TEST_RPN_PRE_NMS_TOP_N = 1000 28 | TEST_RPN_POST_NMS_TOP_N = 1000 29 | TEST_RPN_NMS_THRESH = 0.7 30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3 31 | TEST_RPN_POSITIVE_OVERLAP = 0.7 32 | TEST_RPN_FG_FRACTION = 0.5 33 | TEST_RPN_BATCHSIZE = 256 34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 36 | TEST_POOLING_METHOD = ['align', 'pool'][0] 37 | TEST_POOLING_SIZE = 7 38 | TEST_POOLING_SAMPLE_NUM = 2 39 | TEST_ROI_MAP_LEVEL_SCALE = 56 40 | TEST_ROI_BATCHSIZE = 512 41 | TEST_ROI_FG_FRACTION = 0.25 42 | TEST_ROI_FG_THRESH = 0.5 43 | TEST_ROI_BG_THRESH_HI = 0.5 44 | TEST_ROI_BG_THRESH_LO = 0.0 45 | # backbone 46 | BACKBONE_TYPE = 'resnet18' 47 | PRETRAINED_MODEL_PATH = '' 48 | USE_CAFFE_PRETRAINED_MODEL = False 49 | FIXED_FRONT_BLOCKS = True 50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'} 51 | IS_MULTI_GPUS = True 52 | IS_CLASS_AGNOSTIC = False 53 | # dataset 54 | DATASET_ROOT_DIR = '' 55 | MAX_NUM_GT_BOXES = 50 56 | NUM_CLASSES = 81 57 | NUM_WORKERS = 8 58 | PIN_MEMORY = True 59 | BATCHSIZE = 16 60 | CLSNAMESPATH = 'names/coco.names' 61 | USE_COLOR_JITTER = False 62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}} 63 | # loss function 64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}} 65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}} 66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}} 67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}} 68 | # optimizer 69 | MOMENTUM = 0.9 70 | WEIGHT_DECAY = 0.0001 71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)] 72 | LR_ADJUST_EPOCHS = [9, 12] 73 | MAX_EPOCHS = 12 74 | IS_USE_WARMUP = True 75 | NUM_WARMUP_STEPS = 500 76 | GRAD_CLIP_MAX_NORM = 35 77 | GRAD_CLIP_NORM_TYPE = 2 78 | # image size (max_len, min_len) 79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800} 80 | # record 81 | TRAIN_BACKUPDIR = 'fpn_res18_trainbackup_coco' 82 | TRAIN_LOGFILE = 'fpn_res18_trainbackup_coco/train.log' 83 | TEST_BACKUPDIR = 'fpn_res18_testbackup_coco' 84 | TEST_LOGFILE = 'fpn_res18_testbackup_coco/test.log' 85 | TEST_BBOXES_SAVE_PATH = 'fpn_res18_testbackup_coco/fpn_res18_detection_results_coco.json' 86 | SAVE_INTERVAL = 1 -------------------------------------------------------------------------------- /cfgs/cfg_coco_resnet34.py: -------------------------------------------------------------------------------- 1 | '''cfg file for coco2017 dataset''' 2 | 3 | 4 | # anchors 5 | ANCHOR_SCALES = [8] 6 | ANCHOR_RATIOS = [0.5, 1, 2] 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64] 8 | # RPN, RoI settings 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000 10 | TRAIN_RPN_POST_NMS_TOP_N = 2000 11 | TRAIN_RPN_NMS_THRESH = 0.7 12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3 13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7 14 | TRAIN_RPN_FG_FRACTION = 0.5 15 | TRAIN_RPN_BATCHSIZE = 256 16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0] 19 | TRAIN_POOLING_SIZE = 7 20 | TRAIN_POOLING_SAMPLE_NUM = 2 21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56 22 | TRAIN_ROI_BATCHSIZE = 512 23 | TRAIN_ROI_FG_FRACTION = 0.25 24 | TRAIN_ROI_FG_THRESH = 0.5 25 | TRAIN_ROI_BG_THRESH_HI = 0.5 26 | TRAIN_ROI_BG_THRESH_LO = 0.0 27 | TEST_RPN_PRE_NMS_TOP_N = 1000 28 | TEST_RPN_POST_NMS_TOP_N = 1000 29 | TEST_RPN_NMS_THRESH = 0.7 30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3 31 | TEST_RPN_POSITIVE_OVERLAP = 0.7 32 | TEST_RPN_FG_FRACTION = 0.5 33 | TEST_RPN_BATCHSIZE = 256 34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 36 | TEST_POOLING_METHOD = ['align', 'pool'][0] 37 | TEST_POOLING_SIZE = 7 38 | TEST_POOLING_SAMPLE_NUM = 2 39 | TEST_ROI_MAP_LEVEL_SCALE = 56 40 | TEST_ROI_BATCHSIZE = 512 41 | TEST_ROI_FG_FRACTION = 0.25 42 | TEST_ROI_FG_THRESH = 0.5 43 | TEST_ROI_BG_THRESH_HI = 0.5 44 | TEST_ROI_BG_THRESH_LO = 0.0 45 | # backbone 46 | BACKBONE_TYPE = 'resnet34' 47 | PRETRAINED_MODEL_PATH = '' 48 | USE_CAFFE_PRETRAINED_MODEL = False 49 | FIXED_FRONT_BLOCKS = True 50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'} 51 | IS_MULTI_GPUS = True 52 | IS_CLASS_AGNOSTIC = False 53 | # dataset 54 | DATASET_ROOT_DIR = '' 55 | MAX_NUM_GT_BOXES = 50 56 | NUM_CLASSES = 81 57 | NUM_WORKERS = 8 58 | PIN_MEMORY = True 59 | BATCHSIZE = 16 60 | CLSNAMESPATH = 'names/coco.names' 61 | USE_COLOR_JITTER = False 62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}} 63 | # loss function 64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}} 65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}} 66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}} 67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}} 68 | # optimizer 69 | MOMENTUM = 0.9 70 | WEIGHT_DECAY = 0.0001 71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)] 72 | LR_ADJUST_EPOCHS = [9, 12] 73 | MAX_EPOCHS = 12 74 | IS_USE_WARMUP = True 75 | NUM_WARMUP_STEPS = 500 76 | GRAD_CLIP_MAX_NORM = 35 77 | GRAD_CLIP_NORM_TYPE = 2 78 | # image size (max_len, min_len) 79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800} 80 | # record 81 | TRAIN_BACKUPDIR = 'fpn_res34_trainbackup_coco' 82 | TRAIN_LOGFILE = 'fpn_res34_trainbackup_coco/train.log' 83 | TEST_BACKUPDIR = 'fpn_res34_testbackup_coco' 84 | TEST_LOGFILE = 'fpn_res34_testbackup_coco/test.log' 85 | TEST_BBOXES_SAVE_PATH = 'fpn_res34_testbackup_coco/fpn_res34_detection_results_coco.json' 86 | SAVE_INTERVAL = 1 -------------------------------------------------------------------------------- /cfgs/cfg_coco_resnet50.py: -------------------------------------------------------------------------------- 1 | '''cfg file for coco2017 dataset''' 2 | 3 | 4 | # anchors 5 | ANCHOR_SCALES = [8] 6 | ANCHOR_RATIOS = [0.5, 1, 2] 7 | ANCHOR_SIZE_BASES = [4, 8, 16, 32, 64] 8 | # RPN, RoI settings 9 | TRAIN_RPN_PRE_NMS_TOP_N = 2000 10 | TRAIN_RPN_POST_NMS_TOP_N = 2000 11 | TRAIN_RPN_NMS_THRESH = 0.7 12 | TRAIN_RPN_NEGATIVE_OVERLAP = 0.3 13 | TRAIN_RPN_POSITIVE_OVERLAP = 0.7 14 | TRAIN_RPN_FG_FRACTION = 0.5 15 | TRAIN_RPN_BATCHSIZE = 256 16 | TRAIN_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 17 | TRAIN_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 18 | TRAIN_POOLING_METHOD = ['align', 'pool'][0] 19 | TRAIN_POOLING_SIZE = 7 20 | TRAIN_POOLING_SAMPLE_NUM = 2 21 | TRAIN_ROI_MAP_LEVEL_SCALE = 56 22 | TRAIN_ROI_BATCHSIZE = 512 23 | TRAIN_ROI_FG_FRACTION = 0.25 24 | TRAIN_ROI_FG_THRESH = 0.5 25 | TRAIN_ROI_BG_THRESH_HI = 0.5 26 | TRAIN_ROI_BG_THRESH_LO = 0.0 27 | TEST_RPN_PRE_NMS_TOP_N = 1000 28 | TEST_RPN_POST_NMS_TOP_N = 1000 29 | TEST_RPN_NMS_THRESH = 0.7 30 | TEST_RPN_NEGATIVE_OVERLAP = 0.3 31 | TEST_RPN_POSITIVE_OVERLAP = 0.7 32 | TEST_RPN_FG_FRACTION = 0.5 33 | TEST_RPN_BATCHSIZE = 256 34 | TEST_BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 35 | TEST_BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 36 | TEST_POOLING_METHOD = ['align', 'pool'][0] 37 | TEST_POOLING_SIZE = 7 38 | TEST_POOLING_SAMPLE_NUM = 2 39 | TEST_ROI_MAP_LEVEL_SCALE = 56 40 | TEST_ROI_BATCHSIZE = 512 41 | TEST_ROI_FG_FRACTION = 0.25 42 | TEST_ROI_FG_THRESH = 0.5 43 | TEST_ROI_BG_THRESH_HI = 0.5 44 | TEST_ROI_BG_THRESH_LO = 0.0 45 | # backbone 46 | BACKBONE_TYPE = 'resnet50' 47 | PRETRAINED_MODEL_PATH = '' 48 | USE_CAFFE_PRETRAINED_MODEL = False 49 | FIXED_FRONT_BLOCKS = True 50 | ADDED_MODULES_WEIGHT_INIT_METHOD = {'fpn': 'xavier', 'rpn': 'normal', 'rcnn': 'normal'} 51 | IS_MULTI_GPUS = True 52 | IS_CLASS_AGNOSTIC = False 53 | # dataset 54 | DATASET_ROOT_DIR = '' 55 | MAX_NUM_GT_BOXES = 50 56 | NUM_CLASSES = 81 57 | NUM_WORKERS = 8 58 | PIN_MEMORY = True 59 | BATCHSIZE = 16 60 | CLSNAMESPATH = 'names/coco.names' 61 | USE_COLOR_JITTER = False 62 | IMAGE_NORMALIZE_INFO = {'caffe': {'mean_rgb': (0.4814576470588235, 0.4546921568627451, 0.40384352941176466), 'std_rgb': (1., 1., 1.)}, 'pytorch': {'mean_rgb': (0.485, 0.456, 0.406), 'std_rgb': (0.229, 0.224, 0.225)}} 63 | # loss function 64 | RPN_CLS_LOSS_SET = {'type': ['binary_cross_entropy'][0], 'binary_cross_entropy': {'size_average': True, 'weight': 1.}} 65 | RCNN_CLS_LOSS_SET = {'type': ['cross_entropy'][0], 'cross_entropy': {'size_average': True, 'weight': 1.}} 66 | RPN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1./9., 'size_average': True, 'weight': 1.}} 67 | RCNN_REG_LOSS_SET = {'type': ['betaSmoothL1Loss'][0], 'betaSmoothL1Loss': {'beta': 1., 'size_average': True, 'weight': 1.}} 68 | # optimizer 69 | MOMENTUM = 0.9 70 | WEIGHT_DECAY = 0.0001 71 | LEARNING_RATES = [[2e-2, 2e-3, 2e-4], [1e-2, 1e-3, 1e-4]][int(USE_CAFFE_PRETRAINED_MODEL)] 72 | LR_ADJUST_EPOCHS = [9, 12] 73 | MAX_EPOCHS = 12 74 | IS_USE_WARMUP = True 75 | NUM_WARMUP_STEPS = 500 76 | GRAD_CLIP_MAX_NORM = 35 77 | GRAD_CLIP_NORM_TYPE = 2 78 | # image size (max_len, min_len) 79 | IMAGESIZE_DICT = {'LONG_SIDE': 1333, 'SHORT_SIDE': 800} 80 | # record 81 | TRAIN_BACKUPDIR = 'fpn_res50_trainbackup_coco' 82 | TRAIN_LOGFILE = 'fpn_res50_trainbackup_coco/train.log' 83 | TEST_BACKUPDIR = 'fpn_res50_testbackup_coco' 84 | TEST_LOGFILE = 'fpn_res50_testbackup_coco/test.log' 85 | TEST_BBOXES_SAVE_PATH = 'fpn_res50_testbackup_coco/fpn_res50_detection_results_coco.json' 86 | SAVE_INTERVAL = 1 -------------------------------------------------------------------------------- /cfgs/getcfg.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | used to get config file for specified dataset and backbone. 4 | Author: 5 | Charles 6 | ''' 7 | def getCfgByDatasetAndBackbone(datasetname, backbonename): 8 | if [datasetname, backbonename] == ['coco', 'resnet101']: 9 | import cfgs.cfg_coco_resnet101 as cfg 10 | cfg_file_path = 'cfgs/cfg_coco_resnet101' 11 | elif [datasetname, backbonename] == ['coco', 'resnet50']: 12 | import cfgs.cfg_coco_resnet50 as cfg 13 | cfg_file_path = 'cfgs/cfg_coco_resnet50' 14 | elif [datasetname, backbonename] == ['coco', 'resnet34']: 15 | import cfgs.cfg_coco_resnet34 as cfg 16 | cfg_file_path = 'cfgs/cfg_coco_resnet34' 17 | elif [datasetname, backbonename] == ['coco', 'resnet18']: 18 | import cfgs.cfg_coco_resnet18 as cfg 19 | cfg_file_path = 'cfgs/cfg_coco_resnet18' 20 | else: 21 | raise ValueError('Can not find cfg file for dataset <%s> and backbone <%s>...' % (datasetname, backbonename)) 22 | return cfg, cfg_file_path -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | detect objects in one image 4 | Author: 5 | Charles 6 | ''' 7 | import os 8 | import torch 9 | import warnings 10 | import argparse 11 | import numpy as np 12 | from modules.utils import * 13 | from libs.nms.nms_wrapper import nms 14 | from PIL import Image, ImageDraw, ImageFont 15 | from cfgs.getcfg import getCfgByDatasetAndBackbone 16 | from modules.fasterRCNN import FasterRCNNFPNResNets 17 | warnings.filterwarnings("ignore") 18 | 19 | 20 | '''parse arguments for demo''' 21 | def parseArgs(): 22 | parser = argparse.ArgumentParser(description='Faster R-CNN with FPN') 23 | parser.add_argument('--imagepath', dest='imagepath', help='image you want to detect.', default='', type=str, required=True) 24 | parser.add_argument('--backbonename', dest='backbonename', help='backbone network for demo.', default='', type=str, required=True) 25 | parser.add_argument('--datasetname', dest='datasetname', help='dataset used to train.', default='', type=str, required=True) 26 | parser.add_argument('--checkpointspath', dest='checkpointspath', help='checkpoints you want to use.', default='', type=str, required=True) 27 | parser.add_argument('--nmsthresh', dest='nmsthresh', help='thresh used in nms.', default=0.5, type=float) 28 | parser.add_argument('--confthresh', dest='confthresh', help='thresh used in showing bounding box.', default=0.5, type=float) 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | '''detect objects in one image''' 34 | def demo(): 35 | # prepare base things 36 | args = parseArgs() 37 | cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename) 38 | checkDir(cfg.TEST_BACKUPDIR) 39 | logger_handle = Logger(cfg.TEST_LOGFILE) 40 | use_cuda = torch.cuda.is_available() 41 | clsnames = loadclsnames(cfg.CLSNAMESPATH) 42 | # prepare model 43 | if args.backbonename.find('resnet') != -1: 44 | model = FasterRCNNFPNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle) 45 | else: 46 | raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) 47 | if use_cuda: 48 | model = model.cuda() 49 | # load checkpoints 50 | checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) 51 | model.load_state_dict(checkpoints['model']) 52 | model.eval() 53 | # do detect 54 | FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor 55 | img = Image.open(args.imagepath) 56 | if args.datasetname == 'coco': 57 | input_img, scale_factor, target_size = COCODataset.preprocessImage(img, use_color_jitter=False, image_size_dict=cfg.IMAGESIZE_DICT, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL) 58 | else: 59 | raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) 60 | input_img = input_img.unsqueeze(0).type(FloatTensor) 61 | gt_boxes = torch.FloatTensor([1, 1, 1, 1, 0]).unsqueeze(0).type(FloatTensor) 62 | img_info = torch.from_numpy(np.array([target_size[0], target_size[1], scale_factor])).unsqueeze(0).type(FloatTensor) 63 | num_gt_boxes = torch.FloatTensor([0]).unsqueeze(0).type(FloatTensor) 64 | with torch.no_grad(): 65 | output = model(x=input_img, gt_boxes=gt_boxes, img_info=img_info, num_gt_boxes=num_gt_boxes) 66 | rois = output[0].data[..., 1:5] 67 | cls_probs = output[1].data 68 | bbox_preds = output[2].data 69 | # parse the results 70 | if cfg.IS_CLASS_AGNOSTIC: 71 | box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) 72 | box_deltas = box_deltas.view(1, -1, 4) 73 | else: 74 | box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) 75 | box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES) 76 | boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas) 77 | boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, img_info.data) 78 | boxes_pred = boxes_pred.squeeze() 79 | scores = cls_probs.squeeze() 80 | thresh = 0.05 81 | for j in range(1, cfg.NUM_CLASSES): 82 | idxs = torch.nonzero(scores[:, j] > thresh).view(-1) 83 | if idxs.numel() > 0: 84 | cls_scores = scores[:, j][idxs] 85 | _, order = torch.sort(cls_scores, 0, True) 86 | if cfg.IS_CLASS_AGNOSTIC: 87 | cls_boxes = boxes_pred[idxs, :] 88 | else: 89 | cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4] 90 | cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) 91 | cls_dets = cls_dets[order] 92 | _, keep_idxs = nms(cls_dets, args.nmsthresh) 93 | cls_dets = cls_dets[keep_idxs.view(-1).long()] 94 | for cls_det in cls_dets: 95 | if cls_det[-1] > args.confthresh: 96 | x1, y1, x2, y2 = cls_det[:4] 97 | x1 = x1.item() / scale_factor 98 | x2 = x2.item() / scale_factor 99 | y1 = y1.item() / scale_factor 100 | y2 = y2.item() / scale_factor 101 | label = clsnames[j-1] 102 | logger_handle.info('Detect a %s in confidence %.4f...' % (label, cls_det[-1].item())) 103 | color = (0, 255, 0) 104 | draw = ImageDraw.Draw(img) 105 | draw.line([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)], width=2, fill=color) 106 | font = ImageFont.truetype('libs/font.TTF', 25) 107 | draw.text((x1+5, y1), label, fill=color, font=font) 108 | img.save(os.path.join(cfg.TEST_BACKUPDIR, 'demo_output.jpg')) 109 | 110 | 111 | '''run''' 112 | if __name__ == '__main__': 113 | demo() -------------------------------------------------------------------------------- /docs/Res101FPN_pytorch_epoch12.MD: -------------------------------------------------------------------------------- 1 | # Results on minival5k 2 | #### RoI512 3 | ``` 4 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.374 5 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.587 6 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.406 7 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.208 8 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.417 9 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.487 10 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.313 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.493 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.517 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319 14 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.562 15 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.664 16 | ``` -------------------------------------------------------------------------------- /docs/Res50FPN_pytorch_epoch12.MD: -------------------------------------------------------------------------------- 1 | # Results on minival5k 2 | #### RoI512 3 | ``` 4 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.355 5 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.569 6 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.383 7 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.205 8 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.393 9 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.452 10 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.298 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.479 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.314 14 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.544 15 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641 16 | ``` -------------------------------------------------------------------------------- /libs/cocoapi/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/cocoapi/.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | language: python 3 | cache: pip 4 | python: 5 | - 2.7 6 | - 3.6 7 | install: 8 | - pip install --upgrade pip 9 | - pip install pycocotools 10 | script: 11 | - true 12 | -------------------------------------------------------------------------------- /libs/cocoapi/LuaAPI/MaskApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for manipulating masks stored in RLE format. 4 | 5 | For an overview of RLE please see http://mscoco.org/dataset/#download. 6 | Additionally, more detailed information can be found in the Matlab MaskApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/MaskApi.m 8 | 9 | The following API functions are defined: 10 | encode - Encode binary masks using RLE. 11 | decode - Decode binary masks encoded via RLE. 12 | merge - Compute union or intersection of encoded masks. 13 | iou - Compute intersection over union between masks. 14 | nms - Compute non-maximum suppression between ordered masks. 15 | area - Compute area of encoded masks. 16 | toBbox - Get bounding boxes surrounding encoded masks. 17 | frBbox - Convert bounding boxes to encoded masks. 18 | frPoly - Convert polygon to encoded mask. 19 | drawCirc - Draw circle into image (alters input). 20 | drawLine - Draw line into image (alters input). 21 | drawMasks - Draw masks into image (alters input). 22 | 23 | Usage: 24 | Rs = MaskApi.encode( masks ) 25 | masks = MaskApi.decode( Rs ) 26 | R = MaskApi.merge( Rs, [intersect=false] ) 27 | o = MaskApi.iou( dt, gt, [iscrowd=false] ) 28 | keep = MaskApi.nms( dt, thr ) 29 | a = MaskApi.area( Rs ) 30 | bbs = MaskApi.toBbox( Rs ) 31 | Rs = MaskApi.frBbox( bbs, h, w ) 32 | R = MaskApi.frPoly( poly, h, w ) 33 | MaskApi.drawCirc( img, x, y, rad, clr ) 34 | MaskApi.drawLine( img, x0, y0, x1, y1, rad, clr ) 35 | MaskApi.drawMasks( img, masks, [maxn=n], [alpha=.4], [clrs] ) 36 | For detailed usage information please see cocoDemo.lua. 37 | 38 | In the API the following formats are used: 39 | R,Rs - [table] Run-length encoding of binary mask(s) 40 | masks - [nxhxw] Binary mask(s) 41 | bbs - [nx4] Bounding box(es) stored as [x y w h] 42 | poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 43 | dt,gt - May be either bounding boxes or encoded masks 44 | Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 45 | 46 | Common Objects in COntext (COCO) Toolbox. version 3.0 47 | Data, paper, and tutorials available at: http://mscoco.org/ 48 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 49 | Licensed under the Simplified BSD License [see coco/license.txt] 50 | 51 | ------------------------------------------------------------------------------]] 52 | 53 | local ffi = require 'ffi' 54 | local coco = require 'coco.env' 55 | 56 | coco.MaskApi = {} 57 | local MaskApi = coco.MaskApi 58 | 59 | coco.libmaskapi = ffi.load(package.searchpath('libmaskapi',package.cpath)) 60 | local libmaskapi = coco.libmaskapi 61 | 62 | -------------------------------------------------------------------------------- 63 | 64 | MaskApi.encode = function( masks ) 65 | local n, h, w = masks:size(1), masks:size(2), masks:size(3) 66 | masks = masks:type('torch.ByteTensor'):transpose(2,3) 67 | local data = masks:contiguous():data() 68 | local Qs = MaskApi._rlesInit(n) 69 | libmaskapi.rleEncode(Qs[0],data,h,w,n) 70 | return MaskApi._rlesToLua(Qs,n) 71 | end 72 | 73 | MaskApi.decode = function( Rs ) 74 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 75 | local masks = torch.ByteTensor(n,w,h):zero():contiguous() 76 | libmaskapi.rleDecode(Qs,masks:data(),n) 77 | MaskApi._rlesFree(Qs,n) 78 | return masks:transpose(2,3) 79 | end 80 | 81 | MaskApi.merge = function( Rs, intersect ) 82 | intersect = intersect or 0 83 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 84 | local Q = MaskApi._rlesInit(1) 85 | libmaskapi.rleMerge(Qs,Q,n,intersect) 86 | MaskApi._rlesFree(Qs,n) 87 | return MaskApi._rlesToLua(Q,1)[1] 88 | end 89 | 90 | MaskApi.iou = function( dt, gt, iscrowd ) 91 | if not iscrowd then iscrowd = NULL else 92 | iscrowd = iscrowd:type('torch.ByteTensor'):contiguous():data() 93 | end 94 | if torch.isTensor(gt) and torch.isTensor(dt) then 95 | local nDt, k = dt:size(1), dt:size(2); assert(k==4) 96 | local nGt, k = gt:size(1), gt:size(2); assert(k==4) 97 | local dDt = dt:type('torch.DoubleTensor'):contiguous():data() 98 | local dGt = gt:type('torch.DoubleTensor'):contiguous():data() 99 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 100 | libmaskapi.bbIou(dDt,dGt,nDt,nGt,iscrowd,o:data()) 101 | return o:transpose(1,2) 102 | else 103 | local qDt, nDt = MaskApi._rlesFrLua(dt) 104 | local qGt, nGt = MaskApi._rlesFrLua(gt) 105 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 106 | libmaskapi.rleIou(qDt,qGt,nDt,nGt,iscrowd,o:data()) 107 | MaskApi._rlesFree(qDt,nDt); MaskApi._rlesFree(qGt,nGt) 108 | return o:transpose(1,2) 109 | end 110 | end 111 | 112 | MaskApi.nms = function( dt, thr ) 113 | if torch.isTensor(dt) then 114 | local n, k = dt:size(1), dt:size(2); assert(k==4) 115 | local Q = dt:type('torch.DoubleTensor'):contiguous():data() 116 | local kp = torch.IntTensor(n):contiguous() 117 | libmaskapi.bbNms(Q,n,kp:data(),thr) 118 | return kp 119 | else 120 | local Q, n = MaskApi._rlesFrLua(dt) 121 | local kp = torch.IntTensor(n):contiguous() 122 | libmaskapi.rleNms(Q,n,kp:data(),thr) 123 | MaskApi._rlesFree(Q,n) 124 | return kp 125 | end 126 | end 127 | 128 | MaskApi.area = function( Rs ) 129 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 130 | local a = torch.IntTensor(n):contiguous() 131 | libmaskapi.rleArea(Qs,n,a:data()) 132 | MaskApi._rlesFree(Qs,n) 133 | return a 134 | end 135 | 136 | MaskApi.toBbox = function( Rs ) 137 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 138 | local bb = torch.DoubleTensor(n,4):contiguous() 139 | libmaskapi.rleToBbox(Qs,bb:data(),n) 140 | MaskApi._rlesFree(Qs,n) 141 | return bb 142 | end 143 | 144 | MaskApi.frBbox = function( bbs, h, w ) 145 | if bbs:dim()==1 then bbs=bbs:view(1,bbs:size(1)) end 146 | local n, k = bbs:size(1), bbs:size(2); assert(k==4) 147 | local data = bbs:type('torch.DoubleTensor'):contiguous():data() 148 | local Qs = MaskApi._rlesInit(n) 149 | libmaskapi.rleFrBbox(Qs[0],data,h,w,n) 150 | return MaskApi._rlesToLua(Qs,n) 151 | end 152 | 153 | MaskApi.frPoly = function( poly, h, w ) 154 | local n = #poly 155 | local Qs, Q = MaskApi._rlesInit(n), MaskApi._rlesInit(1) 156 | for i,p in pairs(poly) do 157 | local xy = p:type('torch.DoubleTensor'):contiguous():data() 158 | libmaskapi.rleFrPoly(Qs[i-1],xy,p:size(1)/2,h,w) 159 | end 160 | libmaskapi.rleMerge(Qs,Q[0],n,0) 161 | MaskApi._rlesFree(Qs,n) 162 | return MaskApi._rlesToLua(Q,1)[1] 163 | end 164 | 165 | -------------------------------------------------------------------------------- 166 | 167 | MaskApi.drawCirc = function( img, x, y, rad, clr ) 168 | assert(img:isContiguous() and img:dim()==3) 169 | local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data() 170 | for dx=-rad,rad do for dy=-rad,rad do 171 | local xi, yi = torch.round(x+dx), torch.round(y+dy) 172 | if dx*dx+dy*dy<=rad*rad and xi>=0 and yi>=0 and xi=0 and yi>=0 and xi= 5.1", 17 | "torch >= 7.0", 18 | "lua-cjson" 19 | } 20 | 21 | build = { 22 | type = "builtin", 23 | modules = { 24 | ["coco.env"] = "LuaAPI/env.lua", 25 | ["coco.init"] = "LuaAPI/init.lua", 26 | ["coco.MaskApi"] = "LuaAPI/MaskApi.lua", 27 | ["coco.CocoApi"] = "LuaAPI/CocoApi.lua", 28 | libmaskapi = { 29 | sources = { "common/maskApi.c" }, 30 | incdirs = { "common/" } 31 | } 32 | } 33 | } 34 | 35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec 36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec 37 | -------------------------------------------------------------------------------- /libs/cocoapi/MatlabAPI/MaskApi.m: -------------------------------------------------------------------------------- 1 | classdef MaskApi 2 | % Interface for manipulating masks stored in RLE format. 3 | % 4 | % RLE is a simple yet efficient format for storing binary masks. RLE 5 | % first divides a vector (or vectorized image) into a series of piecewise 6 | % constant regions and then for each piece simply stores the length of 7 | % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 8 | % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 9 | % (note that the odd counts are always the numbers of zeros). Instead of 10 | % storing the counts directly, additional compression is achieved with a 11 | % variable bitrate representation based on a common scheme called LEB128. 12 | % 13 | % Compression is greatest given large piecewise constant regions. 14 | % Specifically, the size of the RLE is proportional to the number of 15 | % *boundaries* in M (or for an image the number of boundaries in the y 16 | % direction). Assuming fairly simple shapes, the RLE representation is 17 | % O(sqrt(n)) where n is number of pixels in the object. Hence space usage 18 | % is substantially lower, especially for large simple objects (large n). 19 | % 20 | % Many common operations on masks can be computed directly using the RLE 21 | % (without need for decoding). This includes computations such as area, 22 | % union, intersection, etc. All of these operations are linear in the 23 | % size of the RLE, in other words they are O(sqrt(n)) where n is the area 24 | % of the object. Computing these operations on the original mask is O(n). 25 | % Thus, using the RLE can result in substantial computational savings. 26 | % 27 | % The following API functions are defined: 28 | % encode - Encode binary masks using RLE. 29 | % decode - Decode binary masks encoded via RLE. 30 | % merge - Compute union or intersection of encoded masks. 31 | % iou - Compute intersection over union between masks. 32 | % nms - Compute non-maximum suppression between ordered masks. 33 | % area - Compute area of encoded masks. 34 | % toBbox - Get bounding boxes surrounding encoded masks. 35 | % frBbox - Convert bounding boxes to encoded masks. 36 | % frPoly - Convert polygon to encoded mask. 37 | % 38 | % Usage: 39 | % Rs = MaskApi.encode( masks ) 40 | % masks = MaskApi.decode( Rs ) 41 | % R = MaskApi.merge( Rs, [intersect=false] ) 42 | % o = MaskApi.iou( dt, gt, [iscrowd=false] ) 43 | % keep = MaskApi.nms( dt, thr ) 44 | % a = MaskApi.area( Rs ) 45 | % bbs = MaskApi.toBbox( Rs ) 46 | % Rs = MaskApi.frBbox( bbs, h, w ) 47 | % R = MaskApi.frPoly( poly, h, w ) 48 | % 49 | % In the API the following formats are used: 50 | % R,Rs - [struct] Run-length encoding of binary mask(s) 51 | % masks - [hxwxn] Binary mask(s) (must have type uint8) 52 | % bbs - [nx4] Bounding box(es) stored as [x y w h] 53 | % poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 54 | % dt,gt - May be either bounding boxes or encoded masks 55 | % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 56 | % 57 | % Finally, a note about the intersection over union (iou) computation. 58 | % The standard iou of a ground truth (gt) and detected (dt) object is 59 | % iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 60 | % For "crowd" regions, we use a modified criteria. If a gt object is 61 | % marked as "iscrowd", we allow a dt to match any subregion of the gt. 62 | % Choosing gt' in the crowd gt that best matches the dt can be done using 63 | % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 64 | % iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 65 | % For crowd gt regions we use this modified criteria above for the iou. 66 | % 67 | % To compile use the following (some precompiled binaries are included): 68 | % mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',... 69 | % 'private/maskApiMex.c','../common/maskApi.c',... 70 | % '-I../common/','-outdir','private'); 71 | % Please do not contact us for help with compiling. 72 | % 73 | % Microsoft COCO Toolbox. version 2.0 74 | % Data, paper, and tutorials available at: http://mscoco.org/ 75 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 76 | % Licensed under the Simplified BSD License [see coco/license.txt] 77 | 78 | methods( Static ) 79 | function Rs = encode( masks ) 80 | Rs = maskApiMex( 'encode', masks ); 81 | end 82 | 83 | function masks = decode( Rs ) 84 | masks = maskApiMex( 'decode', Rs ); 85 | end 86 | 87 | function R = merge( Rs, varargin ) 88 | R = maskApiMex( 'merge', Rs, varargin{:} ); 89 | end 90 | 91 | function o = iou( dt, gt, varargin ) 92 | o = maskApiMex( 'iou', dt', gt', varargin{:} ); 93 | end 94 | 95 | function keep = nms( dt, thr ) 96 | keep = maskApiMex('nms',dt',thr); 97 | end 98 | 99 | function a = area( Rs ) 100 | a = maskApiMex( 'area', Rs ); 101 | end 102 | 103 | function bbs = toBbox( Rs ) 104 | bbs = maskApiMex( 'toBbox', Rs )'; 105 | end 106 | 107 | function Rs = frBbox( bbs, h, w ) 108 | Rs = maskApiMex( 'frBbox', bbs', h, w ); 109 | end 110 | 111 | function R = frPoly( poly, h, w ) 112 | R = maskApiMex( 'frPoly', poly, h , w ); 113 | end 114 | end 115 | 116 | end 117 | -------------------------------------------------------------------------------- /libs/cocoapi/MatlabAPI/cocoDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo for the CocoApi (see CocoApi.m) 2 | 3 | %% initialize COCO api (please specify dataType/annType below) 4 | annTypes = { 'instances', 'captions', 'person_keypoints' }; 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType); 7 | coco=CocoApi(annFile); 8 | 9 | %% display COCO categories and supercategories 10 | if( ~strcmp(annType,'captions') ) 11 | cats = coco.loadCats(coco.getCatIds()); 12 | nms={cats.name}; fprintf('COCO categories: '); 13 | fprintf('%s, ',nms{:}); fprintf('\n'); 14 | nms=unique({cats.supercategory}); fprintf('COCO supercategories: '); 15 | fprintf('%s, ',nms{:}); fprintf('\n'); 16 | end 17 | 18 | %% get all images containing given categories, select one at random 19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'}); 20 | imgIds = coco.getImgIds('catIds',catIds); 21 | imgId = imgIds(randi(length(imgIds))); 22 | 23 | %% load and display image 24 | img = coco.loadImgs(imgId); 25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name)); 26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[]) 27 | 28 | %% load and display annotations 29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]); 30 | anns = coco.loadAnns(annIds); coco.showAnns(anns); 31 | -------------------------------------------------------------------------------- /libs/cocoapi/MatlabAPI/evalDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo demonstrating the algorithm result formats for COCO 2 | 3 | %% select results type for demo (either bbox or segm) 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here 5 | fprintf('Running demo for *%s* results.\n\n',type); 6 | 7 | %% initialize COCO ground truth api 8 | dataDir='../'; prefix='instances'; dataType='val2014'; 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end 10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType); 11 | cocoGt=CocoApi(annFile); 12 | 13 | %% initialize COCO detections api 14 | resFile='%s/results/%s_%s_fake%s100_results.json'; 15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type); 16 | cocoDt=cocoGt.loadRes(resFile); 17 | 18 | %% visialuze gt and dt side by side 19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100); 20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId); 21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name)); 22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off; 23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth') 24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns); 25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off; 26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results') 27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns); 28 | 29 | %% load raw JSON and show exact format for results 30 | fprintf('results structure have the following format:\n'); 31 | res = gason(fileread(resFile)); disp(res) 32 | 33 | %% the following command can be used to save the results back to disk 34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end 35 | 36 | %% run COCO evaluation code (see CocoEval.m) 37 | cocoEval=CocoEval(cocoGt,cocoDt,type); 38 | cocoEval.params.imgIds=imgIds; 39 | cocoEval.evaluate(); 40 | cocoEval.accumulate(); 41 | cocoEval.summarize(); 42 | 43 | %% generate Derek Hoiem style analyis of false positives (slow) 44 | if(0), cocoEval.analyze(); end 45 | -------------------------------------------------------------------------------- /libs/cocoapi/MatlabAPI/gason.m: -------------------------------------------------------------------------------- 1 | function out = gason( in ) 2 | % Convert between JSON strings and corresponding JSON objects. 3 | % 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev: 5 | % https://github.com/vivkin/gason 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the 7 | % above link for license information and additional details about Gason. 8 | % 9 | % Given a JSON string, gason calls the C++ parser and converts the output 10 | % into an appropriate Matlab structure. As the parsing is performed in mex 11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take 12 | % only a few seconds to parse (compared to hours for pure Matlab parsers). 13 | % 14 | % Given a JSON object, gason calls the C++ encoder to convert the object 15 | % back into a JSON string representation. Nearly any Matlab struct, cell 16 | % array, or numeric array represent a valid JSON object. Note that gason() 17 | % can be used to go both from JSON string to JSON object and back. 18 | % 19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or 20 | % later). The following command compiles the parser (may require tweaking): 21 | % mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',... 22 | % 'private/gasonMex.cpp','../common/gason.cpp',... 23 | % '-I../common/','-outdir','private'); 24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries 25 | % are included, please do not contact us for help with compiling. If needed 26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'. 27 | % 28 | % Note that by default JSON arrays that contain only numbers are stored as 29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of 30 | % the same type are stored as Matlab struct arrays. This is much faster and 31 | % can use considerably less memory than always using Matlab cell arrays. 32 | % 33 | % USAGE 34 | % object = gason( string ) 35 | % string = gason( object ) 36 | % 37 | % INPUTS/OUTPUTS 38 | % string - JSON string 39 | % object - JSON object 40 | % 41 | % EXAMPLE 42 | % o = struct('first',{'piotr','ty'},'last',{'dollar','lin'}) 43 | % s = gason( o ) % convert JSON object -> JSON string 44 | % p = gason( s ) % convert JSON string -> JSON object 45 | % 46 | % See also 47 | % 48 | % Microsoft COCO Toolbox. version 2.0 49 | % Data, paper, and tutorials available at: http://mscoco.org/ 50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 51 | % Licensed under the Simplified BSD License [see coco/license.txt] 52 | 53 | out = gasonMex( 'convert', in ); 54 | -------------------------------------------------------------------------------- /libs/cocoapi/MatlabAPI/private/gasonMex.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "gason.h" 8 | #include "mex.h" 9 | #include "string.h" 10 | #include "math.h" 11 | #include 12 | #include 13 | #include 14 | typedef std::ostringstream ostrm; 15 | typedef unsigned long siz; 16 | typedef unsigned short ushort; 17 | 18 | siz length( const JsonValue &a ) { 19 | // get number of elements in JSON_ARRAY or JSON_OBJECT 20 | siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k; 21 | } 22 | 23 | bool isRegularObjArray( const JsonValue &a ) { 24 | // check if all JSON_OBJECTs in JSON_ARRAY have the same fields 25 | JsonValue o=a.toNode()->value; siz k, n; const char **keys; 26 | n=length(o); keys=new const char*[n]; 27 | k=0; for(auto j:o) keys[k++]=j->key; 28 | for( auto i:a ) { 29 | if(length(i->value)!=n) return false; k=0; 30 | for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false; 31 | } 32 | delete [] keys; return true; 33 | } 34 | 35 | mxArray* json( const JsonValue &o ) { 36 | // convert JsonValue to Matlab mxArray 37 | siz k, m, n; mxArray *M; const char **keys; 38 | switch( o.getTag() ) { 39 | case JSON_NUMBER: 40 | return mxCreateDoubleScalar(o.toNumber()); 41 | case JSON_STRING: 42 | return mxCreateString(o.toString()); 43 | case JSON_ARRAY: { 44 | if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL); 45 | JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag(); 46 | n=length(o); bool isRegular=true; 47 | for(auto i:o) isRegular=isRegular && i->value.getTag()==tag; 48 | if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) { 49 | m=length(o0); keys=new const char*[m]; 50 | k=0; for(auto j:o0) keys[k++]=j->key; 51 | M = mxCreateStructMatrix(1,n,m,keys); 52 | k=0; for(auto i:o) { m=0; for(auto j:i->value) 53 | mxSetFieldByNumber(M,k,m++,json(j->value)); k++; } 54 | delete [] keys; return M; 55 | } else if( isRegular && tag==JSON_NUMBER ) { 56 | M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M); 57 | k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M; 58 | } else { 59 | M = mxCreateCellMatrix(1,n); 60 | k=0; for(auto i:o) mxSetCell(M,k++,json(i->value)); 61 | return M; 62 | } 63 | } 64 | case JSON_OBJECT: 65 | if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL); 66 | n=length(o); keys=new const char*[n]; 67 | k=0; for(auto i:o) keys[k++]=i->key; 68 | M = mxCreateStructMatrix(1,1,n,keys); k=0; 69 | for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value)); 70 | delete [] keys; return M; 71 | case JSON_TRUE: 72 | return mxCreateDoubleScalar(1); 73 | case JSON_FALSE: 74 | return mxCreateDoubleScalar(0); 75 | case JSON_NULL: 76 | return mxCreateDoubleMatrix(0,0,mxREAL); 77 | default: return NULL; 78 | } 79 | } 80 | 81 | template ostrm& json( ostrm &S, T *A, siz n ) { 82 | // convert numeric array to JSON string with casting 83 | if(n==0) { S<<"[]"; return S; } if(n==1) { S< ostrm& json( ostrm &S, T *A, siz n ) { 89 | // convert numeric array to JSON string without casting 90 | return json(S,A,n); 91 | } 92 | 93 | ostrm& json( ostrm &S, const char *A ) { 94 | // convert char array to JSON string (handle escape characters) 95 | #define RPL(a,b) case a: { S << b; A++; break; } 96 | S << "\""; while( *A>0 ) switch( *A ) { 97 | RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b"); 98 | RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t"); 99 | default: S << *A; A++; 100 | } 101 | S << "\""; return S; 102 | } 103 | 104 | ostrm& json( ostrm& S, const JsonValue *o ) { 105 | // convert JsonValue to JSON string 106 | switch( o->getTag() ) { 107 | case JSON_NUMBER: S << o->toNumber(); return S; 108 | case JSON_TRUE: S << "true"; return S; 109 | case JSON_FALSE: S << "false"; return S; 110 | case JSON_NULL: S << "null"; return S; 111 | case JSON_STRING: return json(S,o->toString()); 112 | case JSON_ARRAY: 113 | S << "["; for(auto i:*o) { 114 | json(S,&i->value) << (i->next ? "," : ""); } 115 | S << "]"; return S; 116 | case JSON_OBJECT: 117 | S << "{"; for(auto i:*o) { 118 | json(S,i->key) << ":"; 119 | json(S,&i->value) << (i->next ? "," : ""); } 120 | S << "}"; return S; 121 | default: return S; 122 | } 123 | } 124 | 125 | ostrm& json( ostrm& S, const mxArray *M ) { 126 | // convert Matlab mxArray to JSON string 127 | siz i, j, m, n=mxGetNumberOfElements(M); 128 | void *A=mxGetData(M); ostrm *nms; 129 | switch( mxGetClassID(M) ) { 130 | case mxDOUBLE_CLASS: return json(S,(double*) A,n); 131 | case mxSINGLE_CLASS: return json(S,(float*) A,n); 132 | case mxINT64_CLASS: return json(S,(int64_t*) A,n); 133 | case mxUINT64_CLASS: return json(S,(uint64_t*) A,n); 134 | case mxINT32_CLASS: return json(S,(int32_t*) A,n); 135 | case mxUINT32_CLASS: return json(S,(uint32_t*) A,n); 136 | case mxINT16_CLASS: return json(S,(int16_t*) A,n); 137 | case mxUINT16_CLASS: return json(S,(uint16_t*) A,n); 138 | case mxINT8_CLASS: return json(S,(int8_t*) A,n); 139 | case mxUINT8_CLASS: return json(S,(uint8_t*) A,n); 140 | case mxLOGICAL_CLASS: return json(S,(uint8_t*) A,n); 141 | case mxCHAR_CLASS: return json(S,mxArrayToString(M)); 142 | case mxCELL_CLASS: 143 | S << "["; for(i=0; i0) json(S,mxGetCell(M,n-1)); S << "]"; return S; 145 | case mxSTRUCT_CLASS: 146 | if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M); 147 | if(m==0) { S<<"["; for(i=0; i1) S<<"["; nms=new ostrm[m]; 149 | for(j=0; j1) S<<"]"; delete [] nms; return S; 156 | default: 157 | mexErrMsgTxt( "Unknown type." ); return S; 158 | } 159 | } 160 | 161 | mxArray* mxCreateStringRobust( const char* str ) { 162 | // convert char* to Matlab string (robust version of mxCreateString) 163 | mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)}; 164 | M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M); 165 | for( siz i=0; i1 ) mexErrMsgTxt("One output expected."); 182 | 183 | if(!strcmp(action,"convert")) { 184 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 185 | if( mxGetClassID(pr[0])==mxCHAR_CLASS ) { 186 | // object = mexFunction( string ) 187 | char *str = mxArrayToStringRobust(pr[0]); 188 | int status = jsonParse(str, &endptr, &val, allocator); 189 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 190 | pl[0] = json(val); mxFree(str); 191 | } else { 192 | // string = mexFunction( object ) 193 | ostrm S; S << std::setprecision(12); json(S,pr[0]); 194 | pl[0]=mxCreateStringRobust(S.str().c_str()); 195 | } 196 | 197 | } else if(!strcmp(action,"split")) { 198 | // strings = mexFunction( string, k ) 199 | if( nr!=2 ) mexErrMsgTxt("Two input expected."); 200 | char *str = mxArrayToStringRobust(pr[0]); 201 | int status = jsonParse(str, &endptr, &val, allocator); 202 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 203 | if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected"); 204 | siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]); 205 | k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k)); 206 | pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<value); t--; if(!o->next) t=0; S << (t ? "," : "]"); 210 | if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str())); 211 | } 212 | 213 | } else if(!strcmp(action,"merge")) { 214 | // string = mexFunction( strings ) 215 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 216 | if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected."); 217 | siz n = mxGetNumberOfElements(pr[0]); 218 | ostrm S; S << std::setprecision(12); S << "["; 219 | for( siz i=0; ivalue) << (j->next ? "," : ""); 225 | mxFree(str); if(i1) 14 | % [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] ) 15 | % 16 | % INPUTS 17 | % prm - param struct or cell of form {'name1' v1 'name2' v2 ...} 18 | % dfs - cell of form {'name1' def1 'name2' def2 ...} 19 | % checkExtra - [0] if 1 throw error if prm contains params not in dfs 20 | % if -1 if prm contains params not in dfs adds them 21 | % 22 | % OUTPUTS (nargout==1) 23 | % prm - parameter struct with fields 'name1' through 'nameN' assigned 24 | % 25 | % OUTPUTS (nargout>1) 26 | % param1 - value assigned to parameter with 'name1' 27 | % ... 28 | % paramN - value assigned to parameter with 'nameN' 29 | % 30 | % EXAMPLE 31 | % dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 }; 32 | % prm = getPrmDflt( struct('x',1,'y',1), dfs ) 33 | % [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs ) 34 | % 35 | % See also INPUTPARSER 36 | % 37 | % Piotr's Computer Vision Matlab Toolbox Version 2.60 38 | % Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] 39 | % Licensed under the Simplified BSD License [see external/bsd.txt] 40 | 41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end 42 | if nargin<=2, checkExtra = 0; end 43 | 44 | % get the input parameters as two cell arrays: prmVal and prmField 45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end 46 | if iscell(prm) 47 | if(mod(length(prm),2)), error('odd number of parameters in prm'); end 48 | prmField = prm(1:2:end); prmVal = prm(2:2:end); 49 | else 50 | if(~isstruct(prm)), error('prm must be a struct or a cell'); end 51 | prmVal = struct2cell(prm); prmField = fieldnames(prm); 52 | end 53 | 54 | % get and update default values using quick for loop 55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end); 56 | if checkExtra>0 57 | for i=1:length(prmField) 58 | j = find(strcmp(prmField{i},dfsField)); 59 | if isempty(j), error('parameter %s is not valid', prmField{i}); end 60 | dfsVal(j) = prmVal(i); 61 | end 62 | elseif checkExtra<0 63 | for i=1:length(prmField) 64 | j = find(strcmp(prmField{i},dfsField)); 65 | if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end 66 | dfsVal(j) = prmVal(i); 67 | end 68 | else 69 | for i=1:length(prmField) 70 | dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i); 71 | end 72 | end 73 | 74 | % check for missing values 75 | if any(strcmp('REQ',dfsVal)) 76 | cmpArray = find(strcmp('REQ',dfsVal)); 77 | error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] ); 78 | end 79 | 80 | % set output 81 | if nargout==1 82 | varargout{1} = cell2struct( dfsVal, dfsField, 2 ); 83 | else 84 | varargout = dfsVal; 85 | end 86 | -------------------------------------------------------------------------------- /libs/cocoapi/MatlabAPI/private/maskApiMex.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "mex.h" 8 | #include "maskApi.h" 9 | #include 10 | 11 | void checkType( const mxArray *M, mxClassID id ) { 12 | if(mxGetClassID(M)!=id) mexErrMsgTxt("Invalid type."); 13 | } 14 | 15 | mxArray* toMxArray( const RLE *R, siz n ) { 16 | const char *fs[] = {"size", "counts"}; 17 | mxArray *M=mxCreateStructMatrix(1,n,2,fs); 18 | for( siz i=0; i1) mexErrMsgTxt(err); 35 | for( i=0; i<*n; i++ ) { 36 | mxArray *S, *C; double *s; void *c; 37 | S=mxGetFieldByNumber(M,i,O[0]); checkType(S,mxDOUBLE_CLASS); 38 | C=mxGetFieldByNumber(M,i,O[1]); s=mxGetPr(S); c=mxGetData(C); 39 | h=(siz)s[0]; w=(siz)s[1]; m=mxGetNumberOfElements(C); 40 | if(same && i>0 && (h!=R[0].h || w!=R[0].w)) mexErrMsgTxt(err); 41 | if( mxGetClassID(C)==mxDOUBLE_CLASS ) { 42 | rleInit(R+i,h,w,m,0); 43 | for(j=0; j=2) ? (mxGetScalar(pr[1])>0) : false; 74 | rleMerge(R,&M,n,intersect); pl[0]=toMxArray(&M,1); rleFree(&M); 75 | 76 | } else if(!strcmp(action,"area")) { 77 | R=frMxArray(pr[0],&n,0); 78 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 79 | uint *a=(uint*) mxGetPr(pl[0]); rleArea(R,n,a); 80 | 81 | } else if(!strcmp(action,"iou")) { 82 | if(nr>2) checkType(pr[2],mxUINT8_CLASS); siz nDt, nGt; 83 | byte *iscrowd = nr>2 ? (byte*) mxGetPr(pr[2]) : NULL; 84 | if(mxIsStruct(pr[0]) || mxIsStruct(pr[1])) { 85 | RLE *dt=frMxArray(pr[0],&nDt,1), *gt=frMxArray(pr[1],&nGt,1); 86 | pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL); 87 | double *o=mxGetPr(pl[0]); rleIou(dt,gt,nDt,nGt,iscrowd,o); 88 | rlesFree(&dt,nDt); rlesFree(>,nGt); 89 | } else { 90 | checkType(pr[0],mxDOUBLE_CLASS); checkType(pr[1],mxDOUBLE_CLASS); 91 | double *dt=mxGetPr(pr[0]); nDt=mxGetN(pr[0]); 92 | double *gt=mxGetPr(pr[1]); nGt=mxGetN(pr[1]); 93 | pl[0]=mxCreateNumericMatrix(nDt,nGt,mxDOUBLE_CLASS,mxREAL); 94 | double *o=mxGetPr(pl[0]); bbIou(dt,gt,nDt,nGt,iscrowd,o); 95 | } 96 | 97 | } else if(!strcmp(action,"nms")) { 98 | siz n; uint *keep; double thr=(double) mxGetScalar(pr[1]); 99 | if(mxIsStruct(pr[0])) { 100 | RLE *dt=frMxArray(pr[0],&n,1); 101 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 102 | keep=(uint*) mxGetPr(pl[0]); rleNms(dt,n,keep,thr); 103 | rlesFree(&dt,n); 104 | } else { 105 | checkType(pr[0],mxDOUBLE_CLASS); 106 | double *dt=mxGetPr(pr[0]); n=mxGetN(pr[0]); 107 | pl[0]=mxCreateNumericMatrix(1,n,mxUINT32_CLASS,mxREAL); 108 | keep=(uint*) mxGetPr(pl[0]); bbNms(dt,n,keep,thr); 109 | } 110 | 111 | } else if(!strcmp(action,"toBbox")) { 112 | R=frMxArray(pr[0],&n,0); 113 | pl[0]=mxCreateNumericMatrix(4,n,mxDOUBLE_CLASS,mxREAL); 114 | BB bb=mxGetPr(pl[0]); rleToBbox(R,bb,n); 115 | 116 | } else if(!strcmp(action,"frBbox")) { 117 | checkType(pr[0],mxDOUBLE_CLASS); 118 | double *bb=mxGetPr(pr[0]); n=mxGetN(pr[0]); 119 | h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); 120 | rlesInit(&R,n); rleFrBbox(R,bb,h,w,n); pl[0]=toMxArray(R,n); 121 | 122 | } else if(!strcmp(action,"frPoly")) { 123 | checkType(pr[0],mxCELL_CLASS); n=mxGetNumberOfElements(pr[0]); 124 | h=(siz)mxGetScalar(pr[1]); w=(siz)mxGetScalar(pr[2]); rlesInit(&R,n); 125 | for(siz i=0; i=18.0', 22 | 'cython>=0.27.3', 23 | 'matplotlib>=2.1.0' 24 | ], 25 | version='2.0', 26 | ext_modules= ext_modules 27 | ) 28 | -------------------------------------------------------------------------------- /libs/cocoapi/README.txt: -------------------------------------------------------------------------------- 1 | COCO API - http://cocodataset.org/ 2 | 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality. 4 | 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website. 6 | -Please download, unzip, and place the images in: coco/images/ 7 | -Please download and place the annotations in: coco/annotations/ 8 | For substantially more details on the API please see http://cocodataset.org/#download. 9 | 10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage. 11 | 12 | To install: 13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided) 14 | -For Python, run "make" under coco/PythonAPI 15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/ 16 | -------------------------------------------------------------------------------- /libs/cocoapi/common/gason.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #include "gason.h" 3 | #include 4 | 5 | #define JSON_ZONE_SIZE 4096 6 | #define JSON_STACK_SIZE 32 7 | 8 | const char *jsonStrError(int err) { 9 | switch (err) { 10 | #define XX(no, str) \ 11 | case JSON_##no: \ 12 | return str; 13 | JSON_ERRNO_MAP(XX) 14 | #undef XX 15 | default: 16 | return "unknown"; 17 | } 18 | } 19 | 20 | void *JsonAllocator::allocate(size_t size) { 21 | size = (size + 7) & ~7; 22 | 23 | if (head && head->used + size <= JSON_ZONE_SIZE) { 24 | char *p = (char *)head + head->used; 25 | head->used += size; 26 | return p; 27 | } 28 | 29 | size_t allocSize = sizeof(Zone) + size; 30 | Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); 31 | if (zone == nullptr) 32 | return nullptr; 33 | zone->used = allocSize; 34 | if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { 35 | zone->next = head; 36 | head = zone; 37 | } else { 38 | zone->next = head->next; 39 | head->next = zone; 40 | } 41 | return (char *)zone + sizeof(Zone); 42 | } 43 | 44 | void JsonAllocator::deallocate() { 45 | while (head) { 46 | Zone *next = head->next; 47 | free(head); 48 | head = next; 49 | } 50 | } 51 | 52 | static inline bool isspace(char c) { 53 | return c == ' ' || (c >= '\t' && c <= '\r'); 54 | } 55 | 56 | static inline bool isdelim(char c) { 57 | return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; 58 | } 59 | 60 | static inline bool isdigit(char c) { 61 | return c >= '0' && c <= '9'; 62 | } 63 | 64 | static inline bool isxdigit(char c) { 65 | return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); 66 | } 67 | 68 | static inline int char2int(char c) { 69 | if (c <= '9') 70 | return c - '0'; 71 | return (c & ~' ') - 'A' + 10; 72 | } 73 | 74 | static double string2double(char *s, char **endptr) { 75 | char ch = *s; 76 | if (ch == '-') 77 | ++s; 78 | 79 | double result = 0; 80 | while (isdigit(*s)) 81 | result = (result * 10) + (*s++ - '0'); 82 | 83 | if (*s == '.') { 84 | ++s; 85 | 86 | double fraction = 1; 87 | while (isdigit(*s)) { 88 | fraction *= 0.1; 89 | result += (*s++ - '0') * fraction; 90 | } 91 | } 92 | 93 | if (*s == 'e' || *s == 'E') { 94 | ++s; 95 | 96 | double base = 10; 97 | if (*s == '+') 98 | ++s; 99 | else if (*s == '-') { 100 | ++s; 101 | base = 0.1; 102 | } 103 | 104 | unsigned int exponent = 0; 105 | while (isdigit(*s)) 106 | exponent = (exponent * 10) + (*s++ - '0'); 107 | 108 | double power = 1; 109 | for (; exponent; exponent >>= 1, base *= base) 110 | if (exponent & 1) 111 | power *= base; 112 | 113 | result *= power; 114 | } 115 | 116 | *endptr = s; 117 | return ch == '-' ? -result : result; 118 | } 119 | 120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { 121 | if (!tail) 122 | return node->next = node; 123 | node->next = tail->next; 124 | tail->next = node; 125 | return node; 126 | } 127 | 128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { 129 | if (tail) { 130 | auto head = tail->next; 131 | tail->next = nullptr; 132 | return JsonValue(tag, head); 133 | } 134 | return JsonValue(tag, nullptr); 135 | } 136 | 137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { 138 | JsonNode *tails[JSON_STACK_SIZE]; 139 | JsonTag tags[JSON_STACK_SIZE]; 140 | char *keys[JSON_STACK_SIZE]; 141 | JsonValue o; 142 | int pos = -1; 143 | bool separator = true; 144 | JsonNode *node; 145 | *endptr = s; 146 | 147 | while (*s) { 148 | while (isspace(*s)) { 149 | ++s; 150 | if (!*s) break; 151 | } 152 | *endptr = s++; 153 | switch (**endptr) { 154 | case '-': 155 | if (!isdigit(*s) && *s != '.') { 156 | *endptr = s; 157 | return JSON_BAD_NUMBER; 158 | } 159 | case '0': 160 | case '1': 161 | case '2': 162 | case '3': 163 | case '4': 164 | case '5': 165 | case '6': 166 | case '7': 167 | case '8': 168 | case '9': 169 | o = JsonValue(string2double(*endptr, &s)); 170 | if (!isdelim(*s)) { 171 | *endptr = s; 172 | return JSON_BAD_NUMBER; 173 | } 174 | break; 175 | case '"': 176 | o = JsonValue(JSON_STRING, s); 177 | for (char *it = s; *s; ++it, ++s) { 178 | int c = *it = *s; 179 | if (c == '\\') { 180 | c = *++s; 181 | switch (c) { 182 | case '\\': 183 | case '"': 184 | case '/': 185 | *it = c; 186 | break; 187 | case 'b': 188 | *it = '\b'; 189 | break; 190 | case 'f': 191 | *it = '\f'; 192 | break; 193 | case 'n': 194 | *it = '\n'; 195 | break; 196 | case 'r': 197 | *it = '\r'; 198 | break; 199 | case 't': 200 | *it = '\t'; 201 | break; 202 | case 'u': 203 | c = 0; 204 | for (int i = 0; i < 4; ++i) { 205 | if (isxdigit(*++s)) { 206 | c = c * 16 + char2int(*s); 207 | } else { 208 | *endptr = s; 209 | return JSON_BAD_STRING; 210 | } 211 | } 212 | if (c < 0x80) { 213 | *it = c; 214 | } else if (c < 0x800) { 215 | *it++ = 0xC0 | (c >> 6); 216 | *it = 0x80 | (c & 0x3F); 217 | } else { 218 | *it++ = 0xE0 | (c >> 12); 219 | *it++ = 0x80 | ((c >> 6) & 0x3F); 220 | *it = 0x80 | (c & 0x3F); 221 | } 222 | break; 223 | default: 224 | *endptr = s; 225 | return JSON_BAD_STRING; 226 | } 227 | } else if ((unsigned int)c < ' ' || c == '\x7F') { 228 | *endptr = s; 229 | return JSON_BAD_STRING; 230 | } else if (c == '"') { 231 | *it = 0; 232 | ++s; 233 | break; 234 | } 235 | } 236 | if (!isdelim(*s)) { 237 | *endptr = s; 238 | return JSON_BAD_STRING; 239 | } 240 | break; 241 | case 't': 242 | if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) 243 | return JSON_BAD_IDENTIFIER; 244 | o = JsonValue(JSON_TRUE); 245 | s += 3; 246 | break; 247 | case 'f': 248 | if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) 249 | return JSON_BAD_IDENTIFIER; 250 | o = JsonValue(JSON_FALSE); 251 | s += 4; 252 | break; 253 | case 'n': 254 | if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) 255 | return JSON_BAD_IDENTIFIER; 256 | o = JsonValue(JSON_NULL); 257 | s += 3; 258 | break; 259 | case ']': 260 | if (pos == -1) 261 | return JSON_STACK_UNDERFLOW; 262 | if (tags[pos] != JSON_ARRAY) 263 | return JSON_MISMATCH_BRACKET; 264 | o = listToValue(JSON_ARRAY, tails[pos--]); 265 | break; 266 | case '}': 267 | if (pos == -1) 268 | return JSON_STACK_UNDERFLOW; 269 | if (tags[pos] != JSON_OBJECT) 270 | return JSON_MISMATCH_BRACKET; 271 | if (keys[pos] != nullptr) 272 | return JSON_UNEXPECTED_CHARACTER; 273 | o = listToValue(JSON_OBJECT, tails[pos--]); 274 | break; 275 | case '[': 276 | if (++pos == JSON_STACK_SIZE) 277 | return JSON_STACK_OVERFLOW; 278 | tails[pos] = nullptr; 279 | tags[pos] = JSON_ARRAY; 280 | keys[pos] = nullptr; 281 | separator = true; 282 | continue; 283 | case '{': 284 | if (++pos == JSON_STACK_SIZE) 285 | return JSON_STACK_OVERFLOW; 286 | tails[pos] = nullptr; 287 | tags[pos] = JSON_OBJECT; 288 | keys[pos] = nullptr; 289 | separator = true; 290 | continue; 291 | case ':': 292 | if (separator || keys[pos] == nullptr) 293 | return JSON_UNEXPECTED_CHARACTER; 294 | separator = true; 295 | continue; 296 | case ',': 297 | if (separator || keys[pos] != nullptr) 298 | return JSON_UNEXPECTED_CHARACTER; 299 | separator = true; 300 | continue; 301 | case '\0': 302 | continue; 303 | default: 304 | return JSON_UNEXPECTED_CHARACTER; 305 | } 306 | 307 | separator = false; 308 | 309 | if (pos == -1) { 310 | *endptr = s; 311 | *value = o; 312 | return JSON_OK; 313 | } 314 | 315 | if (tags[pos] == JSON_OBJECT) { 316 | if (!keys[pos]) { 317 | if (o.getTag() != JSON_STRING) 318 | return JSON_UNQUOTED_KEY; 319 | keys[pos] = o.toString(); 320 | continue; 321 | } 322 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) 323 | return JSON_ALLOCATION_FAILURE; 324 | tails[pos] = insertAfter(tails[pos], node); 325 | tails[pos]->key = keys[pos]; 326 | keys[pos] = nullptr; 327 | } else { 328 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) 329 | return JSON_ALLOCATION_FAILURE; 330 | tails[pos] = insertAfter(tails[pos], node); 331 | } 332 | tails[pos]->value = o; 333 | } 334 | return JSON_BREAKING_BAD; 335 | } 336 | -------------------------------------------------------------------------------- /libs/cocoapi/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /libs/cocoapi/common/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 174 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 175 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 176 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 177 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 178 | } else for( d=0; d<=dy; d++ ) { 179 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 180 | } 181 | } 182 | /* get points along y-boundary and downsample */ 183 | free(x); free(y); k=m; m=0; double xd, yd; 184 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 185 | for( j=1; jw-1 ) continue; 188 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 190 | x[m]=(int) xd; y[m]=(int) yd; m++; 191 | } 192 | /* compute rle encoding given y-boundary points */ 193 | k=m; a=malloc(sizeof(uint)*(k+1)); 194 | for( j=0; j0) b[m++]=a[j++]; else { 200 | j++; if(jm, p=0; long x; int more; 207 | char *s=malloc(sizeof(char)*m*6); 208 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 210 | while( more ) { 211 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 212 | if(more) c |= 0x20; c+=48; s[p++]=c; 213 | } 214 | } 215 | s[p]=0; return s; 216 | } 217 | 218 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 219 | siz m=0, p=0, k; long x; int more; uint *cnts; 220 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 221 | while( s[p] ) { 222 | x=0; k=0; more=1; 223 | while( more ) { 224 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 225 | more = c & 0x20; p++; k++; 226 | if(!more && (c & 0x10)) x |= -1 << 5*k; 227 | } 228 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 229 | } 230 | rleInit(R,h,w,m,cnts); free(cnts); 231 | } 232 | -------------------------------------------------------------------------------- /libs/cocoapi/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /libs/cocoapi/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /libs/cocoapi/results/val2014_fake_eval_res.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | type=segm 3 | Running per image evaluation... DONE (t=0.45s). 4 | Accumulating evaluation results... DONE (t=0.08s). 5 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.320 6 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.562 7 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.299 8 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.387 9 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.310 10 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.327 11 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.268 12 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.415 13 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.417 14 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.469 15 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.377 16 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.381 17 | 18 | ------------------------------------------------------------------------------ 19 | type=bbox 20 | Running per image evaluation... DONE (t=0.34s). 21 | Accumulating evaluation results... DONE (t=0.08s). 22 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505 23 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697 24 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573 25 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586 26 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519 27 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501 28 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387 29 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594 30 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595 31 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640 32 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566 33 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564 34 | 35 | ------------------------------------------------------------------------------ 36 | type=keypoints 37 | Running per image evaluation... DONE (t=0.06s). 38 | Accumulating evaluation results... DONE (t=0.00s). 39 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.372 40 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.636 41 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.348 42 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.384 43 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.386 44 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.514 45 | Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.734 46 | Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.504 47 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.508 48 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.522 49 | -------------------------------------------------------------------------------- /libs/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /libs/dcn/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import deform_pool_cuda 8 | 9 | 10 | class DeformRoIPoolingFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, 14 | data, 15 | rois, 16 | offset, 17 | spatial_scale, 18 | out_size, 19 | out_channels, 20 | no_trans, 21 | group_size=1, 22 | part_size=None, 23 | sample_per_part=4, 24 | trans_std=.0): 25 | # TODO: support unsquare RoIs 26 | out_h, out_w = _pair(out_size) 27 | assert isinstance(out_h, int) and isinstance(out_w, int) 28 | assert out_h == out_w 29 | out_size = out_h # out_h and out_w must be equal 30 | 31 | ctx.spatial_scale = spatial_scale 32 | ctx.out_size = out_size 33 | ctx.out_channels = out_channels 34 | ctx.no_trans = no_trans 35 | ctx.group_size = group_size 36 | ctx.part_size = out_size if part_size is None else part_size 37 | ctx.sample_per_part = sample_per_part 38 | ctx.trans_std = trans_std 39 | 40 | assert 0.0 <= ctx.trans_std <= 1.0 41 | if not data.is_cuda: 42 | raise NotImplementedError 43 | 44 | n = rois.shape[0] 45 | output = data.new_empty(n, out_channels, out_size, out_size) 46 | output_count = data.new_empty(n, out_channels, out_size, out_size) 47 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 48 | data, rois, offset, output, output_count, ctx.no_trans, 49 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 50 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 51 | 52 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 53 | ctx.save_for_backward(data, rois, offset) 54 | ctx.output_count = output_count 55 | 56 | return output 57 | 58 | @staticmethod 59 | @once_differentiable 60 | def backward(ctx, grad_output): 61 | if not grad_output.is_cuda: 62 | raise NotImplementedError 63 | 64 | data, rois, offset = ctx.saved_tensors 65 | output_count = ctx.output_count 66 | grad_input = torch.zeros_like(data) 67 | grad_rois = None 68 | grad_offset = torch.zeros_like(offset) 69 | 70 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 71 | grad_output, data, rois, offset, output_count, grad_input, 72 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 73 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 74 | ctx.trans_std) 75 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 76 | None, None, None, None) 77 | 78 | 79 | deform_roi_pooling = DeformRoIPoolingFunction.apply 80 | 81 | 82 | class DeformRoIPooling(nn.Module): 83 | 84 | def __init__(self, 85 | spatial_scale, 86 | out_size, 87 | out_channels, 88 | no_trans, 89 | group_size=1, 90 | part_size=None, 91 | sample_per_part=4, 92 | trans_std=.0): 93 | super(DeformRoIPooling, self).__init__() 94 | self.spatial_scale = spatial_scale 95 | self.out_size = _pair(out_size) 96 | self.out_channels = out_channels 97 | self.no_trans = no_trans 98 | self.group_size = group_size 99 | self.part_size = out_size if part_size is None else part_size 100 | self.sample_per_part = sample_per_part 101 | self.trans_std = trans_std 102 | 103 | def forward(self, data, rois, offset): 104 | if self.no_trans: 105 | offset = data.new_empty(0) 106 | return deform_roi_pooling(data, rois, offset, self.spatial_scale, 107 | self.out_size, self.out_channels, 108 | self.no_trans, self.group_size, 109 | self.part_size, self.sample_per_part, 110 | self.trans_std) 111 | 112 | 113 | class DeformRoIPoolingPack(DeformRoIPooling): 114 | 115 | def __init__(self, 116 | spatial_scale, 117 | out_size, 118 | out_channels, 119 | no_trans, 120 | group_size=1, 121 | part_size=None, 122 | sample_per_part=4, 123 | trans_std=.0, 124 | num_offset_fcs=3, 125 | deform_fc_channels=1024): 126 | super(DeformRoIPoolingPack, 127 | self).__init__(spatial_scale, out_size, out_channels, no_trans, 128 | group_size, part_size, sample_per_part, trans_std) 129 | 130 | self.num_offset_fcs = num_offset_fcs 131 | self.deform_fc_channels = deform_fc_channels 132 | 133 | if not no_trans: 134 | seq = [] 135 | ic = self.out_size[0] * self.out_size[1] * self.out_channels 136 | for i in range(self.num_offset_fcs): 137 | if i < self.num_offset_fcs - 1: 138 | oc = self.deform_fc_channels 139 | else: 140 | oc = self.out_size[0] * self.out_size[1] * 2 141 | seq.append(nn.Linear(ic, oc)) 142 | ic = oc 143 | if i < self.num_offset_fcs - 1: 144 | seq.append(nn.ReLU(inplace=True)) 145 | self.offset_fc = nn.Sequential(*seq) 146 | self.offset_fc[-1].weight.data.zero_() 147 | self.offset_fc[-1].bias.data.zero_() 148 | 149 | def forward(self, data, rois): 150 | assert data.size(1) == self.out_channels 151 | n = rois.shape[0] 152 | if n == 0: 153 | return data.new_empty(n, self.out_channels, self.out_size[0], 154 | self.out_size[1]) 155 | if self.no_trans: 156 | offset = data.new_empty(0) 157 | return deform_roi_pooling(data, rois, offset, self.spatial_scale, 158 | self.out_size, self.out_channels, 159 | self.no_trans, self.group_size, 160 | self.part_size, self.sample_per_part, 161 | self.trans_std) 162 | else: 163 | offset = data.new_empty(0) 164 | x = deform_roi_pooling(data, rois, offset, self.spatial_scale, 165 | self.out_size, self.out_channels, True, 166 | self.group_size, self.part_size, 167 | self.sample_per_part, self.trans_std) 168 | offset = self.offset_fc(x.view(n, -1)) 169 | offset = offset.view(n, 2, self.out_size[0], self.out_size[1]) 170 | return deform_roi_pooling(data, rois, offset, self.spatial_scale, 171 | self.out_size, self.out_channels, 172 | self.no_trans, self.group_size, 173 | self.part_size, self.sample_per_part, 174 | self.trans_std) 175 | 176 | 177 | class ModulatedDeformRoIPoolingPack(DeformRoIPooling): 178 | 179 | def __init__(self, 180 | spatial_scale, 181 | out_size, 182 | out_channels, 183 | no_trans, 184 | group_size=1, 185 | part_size=None, 186 | sample_per_part=4, 187 | trans_std=.0, 188 | num_offset_fcs=3, 189 | num_mask_fcs=2, 190 | deform_fc_channels=1024): 191 | super(ModulatedDeformRoIPoolingPack, 192 | self).__init__(spatial_scale, out_size, out_channels, no_trans, 193 | group_size, part_size, sample_per_part, trans_std) 194 | 195 | self.num_offset_fcs = num_offset_fcs 196 | self.num_mask_fcs = num_mask_fcs 197 | self.deform_fc_channels = deform_fc_channels 198 | 199 | if not no_trans: 200 | offset_fc_seq = [] 201 | ic = self.out_size[0] * self.out_size[1] * self.out_channels 202 | for i in range(self.num_offset_fcs): 203 | if i < self.num_offset_fcs - 1: 204 | oc = self.deform_fc_channels 205 | else: 206 | oc = self.out_size[0] * self.out_size[1] * 2 207 | offset_fc_seq.append(nn.Linear(ic, oc)) 208 | ic = oc 209 | if i < self.num_offset_fcs - 1: 210 | offset_fc_seq.append(nn.ReLU(inplace=True)) 211 | self.offset_fc = nn.Sequential(*offset_fc_seq) 212 | self.offset_fc[-1].weight.data.zero_() 213 | self.offset_fc[-1].bias.data.zero_() 214 | 215 | mask_fc_seq = [] 216 | ic = self.out_size[0] * self.out_size[1] * self.out_channels 217 | for i in range(self.num_mask_fcs): 218 | if i < self.num_mask_fcs - 1: 219 | oc = self.deform_fc_channels 220 | else: 221 | oc = self.out_size[0] * self.out_size[1] 222 | mask_fc_seq.append(nn.Linear(ic, oc)) 223 | ic = oc 224 | if i < self.num_mask_fcs - 1: 225 | mask_fc_seq.append(nn.ReLU(inplace=True)) 226 | else: 227 | mask_fc_seq.append(nn.Sigmoid()) 228 | self.mask_fc = nn.Sequential(*mask_fc_seq) 229 | self.mask_fc[-2].weight.data.zero_() 230 | self.mask_fc[-2].bias.data.zero_() 231 | 232 | def forward(self, data, rois): 233 | assert data.size(1) == self.out_channels 234 | n = rois.shape[0] 235 | if n == 0: 236 | return data.new_empty(n, self.out_channels, self.out_size[0], 237 | self.out_size[1]) 238 | if self.no_trans: 239 | offset = data.new_empty(0) 240 | return deform_roi_pooling(data, rois, offset, self.spatial_scale, 241 | self.out_size, self.out_channels, 242 | self.no_trans, self.group_size, 243 | self.part_size, self.sample_per_part, 244 | self.trans_std) 245 | else: 246 | offset = data.new_empty(0) 247 | x = deform_roi_pooling(data, rois, offset, self.spatial_scale, 248 | self.out_size, self.out_channels, True, 249 | self.group_size, self.part_size, 250 | self.sample_per_part, self.trans_std) 251 | offset = self.offset_fc(x.view(n, -1)) 252 | offset = offset.view(n, 2, self.out_size[0], self.out_size[1]) 253 | mask = self.mask_fc(x.view(n, -1)) 254 | mask = mask.view(n, 1, self.out_size[0], self.out_size[1]) 255 | return deform_roi_pooling( 256 | data, rois, offset, self.spatial_scale, self.out_size, 257 | self.out_channels, self.no_trans, self.group_size, 258 | self.part_size, self.sample_per_part, self.trans_std) * mask 259 | -------------------------------------------------------------------------------- /libs/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | void DeformablePSROIPoolForward( 15 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 16 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 17 | const int height, const int width, const int num_bbox, 18 | const int channels_trans, const int no_trans, const float spatial_scale, 19 | const int output_dim, const int group_size, const int pooled_size, 20 | const int part_size, const int sample_per_part, const float trans_std); 21 | 22 | void DeformablePSROIPoolBackwardAcc( 23 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 24 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 25 | at::Tensor trans_grad, const int batch, const int channels, 26 | const int height, const int width, const int num_bbox, 27 | const int channels_trans, const int no_trans, const float spatial_scale, 28 | const int output_dim, const int group_size, const int pooled_size, 29 | const int part_size, const int sample_per_part, const float trans_std); 30 | 31 | void deform_psroi_pooling_cuda_forward( 32 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 33 | at::Tensor top_count, const int no_trans, const float spatial_scale, 34 | const int output_dim, const int group_size, const int pooled_size, 35 | const int part_size, const int sample_per_part, const float trans_std) { 36 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 37 | at::DeviceGuard guard(input.device()); 38 | 39 | const int batch = input.size(0); 40 | const int channels = input.size(1); 41 | const int height = input.size(2); 42 | const int width = input.size(3); 43 | const int channels_trans = no_trans ? 2 : trans.size(1); 44 | 45 | const int num_bbox = bbox.size(0); 46 | if (num_bbox != out.size(0)) 47 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 48 | out.size(0), num_bbox); 49 | 50 | DeformablePSROIPoolForward( 51 | input, bbox, trans, out, top_count, batch, channels, height, width, 52 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 53 | pooled_size, part_size, sample_per_part, trans_std); 54 | } 55 | 56 | void deform_psroi_pooling_cuda_backward( 57 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 58 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 59 | const int no_trans, const float spatial_scale, const int output_dim, 60 | const int group_size, const int pooled_size, const int part_size, 61 | const int sample_per_part, const float trans_std) { 62 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 63 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 64 | at::DeviceGuard guard(input.device()); 65 | 66 | const int batch = input.size(0); 67 | const int channels = input.size(1); 68 | const int height = input.size(2); 69 | const int width = input.size(3); 70 | const int channels_trans = no_trans ? 2 : trans.size(1); 71 | 72 | const int num_bbox = bbox.size(0); 73 | if (num_bbox != out_grad.size(0)) 74 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 75 | out_grad.size(0), num_bbox); 76 | 77 | DeformablePSROIPoolBackwardAcc( 78 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 79 | channels, height, width, num_bbox, channels_trans, no_trans, 80 | spatial_scale, output_dim, group_size, pooled_size, part_size, 81 | sample_per_part, trans_std); 82 | } 83 | 84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 85 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 86 | "deform psroi pooling forward(CUDA)"); 87 | m.def("deform_psroi_pooling_cuda_backward", 88 | &deform_psroi_pooling_cuda_backward, 89 | "deform psroi pooling backward(CUDA)"); 90 | } 91 | -------------------------------------------------------------------------------- /libs/font.TTF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DetectionBLWX/FPN.pytorch/baa35139912edb23e1f153b8684b498061c70e92/libs/font.TTF -------------------------------------------------------------------------------- /libs/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd cocoapi/PythonAPI 6 | make 7 | cd .. 8 | cd .. 9 | 10 | python setup.py develop 11 | rm -rf build 12 | rm -rf FPN.egg-info -------------------------------------------------------------------------------- /libs/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /libs/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cpu, nms_cuda 5 | 6 | 7 | def nms(dets, iou_thr, device_id=None): 8 | """Dispatch to either CPU or GPU NMS implementations. 9 | 10 | The input can be either a torch tensor or numpy array. GPU NMS will be used 11 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 12 | will be used. The returned type will always be the same as inputs. 13 | 14 | Arguments: 15 | dets (torch.Tensor or np.ndarray): bboxes with scores. 16 | iou_thr (float): IoU threshold for NMS. 17 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 18 | is None, then cpu nms is used, otherwise gpu_nms will be used. 19 | 20 | Returns: 21 | tuple: kept bboxes and indice, which is always the same data type as 22 | the input. 23 | 24 | Example: 25 | >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], 26 | >>> [49.3, 32.9, 51.0, 35.3, 0.9], 27 | >>> [49.2, 31.8, 51.0, 35.4, 0.5], 28 | >>> [35.1, 11.5, 39.1, 15.7, 0.5], 29 | >>> [35.6, 11.8, 39.3, 14.2, 0.5], 30 | >>> [35.3, 11.5, 39.9, 14.5, 0.4], 31 | >>> [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32) 32 | >>> iou_thr = 0.7 33 | >>> suppressed, inds = nms(dets, iou_thr) 34 | >>> assert len(inds) == len(suppressed) == 3 35 | """ 36 | # convert dets (tensor or numpy array) to tensor 37 | if isinstance(dets, torch.Tensor): 38 | is_numpy = False 39 | dets_th = dets 40 | elif isinstance(dets, np.ndarray): 41 | is_numpy = True 42 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 43 | dets_th = torch.from_numpy(dets).to(device) 44 | else: 45 | raise TypeError( 46 | 'dets must be either a Tensor or numpy array, but got {}'.format( 47 | type(dets))) 48 | 49 | # execute cpu or cuda nms 50 | if dets_th.shape[0] == 0: 51 | inds = dets_th.new_zeros(0, dtype=torch.long) 52 | else: 53 | if dets_th.is_cuda: 54 | inds = nms_cuda.nms(dets_th, iou_thr) 55 | else: 56 | inds = nms_cpu.nms(dets_th, iou_thr) 57 | 58 | if is_numpy: 59 | inds = inds.cpu().numpy() 60 | return dets[inds, :], inds 61 | 62 | 63 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 64 | """Dispatch to only CPU Soft NMS implementations. 65 | 66 | The input can be either a torch tensor or numpy array. 67 | The returned type will always be the same as inputs. 68 | 69 | Arguments: 70 | dets (torch.Tensor or np.ndarray): bboxes with scores. 71 | iou_thr (float): IoU threshold for Soft NMS. 72 | method (str): either 'linear' or 'gaussian' 73 | sigma (float): hyperparameter for gaussian method 74 | min_score (float): score filter threshold 75 | 76 | Returns: 77 | tuple: new det bboxes and indice, which is always the same 78 | data type as the input. 79 | 80 | Example: 81 | >>> dets = np.array([[4., 3., 5., 3., 0.9], 82 | >>> [4., 3., 5., 4., 0.9], 83 | >>> [3., 1., 3., 1., 0.5], 84 | >>> [3., 1., 3., 1., 0.5], 85 | >>> [3., 1., 3., 1., 0.4], 86 | >>> [3., 1., 3., 1., 0.0]], dtype=np.float32) 87 | >>> iou_thr = 0.7 88 | >>> new_dets, inds = soft_nms(dets, iou_thr, sigma=0.5) 89 | >>> assert len(inds) == len(new_dets) == 3 90 | """ 91 | # convert dets (tensor or numpy array) to tensor 92 | if isinstance(dets, torch.Tensor): 93 | is_tensor = True 94 | dets_t = dets.detach().cpu() 95 | elif isinstance(dets, np.ndarray): 96 | is_tensor = False 97 | dets_t = torch.from_numpy(dets) 98 | else: 99 | raise TypeError( 100 | 'dets must be either a Tensor or numpy array, but got {}'.format( 101 | type(dets))) 102 | 103 | method_codes = {'linear': 1, 'gaussian': 2} 104 | if method not in method_codes: 105 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 106 | results = nms_cpu.soft_nms(dets_t, iou_thr, method_codes[method], sigma, 107 | min_score) 108 | 109 | new_dets = results[:, :5] 110 | inds = results[:, 5] 111 | 112 | if is_tensor: 113 | return new_dets.to( 114 | device=dets.device, dtype=dets.dtype), inds.to( 115 | device=dets.device, dtype=torch.long) 116 | else: 117 | return new_dets.numpy().astype(dets.dtype), inds.numpy().astype( 118 | np.int64) 119 | -------------------------------------------------------------------------------- /libs/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Modified from https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx, Soft-NMS is added 2 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | #include 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 7 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 8 | 9 | if (dets.numel() == 0) { 10 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 11 | } 12 | 13 | auto x1_t = dets.select(1, 0).contiguous(); 14 | auto y1_t = dets.select(1, 1).contiguous(); 15 | auto x2_t = dets.select(1, 2).contiguous(); 16 | auto y2_t = dets.select(1, 3).contiguous(); 17 | auto scores = dets.select(1, 4).contiguous(); 18 | 19 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 20 | 21 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 22 | 23 | auto ndets = dets.size(0); 24 | at::Tensor suppressed_t = 25 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 26 | 27 | auto suppressed = suppressed_t.data(); 28 | auto order = order_t.data(); 29 | auto x1 = x1_t.data(); 30 | auto y1 = y1_t.data(); 31 | auto x2 = x2_t.data(); 32 | auto y2 = y2_t.data(); 33 | auto areas = areas_t.data(); 34 | 35 | for (int64_t _i = 0; _i < ndets; _i++) { 36 | auto i = order[_i]; 37 | if (suppressed[i] == 1) continue; 38 | auto ix1 = x1[i]; 39 | auto iy1 = y1[i]; 40 | auto ix2 = x2[i]; 41 | auto iy2 = y2[i]; 42 | auto iarea = areas[i]; 43 | 44 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 45 | auto j = order[_j]; 46 | if (suppressed[j] == 1) continue; 47 | auto xx1 = std::max(ix1, x1[j]); 48 | auto yy1 = std::max(iy1, y1[j]); 49 | auto xx2 = std::min(ix2, x2[j]); 50 | auto yy2 = std::min(iy2, y2[j]); 51 | 52 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 53 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 54 | auto inter = w * h; 55 | auto ovr = inter / (iarea + areas[j] - inter); 56 | if (ovr >= threshold) suppressed[j] = 1; 57 | } 58 | } 59 | return at::nonzero(suppressed_t == 0).squeeze(1); 60 | } 61 | 62 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 63 | at::Tensor result; 64 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 65 | result = nms_cpu_kernel(dets, threshold); 66 | }); 67 | return result; 68 | } 69 | 70 | template 71 | at::Tensor soft_nms_cpu_kernel(const at::Tensor& dets, const float threshold, 72 | const unsigned char method, const float sigma, const float min_score) { 73 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 74 | 75 | if (dets.numel() == 0) { 76 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 77 | } 78 | 79 | auto x1_t = dets.select(1, 0).contiguous(); 80 | auto y1_t = dets.select(1, 1).contiguous(); 81 | auto x2_t = dets.select(1, 2).contiguous(); 82 | auto y2_t = dets.select(1, 3).contiguous(); 83 | auto scores_t = dets.select(1, 4).contiguous(); 84 | 85 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 86 | 87 | auto ndets = dets.size(0); 88 | auto x1 = x1_t.data(); 89 | auto y1 = y1_t.data(); 90 | auto x2 = x2_t.data(); 91 | auto y2 = y2_t.data(); 92 | auto scores = scores_t.data(); 93 | auto areas = areas_t.data(); 94 | 95 | int64_t pos = 0; 96 | at::Tensor inds_t = at::arange(ndets, dets.options()); 97 | auto inds = inds_t.data(); 98 | 99 | for (int64_t i = 0; i < ndets; i++) { 100 | auto max_score = scores[i]; 101 | auto max_pos = i; 102 | 103 | auto ix1 = x1[i]; 104 | auto iy1 = y1[i]; 105 | auto ix2 = x2[i]; 106 | auto iy2 = y2[i]; 107 | auto iscore = scores[i]; 108 | auto iarea = areas[i]; 109 | auto iind = inds[i]; 110 | 111 | pos = i + 1; 112 | // get max box 113 | while (pos < ndets){ 114 | if (max_score < scores[pos]) { 115 | max_score = scores[pos]; 116 | max_pos = pos; 117 | } 118 | pos = pos + 1; 119 | } 120 | // add max box as a detection 121 | x1[i] = x1[max_pos]; 122 | y1[i] = y1[max_pos]; 123 | x2[i] = x2[max_pos]; 124 | y2[i] = y2[max_pos]; 125 | scores[i] = scores[max_pos]; 126 | areas[i] = areas[max_pos]; 127 | inds[i] = inds[max_pos]; 128 | 129 | // swap ith box with position of max box 130 | x1[max_pos] = ix1; 131 | y1[max_pos] = iy1; 132 | x2[max_pos] = ix2; 133 | y2[max_pos] = iy2; 134 | scores[max_pos] = iscore; 135 | areas[max_pos] = iarea; 136 | inds[max_pos] = iind; 137 | 138 | ix1 = x1[i]; 139 | iy1 = y1[i]; 140 | ix2 = x2[i]; 141 | iy2 = y2[i]; 142 | iscore = scores[i]; 143 | iarea = areas[i]; 144 | 145 | pos = i + 1; 146 | // NMS iterations, note that N changes if detection boxes fall below threshold 147 | while (pos < ndets) { 148 | auto xx1 = std::max(ix1, x1[pos]); 149 | auto yy1 = std::max(iy1, y1[pos]); 150 | auto xx2 = std::min(ix2, x2[pos]); 151 | auto yy2 = std::min(iy2, y2[pos]); 152 | 153 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 154 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 155 | auto inter = w * h; 156 | auto ovr = inter / (iarea + areas[pos] - inter); 157 | 158 | scalar_t weight = 1.; 159 | if (method == 1) { 160 | if (ovr > threshold) weight = 1 - ovr; 161 | } 162 | else if (method == 2) { 163 | weight = std::exp(-(ovr * ovr) / sigma); 164 | } 165 | else { 166 | // original NMS 167 | if (ovr > threshold) { 168 | weight = 0; 169 | } 170 | else { 171 | weight = 1; 172 | } 173 | } 174 | scores[pos] = weight * scores[pos]; 175 | // if box score falls below threshold, discard the box by 176 | // swapping with last box update N 177 | if (scores[pos] < min_score) { 178 | x1[pos] = x1[ndets - 1]; 179 | y1[pos] = y1[ndets - 1]; 180 | x2[pos] = x2[ndets - 1]; 181 | y2[pos] = y2[ndets - 1]; 182 | scores[pos] = scores[ndets - 1]; 183 | areas[pos] = areas[ndets - 1]; 184 | inds[pos] = inds[ndets - 1]; 185 | ndets = ndets -1; 186 | pos = pos - 1; 187 | } 188 | pos = pos + 1; 189 | } 190 | } 191 | at::Tensor result = at::zeros({6, ndets}, dets.options()); 192 | result[0] = x1_t.slice(0, 0, ndets); 193 | result[1] = y1_t.slice(0, 0, ndets); 194 | result[2] = x2_t.slice(0, 0, ndets); 195 | result[3] = y2_t.slice(0, 0, ndets); 196 | result[4] = scores_t.slice(0, 0, ndets); 197 | result[5] = inds_t.slice(0, 0, ndets); 198 | 199 | result =result.t().contiguous(); 200 | return result; 201 | } 202 | 203 | at::Tensor soft_nms(const at::Tensor& dets, const float threshold, 204 | const unsigned char method, const float sigma, const float min_score) { 205 | at::Tensor result; 206 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "soft_nms", [&] { 207 | result = soft_nms_cpu_kernel(dets, threshold, method, sigma, min_score); 208 | }); 209 | return result; 210 | } 211 | 212 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 213 | m.def("nms", &nms, "non-maximum suppression"); 214 | m.def("soft_nms", &soft_nms, "soft non-maximum suppression"); 215 | } 216 | -------------------------------------------------------------------------------- /libs/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } 18 | -------------------------------------------------------------------------------- /libs/nms/src/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 13 | 14 | __device__ inline float devIoU(float const * const a, float const * const b) { 15 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 16 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 17 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 18 | float interS = width * height; 19 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 20 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 21 | return interS / (Sa + Sb - interS); 22 | } 23 | 24 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 25 | const float *dev_boxes, unsigned long long *dev_mask) { 26 | const int row_start = blockIdx.y; 27 | const int col_start = blockIdx.x; 28 | 29 | // if (row_start > col_start) return; 30 | 31 | const int row_size = 32 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 33 | const int col_size = 34 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 35 | 36 | __shared__ float block_boxes[threadsPerBlock * 5]; 37 | if (threadIdx.x < col_size) { 38 | block_boxes[threadIdx.x * 5 + 0] = 39 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 40 | block_boxes[threadIdx.x * 5 + 1] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 42 | block_boxes[threadIdx.x * 5 + 2] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 44 | block_boxes[threadIdx.x * 5 + 3] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 46 | block_boxes[threadIdx.x * 5 + 4] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 48 | } 49 | __syncthreads(); 50 | 51 | if (threadIdx.x < row_size) { 52 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 53 | const float *cur_box = dev_boxes + cur_box_idx * 5; 54 | int i = 0; 55 | unsigned long long t = 0; 56 | int start = 0; 57 | if (row_start == col_start) { 58 | start = threadIdx.x + 1; 59 | } 60 | for (i = start; i < col_size; i++) { 61 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 62 | t |= 1ULL << i; 63 | } 64 | } 65 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 66 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 67 | } 68 | } 69 | 70 | // boxes is a N x 5 tensor 71 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 72 | 73 | // Ensure CUDA uses the input tensor device. 74 | at::DeviceGuard guard(boxes.device()); 75 | 76 | using scalar_t = float; 77 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 78 | auto scores = boxes.select(1, 4); 79 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 80 | auto boxes_sorted = boxes.index_select(0, order_t); 81 | 82 | int boxes_num = boxes.size(0); 83 | 84 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 85 | 86 | scalar_t* boxes_dev = boxes_sorted.data(); 87 | 88 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 89 | 90 | unsigned long long* mask_dev = NULL; 91 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 92 | // boxes_num * col_blocks * sizeof(unsigned long long))); 93 | 94 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 95 | 96 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 97 | THCCeilDiv(boxes_num, threadsPerBlock)); 98 | dim3 threads(threadsPerBlock); 99 | nms_kernel<<>>(boxes_num, 100 | nms_overlap_thresh, 101 | boxes_dev, 102 | mask_dev); 103 | 104 | std::vector mask_host(boxes_num * col_blocks); 105 | THCudaCheck(cudaMemcpyAsync( 106 | &mask_host[0], 107 | mask_dev, 108 | sizeof(unsigned long long) * boxes_num * col_blocks, 109 | cudaMemcpyDeviceToHost, 110 | at::cuda::getCurrentCUDAStream() 111 | )); 112 | 113 | std::vector remv(col_blocks); 114 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 115 | 116 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 117 | int64_t* keep_out = keep.data(); 118 | 119 | int num_to_keep = 0; 120 | for (int i = 0; i < boxes_num; i++) { 121 | int nblock = i / threadsPerBlock; 122 | int inblock = i % threadsPerBlock; 123 | 124 | if (!(remv[nblock] & (1ULL << inblock))) { 125 | keep_out[num_to_keep++] = i; 126 | unsigned long long *p = &mask_host[0] + i * col_blocks; 127 | for (int j = nblock; j < col_blocks; j++) { 128 | remv[j] |= p[j]; 129 | } 130 | } 131 | } 132 | 133 | THCudaFree(state, mask_dev); 134 | // TODO improve this part 135 | return std::get<0>(order_t.index({ 136 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 137 | order_t.device(), keep.scalar_type()) 138 | }).sort(0, false)); 139 | } 140 | -------------------------------------------------------------------------------- /libs/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /libs/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 24 | rois = torch.from_numpy(rois).float().cuda() 25 | inputs = (feat, rois) 26 | print('Gradcheck for roi align...') 27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 28 | print(test) 29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 30 | print(test) 31 | -------------------------------------------------------------------------------- /libs/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | from torch.nn.modules.utils import _pair 5 | 6 | from . import roi_align_cuda 7 | 8 | 9 | class RoIAlignFunction(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 13 | out_h, out_w = _pair(out_size) 14 | assert isinstance(out_h, int) and isinstance(out_w, int) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sample_num = sample_num 17 | ctx.save_for_backward(rois) 18 | ctx.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 24 | if features.is_cuda: 25 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 26 | sample_num, output) 27 | else: 28 | raise NotImplementedError 29 | 30 | return output 31 | 32 | @staticmethod 33 | @once_differentiable 34 | def backward(ctx, grad_output): 35 | feature_size = ctx.feature_size 36 | spatial_scale = ctx.spatial_scale 37 | sample_num = ctx.sample_num 38 | rois = ctx.saved_tensors[0] 39 | assert (feature_size is not None and grad_output.is_cuda) 40 | 41 | batch_size, num_channels, data_height, data_width = feature_size 42 | out_w = grad_output.size(3) 43 | out_h = grad_output.size(2) 44 | 45 | grad_input = grad_rois = None 46 | if ctx.needs_input_grad[0]: 47 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 48 | data_width) 49 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 50 | out_w, spatial_scale, sample_num, 51 | grad_input) 52 | 53 | return grad_input, grad_rois, None, None, None 54 | 55 | 56 | roi_align = RoIAlignFunction.apply 57 | 58 | 59 | class RoIAlign(nn.Module): 60 | 61 | def __init__(self, 62 | out_size, 63 | spatial_scale, 64 | sample_num=0, 65 | use_torchvision=False): 66 | super(RoIAlign, self).__init__() 67 | 68 | self.out_size = _pair(out_size) 69 | self.spatial_scale = float(spatial_scale) 70 | self.sample_num = int(sample_num) 71 | self.use_torchvision = use_torchvision 72 | 73 | def forward(self, features, rois): 74 | if self.use_torchvision: 75 | from torchvision.ops import roi_align as tv_roi_align 76 | return tv_roi_align(features, rois, self.out_size, 77 | self.spatial_scale, self.sample_num) 78 | else: 79 | return roi_align(features, rois, self.out_size, self.spatial_scale, 80 | self.sample_num) 81 | 82 | def __repr__(self): 83 | format_str = self.__class__.__name__ 84 | format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format( 85 | self.out_size, self.spatial_scale, self.sample_num) 86 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 87 | return format_str 88 | -------------------------------------------------------------------------------- /libs/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 9 | const float spatial_scale, const int sample_num, 10 | const int channels, const int height, 11 | const int width, const int num_rois, 12 | const int pooled_height, const int pooled_width, 13 | at::Tensor output); 14 | 15 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 16 | const float spatial_scale, const int sample_num, 17 | const int channels, const int height, 18 | const int width, const int num_rois, 19 | const int pooled_height, const int pooled_width, 20 | at::Tensor bottom_grad); 21 | 22 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 23 | #define CHECK_CONTIGUOUS(x) \ 24 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 25 | #define CHECK_INPUT(x) \ 26 | CHECK_CUDA(x); \ 27 | CHECK_CONTIGUOUS(x) 28 | 29 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 30 | int pooled_height, int pooled_width, 31 | float spatial_scale, int sample_num, 32 | at::Tensor output) { 33 | CHECK_INPUT(features); 34 | CHECK_INPUT(rois); 35 | CHECK_INPUT(output); 36 | at::DeviceGuard guard(features.device()); 37 | 38 | // Number of ROIs 39 | int num_rois = rois.size(0); 40 | int size_rois = rois.size(1); 41 | 42 | if (size_rois != 5) { 43 | printf("wrong roi size\n"); 44 | return 0; 45 | } 46 | 47 | int num_channels = features.size(1); 48 | int data_height = features.size(2); 49 | int data_width = features.size(3); 50 | 51 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 52 | num_channels, data_height, data_width, num_rois, 53 | pooled_height, pooled_width, output); 54 | 55 | return 1; 56 | } 57 | 58 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 59 | int pooled_height, int pooled_width, 60 | float spatial_scale, int sample_num, 61 | at::Tensor bottom_grad) { 62 | CHECK_INPUT(top_grad); 63 | CHECK_INPUT(rois); 64 | CHECK_INPUT(bottom_grad); 65 | at::DeviceGuard guard(top_grad.device()); 66 | 67 | // Number of ROIs 68 | int num_rois = rois.size(0); 69 | int size_rois = rois.size(1); 70 | if (size_rois != 5) { 71 | printf("wrong roi size\n"); 72 | return 0; 73 | } 74 | 75 | int num_channels = bottom_grad.size(1); 76 | int data_height = bottom_grad.size(2); 77 | int data_width = bottom_grad.size(3); 78 | 79 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 80 | num_channels, data_height, data_width, num_rois, 81 | pooled_height, pooled_width, bottom_grad); 82 | 83 | return 1; 84 | } 85 | 86 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 87 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 88 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 89 | } 90 | -------------------------------------------------------------------------------- /libs/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ['roi_pool', 'RoIPool'] 4 | -------------------------------------------------------------------------------- /libs/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 12 | [1, 67, 40, 110, 120]]).cuda() 13 | inputs = (feat, rois) 14 | print('Gradcheck for roi pooling...') 15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 16 | print(test) 17 | -------------------------------------------------------------------------------- /libs/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_cuda 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, features, rois, out_size, spatial_scale): 14 | assert features.is_cuda 15 | out_h, out_w = _pair(out_size) 16 | assert isinstance(out_h, int) and isinstance(out_w, int) 17 | ctx.save_for_backward(rois) 18 | num_channels = features.size(1) 19 | num_rois = rois.size(0) 20 | out_size = (num_rois, num_channels, out_h, out_w) 21 | output = features.new_zeros(out_size) 22 | argmax = features.new_zeros(out_size, dtype=torch.int) 23 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 24 | output, argmax) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 45 | spatial_scale, grad_input) 46 | 47 | return grad_input, grad_rois, None, None 48 | 49 | 50 | roi_pool = RoIPoolFunction.apply 51 | 52 | 53 | class RoIPool(nn.Module): 54 | 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | return tv_roi_pool(features, rois, self.out_size, 66 | self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += '(out_size={}, spatial_scale={}'.format( 73 | self.out_size, self.spatial_scale) 74 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 75 | return format_str 76 | -------------------------------------------------------------------------------- /libs/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | at::DeviceGuard guard(features.device()); 35 | 36 | // Number of ROIs 37 | int num_rois = rois.size(0); 38 | int size_rois = rois.size(1); 39 | 40 | if (size_rois != 5) { 41 | printf("wrong roi size\n"); 42 | return 0; 43 | } 44 | 45 | int channels = features.size(1); 46 | int height = features.size(2); 47 | int width = features.size(3); 48 | 49 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 50 | num_rois, pooled_height, pooled_width, output, argmax); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | at::Tensor argmax, float spatial_scale, 57 | at::Tensor bottom_grad) { 58 | CHECK_INPUT(top_grad); 59 | CHECK_INPUT(rois); 60 | CHECK_INPUT(argmax); 61 | CHECK_INPUT(bottom_grad); 62 | at::DeviceGuard guard(top_grad.device()); 63 | 64 | int pooled_height = top_grad.size(2); 65 | int pooled_width = top_grad.size(3); 66 | int num_rois = rois.size(0); 67 | int size_rois = rois.size(1); 68 | 69 | if (size_rois != 5) { 70 | printf("wrong roi size\n"); 71 | return 0; 72 | } 73 | int batch_size = bottom_grad.size(0); 74 | int channels = bottom_grad.size(1); 75 | int height = bottom_grad.size(2); 76 | int width = bottom_grad.size(3); 77 | 78 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 79 | channels, height, width, num_rois, pooled_height, 80 | pooled_width, bottom_grad); 81 | 82 | return 1; 83 | } 84 | 85 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 86 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 87 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 88 | } 89 | -------------------------------------------------------------------------------- /libs/roi_pool/src/roi_pool_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 6 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 7 | i += blockDim.x * gridDim.x) 8 | 9 | #define THREADS_PER_BLOCK 1024 10 | 11 | inline int GET_BLOCKS(const int N) { 12 | int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; 13 | int max_block_num = 65000; 14 | return min(optimal_block_num, max_block_num); 15 | } 16 | 17 | template 18 | __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data, 19 | const scalar_t *rois, 20 | const scalar_t spatial_scale, const int channels, 21 | const int height, const int width, 22 | const int pooled_h, const int pooled_w, 23 | scalar_t *top_data, int *argmax_data) { 24 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 25 | // (n, c, ph, pw) is an element in the pooled output 26 | int pw = index % pooled_w; 27 | int ph = (index / pooled_w) % pooled_h; 28 | int c = (index / pooled_w / pooled_h) % channels; 29 | int n = index / pooled_w / pooled_h / channels; 30 | 31 | const scalar_t *offset_rois = rois + n * 5; 32 | int roi_batch_ind = offset_rois[0]; 33 | // calculate the roi region on feature maps 34 | scalar_t roi_x1 = offset_rois[1] * spatial_scale; 35 | scalar_t roi_y1 = offset_rois[2] * spatial_scale; 36 | scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale; 37 | scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale; 38 | 39 | // force malformed rois to be 1x1 40 | scalar_t roi_w = roi_x2 - roi_x1; 41 | scalar_t roi_h = roi_y2 - roi_y1; 42 | if (roi_w <= 0 || roi_h <= 0) continue; 43 | 44 | scalar_t bin_size_w = roi_w / static_cast(pooled_w); 45 | scalar_t bin_size_h = roi_h / static_cast(pooled_h); 46 | 47 | // the corresponding bin region 48 | int bin_x1 = floor(static_cast(pw) * bin_size_w + roi_x1); 49 | int bin_y1 = floor(static_cast(ph) * bin_size_h + roi_y1); 50 | int bin_x2 = ceil(static_cast(pw + 1) * bin_size_w + roi_x1); 51 | int bin_y2 = ceil(static_cast(ph + 1) * bin_size_h + roi_y1); 52 | 53 | // add roi offsets and clip to input boundaries 54 | bin_x1 = min(max(bin_x1, 0), width); 55 | bin_y1 = min(max(bin_y1, 0), height); 56 | bin_x2 = min(max(bin_x2, 0), width); 57 | bin_y2 = min(max(bin_y2, 0), height); 58 | bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1); 59 | 60 | // If nothing is pooled, argmax = -1 causes nothing to be backprop'd 61 | int max_idx = -1; 62 | bottom_data += (roi_batch_ind * channels + c) * height * width; 63 | 64 | // Define an empty pooling region to be zero 65 | scalar_t max_val = is_empty ? static_cast(0) 66 | : bottom_data[bin_y1 * width + bin_x1] - 1; 67 | 68 | for (int h = bin_y1; h < bin_y2; ++h) { 69 | for (int w = bin_x1; w < bin_x2; ++w) { 70 | int offset = h * width + w; 71 | if (bottom_data[offset] > max_val) { 72 | max_val = bottom_data[offset]; 73 | max_idx = offset; 74 | } 75 | } 76 | } 77 | top_data[index] = max_val; 78 | if (argmax_data != NULL) argmax_data[index] = max_idx; 79 | } 80 | } 81 | 82 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 83 | const float spatial_scale, const int channels, 84 | const int height, const int width, const int num_rois, 85 | const int pooled_h, const int pooled_w, 86 | at::Tensor output, at::Tensor argmax) { 87 | const int output_size = num_rois * channels * pooled_h * pooled_w; 88 | 89 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 90 | features.scalar_type(), "ROIPoolLaucherForward", ([&] { 91 | const scalar_t *bottom_data = features.data(); 92 | const scalar_t *rois_data = rois.data(); 93 | scalar_t *top_data = output.data(); 94 | int *argmax_data = argmax.data(); 95 | 96 | ROIPoolForward 97 | <<>>( 98 | output_size, bottom_data, rois_data, scalar_t(spatial_scale), 99 | channels, height, width, pooled_h, pooled_w, top_data, 100 | argmax_data); 101 | })); 102 | THCudaCheck(cudaGetLastError()); 103 | return 1; 104 | } 105 | 106 | template 107 | __global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff, 108 | const scalar_t *rois, const int *argmax_data, 109 | const scalar_t spatial_scale, 110 | const int channels, const int height, 111 | const int width, const int pooled_h, 112 | const int pooled_w, scalar_t *bottom_diff) { 113 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 114 | int pw = index % pooled_w; 115 | int ph = (index / pooled_w) % pooled_h; 116 | int c = (index / pooled_w / pooled_h) % channels; 117 | int n = index / pooled_w / pooled_h / channels; 118 | 119 | int roi_batch_ind = rois[n * 5]; 120 | int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w + 121 | ph * pooled_w + pw]; 122 | 123 | atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width + 124 | bottom_index, 125 | top_diff[index]); 126 | } 127 | } 128 | 129 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 130 | const at::Tensor argmax, const float spatial_scale, 131 | const int batch_size, const int channels, 132 | const int height, const int width, 133 | const int num_rois, const int pooled_h, 134 | const int pooled_w, at::Tensor bottom_grad) { 135 | const int output_size = num_rois * pooled_h * pooled_w * channels; 136 | 137 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 138 | top_grad.scalar_type(), "ROIPoolLaucherBackward", ([&] { 139 | const scalar_t *top_diff = top_grad.data(); 140 | const scalar_t *rois_data = rois.data(); 141 | const int *argmax_data = argmax.data(); 142 | scalar_t *bottom_diff = bottom_grad.data(); 143 | 144 | if (sizeof(scalar_t) == sizeof(double)) { 145 | fprintf(stderr, "double is not supported\n"); 146 | exit(-1); 147 | } 148 | 149 | ROIPoolBackward 150 | <<>>( 151 | output_size, top_diff, rois_data, argmax_data, 152 | scalar_t(spatial_scale), channels, height, width, pooled_h, 153 | pooled_w, bottom_diff); 154 | })); 155 | THCudaCheck(cudaGetLastError()); 156 | return 1; 157 | } 158 | -------------------------------------------------------------------------------- /libs/setup.py: -------------------------------------------------------------------------------- 1 | '''setup modified from mmdet''' 2 | import os 3 | import torch 4 | from setuptools import find_packages, setup 5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 6 | 7 | 8 | def make_cuda_ext(name, module, sources): 9 | define_macros = [] 10 | if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1': 11 | define_macros += [("WITH_CUDA", None)] 12 | else: 13 | raise EnvironmentError('CUDA is required to compile FPN!') 14 | return CUDAExtension( 15 | name='{}.{}'.format(module, name), 16 | sources=[os.path.join(*module.split('.'), p) for p in sources], 17 | define_macros=define_macros, 18 | extra_compile_args={ 19 | 'cxx': [], 20 | 'nvcc': [ 21 | '-D__CUDA_NO_HALF_OPERATORS__', 22 | '-D__CUDA_NO_HALF_CONVERSIONS__', 23 | '-D__CUDA_NO_HALF2_OPERATORS__', 24 | ] 25 | }) 26 | 27 | 28 | setup( 29 | name='FPN', 30 | version='0.1.0', 31 | description='FPN for object detection', 32 | classifiers=['License :: OSI Approved :: MIT License', 33 | 'Programming Language :: Python :: 3', 34 | 'Intended Audience :: Developers', 35 | 'Operating System :: OS Independent'], 36 | author='Charles', 37 | author_email='charlesjzc@qq.com', 38 | url='https://github.com/DetectionBLWX/FPN.pytorch', 39 | license='MIT', 40 | include_package_data=True, 41 | packages=find_packages(), 42 | ext_modules=[ 43 | make_cuda_ext( 44 | name='deform_conv_cuda', 45 | module='dcn', 46 | sources=['src/deform_conv_cuda.cpp', 'src/deform_conv_cuda_kernel.cu'] 47 | ), 48 | make_cuda_ext( 49 | name='deform_pool_cuda', 50 | module='dcn', 51 | sources=['src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu'] 52 | ), 53 | make_cuda_ext( 54 | name='nms_cpu', 55 | module='nms', 56 | sources=['src/nms_cpu.cpp'] 57 | ), 58 | make_cuda_ext( 59 | name='nms_cuda', 60 | module='nms', 61 | sources=['src/nms_cuda.cpp', 'src/nms_kernel.cu'] 62 | ), 63 | make_cuda_ext( 64 | name='roi_align_cuda', 65 | module='roi_align', 66 | sources=['src/roi_align_cuda.cpp', 'src/roi_align_kernel.cu'] 67 | ), 68 | make_cuda_ext( 69 | name='roi_pool_cuda', 70 | module='roi_pool', 71 | sources=['src/roi_pool_cuda.cpp', 'src/roi_pool_kernel.cu'] 72 | ), 73 | ], 74 | cmdclass={'build_ext': BuildExtension}, 75 | zip_safe=False 76 | ) -------------------------------------------------------------------------------- /modules/backbones/FPNResNets.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | Feature Pyramid Network of ResNets 4 | Author: 5 | Charles 6 | ''' 7 | import torch 8 | import torchvision 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from modules.utils.initialization import * 12 | 13 | 14 | '''resnet from torchvision==0.4.0''' 15 | def ResNets(resnet_type, pretrained=False): 16 | if resnet_type == 'resnet18': 17 | model = torchvision.models.resnet18(pretrained=pretrained) 18 | elif resnet_type == 'resnet34': 19 | model = torchvision.models.resnet34(pretrained=pretrained) 20 | elif resnet_type == 'resnet50': 21 | model = torchvision.models.resnet50(pretrained=pretrained) 22 | elif resnet_type == 'resnet101': 23 | model = torchvision.models.resnet101(pretrained=pretrained) 24 | elif resnet_type == 'resnet152': 25 | model = torchvision.models.resnet152(pretrained=pretrained) 26 | else: 27 | raise ValueError('Unsupport resnet_type <%s>...' % resnet_type) 28 | return model 29 | 30 | 31 | '''FPN by using ResNets''' 32 | class FPNResNets(nn.Module): 33 | def __init__(self, mode, cfg, logger_handle, **kwargs): 34 | super(FPNResNets, self).__init__() 35 | self.logger_handle = logger_handle 36 | self.pretrained_model_path = cfg.PRETRAINED_MODEL_PATH 37 | self.backbone = ResNets(resnet_type=cfg.BACKBONE_TYPE, pretrained=False) 38 | if mode == 'TRAIN': 39 | self.initializeBackbone() 40 | self.backbone.avgpool = None 41 | self.backbone.fc = None 42 | # parse backbone 43 | self.base_layer0 = nn.Sequential(self.backbone.conv1, self.backbone.bn1, self.backbone.relu, self.backbone.maxpool) 44 | self.base_layer1 = nn.Sequential(self.backbone.layer1) 45 | self.base_layer2 = nn.Sequential(self.backbone.layer2) 46 | self.base_layer3 = nn.Sequential(self.backbone.layer3) 47 | self.base_layer4 = nn.Sequential(self.backbone.layer4) 48 | # add lateral layers 49 | in_channels = [512, 256, 128, 64] if cfg.BACKBONE_TYPE in ['resnet18', 'resnet34'] else [2048, 1024, 512, 256] 50 | self.lateral_layer0 = nn.Conv2d(in_channels=in_channels[0], out_channels=256, kernel_size=1, stride=1, padding=0) 51 | self.lateral_layer1 = nn.Conv2d(in_channels=in_channels[1], out_channels=256, kernel_size=1, stride=1, padding=0) 52 | self.lateral_layer2 = nn.Conv2d(in_channels=in_channels[2], out_channels=256, kernel_size=1, stride=1, padding=0) 53 | self.lateral_layer3 = nn.Conv2d(in_channels=in_channels[3], out_channels=256, kernel_size=1, stride=1, padding=0) 54 | # add smooth layers 55 | self.smooth_layer0 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 56 | self.smooth_layer1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 57 | self.smooth_layer2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 58 | self.smooth_layer3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 59 | # add downsample layer 60 | self.downsample_layer = nn.MaxPool2d(kernel_size=1, stride=2) 61 | '''forward''' 62 | def forward(self, x): 63 | # bottom-up 64 | c1 = self.base_layer0(x) 65 | c2 = self.base_layer1(c1) 66 | c3 = self.base_layer2(c2) 67 | c4 = self.base_layer3(c3) 68 | c5 = self.base_layer4(c4) 69 | # top-down 70 | p5 = self.lateral_layer0(c5) 71 | p4 = self.upsampleAdd(p5, self.lateral_layer1(c4)) 72 | p3 = self.upsampleAdd(p4, self.lateral_layer2(c3)) 73 | p2 = self.upsampleAdd(p3, self.lateral_layer3(c2)) 74 | # obtain fpn features 75 | p5 = self.smooth_layer0(p5) 76 | p4 = self.smooth_layer1(p4) 77 | p3 = self.smooth_layer2(p3) 78 | p2 = self.smooth_layer3(p2) 79 | p6 = self.downsample_layer(p5) 80 | # return all feature pyramid levels 81 | return [p2, p3, p4, p5, p6] 82 | '''upsample and add''' 83 | def upsampleAdd(self, p, c): 84 | _, _, H, W = c.size() 85 | return F.interpolate(p, size=(H, W), mode='nearest') + c 86 | '''initialize backbone''' 87 | def initializeBackbone(self): 88 | if self.pretrained_model_path: 89 | self.backbone.load_state_dict({k:v for k,v in torch.load(self.pretrained_model_path).items() if k in self.backbone.state_dict()}) 90 | self.logger_handle.info('Loading pretrained weights from %s for backbone network...' % self.pretrained_model_path) 91 | else: 92 | self.backbone = ResNets(resnet_type=self.backbone_type, pretrained=True) 93 | '''initialize added layers in fpn''' 94 | def initializeAddedLayers(self, init_method='xavier'): 95 | # normal init 96 | if init_method == 'normal': 97 | for layer in [self.lateral_layer0, self.lateral_layer1, self.lateral_layer2, self.lateral_layer3, 98 | self.smooth_layer0, self.smooth_layer1, self.smooth_layer2, self.smooth_layer3]: 99 | normalInit(layer, std=0.01) 100 | # kaiming init 101 | elif init_method == 'kaiming': 102 | for layer in [self.lateral_layer0, self.lateral_layer1, self.lateral_layer2, self.lateral_layer3, 103 | self.smooth_layer0, self.smooth_layer1, self.smooth_layer2, self.smooth_layer3]: 104 | kaimingInit(layer, nonlinearity='relu') 105 | # xavier init 106 | elif init_method == 'xavier': 107 | for layer in [self.lateral_layer0, self.lateral_layer1, self.lateral_layer2, self.lateral_layer3, 108 | self.smooth_layer0, self.smooth_layer1, self.smooth_layer2, self.smooth_layer3]: 109 | xavierInit(layer, distribution='uniform') 110 | # unsupport 111 | else: 112 | raise RuntimeError('Unsupport initializeAddedLayers.init_method <%s>...' % init_method) -------------------------------------------------------------------------------- /modules/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | '''import all''' 2 | from .FPNResNets import FPNResNets -------------------------------------------------------------------------------- /modules/losses/CELoss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | define the cross entropy loss 4 | Author: 5 | Charles 6 | ''' 7 | import torch.nn.functional as F 8 | 9 | 10 | '''cross entropy loss''' 11 | def CrossEntropyLoss(preds, targets, loss_weight=1.0, size_average=True, avg_factor=None): 12 | loss = F.cross_entropy(preds, targets, reduction='none') 13 | if avg_factor is None: 14 | loss = loss.mean() if size_average else loss.sum() 15 | else: 16 | loss = (loss.sum() / avg_factor) if size_average else loss.sum() 17 | return loss * loss_weight 18 | 19 | 20 | '''binary cross entropy loss''' 21 | def BinaryCrossEntropyLoss(preds, targets, loss_weight=1.0, size_average=True, avg_factor=None): 22 | loss = F.binary_cross_entropy_with_logits(preds, targets.float(), reduction='none') 23 | if avg_factor is None: 24 | loss = loss.mean() if size_average else loss.sum() 25 | else: 26 | loss = (loss.sum() / avg_factor) if size_average else loss.sum() 27 | return loss * loss_weight -------------------------------------------------------------------------------- /modules/losses/IoULoss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | define the iou losses 4 | Author: 5 | Charles 6 | ''' 7 | import torch 8 | 9 | 10 | '''giou loss, I borrow the code from mmdet''' 11 | def GIoULoss(bbox_preds, bbox_targets, eps=1e-7, size_average=True, loss_weight=1.0, avg_factor=None): 12 | # overlap 13 | lt = torch.max(bbox_preds[:, :2], bbox_targets[:, :2]) 14 | rb = torch.min(bbox_preds[:, 2:], bbox_targets[:, 2:]) 15 | wh = (rb - lt + 1).clamp(min=0) 16 | overlap = wh[:, 0] * wh[:, 1] 17 | # union 18 | ap = (bbox_preds[:, 2] - bbox_preds[:, 0] + 1) * (bbox_preds[:, 3] - bbox_preds[:, 1] + 1) 19 | ag = (bbox_targets[:, 2] - bbox_targets[:, 0] + 1) * (bbox_targets[:, 3] - bbox_targets[:, 1] + 1) 20 | union = ap + ag - overlap + eps 21 | # IoU 22 | ious = overlap / union 23 | # enclose area 24 | enclose_x1y1 = torch.min(bbox_preds[:, :2], bbox_targets[:, :2]) 25 | enclose_x2y2 = torch.max(bbox_preds[:, 2:], bbox_targets[:, 2:]) 26 | enclose_wh = (enclose_x2y2 - enclose_x1y1 + 1).clamp(min=0) 27 | enclose_area = enclose_wh[:, 0] * enclose_wh[:, 1] + eps 28 | # GIoU 29 | gious = ious - (enclose_area - union) / enclose_area 30 | loss = 1 - gious 31 | # summary and return the loss 32 | if avg_factor is None: 33 | loss = loss.mean() if size_average else loss.sum() 34 | else: 35 | loss = (loss.sum() / avg_factor) if size_average else loss.sum() 36 | return loss * loss_weight -------------------------------------------------------------------------------- /modules/losses/__init__.py: -------------------------------------------------------------------------------- 1 | '''import all''' 2 | from .CELoss import * 3 | from .IoULoss import * 4 | from .smoothL1 import * 5 | from .focalLoss import * -------------------------------------------------------------------------------- /modules/losses/focalLoss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | define the focal loss 4 | Author: 5 | Charles 6 | ''' 7 | import torch.nn.functional as F 8 | 9 | 10 | '''sigmoid focal loss''' 11 | def pySigmoidFocalLoss(preds, targets, loss_weight=1.0, gamma=2.0, alpha=0.25, size_average=True, avg_factor=None): 12 | preds_sigmoid = preds.sigmoid() 13 | targets = targets.type_as(preds) 14 | pt = (1 - preds_sigmoid) * targets + preds_sigmoid * (1 - targets) 15 | focal_weight = (alpha * targets + (1 - alpha) * (1 - targets)) * pt.pow(gamma) 16 | loss = F.binary_cross_entropy_with_logits(preds, targets, reduction='none') * focal_weight 17 | if avg_factor is None: 18 | loss = loss.mean() if size_average else loss.sum() 19 | else: 20 | loss = (loss.sum() / avg_factor) if size_average else loss.sum() 21 | return loss * loss_weight -------------------------------------------------------------------------------- /modules/losses/smoothL1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | define the smooth l1 losses 4 | Author: 5 | Charles 6 | ''' 7 | import torch 8 | import numpy as np 9 | 10 | 11 | '''smooth l1 loss with beta''' 12 | def betaSmoothL1Loss(bbox_preds, bbox_targets, beta=1, size_average=True, loss_weight=1.0, avg_factor=None): 13 | diff = torch.abs(bbox_preds - bbox_targets) 14 | loss = torch.where(diff < beta, 0.5 * diff ** 2 / beta, diff - 0.5 * beta) 15 | if avg_factor is None: 16 | loss = loss.mean() if size_average else loss.sum() 17 | else: 18 | loss = (loss.sum() / avg_factor) if size_average else loss.sum() 19 | return loss * loss_weight 20 | 21 | 22 | '''balanced smooth l1 Loss, I borrow the code from mmdet''' 23 | def balancedSmoothL1Loss(bbox_preds, bbox_targets, beta=1.0, alpha=0.5, gamma=1.5, size_average=True, loss_weight=1.0, avg_factor=None): 24 | assert (beta > 0.) and (bbox_preds.size() == bbox_targets.size()) and (bbox_targets.numel() > 0) 25 | diff = torch.abs(bbox_preds - bbox_targets) 26 | b = np.e ** (gamma / alpha) - 1 27 | loss = torch.where(diff < beta, alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, gamma * diff + gamma / b - alpha * beta) 28 | if avg_factor is None: 29 | loss = loss.mean() if size_average else loss.sum() 30 | else: 31 | loss = (loss.sum() / avg_factor) if size_average else loss.sum() 32 | return loss * loss_weight -------------------------------------------------------------------------------- /modules/utils/__init__.py: -------------------------------------------------------------------------------- 1 | '''import all''' 2 | from .misc import * 3 | from .anchors import * 4 | from .datasets import * 5 | from .initialization import * -------------------------------------------------------------------------------- /modules/utils/anchors.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | define the utils to generate anchors 4 | Author: 5 | Charles 6 | ''' 7 | import torch 8 | 9 | 10 | ''' 11 | Function: 12 | anchor generator 13 | Input for __init__: 14 | --size_base(int): the base anchor size. 15 | --scales(list): scales for anchor boxes. 16 | --ratios(list): ratios for anchor boxes. 17 | Input for generate: 18 | --feature_shape(tuple): the size of feature maps in corresponding pyramid level. 19 | --feature_stride(int): the feature stride in corresponding pyramid level. 20 | --device: specify cpu or cuda. 21 | Return: 22 | --anchors(torch.FloatTensor): [nA, 4], the format is (x1, y1, x2, y2). 23 | ''' 24 | class AnchorGenerator(object): 25 | def __init__(self, size_base, scales=[8], ratios=[0.5, 1, 2], **kwargs): 26 | self.size_base = size_base 27 | self.scales = torch.Tensor(scales) 28 | self.ratios = torch.Tensor(ratios) 29 | self.base_anchors = self.__generateBaseAnchors() 30 | '''generate anchors''' 31 | def generate(self, feature_shape=None, feature_stride=None, device='cuda'): 32 | base_anchors = self.base_anchors.to(device) 33 | feat_h, feat_w = feature_shape 34 | shift_x = torch.arange(0, feat_w, device=device) * feature_stride 35 | shift_y = torch.arange(0, feat_h, device=device) * feature_stride 36 | shift_xx, shift_yy = self.__meshgrid(shift_x, shift_y) 37 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 38 | shifts = shifts.type_as(base_anchors) 39 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :].float() 40 | all_anchors = all_anchors.view(-1, 4) 41 | return all_anchors 42 | '''meshgrid''' 43 | def __meshgrid(self, x, y): 44 | xx = x.repeat(len(y)) 45 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 46 | return xx, yy 47 | '''generate base anchors''' 48 | def __generateBaseAnchors(self): 49 | w = self.size_base 50 | h = self.size_base 51 | x_ctr = 0.5 * (w - 1) 52 | y_ctr = 0.5 * (h - 1) 53 | h_ratios = torch.sqrt(self.ratios) 54 | w_ratios = 1 / h_ratios 55 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 56 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 57 | base_anchors = torch.stack([x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)], dim=-1).round() 58 | return base_anchors -------------------------------------------------------------------------------- /modules/utils/datasets/Custom.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | some shared methods for all datasets. 4 | Author: 5 | Charles 6 | ''' 7 | import torch 8 | 9 | 10 | '''nearest ratio random sampler, used in mode''' 11 | class NearestRatioRandomSampler(torch.utils.data.sampler.Sampler): 12 | def __init__(self, img_ratios, batch_size, **kwargs): 13 | super().__init__(data_source=None) 14 | self.img_ratios = img_ratios 15 | self.batch_size = batch_size 16 | def __iter__(self): 17 | img_ratios = torch.tensor(self.img_ratios) 18 | tall_indices = (img_ratios < 1).nonzero().view(-1) 19 | fat_indices = (img_ratios >= 1).nonzero().view(-1) 20 | tall_indices_length = len(tall_indices) 21 | fat_indices_length = len(fat_indices) 22 | tall_indices = tall_indices[torch.randperm(tall_indices_length)] 23 | fat_indices = fat_indices[torch.randperm(fat_indices_length)] 24 | num_tall_remainder = tall_indices_length % self.batch_size 25 | num_fat_remainder = fat_indices_length % self.batch_size 26 | tall_indices = tall_indices[:tall_indices_length-num_tall_remainder] 27 | fat_indices = fat_indices[:fat_indices_length-num_fat_remainder] 28 | tall_indices = tall_indices.view(-1, self.batch_size) 29 | fat_indices = fat_indices.view(-1, self.batch_size) 30 | merge_indices = torch.cat([tall_indices, fat_indices], dim=0) 31 | merge_indices = merge_indices[torch.randperm(len(merge_indices))].view(-1) 32 | return iter(merge_indices.tolist()) 33 | def __len__(self): 34 | return len(self.img_ratios) -------------------------------------------------------------------------------- /modules/utils/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | '''import all''' 2 | from .COCODataset import COCODataset 3 | from .Custom import NearestRatioRandomSampler -------------------------------------------------------------------------------- /modules/utils/initialization.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | some weight initialization methods from mmcv 4 | Author: 5 | Charles 6 | ''' 7 | import numpy as np 8 | import torch.nn as nn 9 | 10 | 11 | '''constant init''' 12 | def constantInit(module, val, bias=0): 13 | if hasattr(module, 'weight') and module.weight is not None: 14 | nn.init.constant_(module.weight, val) 15 | if hasattr(module, 'bias') and module.bias is not None: 16 | nn.init.constant_(module.bias, bias) 17 | 18 | 19 | '''xavier init''' 20 | def xavierInit(module, gain=1, bias=0, distribution='normal'): 21 | assert distribution in ['uniform', 'normal'] 22 | if distribution == 'uniform': 23 | nn.init.xavier_uniform_(module.weight, gain=gain) 24 | else: 25 | nn.init.xavier_normal_(module.weight, gain=gain) 26 | if hasattr(module, 'bias') and module.bias is not None: 27 | nn.init.constant_(module.bias, bias) 28 | 29 | 30 | '''normal init''' 31 | def normalInit(module, mean=0, std=1, bias=0): 32 | nn.init.normal_(module.weight, mean, std) 33 | if hasattr(module, 'bias') and module.bias is not None: 34 | nn.init.constant_(module.bias, bias) 35 | 36 | 37 | '''uniform init''' 38 | def uniformInit(module, a=0, b=1, bias=0): 39 | nn.init.uniform_(module.weight, a, b) 40 | if hasattr(module, 'bias') and module.bias is not None: 41 | nn.init.constant_(module.bias, bias) 42 | 43 | 44 | '''kaiming init''' 45 | def kaimingInit(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal'): 46 | assert distribution in ['uniform', 'normal'] 47 | if distribution == 'uniform': 48 | nn.init.kaiming_uniform_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) 49 | else: 50 | nn.init.kaiming_normal_(module.weight, a=a, mode=mode, nonlinearity=nonlinearity) 51 | if hasattr(module, 'bias') and module.bias is not None: 52 | nn.init.constant_(module.bias, bias) 53 | 54 | 55 | '''`XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch, Acknowledgment to FAIR's internal code''' 56 | def caffe2XavierInit(module, bias=0): 57 | kaimingInit(module, a=1, mode='fan_in', nonlinearity='leaky_relu', distribution='uniform') 58 | 59 | 60 | '''initialize conv/fc bias value according to giving probablity''' 61 | def biasInitWithProb(prior_prob): 62 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 63 | return bias_init -------------------------------------------------------------------------------- /modules/utils/misc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | some util functions used for many module files. 4 | Author: 5 | Charles 6 | ''' 7 | import os 8 | import torch 9 | import logging 10 | from torch.nn.utils import clip_grad 11 | 12 | 13 | '''check the existence of dirpath''' 14 | def checkDir(dirpath): 15 | if not os.path.exists(dirpath): 16 | os.mkdir(dirpath) 17 | return False 18 | return True 19 | 20 | 21 | '''log function.''' 22 | class Logger(): 23 | def __init__(self, logfilepath, **kwargs): 24 | logging.basicConfig(level=logging.INFO, 25 | format='%(asctime)s %(levelname)-8s %(message)s', 26 | datefmt='%Y-%m-%d %H:%M:%S', 27 | handlers=[logging.FileHandler(logfilepath), 28 | logging.StreamHandler()]) 29 | @staticmethod 30 | def log(level, message): 31 | logging.log(level, message) 32 | @staticmethod 33 | def debug(message): 34 | Logger.log(logging.DEBUG, message) 35 | @staticmethod 36 | def info(message): 37 | Logger.log(logging.INFO, message) 38 | @staticmethod 39 | def warning(message): 40 | Logger.log(logging.WARNING, message) 41 | @staticmethod 42 | def error(message): 43 | Logger.log(logging.ERROR, message) 44 | 45 | 46 | '''load class labels.''' 47 | def loadclsnames(clsnamespath): 48 | names = [] 49 | for line in open(clsnamespath): 50 | if line.strip('\n'): 51 | names.append(line.strip('\n')) 52 | return names 53 | 54 | 55 | '''some functions for bboxes, the format of all the input bboxes are (x1, y1, x2, y2)''' 56 | class BBoxFunctions(object): 57 | def __init__(self): 58 | self.info = 'bbox functions' 59 | def __repr__(self): 60 | return self.info 61 | '''convert anchors to proposals, anchors size: B x N x 4''' 62 | @staticmethod 63 | def anchors2Proposals(anchors, deltas): 64 | widths = anchors[..., 2] - anchors[..., 0] + 1.0 65 | heights = anchors[..., 3] - anchors[..., 1] + 1.0 66 | cxs = anchors[..., 0] + 0.5 * widths 67 | cys = anchors[..., 1] + 0.5 * heights 68 | dx = deltas[..., 0::4] 69 | dy = deltas[..., 1::4] 70 | dw = deltas[..., 2::4] 71 | dh = deltas[..., 3::4] 72 | cxs_pred = dx * widths.unsqueeze(2) + cxs.unsqueeze(2) 73 | cys_pred = dy * heights.unsqueeze(2) + cys.unsqueeze(2) 74 | ws_pred = torch.exp(dw) * widths.unsqueeze(2) 75 | hs_pred = torch.exp(dh) * heights.unsqueeze(2) 76 | boxes_pred = deltas.clone() 77 | boxes_pred[..., 0::4] = cxs_pred - 0.5 * ws_pred 78 | boxes_pred[..., 1::4] = cys_pred - 0.5 * hs_pred 79 | boxes_pred[..., 2::4] = cxs_pred + 0.5 * ws_pred 80 | boxes_pred[..., 3::4] = cys_pred + 0.5 * hs_pred 81 | # [x1, y1, x2, y2] 82 | return boxes_pred 83 | '''clip boxes, boxes size: B x N x 4, img_info: B x 3(height, width, scale_factor)''' 84 | @staticmethod 85 | def clipBoxes(boxes, img_info): 86 | for i in range(boxes.size(0)): 87 | boxes[i, :, 0::4].clamp_(0, img_info[i, 1]-1) 88 | boxes[i, :, 1::4].clamp_(0, img_info[i, 0]-1) 89 | boxes[i, :, 2::4].clamp_(0, img_info[i, 1]-1) 90 | boxes[i, :, 3::4].clamp_(0, img_info[i, 0]-1) 91 | return boxes 92 | '''calculate iou, boxes1(anchors): N x 4 or B x N x 4, boxes2(gts): B x K x 5''' 93 | @staticmethod 94 | def calcIoUs(boxes1, boxes2): 95 | batch_size = boxes2.size(0) 96 | if boxes1.dim() == 2: 97 | num_boxes1 = boxes1.size(0) 98 | num_boxes2 = boxes2.size(1) 99 | boxes1 = boxes1.view(1, num_boxes1, 4).expand(batch_size, num_boxes1, 4).contiguous() 100 | boxes2 = boxes2[..., :4].contiguous() 101 | # calc boxes2(gts) areas 102 | boxes2_ws = boxes2[..., 2] - boxes2[..., 0] + 1 103 | boxes2_hs = boxes2[..., 3] - boxes2[..., 1] + 1 104 | boxes2_areas = (boxes2_ws * boxes2_hs).view(batch_size, 1, num_boxes2) 105 | # calc boxes1(anchors) areas 106 | boxes1_ws = boxes1[..., 2] - boxes1[..., 0] + 1 107 | boxes1_hs = boxes1[..., 3] - boxes1[..., 1] + 1 108 | boxes1_areas = (boxes1_ws * boxes1_hs).view(batch_size, num_boxes1, 1) 109 | # find the error boxes 110 | boxes1_error = (boxes1_ws == 1) & (boxes1_hs == 1) 111 | boxes2_error = (boxes2_ws == 1) & (boxes2_hs == 1) 112 | # re-format boxes 113 | boxes1 = boxes1.view(batch_size, num_boxes1, 1, 4).expand(batch_size, num_boxes1, num_boxes2, 4) 114 | boxes2 = boxes2.view(batch_size, 1, num_boxes2, 4).expand(batch_size, num_boxes1, num_boxes2, 4) 115 | # calc inter area 116 | iws = torch.min(boxes1[..., 2], boxes2[..., 2]) - torch.max(boxes1[..., 0], boxes2[..., 0]) + 1 117 | iws[iws < 0] = 0 118 | ihs = torch.min(boxes1[..., 3], boxes2[..., 3]) - torch.max(boxes1[..., 1], boxes2[..., 1]) + 1 119 | ihs[ihs < 0] = 0 120 | # union area 121 | uas = boxes1_areas + boxes2_areas - (iws * ihs) 122 | # overlaps 123 | overlaps = iws * ihs / uas 124 | overlaps.masked_fill_(boxes2_error.view(batch_size, 1, num_boxes2).expand(batch_size, num_boxes1, num_boxes2), 0) 125 | overlaps.masked_fill_(boxes1_error.view(batch_size, num_boxes1, 1).expand(batch_size, num_boxes1, num_boxes2), -1) 126 | elif boxes1.dim() == 3: 127 | num_boxes1 = boxes1.size(1) 128 | num_boxes2 = boxes2.size(1) 129 | if boxes1.size(2) == 4: 130 | boxes1 = boxes1[..., :4].contiguous() 131 | else: 132 | boxes1 = boxes1[..., 1:5].contiguous() 133 | boxes2 = boxes2[..., :4].contiguous() 134 | # calc boxes2(gts) areas 135 | boxes2_ws = boxes2[..., 2] - boxes2[..., 0] + 1 136 | boxes2_hs = boxes2[..., 3] - boxes2[..., 1] + 1 137 | boxes2_areas = (boxes2_ws * boxes2_hs).view(batch_size, 1, num_boxes2) 138 | # calc boxes1(anchors) areas 139 | boxes1_ws = boxes1[..., 2] - boxes1[..., 0] + 1 140 | boxes1_hs = boxes1[..., 3] - boxes1[..., 1] + 1 141 | boxes1_areas = (boxes1_ws * boxes1_hs).view(batch_size, num_boxes1, 1) 142 | # find the error boxes 143 | boxes1_error = (boxes1_ws == 1) & (boxes1_hs == 1) 144 | boxes2_error = (boxes2_ws == 1) & (boxes2_hs == 1) 145 | # re-format boxes 146 | boxes1 = boxes1.view(batch_size, num_boxes1, 1, 4).expand(batch_size, num_boxes1, num_boxes2, 4) 147 | boxes2 = boxes2.view(batch_size, 1, num_boxes2, 4).expand(batch_size, num_boxes1, num_boxes2, 4) 148 | # calc inter area 149 | iws = torch.min(boxes1[..., 2], boxes2[..., 2]) - torch.max(boxes1[..., 0], boxes2[..., 0]) + 1 150 | iws[iws < 0] = 0 151 | ihs = torch.min(boxes1[..., 3], boxes2[..., 3]) - torch.max(boxes1[..., 1], boxes2[..., 1]) + 1 152 | ihs[ihs < 0] = 0 153 | # union area 154 | uas = boxes1_areas + boxes2_areas - (iws * ihs) 155 | # overlaps 156 | overlaps = iws * ihs / uas 157 | overlaps.masked_fill_(boxes2_error.view(batch_size, 1, num_boxes2).expand(batch_size, num_boxes1, num_boxes2), 0) 158 | overlaps.masked_fill_(boxes1_error.view(batch_size, num_boxes1, 1).expand(batch_size, num_boxes1, num_boxes2), -1) 159 | else: 160 | raise ValueError('boxes1(anchors) dimension error in BBoxFunctions.calcIoUs') 161 | return overlaps 162 | '''encode bboxes''' 163 | @staticmethod 164 | def encodeBboxes(boxes_pred, boxes_gt): 165 | if boxes_pred.dim() == 2: 166 | # convert (x1, y1, x2, y2) to (cx, cy, w, h) 167 | widths_pred = boxes_pred[..., 2] - boxes_pred[..., 0] + 1.0 168 | heights_pred = boxes_pred[..., 3] - boxes_pred[..., 1] + 1.0 169 | centerxs_pred = boxes_pred[..., 0] + 0.5 * widths_pred 170 | centerys_pred = boxes_pred[..., 1] + 0.5 * heights_pred 171 | widths_gt = boxes_gt[..., 2] - boxes_gt[..., 0] + 1.0 172 | heights_gt = boxes_gt[..., 3] - boxes_gt[..., 1] + 1.0 173 | centerxs_gt = boxes_gt[..., 0] + 0.5 * widths_gt 174 | centerys_gt = boxes_gt[..., 1] + 0.5 * heights_gt 175 | # calculate targets 176 | dxs_target = (centerxs_gt - centerxs_pred.view(1, -1).expand_as(centerxs_gt)) / widths_pred 177 | dys_target = (centerys_gt - centerys_pred.view(1, -1).expand_as(centerys_gt)) / heights_pred 178 | dws_target = torch.log(widths_gt / widths_pred.view(1, -1).expand_as(widths_gt)) 179 | dhs_target = torch.log(heights_gt / heights_pred.view(1, -1).expand_as(heights_gt)) 180 | elif boxes_pred.dim() == 3: 181 | # convert (x1, y1, x2, y2) to (cx, cy, w, h) 182 | widths_pred = boxes_pred[..., 2] - boxes_pred[..., 0] + 1.0 183 | heights_pred = boxes_pred[..., 3] - boxes_pred[..., 1] + 1.0 184 | centerxs_pred = boxes_pred[..., 0] + 0.5 * widths_pred 185 | centerys_pred = boxes_pred[..., 1] + 0.5 * heights_pred 186 | widths_gt = boxes_gt[..., 2] - boxes_gt[..., 0] + 1.0 187 | heights_gt = boxes_gt[..., 3] - boxes_gt[..., 1] + 1.0 188 | centerxs_gt = boxes_gt[..., 0] + 0.5 * widths_gt 189 | centerys_gt = boxes_gt[..., 1] + 0.5 * heights_gt 190 | # calculate targets 191 | dxs_target = (centerxs_gt - centerxs_pred) / widths_pred 192 | dys_target = (centerys_gt - centerys_pred) / heights_pred 193 | dws_target = torch.log(widths_gt / widths_pred) 194 | dhs_target = torch.log(heights_gt / heights_pred) 195 | else: 196 | raise ValueError('boxes_pred dimension error in BBoxFunctions.encodeBboxes') 197 | return torch.stack((dxs_target, dys_target, dws_target, dhs_target), 2) 198 | '''decode bboxes''' 199 | @staticmethod 200 | def decodeBboxes(boxes, deltas): 201 | widths = boxes[..., 2] - boxes[..., 0] + 1.0 202 | heights = boxes[..., 3] - boxes[..., 1] + 1.0 203 | cxs = boxes[..., 0] + 0.5 * widths 204 | cys = boxes[..., 1] + 0.5 * heights 205 | dxs = deltas[..., 0::4] 206 | dys = deltas[..., 1::4] 207 | dws = deltas[..., 2::4] 208 | dhs = deltas[..., 3::4] 209 | cxs_pred = dxs * widths.unsqueeze(2) + cxs.unsqueeze(2) 210 | cys_pred = dys * heights.unsqueeze(2) + cys.unsqueeze(2) 211 | ws_pred = torch.exp(dws) * widths.unsqueeze(2) 212 | hs_pred = torch.exp(dhs) * heights.unsqueeze(2) 213 | boxes_pred = deltas.clone() 214 | boxes_pred[..., 0::4] = cxs_pred - ws_pred * 0.5 215 | boxes_pred[..., 1::4] = cys_pred - hs_pred * 0.5 216 | boxes_pred[..., 2::4] = cxs_pred + ws_pred * 0.5 217 | boxes_pred[..., 3::4] = cys_pred + hs_pred * 0.5 218 | # [x1, y1, x2, y2] 219 | return boxes_pred 220 | 221 | 222 | '''adjust learning rate''' 223 | def adjustLearningRate(optimizer, target_lr, logger_handle=None): 224 | if logger_handle is not None: 225 | logger_handle.info('Adjust learning rate to %s...' % str(target_lr)) 226 | for param_group in optimizer.param_groups: 227 | param_group['lr'] = target_lr 228 | return True 229 | 230 | 231 | '''save checkpoints''' 232 | def saveCheckpoints(state_dict, savepath, logger_handle): 233 | logger_handle.info('Saving state_dict in %s...' % savepath) 234 | torch.save(state_dict, savepath) 235 | return True 236 | 237 | 238 | '''load checkpoints''' 239 | def loadCheckpoints(checkpointspath, logger_handle): 240 | logger_handle.info('Loading checkpoints from %s...' % checkpointspath) 241 | checkpoints = torch.load(checkpointspath) 242 | return checkpoints 243 | 244 | 245 | '''clip gradient''' 246 | def clipGradients(params, max_norm=35, norm_type=2): 247 | params = list(filter(lambda p: p.requires_grad and p.grad is not None, params)) 248 | if len(params) > 0: 249 | clip_grad.clip_grad_norm_(params, max_norm=max_norm, norm_type=norm_type) -------------------------------------------------------------------------------- /names/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /names/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | test mAP 4 | Author: 5 | Charles 6 | ''' 7 | import json 8 | import torch 9 | import warnings 10 | import argparse 11 | import numpy as np 12 | from modules.utils import * 13 | from libs.nms.nms_wrapper import nms 14 | from cfgs.getcfg import getCfgByDatasetAndBackbone 15 | from modules.fasterRCNN import FasterRCNNFPNResNets 16 | warnings.filterwarnings("ignore") 17 | 18 | 19 | '''parse arguments for testing''' 20 | def parseArgs(): 21 | parser = argparse.ArgumentParser(description='Faster R-CNN with FPN') 22 | parser.add_argument('--datasetname', dest='datasetname', help='dataset for testing.', default='', type=str, required=True) 23 | parser.add_argument('--annfilepath', dest='annfilepath', help='used to specify annfilepath.', default='', type=str) 24 | parser.add_argument('--datasettype', dest='datasettype', help='used to specify datasettype.', default='val2017', type=str) 25 | parser.add_argument('--backbonename', dest='backbonename', help='backbone network for testing.', default='', type=str, required=True) 26 | parser.add_argument('--checkpointspath', dest='checkpointspath', help='checkpoints you want to use.', default='', type=str, required=True) 27 | parser.add_argument('--nmsthresh', dest='nmsthresh', help='thresh used in nms.', default=0.5, type=float) 28 | args = parser.parse_args() 29 | return args 30 | 31 | 32 | '''test mAP''' 33 | def test(): 34 | # prepare base things 35 | args = parseArgs() 36 | cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename) 37 | checkDir(cfg.TEST_BACKUPDIR) 38 | logger_handle = Logger(cfg.TEST_LOGFILE) 39 | use_cuda = torch.cuda.is_available() 40 | clsnames = loadclsnames(cfg.CLSNAMESPATH) 41 | # prepare dataset 42 | if args.datasetname == 'coco': 43 | dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=-1, use_color_jitter=False, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL, mode='TEST', datasettype=args.datasettype, annfilepath=args.annfilepath) 44 | else: 45 | raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) 46 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) 47 | # prepare model 48 | if args.backbonename.find('resnet') != -1: 49 | model = FasterRCNNFPNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle) 50 | else: 51 | raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) 52 | if use_cuda: 53 | model = model.cuda() 54 | # load checkpoints 55 | checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) 56 | model.load_state_dict(checkpoints['model']) 57 | model.eval() 58 | # test mAP 59 | FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor 60 | results = [] 61 | img_ids = [] 62 | for batch_idx, samples in enumerate(dataloader): 63 | logger_handle.info('detect %s/%s...' % (batch_idx+1, len(dataloader))) 64 | # --do detect 65 | img_id, img, w_ori, h_ori, gt_boxes, img_info, num_gt_boxes = samples 66 | img_id, w_ori, h_ori, scale_factor = int(img_id.item()), w_ori.item(), h_ori.item(), img_info[0][-1].item() 67 | img_ids.append(img_id) 68 | with torch.no_grad(): 69 | output = model(x=img.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor)) 70 | rois = output[0].data[..., 1:5] 71 | cls_probs = output[1].data 72 | bbox_preds = output[2].data 73 | # --parse the results 74 | if cfg.IS_CLASS_AGNOSTIC: 75 | box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) 76 | box_deltas = box_deltas.view(1, -1, 4) 77 | else: 78 | box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) 79 | box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES) 80 | boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas) 81 | boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, torch.from_numpy(np.array([h_ori*scale_factor, w_ori*scale_factor, scale_factor])).unsqueeze(0).type(FloatTensor).data) 82 | boxes_pred = boxes_pred.squeeze() 83 | scores = cls_probs.squeeze() 84 | thresh = 0.05 85 | for j in range(1, cfg.NUM_CLASSES): 86 | idxs = torch.nonzero(scores[:, j] > thresh).view(-1) 87 | if idxs.numel() > 0: 88 | cls_scores = scores[:, j][idxs] 89 | _, order = torch.sort(cls_scores, 0, True) 90 | if cfg.IS_CLASS_AGNOSTIC: 91 | cls_boxes = boxes_pred[idxs, :] 92 | else: 93 | cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4] 94 | cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) 95 | cls_dets = cls_dets[order] 96 | _, keep_idxs = nms(cls_dets, args.nmsthresh) 97 | cls_dets = cls_dets[keep_idxs.view(-1).long()] 98 | for cls_det in cls_dets: 99 | category_id = dataset.clsids2cococlsids_dict.get(j) 100 | x1, y1, x2, y2, score = cls_det 101 | x1 = x1.item() / scale_factor 102 | x2 = x2.item() / scale_factor 103 | y1 = y1.item() / scale_factor 104 | y2 = y2.item() / scale_factor 105 | bbox = [x1, y1, x2, y2] 106 | bbox[2] = bbox[2] - bbox[0] 107 | bbox[3] = bbox[3] - bbox[1] 108 | image_result = { 109 | 'image_id': img_id, 110 | 'category_id': int(category_id), 111 | 'score': float(score.item()), 112 | 'bbox': bbox 113 | } 114 | results.append(image_result) 115 | json.dump(results, open(cfg.TEST_BBOXES_SAVE_PATH, 'w'), indent=4) 116 | if args.datasettype in ['val2017']: 117 | dataset.doDetectionEval(img_ids, cfg.TEST_BBOXES_SAVE_PATH) 118 | 119 | 120 | '''run''' 121 | if __name__ == '__main__': 122 | test() -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Function: 3 | train the model 4 | Author: 5 | Charles 6 | ''' 7 | import os 8 | import torch 9 | import warnings 10 | import argparse 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | from modules.utils import * 14 | from cfgs.getcfg import getCfgByDatasetAndBackbone 15 | from modules.fasterRCNN import FasterRCNNFPNResNets 16 | warnings.filterwarnings("ignore") 17 | 18 | 19 | '''parse arguments for training''' 20 | def parseArgs(): 21 | parser = argparse.ArgumentParser(description='Faster R-CNN with FPN') 22 | parser.add_argument('--datasetname', dest='datasetname', help='dataset for training.', default='', type=str, required=True) 23 | parser.add_argument('--backbonename', dest='backbonename', help='backbone network for training.', default='', type=str, required=True) 24 | parser.add_argument('--checkpointspath', dest='checkpointspath', help='checkpoints you want to use.', default='', type=str) 25 | args = parser.parse_args() 26 | return args 27 | 28 | 29 | '''train model''' 30 | def train(): 31 | # prepare base things 32 | args = parseArgs() 33 | cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename) 34 | checkDir(cfg.TRAIN_BACKUPDIR) 35 | logger_handle = Logger(cfg.TRAIN_LOGFILE) 36 | use_cuda = torch.cuda.is_available() 37 | is_multi_gpus = cfg.IS_MULTI_GPUS 38 | if is_multi_gpus: assert use_cuda 39 | # prepare dataset 40 | if args.datasetname == 'coco': 41 | dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=cfg.MAX_NUM_GT_BOXES, use_color_jitter=cfg.USE_COLOR_JITTER, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL, mode='TRAIN', datasettype='train2017') 42 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=cfg.BATCHSIZE, sampler=NearestRatioRandomSampler(dataset.img_ratios, cfg.BATCHSIZE), num_workers=cfg.NUM_WORKERS, collate_fn=COCODataset.paddingCollateFn, pin_memory=cfg.PIN_MEMORY) 43 | else: 44 | raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) 45 | # prepare model 46 | if args.backbonename.find('resnet') != -1: 47 | model = FasterRCNNFPNResNets(mode='TRAIN', cfg=cfg, logger_handle=logger_handle) 48 | else: 49 | raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) 50 | start_epoch = 1 51 | end_epoch = cfg.MAX_EPOCHS 52 | if use_cuda: 53 | model = model.cuda() 54 | # prepare optimizer 55 | learning_rate_idx = 0 56 | if cfg.IS_USE_WARMUP: 57 | learning_rate = cfg.LEARNING_RATES[learning_rate_idx] / 3 58 | else: 59 | learning_rate = cfg.LEARNING_RATES[learning_rate_idx] 60 | optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=cfg.MOMENTUM, weight_decay=cfg.WEIGHT_DECAY) 61 | # check checkpoints path 62 | if args.checkpointspath: 63 | checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) 64 | model.load_state_dict(checkpoints['model']) 65 | optimizer.load_state_dict(checkpoints['optimizer']) 66 | start_epoch = checkpoints['epoch'] + 1 67 | for epoch in range(1, start_epoch): 68 | if epoch in cfg.LR_ADJUST_EPOCHS: 69 | learning_rate_idx += 1 70 | # data parallel 71 | if is_multi_gpus: 72 | model = nn.DataParallel(model) 73 | # print config 74 | logger_handle.info('Dataset used: %s, Number of images: %s' % (args.datasetname, len(dataset))) 75 | logger_handle.info('Backbone used: %s' % args.backbonename) 76 | logger_handle.info('Checkpoints used: %s' % args.checkpointspath) 77 | logger_handle.info('Config file used: %s' % cfg_file_path) 78 | # train 79 | FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor 80 | for epoch in range(start_epoch, end_epoch+1): 81 | # --set train mode 82 | if is_multi_gpus: 83 | model.module.setTrain() 84 | else: 85 | model.setTrain() 86 | # --adjust learning rate 87 | if epoch in cfg.LR_ADJUST_EPOCHS: 88 | learning_rate_idx += 1 89 | adjustLearningRate(optimizer=optimizer, target_lr=cfg.LEARNING_RATES[learning_rate_idx], logger_handle=logger_handle) 90 | # --log info 91 | logger_handle.info('Start epoch %s, learning rate is %s...' % (epoch, cfg.LEARNING_RATES[learning_rate_idx])) 92 | # --train epoch 93 | for batch_idx, samples in enumerate(dataloader): 94 | if (epoch == 1) and (cfg.IS_USE_WARMUP) and (batch_idx <= cfg.NUM_WARMUP_STEPS): 95 | assert learning_rate_idx == 0, 'BUGS may exist...' 96 | target_lr = cfg.LEARNING_RATES[learning_rate_idx] / 3 97 | target_lr += (cfg.LEARNING_RATES[learning_rate_idx] - cfg.LEARNING_RATES[learning_rate_idx] / 3) * batch_idx / cfg.NUM_WARMUP_STEPS 98 | adjustLearningRate(optimizer=optimizer, target_lr=target_lr) 99 | optimizer.zero_grad() 100 | img_ids, imgs, gt_boxes, img_info, num_gt_boxes = samples 101 | output = model(x=imgs.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor)) 102 | rois, cls_probs, bbox_preds, rpn_cls_loss, rpn_reg_loss, loss_cls, loss_reg = output 103 | loss = rpn_cls_loss.mean() + rpn_reg_loss.mean() + loss_cls.mean() + loss_reg.mean() 104 | logger_handle.info('[EPOCH]: %s/%s, [BTACH]: %s/%s, [LEARNING_RATE]: %s, [DATASET]: %s \n\t [LOSS]: rpn_cls_loss %.4f, rpn_reg_loss %.4f, loss_cls %.4f, loss_reg %.4f, total %.4f' % \ 105 | (epoch, end_epoch, (batch_idx+1), len(dataloader), cfg.LEARNING_RATES[learning_rate_idx], args.datasetname, rpn_cls_loss.mean().item(), rpn_reg_loss.mean().item(), loss_cls.mean().item(), loss_reg.mean().item(), loss.item())) 106 | loss.backward() 107 | clipGradients(model.parameters(), max_norm=cfg.GRAD_CLIP_MAX_NORM, norm_type=cfg.GRAD_CLIP_NORM_TYPE) 108 | optimizer.step() 109 | # --save model 110 | if (epoch % cfg.SAVE_INTERVAL == 0) or (epoch == end_epoch): 111 | state_dict = {'epoch': epoch, 112 | 'model': model.module.state_dict() if is_multi_gpus else model.state_dict(), 113 | 'optimizer': optimizer.state_dict()} 114 | savepath = os.path.join(cfg.TRAIN_BACKUPDIR, 'epoch_%s.pth' % epoch) 115 | saveCheckpoints(state_dict, savepath, logger_handle) 116 | 117 | 118 | '''run''' 119 | if __name__ == '__main__': 120 | train() --------------------------------------------------------------------------------