├── .gitignore ├── LICENSE ├── README.md ├── data └── demo │ ├── 000022.png │ ├── 000188.png │ ├── 000456.jpg │ ├── 000542.jpg │ ├── 001150.jpg │ ├── 001763.jpg │ └── 004545.jpg ├── demo_notebook.ipynb ├── experiments ├── cfgs │ ├── faster_rcnn_end2end.yml │ └── faster_rcnn_end2end_resnet.yml ├── eval │ ├── voc2007_test_res.sh │ └── voc2007_test_vgg.sh ├── profiling │ ├── gprof2dot.py │ ├── profile.png │ └── run_profiling.sh └── scripts │ ├── faster_rcnn_end2end.sh │ ├── faster_rcnn_vggnet.sh │ ├── faster_rcnn_vggnet_restore.sh │ ├── faster_rcnn_voc.sh │ ├── faster_rcnn_voc_resnet_101.sh │ ├── faster_rcnn_voc_resnet_50.sh │ ├── faster_rcnn_voc_resnet_restore.sh │ └── kitti2pascalvoc.py ├── faster_rcnn ├── __init__.py ├── demo.py ├── test_net.py └── train_net.py ├── lib ├── Makefile ├── __init__.py ├── cuda_config.h ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── ds_utils.py │ ├── factory.py │ ├── imagenet3d.py │ ├── imdb.py │ ├── imdb2.py │ ├── kitti.py │ ├── kitti_tracking.py │ ├── kittivoc.py │ ├── nissan.py │ ├── nthu.py │ ├── pascal3d.py │ ├── pascal_voc.py │ ├── pascal_voc2.py │ └── voc_eval.py ├── deform_conv_layer │ ├── __init__.py │ ├── deform_conv.cc │ ├── deform_conv.cu.cc │ ├── deform_conv.h │ ├── deform_conv_grad.py │ ├── deform_conv_op.py │ ├── deform_conv_test_mx.py │ ├── deform_conv_util.h │ └── test_deform_conv.py ├── deform_psroi_pooling_layer │ ├── __init__.py │ ├── deform_psroi_pooling_op.cc │ ├── deform_psroi_pooling_op.py │ ├── deform_psroi_pooling_op_gpu.cu.cc │ ├── deform_psroi_pooling_op_gpu.h │ ├── deform_psroi_pooling_op_grad.py │ ├── deform_psroi_pooling_op_test.py │ └── deform_psroi_pooling_op_test_mx.py ├── fast_rcnn │ ├── __init__.py │ ├── bbox_transform.py │ ├── config.py │ ├── config2.py │ ├── nms_wrapper.py │ ├── test.py │ └── train.py ├── gt_data_layer │ ├── __init__.py │ ├── layer.py │ ├── minibatch.py │ └── roidb.py ├── make.sh ├── networks │ ├── .VGGnet.py.swo │ ├── Resnet101_test.py │ ├── Resnet101_train.py │ ├── Resnet50_test.py │ ├── Resnet50_train.py │ ├── VGGnet_test.py │ ├── VGGnet_train.py │ ├── __init__.py │ ├── caffenet.py │ ├── factory.py │ └── network.py ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── psroi_pooling_layer │ ├── __init__.py │ ├── cuda_kernel_helper.h │ ├── psroi_pooling_op.cc │ ├── psroi_pooling_op.py │ ├── psroi_pooling_op_gpu.cu.cc │ ├── psroi_pooling_op_gpu.h │ ├── psroi_pooling_op_grad.py │ └── psroi_pooling_op_test.py ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h ├── roi_data_layer │ ├── __init__.py │ ├── layer.py │ ├── minibatch.py │ ├── minibatch2.py │ ├── roidb.py │ └── roidb2.py ├── roi_pooling_layer │ ├── __init__.py │ ├── roi_pooling_op.cc │ ├── roi_pooling_op.py │ ├── roi_pooling_op_gpu.cu.cc │ ├── roi_pooling_op_gpu.h │ ├── roi_pooling_op_grad.py │ └── roi_pooling_op_test.py ├── rpn_msr │ ├── __init__.py │ ├── anchor_target_layer.py │ ├── anchor_target_layer_tf.py │ ├── generate.py │ ├── generate_anchors.py │ ├── proposal_layer.py │ ├── proposal_layer_tf.py │ └── proposal_target_layer_tf.py ├── setup.py └── utils │ ├── .gitignore │ ├── __init__.py │ ├── bbox.pyx │ ├── blob.py │ ├── boxes_grid.py │ ├── nms.py │ ├── nms.pyx │ └── timer.py └── test_notebook.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.o 3 | *.pyc 4 | *.npy 5 | *.ckpt 6 | *.tar 7 | *.out 8 | *.tar.gz 9 | *.so 10 | *.zip 11 | *.bak 12 | .DS_Store 13 | experiments/logs/* 14 | # experiments/eval/* 15 | data/cache/* 16 | data/pretrain_model 17 | data/VOC* 18 | data/KITTI* 19 | output 20 | logs 21 | utest 22 | .vscode 23 | *-checkpoint.ipynb 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Charles Shang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data/demo/000022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/000022.png -------------------------------------------------------------------------------- /data/demo/000188.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/000188.png -------------------------------------------------------------------------------- /data/demo/000456.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/000456.jpg -------------------------------------------------------------------------------- /data/demo/000542.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/000542.jpg -------------------------------------------------------------------------------- /data/demo/001150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/001150.jpg -------------------------------------------------------------------------------- /data/demo/001763.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/001763.jpg -------------------------------------------------------------------------------- /data/demo/004545.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/data/demo/004545.jpg -------------------------------------------------------------------------------- /demo_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "%matplotlib inline\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import numpy as np\n", 15 | "import os, sys, cv2\n", 16 | "import argparse\n", 17 | "import os.path as osp\n", 18 | "import glob\n", 19 | "from easydict import EasyDict as edict\n", 20 | "\n", 21 | "\n", 22 | "from lib.networks.factory import get_network\n", 23 | "from lib.fast_rcnn.config import cfg\n", 24 | "from lib.fast_rcnn.test import im_detect\n", 25 | "from lib.fast_rcnn.nms_wrapper import nms_wrapper\n", 26 | "from lib.utils.timer import Timer\n", 27 | "\n", 28 | "CLASSES = ('__background__',\n", 29 | " 'aeroplane', 'bicycle', 'bird', 'boat',\n", 30 | " 'bottle', 'bus', 'car', 'cat', 'chair',\n", 31 | " 'cow', 'diningtable', 'dog', 'horse',\n", 32 | " 'motorbike', 'person', 'pottedplant',\n", 33 | " 'sheep', 'sofa', 'train', 'tvmonitor')\n", 34 | "\n", 35 | "\n", 36 | "# CLASSES = ('__background__','person','bike','motorbike','car','bus')\n", 37 | "\n", 38 | "def vis_detections(im, class_name, dets, ax, thresh=0.5):\n", 39 | " \"\"\"Draw detected bounding boxes.\"\"\"\n", 40 | " print(dets)\n", 41 | " inds = np.where(dets[:, -1] >= thresh)[0]\n", 42 | " if len(inds) == 0:\n", 43 | " return\n", 44 | "\n", 45 | " for i in inds:\n", 46 | " bbox = dets[i, :4]\n", 47 | " score = dets[i, -1]\n", 48 | "\n", 49 | " ax.add_patch(\n", 50 | " plt.Rectangle((bbox[0], bbox[1]),\n", 51 | " bbox[2] - bbox[0],\n", 52 | " bbox[3] - bbox[1], fill=False,\n", 53 | " edgecolor='red', linewidth=3.5)\n", 54 | " )\n", 55 | " ax.text(bbox[0], bbox[1] - 2,\n", 56 | " '{:s} {:.3f}'.format(class_name, score),\n", 57 | " bbox=dict(facecolor='blue', alpha=0.5),\n", 58 | " fontsize=14, color='white')\n", 59 | "\n", 60 | " ax.set_title(('{} detections with '\n", 61 | " 'p({} | box) >= {:.1f}').format(class_name, class_name,\n", 62 | " thresh),\n", 63 | " fontsize=14)\n", 64 | " plt.axis('off')\n", 65 | " plt.tight_layout()\n", 66 | " plt.draw()\n", 67 | "\n", 68 | "\n", 69 | "def demo(sess, net, image_name):\n", 70 | " \"\"\"Detect object classes in an image using pre-computed object proposals.\"\"\"\n", 71 | "\n", 72 | " # Load the demo image\n", 73 | " im = cv2.imread(image_name)\n", 74 | "\n", 75 | " # Detect all object classes and regress object bounds\n", 76 | " timer = Timer()\n", 77 | " timer.tic()\n", 78 | " scores, boxes = im_detect(sess, net, im)\n", 79 | "\n", 80 | " timer.toc()\n", 81 | " print(('Detection took {:.3f}s for '\n", 82 | " '{:d} object proposals').format(timer.total_time, boxes.shape[0]))\n", 83 | "\n", 84 | " # Visualize detections for each class\n", 85 | " im = im[:, :, (2, 1, 0)]\n", 86 | " fig, ax = plt.subplots(figsize=(12, 12))\n", 87 | " ax.imshow(im, aspect='equal')\n", 88 | "\n", 89 | " CONF_THRESH = 0.7\n", 90 | " NMS_THRESH = 0.3\n", 91 | " res = nms_wrapper(scores, boxes, threshold=0.7)\n", 92 | "# print(res)\n", 93 | "# for cls_ind, cls in enumerate(CLASSES[1:]):\n", 94 | "# cls_ind += 1 # because we skipped background\n", 95 | "# cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]\n", 96 | "# cls_scores = scores[:, cls_ind]\n", 97 | "# dets = np.hstack((cls_boxes,\n", 98 | "# cls_scores[:, np.newaxis])).astype(np.float32)\n", 99 | "# keep = nms(dets, NMS_THRESH)\n", 100 | "# dets = dets[keep, :]\n", 101 | "# vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)\n", 102 | " for ind, r in enumerate(res):\n", 103 | " if r['dets'] is None: continue\n", 104 | " dets = r['dets']\n", 105 | " for i in range(0, dets.shape[0]):\n", 106 | " vis_detections(im, r['class'], np.expand_dims(dets[i, :], 0), ax, thresh=CONF_THRESH)\n", 107 | "\n", 108 | "\n", 109 | "if __name__ == '__main__':\n", 110 | " cfg.TEST.HAS_RPN = True # Use RPN for proposals\n", 111 | " args = edict()\n", 112 | " args.gpu_id=0\n", 113 | " args.demo_net=\"Resnet50_test\"\n", 114 | " args.model=\"./output/faster_rcnn_end2end_resnet_voc/voc_2007_trainval\"\n", 115 | "# args = parse_args()\n", 116 | "\n", 117 | " if args.model == ' ' or not os.path.exists(args.model):\n", 118 | " print(('current path is ' + os.path.abspath(__file__)))\n", 119 | " raise IOError(('Error: Model not found.\\n'))\n", 120 | " # load network\n", 121 | " device_name = '/gpu:{:d}'.format(args.gpu_id)\n", 122 | " print(device_name)\n", 123 | " with tf.device(device_name):\n", 124 | " net = get_network(args.demo_net)\n", 125 | " saver = tf.train.Saver()\n", 126 | " # init session\n", 127 | " c = tf.ConfigProto(allow_soft_placement=True)\n", 128 | " c.gpu_options.visible_device_list=str(args.gpu_id)\n", 129 | " sess = tf.Session(config=c)\n", 130 | "\n", 131 | " # load model\n", 132 | " print(('Loading network {:s}... '.format(args.demo_net)), end=' ')\n", 133 | " ckpt = tf.train.latest_checkpoint(args.model)\n", 134 | " if ckpt:\n", 135 | " # the global_step will restore sa well\n", 136 | " saver.restore(sess,ckpt)\n", 137 | " print('restore from the checkpoint{0}'.format(ckpt))\n", 138 | " #saver.restore(sess, args.model)\n", 139 | " print (' done.')\n", 140 | "\n", 141 | " # Warmup on a dummy image\n", 142 | " im = 128 * np.ones((300, 300, 3), dtype=np.uint8)\n", 143 | " for i in range(2):\n", 144 | " _, _ = im_detect(sess, net, im)\n", 145 | "\n", 146 | " im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \\\n", 147 | " glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))\n", 148 | "# im_names = [\"/home/antonio/tf_deformable_net/data/VOCdevkit2007/VOC2007/JPEGImages/00\"+name for name \\\n", 149 | "# in [\"7097.jpg\", \"8997.jpg\"]]\n", 150 | " for im_name in im_names:\n", 151 | " print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')\n", 152 | " print('Demo for {:s}'.format(im_name))\n", 153 | " demo(sess, net, im_name)\n", 154 | "\n", 155 | " plt.show()\n", 156 | "\n" 157 | ] 158 | } 159 | ], 160 | "metadata": { 161 | "anaconda-cloud": {}, 162 | "kernelspec": { 163 | "display_name": "Python [default]", 164 | "language": "python", 165 | "name": "python3" 166 | }, 167 | "language_info": { 168 | "codemirror_mode": { 169 | "name": "ipython", 170 | "version": 3 171 | }, 172 | "file_extension": ".py", 173 | "mimetype": "text/x-python", 174 | "name": "python", 175 | "nbconvert_exporter": "python", 176 | "pygments_lexer": "ipython3", 177 | "version": "3.5.2" 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } 183 | -------------------------------------------------------------------------------- /experiments/cfgs/faster_rcnn_end2end.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: faster_rcnn_voc_vgg 2 | LOG_DIR: faster_rcnn_voc_vgg 3 | IS_MULTISCALE: False 4 | NET_NAME: VGGnet 5 | ANCHOR_SCALES: [8, 16, 32] 6 | NCLASSES: 21 7 | TRAIN: 8 | OHEM: True 9 | RPN_BATCHSIZE: 2000 10 | BATCH_SIZE: 300 11 | LOG_IMAGE_ITERS: 100 12 | DISPLAY: 10 13 | SNAPSHOT_ITERS: 5000 14 | HAS_RPN: True 15 | LEARNING_RATE: 0.001 16 | MOMENTUM: 0.9 17 | GAMMA: 0.1 18 | STEPSIZE: 60000 19 | IMS_PER_BATCH: 1 20 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 21 | RPN_POSITIVE_OVERLAP: 0.7 22 | RPN_BATCHSIZE: 256 23 | PROPOSAL_METHOD: gt 24 | BG_THRESH_LO: 0.0 25 | PRECLUDE_HARD_SAMPLES: True 26 | BBOX_INSIDE_WEIGHTS: [1, 1, 1, 1] 27 | RPN_BBOX_INSIDE_WEIGHTS: [1, 1, 1, 1] 28 | RPN_POSITIVE_WEIGHT: -1.0 29 | FG_FRACTION: 0.3 30 | WEIGHT_DECAY: 0.0005 31 | RPN_PRE_NMS_TOP_N : 6000 32 | RPN_POST_NMS_TOP_N: 300 33 | RPN_MIN_SIZE: 0 34 | TEST: 35 | HAS_RPN: True 36 | RPN_MIN_SIZE: 0 37 | # RPN generate proposal 38 | RPN_NMS_THRESH: 0.7 39 | RPN_PRE_NMS_TOP_N: 6000 40 | RPN_POST_NMS_TOP_N: 300 41 | RPN_MIN_SIZE: 0 42 | # RCNN nms 43 | NMS: 0.7 44 | # test_epoch: 7 45 | -------------------------------------------------------------------------------- /experiments/cfgs/faster_rcnn_end2end_resnet.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: faster_rcnn_end2end_resnet_voc 2 | LOG_DIR: faster_rcnn_end2end_resnet_voc 3 | IS_MULTISCALE: False 4 | NET_NAME: Resnet50 5 | ANCHOR_SCALES: [8, 16, 32] 6 | NCLASSES: 21 7 | TRAIN: 8 | OHEM: False 9 | RANDOM_DOWNSAMPLE: False 10 | RPN_BATCHSIZE: 256 11 | BATCH_SIZE: 300 12 | SNAPSHOT_PREFIX: Resnet50 13 | LOG_IMAGE_ITERS: 100 14 | DISPLAY: 10 15 | SNAPSHOT_ITERS: 5000 16 | HAS_RPN: True 17 | LEARNING_RATE: 0.0005 18 | MOMENTUM: 0.9 19 | GAMMA: 0.1 20 | STEPSIZE: 70000 21 | IMS_PER_BATCH: 1 22 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 23 | RPN_POSITIVE_OVERLAP: 0.7 24 | PROPOSAL_METHOD: gt 25 | BG_THRESH_LO: 0.1 26 | PRECLUDE_HARD_SAMPLES: False 27 | BBOX_INSIDE_WEIGHTS: [1, 1, 1, 1] 28 | RPN_BBOX_INSIDE_WEIGHTS: [1, 1, 1, 1] 29 | RPN_POSITIVE_WEIGHT: -1.0 30 | FG_FRACTION: 0.25 31 | WEIGHT_DECAY: 0.0005 32 | RPN_PRE_NMS_TOP_N : 6000 33 | RPN_POST_NMS_TOP_N: 300 34 | RPN_MIN_SIZE: 0 35 | 36 | TEST: 37 | HAS_RPN: True 38 | RPN_MIN_SIZE: 0 39 | # RPN generate proposal 40 | RPN_NMS_THRESH: 0.7 41 | RPN_PRE_NMS_TOP_N: 6000 42 | RPN_POST_NMS_TOP_N: 300 43 | NMS: 0.7 44 | # PROPOSAL_METHOD : rpn 45 | # RCNN nms 46 | # pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 47 | # post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 48 | # nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 49 | # min_size = cfg[cfg_key].RPN_MIN_SIZE -------------------------------------------------------------------------------- /experiments/eval/voc2007_test_res.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python ./faster_rcnn/test_net.py \ 4 | --gpu 0 \ 5 | --weights ./output/faster_rcnn_end2end_resnet_voc/voc_2007_trainval \ 6 | --imdb voc_2007_test \ 7 | --cfg ./experiments/cfgs/faster_rcnn_end2end_resnet.yml \ 8 | --network Resnet50_test 9 | -------------------------------------------------------------------------------- /experiments/eval/voc2007_test_vgg.sh: -------------------------------------------------------------------------------- 1 | python ./faster_rcnn/test_net.py \ 2 | --gpu 0 \ 3 | --weights ./output/faster_rcnn_voc_vgg/voc_2007_trainval \ 4 | --imdb voc_2007_test \ 5 | --cfg ./experiments/cfgs/faster_rcnn_end2end_resnet.yml \ 6 | --network VGGnet_test 7 | -------------------------------------------------------------------------------- /experiments/profiling/profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/experiments/profiling/profile.png -------------------------------------------------------------------------------- /experiments/profiling/run_profiling.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -m cProfile -o experiments/profiling/profile.out ./faster_rcnn/train_net.py\ 4 | --gpu 0 --weights data/pretrain_model/VGG_imagenet.npy --imdb voc_2007_trainval \ 5 | --iters 1000 --cfg experiments/cfgs/faster_rcnn_end2end.yml --network VGGnet_train 6 | 7 | # generate an image 8 | if [ ! -f experiments/profiling/gprof2dot.py ]; then 9 | echo "Downloading ... " 10 | wget https://raw.githubusercontent.com/jrfonseca/gprof2dot/master/gprof2dot.py -O experiments/profiling/gprof2dot.py 11 | fi 12 | python experiments/profiling/gprof2dot.py -f pstats experiments/profiling/profile.out | dot -Tpng -o experiments/profiling/profile.png -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_end2end.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Usage: 3 | # ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py] 4 | # DATASET is either pascal_voc or coco. 5 | # 6 | # Example: 7 | # ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc \ 8 | # --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]" 9 | 10 | set -x 11 | set -e 12 | 13 | export PYTHONUNBUFFERED="True" 14 | 15 | GPU_ID=$1 16 | NET=$2 17 | NET_lc=${NET,,} 18 | DATASET=$3 19 | 20 | array=( $@ ) 21 | len=${#array[@]} 22 | EXTRA_ARGS=${array[@]:3:$len} 23 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 24 | 25 | case $DATASET in 26 | pascal_voc) 27 | TRAIN_IMDB="voc_2007_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | PT_DIR="pascal_voc" 30 | ITERS=70000 31 | CFG="experiments/cfgs/faster_rcnn_end2end.yml" 32 | ;; 33 | kittivoc) 34 | TRAIN_IMDB="kittivoc_train" 35 | ITERS=100000 36 | CFG="experiments/cfgs/faster_rcnn_kitti.yml" 37 | ;; 38 | coco) 39 | # This is a very long and slow training schedule 40 | # You can probably use fewer iterations and reduce the 41 | # time to the LR drop (set in the solver to 350,000 iterations). 42 | TRAIN_IMDB="coco_2014_train" 43 | TEST_IMDB="coco_2014_minival" 44 | PT_DIR="coco" 45 | ITERS=490000 46 | ;; 47 | *) 48 | echo "No dataset given" 49 | exit 50 | ;; 51 | esac 52 | LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y_%m_%d_%H_%M_%S'`" 53 | exec &> >(tee -a "$LOG") 54 | echo Logging output to "$LOG" 55 | 56 | time python ./faster_rcnn/train_net.py --gpu ${GPU_ID} \ 57 | --weights data/pretrain_model/VGG_imagenet.npy \ 58 | --imdb ${TRAIN_IMDB} \ 59 | --iters ${ITERS} \ 60 | --cfg ${CFG} \ 61 | --network VGGnet_train \ 62 | ${EXTRA_ARGS} 63 | 64 | set +x 65 | NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'` 66 | set -x 67 | 68 | time python ./faster_rcnn/test_net.py --gpu ${GPU_ID} \ 69 | --weights ${NET_FINAL} \ 70 | --imdb ${TEST_IMDB} \ 71 | --cfg experiments/cfgs/faster_rcnn_end2end.yml \ 72 | --network VGGnet_test \ 73 | ${EXTRA_ARGS} 74 | -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_vggnet.sh: -------------------------------------------------------------------------------- 1 | python ./faster_rcnn/train_net.py \ 2 | --gpu 0 \ 3 | --weights ./data/pretrain_model/VGG_imagenet.npy \ 4 | --imdb voc_2007_trainval \ 5 | --iters 100000 \ 6 | --cfg ./experiments/cfgs/faster_rcnn_end2end.yml \ 7 | --network VGGnet_train \ 8 | --restore 0 -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_vggnet_restore.sh: -------------------------------------------------------------------------------- 1 | python ./faster_rcnn/train_net.py \ 2 | --gpu 0 \ 3 | --weights ./data/pretrain_model/VGG_imagenet.npy \ 4 | --imdb voc_2007_trainval \ 5 | --iters 100000 \ 6 | --cfg ./experiments/cfgs/faster_rcnn_vggnet.yml \ 7 | --network VGGnet_train \ 8 | --restore 1 -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_voc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #python ./faster_rcnn/train_net.py \ 3 | #--gpu 0 \ 4 | #--weights ./data/pretrain_model/VGG_imagenet.npy \ 5 | #--imdb voc_2007_trainval \ 6 | #--iters 100000 \ 7 | #--cfg ./experiments/cfgs/faster_rcnn_end2end.yml \ 8 | #--network VGGnet_train \ 9 | #--restore 0 10 | 11 | python ./faster_rcnn/train_net.py \ 12 | --gpu 0 \ 13 | --weights ./data/pretrain_model/VGG_imagenet.npy \ 14 | --imdb voc_0712_trainval \ 15 | --iters 100000 \ 16 | --cfg ./experiments/cfgs/faster_rcnn_end2end.yml \ 17 | --network VGGnet_train \ 18 | --restore 1 -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_voc_resnet_101.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python ./faster_rcnn/train_net.py \ 3 | --gpu 0 \ 4 | --weights ./data/pretrain_model/Resnet101.npy \ 5 | --imdb voc_2007_trainval \ 6 | --iters 160000 \ 7 | --cfg ./experiments/cfgs/faster_rcnn_end2end_resnet.yml \ 8 | --network Resnet101_train \ 9 | --restore 0 10 | -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_voc_resnet_50.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python ./faster_rcnn/train_net.py \ 3 | --gpu 1 \ 4 | --weights ./data/pretrain_model/Resnet50.npy \ 5 | --imdb voc_2007_trainval \ 6 | --iters 160000 \ 7 | --cfg ./experiments/cfgs/faster_rcnn_end2end_resnet.yml \ 8 | --network Resnet50_train \ 9 | --restore 0 10 | -------------------------------------------------------------------------------- /experiments/scripts/faster_rcnn_voc_resnet_restore.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python ./faster_rcnn/train_net.py \ 3 | --gpu 1 \ 4 | --imdb voc_2007_trainval \ 5 | --iters 240000 \ 6 | --cfg ./experiments/cfgs/faster_rcnn_end2end_resnet.yml \ 7 | --network Resnet50_train \ 8 | --restore 1 \ 9 | # --set EXP_DIR exp_dir 10 | # --weights ./data/pretrain_model/Resnet50.npy \ 11 | -------------------------------------------------------------------------------- /faster_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') -------------------------------------------------------------------------------- /faster_rcnn/demo.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import os, sys, cv2 5 | import argparse 6 | import os.path as osp 7 | import glob 8 | 9 | 10 | # this_dir = osp.dirname(__file__) 11 | # print(this_dir) 12 | #with open('../environ.txt') as f: 13 | # l = f.readline() 14 | # proPath = re.match(r'.*\'(.*)\'',l).group(1) 15 | # sys.path.append(proPath) 16 | PACKAGE_PARENT = '..' 17 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 18 | sys.path.insert(0, os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 19 | 20 | 21 | from lib.networks.factory import get_network 22 | from lib.fast_rcnn.config import cfg 23 | from lib.fast_rcnn.test import im_detect 24 | from lib.fast_rcnn.nms_wrapper import nms 25 | from lib.utils.timer import Timer 26 | 27 | CLASSES = ('__background__', 28 | 'aeroplane', 'bicycle', 'bird', 'boat', 29 | 'bottle', 'bus', 'car', 'cat', 'chair', 30 | 'cow', 'diningtable', 'dog', 'horse', 31 | 'motorbike', 'person', 'pottedplant', 32 | 'sheep', 'sofa', 'train', 'tvmonitor') 33 | 34 | 35 | # CLASSES = ('__background__','person','bike','motorbike','car','bus') 36 | 37 | def vis_detections(im, class_name, dets, ax, thresh=0.5): 38 | """Draw detected bounding boxes.""" 39 | inds = np.where(dets[:, -1] >= thresh)[0] 40 | if len(inds) == 0: 41 | return 42 | 43 | for i in inds: 44 | bbox = dets[i, :4] 45 | score = dets[i, -1] 46 | 47 | ax.add_patch( 48 | plt.Rectangle((bbox[0], bbox[1]), 49 | bbox[2] - bbox[0], 50 | bbox[3] - bbox[1], fill=False, 51 | edgecolor='red', linewidth=3.5) 52 | ) 53 | ax.text(bbox[0], bbox[1] - 2, 54 | '{:s} {:.3f}'.format(class_name, score), 55 | bbox=dict(facecolor='blue', alpha=0.5), 56 | fontsize=14, color='white') 57 | 58 | ax.set_title(('{} detections with ' 59 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 60 | thresh), 61 | fontsize=14) 62 | plt.axis('off') 63 | plt.tight_layout() 64 | plt.draw() 65 | 66 | 67 | def demo(sess, net, image_name): 68 | """Detect object classes in an image using pre-computed object proposals.""" 69 | 70 | # Load the demo image 71 | im = cv2.imread(image_name) 72 | 73 | # Detect all object classes and regress object bounds 74 | timer = Timer() 75 | timer.tic() 76 | scores, boxes = im_detect(sess, net, im) 77 | timer.toc() 78 | print(('Detection took {:.3f}s for ' 79 | '{:d} object proposals').format(timer.total_time, boxes.shape[0])) 80 | 81 | # Visualize detections for each class 82 | im = im[:, :, (2, 1, 0)] 83 | fig, ax = plt.subplots(figsize=(12, 12)) 84 | ax.imshow(im, aspect='equal') 85 | 86 | CONF_THRESH = 0.8 87 | NMS_THRESH = 0.3 88 | for cls_ind, cls in enumerate(CLASSES[1:]): 89 | cls_ind += 1 # because we skipped background 90 | cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] 91 | cls_scores = scores[:, cls_ind] 92 | dets = np.hstack((cls_boxes, 93 | cls_scores[:, np.newaxis])).astype(np.float32) 94 | keep = nms(dets, NMS_THRESH) 95 | dets = dets[keep, :] 96 | vis_detections(im, cls, dets, ax, thresh=CONF_THRESH) 97 | 98 | 99 | def parse_args(): 100 | """Parse input arguments.""" 101 | parser = argparse.ArgumentParser(description='Deform Faster R-CNN demo') 102 | parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', 103 | default=0, type=int) 104 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', 105 | default='Resnet50_test') 106 | parser.add_argument('--model', dest='model', help='Model path', 107 | default=' ') 108 | 109 | args = parser.parse_args() 110 | 111 | return args 112 | 113 | 114 | if __name__ == '__main__': 115 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 116 | 117 | args = parse_args() 118 | 119 | if args.model == ' ' or not os.path.exists(args.model+".meta"): 120 | print(('current path is ' + os.path.abspath(__file__))) 121 | raise IOError(('Error: Model not found.\n')) 122 | 123 | # init session 124 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) 125 | # load network 126 | net = get_network(args.demo_net) 127 | # load model 128 | print(('Loading network {:s}... '.format(args.demo_net)), end=' ') 129 | saver = tf.train.Saver() 130 | # print(args.model) 131 | # ckpt = tf.train.latest_checkpoint(args.model) 132 | # print(ckpt) 133 | ckpt = args.model 134 | if ckpt: 135 | # the global_step will restore sa well 136 | saver.restore(sess,ckpt) 137 | print('restore from the checkpoint{0}'.format(ckpt)) 138 | #saver.restore(sess, args.model) 139 | print (' done.') 140 | 141 | # Warmup on a dummy image 142 | im = 128 * np.ones((300, 300, 3), dtype=np.uint8) 143 | for i in range(2): 144 | _, _ = im_detect(sess, net, im) 145 | 146 | im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \ 147 | glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg')) 148 | 149 | for im_name in im_names: 150 | print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') 151 | print('Demo for {:s}'.format(im_name)) 152 | demo(sess, net, im_name) 153 | 154 | plt.show() 155 | 156 | -------------------------------------------------------------------------------- /faster_rcnn/test_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | """Test a Fast R-CNN network on an image database.""" 11 | import sys,os 12 | this_dir = os.path.dirname(__file__) 13 | sys.path.insert(0, this_dir + '/..') 14 | # import _init_paths 15 | from lib.fast_rcnn.test import test_net, load_test_net 16 | from lib.fast_rcnn.config import cfg, cfg_from_file 17 | from lib.datasets.factory import get_imdb 18 | from lib.networks.factory import get_network 19 | import argparse 20 | import pprint 21 | import time 22 | import tensorflow as tf 23 | 24 | def parse_args(): 25 | """ 26 | Parse input arguments 27 | """ 28 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 29 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 30 | default=0, type=int) 31 | parser.add_argument('--def', dest='prototxt', 32 | help='prototxt file defining the network', 33 | default=None, type=str) 34 | parser.add_argument('--weights', dest='model', 35 | help='model to test', 36 | default=None, type=str) 37 | parser.add_argument('--cfg', dest='cfg_file', 38 | help='optional config file', default=None, type=str) 39 | parser.add_argument('--wait', dest='wait', 40 | help='wait until net file exists', 41 | default=True, type=bool) 42 | parser.add_argument('--imdb', dest='imdb_name', 43 | help='dataset to test', 44 | default='voc_2007_test', type=str) 45 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 46 | action='store_true') 47 | parser.add_argument('--network', dest='network_name', 48 | help='name of the network', 49 | default=None, type=str) 50 | 51 | if len(sys.argv) == 1: 52 | parser.print_help() 53 | sys.exit(1) 54 | 55 | args = parser.parse_args() 56 | return args 57 | 58 | if __name__ == '__main__': 59 | args = parse_args() 60 | 61 | print('Called with args:') 62 | print(args) 63 | 64 | if args.cfg_file is not None: 65 | cfg_from_file(args.cfg_file) 66 | 67 | print('Using config:') 68 | pprint.pprint(cfg) 69 | 70 | while not os.path.exists(args.model) and args.wait: 71 | print(('Waiting for {} to exist...'.format(args.model))) 72 | time.sleep(1000) 73 | 74 | weights_filename = os.path.splitext(os.path.basename(args.model))[0] 75 | 76 | imdb = get_imdb(args.imdb_name) 77 | imdb.competition_mode(args.comp_mode) 78 | 79 | device_name = '/gpu:{:d}'.format(args.gpu_id) 80 | print(device_name) 81 | with tf.device(device_name): 82 | network = get_network(args.network_name) 83 | print(('Use network `{:s}` in training'.format(args.network_name))) 84 | 85 | cfg.GPU_ID = args.gpu_id 86 | # import os 87 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152 88 | # os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu_id) 89 | # start a session 90 | saver = tf.train.Saver() 91 | c = tf.ConfigProto(allow_soft_placement=True) 92 | c.gpu_options.visible_device_list=str(args.gpu_id) 93 | sess = tf.Session(config=c) 94 | saver.restore(sess, tf.train.latest_checkpoint(args.model)) 95 | print((('Loading model weights from {:s}').format(args.model))) 96 | 97 | test_net(sess, network, imdb, weights_filename, thresh=0.7) 98 | # load_test_net(sess, network, imdb, weights_filename) 99 | -------------------------------------------------------------------------------- /faster_rcnn/train_net.py: -------------------------------------------------------------------------------- 1 | 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | """Train a Fast R-CNN network on a region of interest database.""" 10 | 11 | import argparse 12 | import pprint 13 | import numpy as np 14 | import pdb 15 | import sys 16 | import os.path 17 | import tensorflow as tf 18 | # this_dir = os.path.dirname(__file__) 19 | # sys.path.insert(0, this_dir + '/..') 20 | PACKAGE_PARENT = '..' 21 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 22 | sys.path.insert(0, os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 23 | # print(sys.path) 24 | # for p in sys.path: print p 25 | # print (sys.path) 26 | 27 | from lib.fast_rcnn.train import get_training_roidb, train_net 28 | from lib.fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_log_dir 29 | from lib.datasets.factory import get_imdb 30 | from lib.networks.factory import get_network 31 | from lib.fast_rcnn.config import cfg 32 | 33 | def parse_args(): 34 | """ 35 | Parse input arguments 36 | """ 37 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 38 | parser.add_argument('--gpu', dest='gpu_id', 39 | help='GPU device id to use [0]', 40 | default=0, type=int) 41 | parser.add_argument('--solver', dest='solver', 42 | help='solver prototxt', 43 | default=None, type=str) 44 | parser.add_argument('--iters', dest='max_iters', 45 | help='number of iterations to train', 46 | default=70000, type=int) 47 | parser.add_argument('--weights', dest='pretrained_model', 48 | help='initialize with pretrained model weights', 49 | default=None, type=str) 50 | parser.add_argument('--cfg', dest='cfg_file', 51 | help='optional config file', 52 | default=None, type=str) 53 | parser.add_argument('--imdb', dest='imdb_name', 54 | help='dataset to train on', 55 | default='kitti_train', type=str) 56 | parser.add_argument('--rand', dest='randomize', 57 | help='randomize (do not use a fixed seed)', 58 | action='store_true') 59 | parser.add_argument('--network', dest='network_name', 60 | help='name of the network', 61 | default=None, type=str) 62 | parser.add_argument('--set', dest='set_cfgs', 63 | help='set config keys', default=None, 64 | nargs=argparse.REMAINDER) 65 | parser.add_argument('--restore', dest='restore', 66 | help='restore or not', 67 | default=0, type=int) 68 | 69 | if len(sys.argv) == 1: 70 | parser.print_help() 71 | # sys.exit(1) 72 | 73 | args = parser.parse_args() 74 | return args 75 | 76 | if __name__ == '__main__': 77 | args = parse_args() 78 | 79 | print('Called with args:') 80 | print(args) 81 | 82 | if args.cfg_file is not None: 83 | cfg_from_file(args.cfg_file) 84 | if args.set_cfgs is not None: 85 | cfg_from_list(args.set_cfgs) 86 | 87 | print('Using config:') 88 | pprint.pprint(cfg) 89 | 90 | if not args.randomize: 91 | # fix the random seeds (numpy and caffe) for reproducibility 92 | np.random.seed(cfg.RNG_SEED) 93 | imdb = get_imdb(args.imdb_name) 94 | print(('Loaded dataset `{:s}` for training'.format(imdb.name))) 95 | roidb = get_training_roidb(imdb) 96 | 97 | output_dir = get_output_dir(imdb, None) 98 | log_dir = get_log_dir(imdb) 99 | print(('Output will be saved to `{:s}`'.format(output_dir))) 100 | print(('Logs will be saved to `{:s}`'.format(log_dir))) 101 | 102 | device_name = '/gpu:{:d}'.format(args.gpu_id) 103 | print(device_name) 104 | with tf.device(device_name): 105 | network = get_network(args.network_name) 106 | print(('Use network `{:s}` in training'.format(args.network_name))) 107 | # import os 108 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152 109 | # os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu_id) 110 | train_net(network, imdb, roidb, 111 | output_dir=output_dir, 112 | log_dir=log_dir, 113 | pretrained_model=args.pretrained_model, 114 | max_iters=args.max_iters, 115 | restore=bool(int(args.restore))) 116 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | sh make.sh 5 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | #import fast_rcnn 2 | -------------------------------------------------------------------------------- /lib/cuda_config.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | /* 17 | # If coming across: cudaCheckError() failed : invalid device function. change -arch=sm_xx accordingly. 18 | 19 | # Which CUDA capabilities do we want to pre-build for? 20 | # https://developer.nvidia.com/cuda-gpus 21 | # Compute/shader model Cards 22 | # 6.1 P4, P40, Titan X so CUDA_MODEL = 61 23 | # 6.0 P100 so CUDA_MODEL = 60 24 | # 5.2 M40 25 | # 3.7 K80 26 | # 3.5 K40, K20 27 | # 3.0 K10, Grid K520 (AWS G2) 28 | # Other Nvidia shader models should work, but they will require extra startup 29 | # time as the code is pre-optimized for them. 30 | # CUDA_MODELS=30 35 37 52 60 61 31 | */ 32 | 33 | // DO NOT EDIT: automatically generated file 34 | #ifndef CUDA_CUDA_CONFIG_H_ 35 | #define CUDA_CUDA_CONFIG_H_ 36 | // please modify the TF_CUDA_CAPABILITIES according to the above list and 37 | // your gpu model. 38 | 39 | #define TF_CUDA_CAPABILITIES CudaVersion("3.7") 40 | 41 | #define TF_CUDA_VERSION "8.0" 42 | #define TF_CUDNN_VERSION "6" 43 | 44 | #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-8.0" 45 | 46 | #endif // CUDA_CUDA_CONFIG_H_ -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # TODO: make this fold self-contained, only depends on utils package 9 | 10 | from .imdb import imdb 11 | from .pascal_voc import pascal_voc 12 | from .pascal3d import pascal3d 13 | from .imagenet3d import imagenet3d 14 | from .kitti import kitti 15 | from .kitti_tracking import kitti_tracking 16 | from .nissan import nissan 17 | from .nthu import nthu 18 | from . import factory 19 | 20 | ## NOTE: obsolete 21 | import os.path as osp 22 | from .imdb import ROOT_DIR 23 | from .imdb import MATLAB 24 | 25 | # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 26 | def _which(program): 27 | import os 28 | def is_exe(fpath): 29 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 30 | 31 | fpath, fname = os.path.split(program) 32 | if fpath: 33 | if is_exe(program): 34 | return program 35 | else: 36 | for path in os.environ["PATH"].split(os.pathsep): 37 | path = path.strip('"') 38 | exe_file = os.path.join(path, program) 39 | if is_exe(exe_file): 40 | return exe_file 41 | 42 | return None 43 | """ 44 | if _which(MATLAB) is None: 45 | msg = ("MATLAB command '{}' not found. " 46 | "Please add '{}' to your PATH.").format(MATLAB, MATLAB) 47 | raise EnvironmentError(msg) 48 | """ 49 | -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | 9 | def unique_boxes(boxes, scale=1.0): 10 | """Return indices of unique boxes.""" 11 | v = np.array([1, 1e3, 1e6, 1e9]) 12 | hashes = np.round(boxes * scale).dot(v) 13 | _, index = np.unique(hashes, return_index=True) 14 | return np.sort(index) 15 | 16 | def xywh_to_xyxy(boxes): 17 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 18 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 19 | 20 | def xyxy_to_xywh(boxes): 21 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 22 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 23 | 24 | def validate_boxes(boxes, width=0, height=0): 25 | """Check that a set of boxes are valid.""" 26 | x1 = boxes[:, 0] 27 | y1 = boxes[:, 1] 28 | x2 = boxes[:, 2] 29 | y2 = boxes[:, 3] 30 | assert (x1 >= 0).all() 31 | assert (y1 >= 0).all() 32 | assert (x2 >= x1).all() 33 | assert (y2 >= y1).all() 34 | assert (x2 < width).all() 35 | assert (y2 < height).all() 36 | 37 | def filter_small_boxes(boxes, min_size): 38 | w = boxes[:, 2] - boxes[:, 0] 39 | h = boxes[:, 3] - boxes[:, 1] 40 | keep = np.where((w >= min_size) & (h > min_size))[0] 41 | return keep 42 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | import numpy as np 13 | 14 | from .pascal_voc import pascal_voc 15 | from .imagenet3d import imagenet3d 16 | from .kitti import kitti 17 | from .kitti_tracking import kitti_tracking 18 | from .nthu import nthu 19 | from .coco import coco 20 | from .kittivoc import kittivoc 21 | 22 | def _selective_search_IJCV_top_k(split, year, top_k): 23 | """Return an imdb that uses the top k proposals from the selective search 24 | IJCV code. 25 | """ 26 | imdb = pascal_voc(split, year) 27 | imdb.roidb_handler = imdb.selective_search_IJCV_roidb 28 | imdb.config['top_k'] = top_k 29 | return imdb 30 | 31 | # Set up voc__ using selective search "fast" mode 32 | for year in ['2007', '2012', '0712']: 33 | for split in ['train', 'val', 'trainval', 'test']: 34 | name = 'voc_{}_{}'.format(year, split) 35 | __sets[name] = (lambda split=split, year=year: 36 | pascal_voc(split, year)) 37 | 38 | 39 | # Set up kittivoc 40 | for split in ['train', 'val', 'trainval', 'test']: 41 | name = 'kittivoc_{}'.format(split) 42 | print(name) 43 | __sets[name] = (lambda split=split: kittivoc(split)) 44 | 45 | # # KITTI dataset 46 | # for split in ['train', 'val', 'trainval', 'test']: 47 | # name = 'kitti_{}'.format(split) 48 | # print name 49 | # __sets[name] = (lambda split=split: kitti(split)) 50 | 51 | # Set up coco_2014_ 52 | for year in ['2014']: 53 | for split in ['train', 'val', 'minival', 'valminusminival']: 54 | name = 'coco_{}_{}'.format(year, split) 55 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 56 | 57 | # Set up coco_2015_ 58 | for year in ['2015']: 59 | for split in ['test', 'test-dev']: 60 | name = 'coco_{}_{}'.format(year, split) 61 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 62 | 63 | # NTHU dataset 64 | for split in ['71', '370']: 65 | name = 'nthu_{}'.format(split) 66 | print(name) 67 | __sets[name] = (lambda split=split: nthu(split)) 68 | 69 | 70 | def get_imdb(name): 71 | """Get an imdb (image database) by name.""" 72 | if name not in __sets: 73 | print((list_imdbs())) 74 | raise KeyError('Unknown dataset: {}'.format(name)) 75 | return __sets[name]() 76 | 77 | def list_imdbs(): 78 | """List all registered imdbs.""" 79 | return list(__sets.keys()) 80 | -------------------------------------------------------------------------------- /lib/datasets/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import xml.etree.ElementTree as ET 8 | import os 9 | import pickle 10 | import numpy as np 11 | import pdb 12 | def parse_rec(filename): 13 | """ Parse a PASCAL VOC xml file """ 14 | tree = ET.parse(filename) 15 | objects = [] 16 | for obj in tree.findall('object'): 17 | obj_struct = {} 18 | obj_struct['name'] = obj.find('name').text 19 | obj_struct['pose'] = obj.find('pose').text 20 | obj_struct['truncated'] = int(obj.find('truncated').text) 21 | obj_struct['difficult'] = int(obj.find('difficult').text) 22 | bbox = obj.find('bndbox') 23 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 24 | int(bbox.find('ymin').text), 25 | int(bbox.find('xmax').text), 26 | int(bbox.find('ymax').text)] 27 | objects.append(obj_struct) 28 | 29 | return objects 30 | 31 | def voc_ap(rec, prec, use_07_metric=False): 32 | """ ap = voc_ap(rec, prec, [use_07_metric]) 33 | Compute VOC AP given precision and recall. 34 | If use_07_metric is true, uses the 35 | VOC 07 11 point method (default:False). 36 | """ 37 | if use_07_metric: 38 | # 11 point metric 39 | ap = 0. 40 | for t in np.arange(0., 1.1, 0.1): 41 | if np.sum(rec >= t) == 0: 42 | p = 0 43 | else: 44 | p = np.max(prec[rec >= t]) 45 | ap = ap + p / 11. 46 | else: 47 | # correct AP calculation 48 | # first append sentinel values at the end 49 | mrec = np.concatenate(([0.], rec, [1.])) 50 | mpre = np.concatenate(([0.], prec, [0.])) 51 | 52 | # compute the precision envelope 53 | for i in range(mpre.size - 1, 0, -1): 54 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 55 | 56 | # to calculate area under PR curve, look for points 57 | # where X axis (recall) changes value 58 | i = np.where(mrec[1:] != mrec[:-1])[0] 59 | 60 | # and sum (\Delta recall) * prec 61 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 62 | return ap 63 | 64 | def voc_eval(detpath, 65 | annopath, 66 | imagesetfile, 67 | classname, 68 | cachedir, 69 | ovthresh=0.5, 70 | use_07_metric=False): 71 | """rec, prec, ap = voc_eval(detpath, 72 | annopath, 73 | imagesetfile, 74 | classname, 75 | [ovthresh], 76 | [use_07_metric]) 77 | 78 | Top level function that does the PASCAL VOC evaluation. 79 | 80 | detpath: Path to detections 81 | detpath.format(classname) should produce the detection results file. 82 | annopath: Path to annotations 83 | annopath.format(imagename) should be the xml annotations file. 84 | imagesetfile: Text file containing the list of images, one image per line. 85 | classname: Category name (duh) 86 | cachedir: Directory for caching the annotations 87 | [ovthresh]: Overlap threshold (default = 0.5) 88 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 89 | (default False) 90 | """ 91 | # assumes detections are in detpath.format(classname) 92 | # assumes annotations are in annopath.format(imagename) 93 | # assumes imagesetfile is a text file with each line an image name 94 | # cachedir caches the annotations in a pickle file 95 | 96 | # first load gt 97 | if not os.path.isdir(cachedir): 98 | os.mkdir(cachedir) 99 | cachefile = os.path.join(cachedir, 'annots.pkl') 100 | # read list of images 101 | with open(imagesetfile, 'r') as f: 102 | lines = f.readlines() 103 | imagenames = [x.strip() for x in lines] 104 | 105 | if not os.path.isfile(cachefile): 106 | # load annots 107 | recs = {} 108 | for i, imagename in enumerate(imagenames): 109 | recs[imagename] = parse_rec(annopath.format(imagename)) 110 | if i % 100 == 0: 111 | print('Reading annotation for {:d}/{:d}'.format( 112 | i + 1, len(imagenames))) 113 | # print(recs) 114 | # save 115 | print('Saving cached annotations to {:s}'.format(cachefile)) 116 | with open(cachefile, 'wb') as f: 117 | pickle.dump(recs, f) 118 | else: 119 | # load 120 | print(cachefile) 121 | with open(cachefile, 'rb') as f: 122 | recs = pickle.load(f) 123 | 124 | # extract gt objects for this class 125 | class_recs = {} 126 | npos = 0 127 | for imagename in imagenames: 128 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 129 | bbox = np.array([x['bbox'] for x in R]) 130 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 131 | det = [False] * len(R) 132 | npos = npos + sum(~difficult) 133 | class_recs[imagename] = {'bbox': bbox, 134 | 'difficult': difficult, 135 | 'det': det} 136 | 137 | # read dets 138 | detfile = detpath.format(classname) 139 | with open(detfile, 'r') as f: 140 | lines = f.readlines() 141 | if any(lines) == 1: 142 | 143 | splitlines = [x.strip().split(' ') for x in lines] 144 | image_ids = [x[0] for x in splitlines] 145 | confidence = np.array([float(x[1]) for x in splitlines]) 146 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 147 | 148 | # sort by confidence 149 | sorted_ind = np.argsort(-confidence) 150 | sorted_scores = np.sort(-confidence) 151 | BB = BB[sorted_ind, :] 152 | image_ids = [image_ids[x] for x in sorted_ind] 153 | 154 | # go down dets and mark TPs and FPs 155 | nd = len(image_ids) 156 | tp = np.zeros(nd) 157 | fp = np.zeros(nd) 158 | for d in range(nd): 159 | R = class_recs[image_ids[d]] 160 | bb = BB[d, :].astype(float) 161 | ovmax = -np.inf 162 | BBGT = R['bbox'].astype(float) 163 | 164 | if BBGT.size > 0: 165 | # compute overlaps 166 | # intersection 167 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 168 | iymin = np.maximum(BBGT[:, 1], bb[1]) 169 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 170 | iymax = np.minimum(BBGT[:, 3], bb[3]) 171 | iw = np.maximum(ixmax - ixmin + 1., 0.) 172 | ih = np.maximum(iymax - iymin + 1., 0.) 173 | inters = iw * ih 174 | 175 | # union 176 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 177 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 178 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 179 | 180 | overlaps = inters / uni 181 | ovmax = np.max(overlaps) 182 | jmax = np.argmax(overlaps) 183 | 184 | if ovmax > ovthresh: 185 | if not R['difficult'][jmax]: 186 | if not R['det'][jmax]: 187 | tp[d] = 1. 188 | R['det'][jmax] = 1 189 | else: 190 | fp[d] = 1. 191 | else: 192 | fp[d] = 1. 193 | 194 | # compute precision recall 195 | fp = np.cumsum(fp) 196 | tp = np.cumsum(tp) 197 | rec = tp / float(npos) 198 | # avoid divide by zero in case the first detection matches a difficult 199 | # ground truth 200 | prec = tp / np.maximum(tp + fp, np.finfo(np.float32).eps) 201 | ap = voc_ap(rec, prec, use_07_metric) 202 | else: 203 | rec = -1 204 | prec = -1 205 | ap = -1 206 | 207 | return rec, prec, ap 208 | -------------------------------------------------------------------------------- /lib/deform_conv_layer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/lib/deform_conv_layer/__init__.py -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 3 | * 4 | * COPYRIGHT 5 | * 6 | * All contributions by the University of California: 7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 8 | * All rights reserved. 9 | * 10 | * All other contributions: 11 | * Copyright (c) 2014-2017, the respective contributors 12 | * All rights reserved. 13 | * 14 | * Caffe uses a shared copyright model: each contributor holds copyright over 15 | * their contributions to Caffe. The project versioning records all such 16 | * contribution and copyright details. If a contributor wants to further mark 17 | * their specific copyright on a particular contribution, they should indicate 18 | * their copyright solely in the commit message of the change when it is 19 | * committed. 20 | * 21 | * LICENSE 22 | * 23 | * Redistribution and use in source and binary forms, with or without 24 | * modification, are permitted provided that the following conditions are met: 25 | * 26 | * 1. Redistributions of source code must retain the above copyright notice, this 27 | * list of conditions and the following disclaimer. 28 | * 2. Redistributions in binary form must reproduce the above copyright notice, 29 | * this list of conditions and the following disclaimer in the documentation 30 | * and/or other materials provided with the distribution. 31 | * 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | * 43 | * CONTRIBUTION AGREEMENT 44 | * 45 | * By contributing to the BVLC/caffe repository through pull-request, comment, 46 | * or otherwise, the contributor releases their content to the 47 | * license and copyright terms herein. 48 | * 49 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 50 | * 51 | * Copyright (c) 2017 by Contributors 52 | * \file deformable_im2col.h 53 | * \brief Function definitions of converting an image to 54 | * column matrix based on kernel, padding, and dilation. 55 | * These functions are mainly used in convolution operators. 56 | * The implementation of the im2col and col2im algorithms 57 | * are copied from Caffe with minor interface modifications 58 | * adapting to MXNet data structures. 59 | */ 60 | 61 | #ifndef TENSORFLOW_KERNELS_CONV_OPS_im2col_H_ 62 | #define TENSORFLOW_KERNELS_CONV_OPS_im2col_H_ 63 | 64 | // #define EIGEN_USE_GPU 65 | 66 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 67 | #include "tensorflow/core/framework/register_types.h" 68 | #include "tensorflow/core/framework/tensor_types.h" 69 | #include "tensorflow/core/framework/tensor_shape.h" 70 | #include 71 | #include 72 | 73 | namespace tensorflow { 74 | // typedef Eigen::ThreadPoolDevice CPUDevice; 75 | typedef std::vector TShape; 76 | // typedef Eigen::GpuDevice GPUDevice; 77 | 78 | namespace functor { 79 | 80 | /*! 81 | * \brief cpu function of im2col algorithm 82 | * \param data_im pointer of a image (C, H, W,...) in the image batch 83 | * \param im_shape input image shape in dimensions (N, C, H, W,) 84 | * \param col_shape column buffer shape 85 | * \param kernel_shape kernel filter shape 86 | * \param pad pad shape 87 | * \param stride stride shape 88 | * \param dilation dilation shape 89 | * \param data_col start pointer of the column buffer to be filled 90 | */ 91 | template 92 | struct deformable_im2col { 93 | void operator()(const Device& d, 94 | const DType* data_im, const DType* data_offset, 95 | const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape, 96 | const TShape& pad, const TShape& stride, const TShape& dilation, 97 | const int deformable_group, DType* data_col); 98 | }; 99 | 100 | /*!\brief 101 | * cpu function of col2im algorithm 102 | * \param s device stream 103 | * \param data_col start pointer of the column buffer to be filled 104 | * \param data_im start pointer of the image data 105 | * \param data_offset start pointer of the offset data 106 | * \param im_shape input image shape in dimensions (N, C, H, W,) 107 | * \param col_shape column buffer shape 108 | * \param kernel_shape kernel filter shape 109 | * \param pad pad shape 110 | * \param stride stride shape 111 | * \param dilation dilation shape 112 | * \param grad_im pointer of a image (C, H, W,...) in the image batch 113 | */ 114 | template 115 | struct deformable_col2im { 116 | void operator()(const Device& d, 117 | const DType* data_col, const DType* data_offset, 118 | const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape, 119 | const TShape& pad, const TShape& stride, 120 | const TShape& dilation, const int deformable_group, 121 | DType* grad_im); 122 | }; 123 | 124 | 125 | 126 | template 127 | struct deformable_col2im_coord { 128 | void operator()(const Device& d, 129 | const DType* data_col, const DType* data_im, const DType* data_offset, const TShape& im_shape, 130 | const TShape& col_shape, const TShape& kernel_shape, 131 | const TShape& pad, const TShape& stride, 132 | const TShape& dilation, const int deformable_group, DType* grad_offset); 133 | }; 134 | 135 | template 136 | struct im2col { 137 | void operator() (const Device& d, 138 | const DType* data_im, const TShape& im_shape, 139 | const TShape& col_shape, const TShape& kernel_shape, 140 | const TShape& pad, const TShape& stride, 141 | const TShape& dilation, DType* data_col); 142 | }; 143 | 144 | template 145 | struct pureAddTo { 146 | void operator() (const Device& d, const int n, DType* result_data, const DType* right_data); 147 | }; 148 | 149 | template 150 | struct pureSubTo { 151 | void operator() (const Device& d, const int n, DType* result_data, const DType* right_data); 152 | }; 153 | 154 | template 155 | struct setZero { 156 | void operator() (const Device& d, const int n, DType* result_data); 157 | }; 158 | 159 | 160 | } // namespace functor 161 | } // namespace tensorflow 162 | #endif // TENSORFLOW_KERNELS_CONV_OPS_im2col_H_ 163 | -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_grad.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | from tensorflow.python.framework import ops 4 | from . import deform_conv_op 5 | 6 | @ops.RegisterGradient("DeformConvOp") 7 | def _deform_conv_grad(op, grad): 8 | """The gradients for `deform_conv`. 9 | Args: 10 | op: The `deform_conv` `Operation` that we are differentiating, which we can use 11 | to find the inputs and outputs of the original op. 12 | grad: Gradient with respect to the output of the `roi_pool` op. 13 | Returns: 14 | Gradients with respect to the input of `zero_out`. 15 | """ 16 | data = op.inputs[0] 17 | filter = op.inputs[1] 18 | offset = op.inputs[2] 19 | 20 | strides = op.get_attr('strides') 21 | rates = op.get_attr('rates') 22 | num_groups = op.get_attr('num_groups') 23 | padding = op.get_attr('padding') 24 | data_format = op.get_attr('data_format') 25 | 26 | # compute gradient 27 | data_grad = deform_conv_op.deform_conv_grad_op(data, filter, offset, grad, strides, rates, num_groups, padding, data_format) 28 | 29 | return data_grad -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_op.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | import os.path as osp 4 | from tensorflow.python.framework import ops 5 | 6 | 7 | filename = osp.join(osp.dirname(__file__), 'deform_conv.so') 8 | _deform_conv_module = tf.load_op_library(filename) 9 | deform_conv_op = _deform_conv_module.deform_conv_op 10 | deform_conv_grad_op = _deform_conv_module.deform_conv_backprop_op 11 | 12 | 13 | @ops.RegisterGradient("DeformConvOp") 14 | def _deform_conv_grad(op, grad): 15 | """The gradients for `deform_conv`. 16 | Args: 17 | op: The `deform_conv` `Operation` that we are differentiating, which we can use 18 | to find the inputs and outputs of the original op. 19 | grad: Gradient with respect to the output of the `roi_pool` op. 20 | Returns: 21 | Gradients with respect to the input of `zero_out`. 22 | """ 23 | data = op.inputs[0] 24 | filter = op.inputs[1] 25 | offset = op.inputs[2] 26 | 27 | strides = op.get_attr('strides') 28 | rates = op.get_attr('rates') 29 | num_groups = op.get_attr('num_groups') 30 | padding = op.get_attr('padding') 31 | data_format = op.get_attr('data_format') 32 | deformable_group = op.get_attr('deformable_group') 33 | 34 | # compute gradient 35 | data_grad = deform_conv_grad_op(data, filter, offset, grad, strides, rates, num_groups, deformable_group, padding, data_format) 36 | 37 | return data_grad # List of one Tensor, since we have one input -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_test_mx.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | gpu_device=mx.gpu() 7 | cpu_device=mx.cpu() 8 | 9 | # trans = np.random.rand(1,2,2,2) 10 | 11 | if not os.path.isfile('test.npz'): 12 | with open("test.npz", 'wb') as f: 13 | arr=np.random.random((8, 6, 4, 5)) 14 | np.save(f, arr) 15 | else: 16 | with open("test.npz", 'rb') as f: 17 | arr = np.load(f) 18 | kernel = mx.nd.array(np.ones((21,2,2,2)), ctx=gpu_device) 19 | trans = mx.nd.array(np.ones((8,8,2,2)), ctx=gpu_device) 20 | arr = mx.nd.array(arr, ctx=gpu_device) 21 | data_grad = mx.nd.zeros_like(arr) 22 | kernel_grad = mx.nd.zeros_like(kernel) 23 | trans_grad = mx.nd.zeros_like(trans) 24 | 25 | def main(): 26 | data_var = mx.symbol.Variable('data') 27 | ker_var = mx.symbol.Variable('kernel') 28 | trans_var = mx.symbol.Variable('trans') 29 | res = mx.contrib.sym.DeformableConvolution(data=data_var, offset=trans_var, weight=ker_var, 30 | num_group=3, no_bias=True, kernel=[2,2], num_filter=21, stride=[2, 2]) 31 | rua = res.bind(ctx=gpu_device, args={'data':arr, 'kernel':kernel, 'trans':trans}, args_grad={'data':data_grad, 'kernel':kernel_grad, 'trans':trans_grad}) 32 | rua.forward(is_train=True) 33 | rua.backward(out_grads=mx.nd.ones((8,21,2,2))) 34 | # print(trans.asnumpy()) 35 | # res_arr = rua.outputs[0].asnumpy() 36 | # print(res_arr) 37 | # print([a.asnumpy() for a in rua.grad_arrays]) 38 | print(data_grad.asnumpy()) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() -------------------------------------------------------------------------------- /lib/deform_conv_layer/deform_conv_util.h: -------------------------------------------------------------------------------- 1 | #include "tensorflow/core/util/tensor_format.h" 2 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 3 | #include "tensorflow/core/framework/tensor.h" 4 | 5 | namespace tensorflow { 6 | typedef std::vector TShape; 7 | 8 | inline int ProdShape(const TensorShape &shape, int start) { 9 | int64 res = 1; 10 | for(int i=start; i ToVector(const TensorShape &shape) { 17 | // int64 res = 1; 18 | std::vector res; 19 | for(int i=0; i 20 | struct DeformPSROIPoolForwardLauncher { 21 | bool operator()( 22 | const DType* bottom_data, const float spatial_scale, const int num_rois, const int channels, const int height, 23 | const int width, const int pooled_height, const int pooled_width, const DType* bottom_rois, const DType* bottom_trans, 24 | const bool no_trans, const float trans_std, const int sample_per_part, const int output_dim, const int num_classes, 25 | const int group_size, const int part_size, DType* top_data, DType* mapping_channel, const Eigen::GpuDevice& d); 26 | }; 27 | 28 | template 29 | struct DeformPSROIPoolBackwardLauncher { 30 | bool operator() (const DType* top_diff, const DType* mapping_channel, const int num_rois, const float spatial_scale, 31 | const int channels, const int height, const int width, const int pooled_height, const int pooled_width, 32 | const int output_dim, DType* bottom_data_diff, DType* bottom_trans_diff, const DType* bottom_data, 33 | const DType* bottom_rois, const DType* bottom_trans, const bool no_trans, const float trans_std, 34 | const int sample_per_part, const int group_size, const int part_size, 35 | const int num_classes, const int channels_each_class, const Eigen::GpuDevice& d); 36 | }; 37 | 38 | template 39 | struct setZero { 40 | void operator() (const Device& d, const int n, DType* result_data); 41 | }; 42 | 43 | } // namespace tensorflow 44 | 45 | #endif // TENSORFLOW_USER_OPS_DEFORMPSROIPOOLING_OP_GPU_H_ -------------------------------------------------------------------------------- /lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | # from __future__ import absolute_import 2 | # import tensorflow as tf 3 | # from tensorflow.python.framework import ops 4 | # import deform_psroi_pooling_op 5 | # import pdb 6 | 7 | 8 | # # @tf.RegisterShape("DeformPSROIPool") 9 | # # def _deform_psroi_pool_shape(op): 10 | # # """Shape function for the DeformPSROIPool op. 11 | 12 | # # """ 13 | # # dims_data = op.inputs[0].get_shape().as_list() 14 | # # channels = dims_data[3] 15 | # # dims_rois = op.inputs[1].get_shape().as_list() 16 | # # num_rois = dims_rois[0] 17 | # # output_dim = op.get_attr('output_dim') 18 | # # group_size = op.get_attr('group_size') 19 | # # pooled_height = group_size 20 | # # pooled_width = group_size 21 | 22 | # # output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 23 | # # return [output_shape, output_shape] 24 | 25 | # @ops.RegisterGradient("DeformPSROIPool") 26 | # def _deform_psroi_pool_grad(op, grad, _): 27 | # """The gradients for `Deform_PSROI_pool`. 28 | # Args: 29 | # op: The `roi_pool` `Operation` that we are differentiating, which we can use 30 | # to find the inputs and outputs of the original op. 31 | # grad: Gradient with respect to the output of the `roi_pool` op. 32 | # Returns: 33 | # Gradients with respect to the input of `zero_out`. 34 | # """ 35 | 36 | 37 | # data = op.inputs[0] 38 | # rois = op.inputs[1] 39 | # trans = op.inputs[2] 40 | # mapping_channel = op.outputs[1] 41 | # spatial_scale = op.get_attr('spatial_scale') 42 | # output_dim = op.get_attr('output_dim') 43 | # group_size = op.get_attr('group_size') 44 | # pooled_size = op.get_attr('pooled_size') 45 | # part_size = op.get_attr('part_size') 46 | # sample_per_part = op.get_attr('sample_per_part') 47 | # trans_std = op.get_attr('trans_std') 48 | # no_trans = op.get_attr('no_trans') 49 | 50 | 51 | 52 | # # compute gradient 53 | # #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 54 | # data_grad, trans_grad = deform_psroi_pooling_op.deform_psroi_pool_grad(data, rois, trans, mapping_channel, grad, spatial_scale, 55 | # output_dim, group_size, pooled_size, part_size, sample_per_part, 56 | # trans_std, no_trans) 57 | # # rois_grad = tf.zeros(rois.shape) 58 | # return [data_grad, None, trans_grad] # List of one Tensor, since we have one input 59 | 60 | -------------------------------------------------------------------------------- /lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tensorflow as tf 3 | import numpy as np 4 | import deform_psroi_pooling_op 5 | import deform_psroi_pooling_op_grad 6 | import pdb 7 | 8 | # pdb.set_trace() 9 | data_arr = np.random.rand(1,25,5,5) 10 | # roi = np.array([[0, 0, 0, 4, 4]],dtype=np.float32) 11 | trans_arr = np.random.rand(1,2,2,2) 12 | 13 | # with open("data.npz", 'rb') as f: 14 | # data_arr = np.load(f) 15 | # with open("trans.npz", 'rb') as f: 16 | # trans_arr = np.load(f) 17 | 18 | 19 | rois = tf.convert_to_tensor([ [0, 0, 0, 4, 4]], dtype=tf.float32) 20 | trans = tf.convert_to_tensor(trans_arr, dtype=tf.float32) 21 | hh=tf.convert_to_tensor(data_arr,dtype=tf.float32) 22 | [y2, channels] = deform_psroi_pooling_op.deform_psroi_pool(hh, rois, trans=trans, pooled_size=2, output_dim=1, group_size=1, spatial_scale=1.0, 23 | trans_std=1e-1, sample_per_part=1, part_size=2, no_trans=False) 24 | s = tf.gradients(y2, [hh, trans]) 25 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 26 | # sess.run(s[0]) 27 | # print( sess.run(trans)) 28 | # print( sess.run(y2)) 29 | print( sess.run(s[1])) 30 | # print( sess.run(s[1])) 31 | # pdb.set_trace() 32 | -------------------------------------------------------------------------------- /lib/deform_psroi_pooling_layer/deform_psroi_pooling_op_test_mx.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import mxnet as mx 3 | import numpy as np 4 | 5 | gpu_device=mx.gpu() 6 | # data = np.random.rand(1,25,5,5) 7 | roi = mx.nd.array(np.array([[0, 0, 0, 4, 4]],dtype=np.float32), ctx=gpu_device) 8 | # trans = np.random.rand(1,2,2,2) 9 | 10 | with open("data.npz", 'rb') as f: 11 | data = mx.nd.array(np.load(f), ctx=gpu_device) 12 | with open("trans.npz", 'rb') as f: 13 | trans = mx.nd.array(np.load(f), ctx=gpu_device) 14 | 15 | data_grad = mx.nd.zeros_like(data) 16 | roi_grad = mx.nd.zeros_like(roi) 17 | trans_grad = mx.nd.zeros_like(trans) 18 | 19 | def main(): 20 | data_var = mx.symbol.Variable('data') 21 | roi_var = mx.symbol.Variable('roi') 22 | trans_var = mx.symbol.Variable('trans') 23 | res = mx.contrib.sym.DeformablePSROIPooling(data=data_var, rois=roi_var, trans=trans_var, group_size=1, pooled_size=2, 24 | output_dim=1, no_trans=False, part_size=2, sample_per_part=1, spatial_scale=1., trans_std=0.1) 25 | rua = res.bind(ctx=gpu_device, args={'data':data, 'roi':roi, 'trans':trans}, args_grad={'data':data_grad, 'roi':roi_grad, 'trans':trans_grad}) 26 | rua.forward(is_train=True) 27 | rua.backward(out_grads=mx.nd.ones((1, 1, 2, 2))) 28 | # print(trans.asnumpy()) 29 | # res_arr = rua.outputs[0].asnumpy() 30 | # print(res_arr) 31 | # print([a.asnumpy() for a in rua.grad_arrays]) 32 | print(trans_grad.asnumpy()) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() -------------------------------------------------------------------------------- /lib/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from . import config 9 | from . import train 10 | from . import test 11 | from . import nms_wrapper 12 | # from nms_wrapper import nms -------------------------------------------------------------------------------- /lib/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import warnings 10 | 11 | def bbox_transform(ex_rois, gt_rois): 12 | """ 13 | computes the distance from ground-truth boxes to the given boxes, normed by their size 14 | :param ex_rois: n * 4 numpy array, given boxes 15 | :param gt_rois: n * 4 numpy array, ground-truth boxes 16 | :return: deltas: n * 4 numpy array, ground-truth boxes 17 | """ 18 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 19 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 20 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 21 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 22 | 23 | assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \ 24 | 'Invalid boxes found: {} {}'. \ 25 | format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :]) 26 | 27 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 28 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 29 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 30 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 31 | 32 | # warnings.catch_warnings() 33 | # warnings.filterwarnings('error') 34 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 35 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 36 | targets_dw = np.log(gt_widths / ex_widths) 37 | targets_dh = np.log(gt_heights / ex_heights) 38 | 39 | targets = np.vstack( 40 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 41 | return targets 42 | 43 | def bbox_transform_inv(boxes, deltas): 44 | if boxes.shape[0] == 0: 45 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 46 | 47 | boxes = boxes.astype(deltas.dtype, copy=False) 48 | 49 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 50 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 51 | ctr_x = boxes[:, 0] + 0.5 * widths 52 | ctr_y = boxes[:, 1] + 0.5 * heights 53 | 54 | dx = deltas[:, 0::4] 55 | dy = deltas[:, 1::4] 56 | dw = deltas[:, 2::4] 57 | dh = deltas[:, 3::4] 58 | 59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 61 | pred_w = np.exp(dw) * widths[:, np.newaxis] 62 | pred_h = np.exp(dh) * heights[:, np.newaxis] 63 | 64 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 65 | # x1 66 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 67 | # y1 68 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 69 | # x2 70 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 71 | # y2 72 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 73 | 74 | return pred_boxes 75 | 76 | def clip_boxes(boxes, im_shape): 77 | """ 78 | Clip boxes to image boundaries. 79 | """ 80 | 81 | # x1 >= 0 82 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 83 | # y1 >= 0 84 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 85 | # x2 < im_shape[1] 86 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 87 | # y2 < im_shape[0] 88 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 89 | return boxes 90 | -------------------------------------------------------------------------------- /lib/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | from .config import cfg 10 | from ..nms.gpu_nms import gpu_nms 11 | from ..nms.cpu_nms import cpu_nms 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS and not force_cpu: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None): 24 | """ 25 | post-process the results of im_detect 26 | :param scores: N * (K * 4) numpy 27 | :param boxes: N * K numpy 28 | :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus') 29 | :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]} 30 | """ 31 | num_class = scores.shape[1] if class_sets is None else len(class_sets) 32 | assert num_class * 4 == boxes.shape[1],\ 33 | 'Detection scores and boxes dont match' 34 | class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets 35 | 36 | res = [] 37 | for ind, cls in enumerate(class_sets[1:]): 38 | ind += 1 # skip background 39 | cls_boxes = boxes[:, 4*ind : 4*(ind+1)] 40 | cls_scores = scores[:, ind] 41 | # if ind == 1: 42 | print(np.max(cls_scores)) 43 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) 44 | keep = nms(dets, thresh=0.3) 45 | dets = dets[keep, :] 46 | # if ind == 1: 47 | print(np.max(dets[:, 4])) 48 | dets = dets[np.where(dets[:, 4] > threshold)] 49 | r = {} 50 | if dets.shape[0] > 0: 51 | r['class'], r['dets'] = cls, dets 52 | else: 53 | r['class'], r['dets'] = cls, None 54 | res.append(r) 55 | return res -------------------------------------------------------------------------------- /lib/gt_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import roidb 8 | # from layer import GtDataLayer -------------------------------------------------------------------------------- /lib/gt_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | GtDataLayer implements a Caffe Python layer. 11 | """ 12 | # TODO: make caffe irrelevant, or remove caffe backend from this projcet 13 | import caffe 14 | 15 | import numpy as np 16 | import yaml 17 | from multiprocessing import Process, Queue 18 | 19 | from .minibatch import get_minibatch 20 | 21 | # TODO: make fast_rcnn irrelevant 22 | # >>>> obsolete, because it depends on sth outside of this project 23 | from ..fast_rcnn.config import cfg 24 | # <<<< obsolete 25 | 26 | class GtDataLayer(caffe.Layer): 27 | """Fast R-CNN data layer used for training.""" 28 | 29 | def _shuffle_roidb_inds(self): 30 | """Randomly permute the training roidb.""" 31 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 32 | self._cur = 0 33 | 34 | def _get_next_minibatch_inds(self): 35 | """Return the roidb indices for the next minibatch.""" 36 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 37 | self._shuffle_roidb_inds() 38 | 39 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 40 | self._cur += cfg.TRAIN.IMS_PER_BATCH 41 | 42 | """ 43 | # sample images with gt objects 44 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) 45 | i = 0 46 | while (i < cfg.TRAIN.IMS_PER_BATCH): 47 | ind = self._perm[self._cur] 48 | num_objs = self._roidb[ind]['boxes'].shape[0] 49 | if num_objs != 0: 50 | db_inds[i] = ind 51 | i += 1 52 | 53 | self._cur += 1 54 | if self._cur >= len(self._roidb): 55 | self._shuffle_roidb_inds() 56 | """ 57 | 58 | return db_inds 59 | 60 | def _get_next_minibatch(self): 61 | """Return the blobs to be used for the next minibatch.""" 62 | db_inds = self._get_next_minibatch_inds() 63 | minibatch_db = [self._roidb[i] for i in db_inds] 64 | return get_minibatch(minibatch_db, self._num_classes) 65 | 66 | # this function is called in training the net 67 | def set_roidb(self, roidb): 68 | """Set the roidb to be used by this layer during training.""" 69 | self._roidb = roidb 70 | self._shuffle_roidb_inds() 71 | 72 | def setup(self, bottom, top): 73 | """Setup the GtDataLayer.""" 74 | 75 | # parse the layer parameter string, which must be valid YAML 76 | layer_params = yaml.load(self.param_str_) 77 | 78 | self._num_classes = layer_params['num_classes'] 79 | 80 | self._name_to_top_map = { 81 | 'data': 0, 82 | 'info_boxes': 1, 83 | 'parameters': 2} 84 | 85 | # data blob: holds a batch of N images, each with 3 channels 86 | # The height and width (100 x 100) are dummy values 87 | num_scale_base = len(cfg.TRAIN.SCALES_BASE) 88 | top[0].reshape(num_scale_base, 3, 100, 100) 89 | 90 | # info boxes blob 91 | top[1].reshape(1, 18) 92 | 93 | # parameters blob 94 | num_scale = len(cfg.TRAIN.SCALES) 95 | num_aspect = len(cfg.TRAIN.ASPECTS) 96 | top[2].reshape(2 + 2*num_scale + 2*num_aspect) 97 | 98 | def forward(self, bottom, top): 99 | """Get blobs and copy them into this layer's top blob vector.""" 100 | blobs = self._get_next_minibatch() 101 | 102 | for blob_name, blob in blobs.items(): 103 | top_ind = self._name_to_top_map[blob_name] 104 | # Reshape net's input blobs 105 | top[top_ind].reshape(*(blob.shape)) 106 | # Copy data into net's input blobs 107 | top[top_ind].data[...] = blob.astype(np.float32, copy=False) 108 | 109 | def backward(self, top, propagate_down, bottom): 110 | """This layer does not propagate gradients.""" 111 | pass 112 | 113 | def reshape(self, bottom, top): 114 | """Reshaping happens during the call to forward.""" 115 | pass 116 | -------------------------------------------------------------------------------- /lib/gt_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | 10 | import numpy as np 11 | import numpy.random as npr 12 | import cv2 13 | 14 | from ..utils.blob import prep_im_for_blob, im_list_to_blob 15 | 16 | # TODO: make fast_rcnn irrelevant 17 | # >>>> obsolete, because it depends on sth outside of this project 18 | from ..fast_rcnn.config import cfg 19 | # <<<< obsolete 20 | 21 | def get_minibatch(roidb, num_classes): 22 | """Given a roidb, construct a minibatch sampled from it.""" 23 | num_images = len(roidb) 24 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 25 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 26 | format(num_images, cfg.TRAIN.BATCH_SIZE) 27 | 28 | # Get the input image blob, formatted for caffe 29 | im_blob = _get_image_blob(roidb) 30 | 31 | # build the box information blob 32 | info_boxes_blob = np.zeros((0, 18), dtype=np.float32) 33 | num_scale = len(cfg.TRAIN.SCALES) 34 | for i in range(num_images): 35 | info_boxes = roidb[i]['info_boxes'] 36 | 37 | # change the batch index 38 | info_boxes[:,2] += i * num_scale 39 | info_boxes[:,7] += i * num_scale 40 | 41 | info_boxes_blob = np.vstack((info_boxes_blob, info_boxes)) 42 | 43 | # build the parameter blob 44 | num_aspect = len(cfg.TRAIN.ASPECTS) 45 | num = 2 + 2 * num_scale + 2 * num_aspect 46 | parameters_blob = np.zeros((num), dtype=np.float32) 47 | parameters_blob[0] = num_scale 48 | parameters_blob[1] = num_aspect 49 | parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES 50 | parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING 51 | parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS 52 | parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS 53 | 54 | # For debug visualizations 55 | # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob) 56 | 57 | blobs = {'data': im_blob, 58 | 'info_boxes': info_boxes_blob, 59 | 'parameters': parameters_blob} 60 | 61 | return blobs 62 | 63 | def _get_image_blob(roidb): 64 | """Builds an input blob from the images in the roidb at the different scales. 65 | """ 66 | num_images = len(roidb) 67 | processed_ims = [] 68 | 69 | for i in range(num_images): 70 | # read image 71 | im = cv2.imread(roidb[i]['image']) 72 | if roidb[i]['flipped']: 73 | im = im[:, ::-1, :] 74 | 75 | im_orig = im.astype(np.float32, copy=True) 76 | im_orig -= cfg.PIXEL_MEANS 77 | 78 | # build image pyramid 79 | for im_scale in cfg.TRAIN.SCALES_BASE: 80 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 81 | interpolation=cv2.INTER_LINEAR) 82 | 83 | processed_ims.append(im) 84 | 85 | # Create a blob to hold the input images 86 | blob = im_list_to_blob(processed_ims) 87 | 88 | return blob 89 | 90 | def _project_im_rois(im_rois, im_scale_factor): 91 | """Project image RoIs into the rescaled training image.""" 92 | rois = im_rois * im_scale_factor 93 | return rois 94 | 95 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 96 | """Bounding-box regression targets are stored in a compact form in the 97 | roidb. 98 | 99 | This function expands those targets into the 4-of-4*K representation used 100 | by the network (i.e. only one class has non-zero targets). The loss weights 101 | are similarly expanded. 102 | 103 | Returns: 104 | bbox_target_data (ndarray): N x 4K blob of regression targets 105 | bbox_loss_weights (ndarray): N x 4K blob of loss weights 106 | """ 107 | clss = bbox_target_data[:, 0] 108 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 109 | bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 110 | inds = np.where(clss > 0)[0] 111 | for ind in inds: 112 | cls = clss[ind] 113 | start = 4 * cls 114 | end = start + 4 115 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 116 | bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.] 117 | return bbox_targets, bbox_loss_weights 118 | 119 | 120 | def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): 121 | """Visualize a mini-batch for debugging.""" 122 | import matplotlib.pyplot as plt 123 | for i in range(rois_blob.shape[0]): 124 | rois = rois_blob[i, :] 125 | im_ind = rois[0] 126 | roi = rois[2:] 127 | im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() 128 | im += cfg.PIXEL_MEANS 129 | im = im[:, :, (2, 1, 0)] 130 | im = im.astype(np.uint8) 131 | cls = labels_blob[i] 132 | subcls = sublabels_blob[i] 133 | plt.imshow(im) 134 | print('class: ', cls, ' subclass: ', subcls) 135 | plt.gca().add_patch( 136 | plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], 137 | roi[3] - roi[1], fill=False, 138 | edgecolor='r', linewidth=3) 139 | ) 140 | plt.show() 141 | -------------------------------------------------------------------------------- /lib/gt_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | import scipy.sparse 12 | import PIL 13 | import math 14 | import os 15 | import pickle 16 | import pdb 17 | 18 | from ..utils.cython_bbox import bbox_overlaps 19 | from ..utils.boxes_grid import get_boxes_grid 20 | # TODO: make fast_rcnn irrelevant 21 | # >>>> obsolete, because it depends on sth outside of this project 22 | from ..fast_rcnn.config import cfg 23 | # <<<< obsolete 24 | 25 | 26 | def prepare_roidb(imdb): 27 | """Enrich the imdb's roidb by adding some derived quantities that 28 | are useful for training. This function precomputes the maximum 29 | overlap, taken over ground-truth boxes, between each ROI and 30 | each ground-truth box. The class with maximum overlap is also 31 | recorded. 32 | """ 33 | cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') 34 | if os.path.exists(cache_file): 35 | with open(cache_file, 'rb') as fid: 36 | imdb._roidb = pickle.load(fid) 37 | print('{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)) 38 | return 39 | 40 | roidb = imdb.roidb 41 | for i in range(len(imdb.image_index)): 42 | roidb[i]['image'] = imdb.image_path_at(i) 43 | boxes = roidb[i]['boxes'] 44 | labels = roidb[i]['gt_classes'] 45 | info_boxes = np.zeros((0, 18), dtype=np.float32) 46 | 47 | if boxes.shape[0] == 0: 48 | roidb[i]['info_boxes'] = info_boxes 49 | continue 50 | 51 | # compute grid boxes 52 | s = PIL.Image.open(imdb.image_path_at(i)).size 53 | image_height = s[1] 54 | image_width = s[0] 55 | boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) 56 | 57 | # for each scale 58 | for scale_ind, scale in enumerate(cfg.TRAIN.SCALES): 59 | boxes_rescaled = boxes * scale 60 | 61 | # compute overlap 62 | overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float)) 63 | max_overlaps = overlaps.max(axis = 1) 64 | argmax_overlaps = overlaps.argmax(axis = 1) 65 | max_classes = labels[argmax_overlaps] 66 | 67 | # select positive boxes 68 | fg_inds = [] 69 | for k in range(1, imdb.num_classes): 70 | fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0]) 71 | 72 | if len(fg_inds) > 0: 73 | gt_inds = argmax_overlaps[fg_inds] 74 | # bounding box regression targets 75 | gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:]) 76 | # scale mapping for RoI pooling 77 | scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind] 78 | scale_map = cfg.TRAIN.SCALES[scale_ind_map] 79 | # contruct the list of positive boxes 80 | # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) 81 | info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) 82 | info_box[:, 0] = cx[fg_inds] 83 | info_box[:, 1] = cy[fg_inds] 84 | info_box[:, 2] = scale_ind 85 | info_box[:, 3:7] = boxes_grid[fg_inds,:] 86 | info_box[:, 7] = scale_ind_map 87 | info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale 88 | info_box[:, 12] = labels[gt_inds] 89 | info_box[:, 14:] = gt_targets 90 | info_boxes = np.vstack((info_boxes, info_box)) 91 | 92 | roidb[i]['info_boxes'] = info_boxes 93 | 94 | with open(cache_file, 'wb') as fid: 95 | pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) 96 | print('wrote gt roidb prepared to {}'.format(cache_file)) 97 | 98 | def add_bbox_regression_targets(roidb): 99 | """Add information needed to train bounding-box regressors.""" 100 | assert len(roidb) > 0 101 | assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?' 102 | 103 | num_images = len(roidb) 104 | # Infer number of classes from the number of columns in gt_overlaps 105 | num_classes = roidb[0]['gt_overlaps'].shape[1] 106 | 107 | # Compute values needed for means and stds 108 | # var(x) = E(x^2) - E(x)^2 109 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 110 | sums = np.zeros((num_classes, 4)) 111 | squared_sums = np.zeros((num_classes, 4)) 112 | for im_i in range(num_images): 113 | targets = roidb[im_i]['info_boxes'] 114 | for cls in range(1, num_classes): 115 | cls_inds = np.where(targets[:, 12] == cls)[0] 116 | if cls_inds.size > 0: 117 | class_counts[cls] += cls_inds.size 118 | sums[cls, :] += targets[cls_inds, 14:].sum(axis=0) 119 | squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0) 120 | 121 | means = sums / class_counts 122 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 123 | 124 | # Normalize targets 125 | for im_i in range(num_images): 126 | targets = roidb[im_i]['info_boxes'] 127 | for cls in range(1, num_classes): 128 | cls_inds = np.where(targets[:, 12] == cls)[0] 129 | roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :] 130 | if stds[cls, 0] != 0: 131 | roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :] 132 | 133 | # These values will be needed for making predictions 134 | # (the predicts will need to be unnormalized and uncentered) 135 | return means.ravel(), stds.ravel() 136 | 137 | def _compute_targets(ex_rois, gt_rois): 138 | """Compute bounding-box regression targets for an image. The targets are scale invariance""" 139 | 140 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS 141 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS 142 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 143 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 144 | 145 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS 146 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS 147 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 148 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 149 | 150 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 151 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 152 | targets_dw = np.log(gt_widths / ex_widths) 153 | targets_dh = np.log(gt_heights / ex_heights) 154 | 155 | targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32) 156 | targets[:, 0] = targets_dx 157 | targets[:, 1] = targets_dy 158 | targets[:, 2] = targets_dw 159 | targets[:, 3] = targets_dh 160 | return targets 161 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 3 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 4 | NSYNC_INC=$TF_INC"/external/nsync/public" 5 | # please modify $ARCH according to the following list and your gpu model. 6 | ARCH=sm_37 7 | echo $TF_INC 8 | 9 | 10 | # If coming across: cudaCheckError() failed : invalid device function. change -arch=sm_xx accordingly. 11 | 12 | # Which CUDA capabilities do we want to pre-build for? 13 | # https://developer.nvidia.com/cuda-gpus 14 | # Compute/shader model Cards 15 | # 6.1 P4, P40, Titan X so CUDA_MODEL = 61 16 | # 6.0 P100 so CUDA_MODEL = 60 17 | # 5.2 M40 18 | # 3.7 K80 19 | # 3.5 K40, K20 20 | # 3.0 K10, Grid K520 (AWS G2) 21 | # Other Nvidia shader models should work, but they will require extra startup 22 | # time as the code is pre-optimized for them. 23 | # CUDA_MODELS=30 35 37 52 60 61 24 | 25 | 26 | 27 | CUDA_HOME=/usr/local/cuda/ 28 | 29 | if [ ! -f $TF_INC/tensorflow/stream_executor/cuda/cuda_config.h ]; then 30 | cp ./cuda_config.h $TF_INC/tensorflow/stream_executor/cuda/ 31 | fi 32 | 33 | cd roi_pooling_layer 34 | 35 | nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \ 36 | -I $TF_INC -I $NSYNC_INC -D GOOGLE_CUDA=1 -L $CUDA_HOME/lib64 -x cu -Xcompiler -fPIC -D GOOGLE_CUDA -arch=$ARCH 37 | 38 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 39 | g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ 40 | roi_pooling_op.cu.o -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0 41 | 42 | # for gcc5-built tf 43 | # g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \ 44 | # roi_pooling_op.cu.o -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0 45 | cd .. 46 | 47 | 48 | # add building psroi_pooling layer 49 | cd psroi_pooling_layer 50 | nvcc -std=c++11 -c -o psroi_pooling_op.cu.o psroi_pooling_op_gpu.cu.cc \ 51 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -D GOOGLE_CUDA -arch=$ARCH 52 | 53 | 54 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 55 | g++ -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc \ 56 | psroi_pooling_op.cu.o -I $TF_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 57 | # for gcc5-built tf 58 | # g++ -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc \ 59 | # psroi_pooling_op.cu.o -I $TF_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 60 | 61 | cd .. 62 | 63 | cd deform_psroi_pooling_layer 64 | nvcc -std=c++11 -c -o deform_psroi_pooling_op.cu.o deform_psroi_pooling_op_gpu.cu.cc \ 65 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -D GOOGLE_CUDA -arch=$ARCH 66 | 67 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the three lines below 68 | g++ -std=c++11 -shared -o deform_psroi_pooling.so deform_psroi_pooling_op.cc deform_psroi_pooling_op.cu.o -I \ 69 | $TF_INC -fPIC -lcudart -L $CUDA_HOME/lib64 -D GOOGLE_CUDA=1 -Wfatal-errors -I \ 70 | $CUDA_HOME/include 71 | # for gcc5-built tf 72 | # g++ -std=c++11 -shared -o deform_psroi_pooling.so deform_psroi_pooling_op.cc deform_psroi_pooling_op.cu.o \ 73 | # -I $TF_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -D_GLIBCXX_USE_CXX11_ABI=0 74 | cd .. 75 | 76 | cd deform_conv_layer 77 | nvcc -std=c++11 -ccbin=/usr/bin/g++-4.9 -c -o deform_conv.cu.o deform_conv.cu.cc -I $TF_INC -I $NSYNC_INC -D\ 78 | GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -L /usr/local/cuda-8.0/lib64/ --expt-relaxed-constexpr -arch=$ARCH 79 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the three lines below 80 | g++ -std=c++11 -shared -o deform_conv.so deform_conv.cc deform_conv.cu.o -I\ 81 | $TF_INC -I $NSYNC_INC -fPIC -lcudart -L $CUDA_HOME/lib64 -D GOOGLE_CUDA=1 -Wfatal-errors \ 82 | -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0 83 | # for gcc5-built tf 84 | # g++ -std=c++11 -shared -o deform_conv.so deform_conv.cc deform_conv.cu.o \ 85 | # -I $TF_INC -I $NSYNC_INC -fPIC -D GOOGLE_CUDA -lcudart -L $CUDA_HOME/lib64 -L $TF_LIB -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=0 86 | 87 | cd .. 88 | -------------------------------------------------------------------------------- /lib/networks/.VGGnet.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/lib/networks/.VGGnet.py.swo -------------------------------------------------------------------------------- /lib/networks/VGGnet_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .network import Network 3 | from ..fast_rcnn.config import cfg 4 | 5 | 6 | class VGGnet_test(Network): 7 | def __init__(self, trainable=True): 8 | self.inputs = [] 9 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 10 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) 11 | self.keep_prob = tf.placeholder(tf.float32) 12 | self.layers = dict({'data': self.data, 'im_info': self.im_info}) 13 | self.trainable = trainable 14 | self.setup() 15 | 16 | def setup(self): 17 | # n_classes = 21 18 | n_classes = cfg.NCLASSES 19 | # anchor_scales = [8, 16, 32] 20 | anchor_scales = cfg.ANCHOR_SCALES 21 | _feat_stride = [16, ] 22 | 23 | (self.feed('data') 24 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) 25 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) 26 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') 27 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) 28 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) 29 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') 30 | .conv(3, 3, 256, 1, 1, name='conv3_1') 31 | .conv(3, 3, 256, 1, 1, name='conv3_2') 32 | .conv(3, 3, 256, 1, 1, name='conv3_3') 33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') 34 | .conv(3, 3, 512, 1, 1, name='conv4_1') 35 | .conv(3, 3, 512, 1, 1, name='conv4_2') 36 | .conv(3, 3, 512, 1, 1, name='conv4_3') 37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') 38 | .conv(3, 3, 512, 1, 1, name='conv5_1') 39 | .conv(3, 3, 512, 1, 1, name='conv5_2') 40 | .conv(3, 3, 512, 1, 1, name='conv5_3')) 41 | 42 | (self.feed('conv5_3') 43 | .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3') 44 | .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score')) 45 | 46 | (self.feed('rpn_conv/3x3') 47 | .conv(1, 1, len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu=False, name='rpn_bbox_pred')) 48 | 49 | # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) 50 | (self.feed('rpn_cls_score') 51 | .spatial_reshape_layer(2, name='rpn_cls_score_reshape') 52 | .spatial_softmax(name='rpn_cls_prob')) 53 | 54 | # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) 55 | (self.feed('rpn_cls_prob') 56 | .spatial_reshape_layer(len(anchor_scales) * 3 * 2, name='rpn_cls_prob_reshape')) 57 | 58 | (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info') 59 | .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois')) 60 | 61 | (self.feed('conv5_3') 62 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_1_offset', padding='SAME', initializer='zeros')) 63 | (self.feed('conv5_3', 'conv6_1_offset') 64 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_1')) 65 | (self.feed('conv6_1') 66 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_2_offset', padding='SAME', initializer='zeros')) 67 | (self.feed('conv6_1', 'conv6_2_offset') 68 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_2')) 69 | (self.feed('conv6_2', 'rois') 70 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=True, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='offset_t') 71 | .fc(num_out=7 * 7 * 2, name='offset', relu=False) 72 | .reshape(shape=(-1,2,7,7), name='offset_reshape')) 73 | (self.feed('conv6_2', 'rois', 'offset_reshape') 74 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='pool_6') 75 | .fc(4096, name='fc6') 76 | .dropout(0.5, name='drop6') 77 | .fc(4096, name='fc7') 78 | .dropout(0.5, name='drop7') 79 | .fc(n_classes, relu=False, name='cls_score') 80 | .softmax(name='cls_prob')) 81 | 82 | (self.feed('drop7') 83 | .fc(n_classes*4, relu=False, name='bbox_pred')) 84 | 85 | -------------------------------------------------------------------------------- /lib/networks/VGGnet_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .network import Network 3 | from ..fast_rcnn.config import cfg 4 | 5 | class VGGnet_train(Network): 6 | def __init__(self, trainable=True): 7 | self.inputs = [] 8 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='data') 9 | self.im_info = tf.placeholder(tf.float32, shape=[None, 3], name='im_info') 10 | self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='gt_boxes') 11 | self.gt_ishard = tf.placeholder(tf.int32, shape=[None], name='gt_ishard') 12 | self.dontcare_areas = tf.placeholder(tf.float32, shape=[None, 4], name='dontcare_areas') 13 | self.keep_prob = tf.placeholder(tf.float32) 14 | self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes,\ 15 | 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas}) 16 | self.trainable = trainable 17 | self.setup() 18 | 19 | def setup(self): 20 | 21 | # n_classes = 21 22 | n_classes = cfg.NCLASSES 23 | # anchor_scales = [8, 16, 32] 24 | anchor_scales = cfg.ANCHOR_SCALES 25 | _feat_stride = [16, ] 26 | 27 | (self.feed('data') 28 | .conv(3, 3, 64, 1, 1, name='conv1_1', trainable=False) 29 | .conv(3, 3, 64, 1, 1, name='conv1_2', trainable=False) 30 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') 31 | .conv(3, 3, 128, 1, 1, name='conv2_1', trainable=False) 32 | .conv(3, 3, 128, 1, 1, name='conv2_2', trainable=False) 33 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') 34 | .conv(3, 3, 256, 1, 1, name='conv3_1') 35 | .conv(3, 3, 256, 1, 1, name='conv3_2') 36 | .conv(3, 3, 256, 1, 1, name='conv3_3') 37 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') 38 | .conv(3, 3, 512, 1, 1, name='conv4_1') 39 | .conv(3, 3, 512, 1, 1, name='conv4_2') 40 | .conv(3, 3, 512, 1, 1, name='conv4_3') 41 | .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') 42 | .conv(3, 3, 512, 1, 1, name='conv5_1') 43 | .conv(3, 3, 512, 1, 1, name='conv5_2') 44 | .conv(3, 3, 512, 1, 1, name='conv5_3')) 45 | #========= RPN ============ 46 | (self.feed('conv5_3') 47 | .conv(3,3,512,1,1,name='rpn_conv/3x3')) 48 | 49 | # Loss of rpn_cls & rpn_boxes 50 | # shape is (1, H, W, A x 4) and (1, H, W, A x 2) 51 | (self.feed('rpn_conv/3x3') 52 | .conv(1,1,len(anchor_scales) * 3 * 4, 1, 1, padding='VALID', relu = False, name='rpn_bbox_pred')) 53 | (self.feed('rpn_conv/3x3') 54 | .conv(1, 1, len(anchor_scales) * 3 * 2, 1, 1, padding='VALID', relu=False, name='rpn_cls_score')) 55 | 56 | # generating training labels on the fly 57 | # output: rpn_labels(HxWxA, 2) rpn_bbox_targets(HxWxA, 4) rpn_bbox_inside_weights rpn_bbox_outside_weights 58 | (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info') 59 | .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' )) 60 | 61 | # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) 62 | (self.feed('rpn_cls_score') 63 | .spatial_reshape_layer(2, name = 'rpn_cls_score_reshape') 64 | .spatial_softmax(name='rpn_cls_prob')) 65 | 66 | # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) 67 | (self.feed('rpn_cls_prob') 68 | .spatial_reshape_layer(len(anchor_scales)*3*2, name = 'rpn_cls_prob_reshape')) 69 | 70 | # ========= RoI Proposal ============ 71 | # add the delta(output) to anchors then 72 | # choose some reasonabel boxes, considering scores, ratios, size and iou 73 | # rpn_rois <- (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] 74 | (self.feed('rpn_cls_prob_reshape','rpn_bbox_pred','im_info') 75 | .proposal_layer(_feat_stride, anchor_scales, 'TRAIN', name = 'rpn_rois')) 76 | 77 | # matching boxes and groundtruth, 78 | # and randomly sample some rois and labels for RCNN 79 | (self.feed('rpn_rois','gt_boxes', 'gt_ishard', 'dontcare_areas') 80 | .proposal_target_layer(n_classes,name = 'roi-data')) 81 | 82 | #========= RCNN ============ 83 | (self.feed('conv5_3') 84 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_1_offset', padding='SAME', initializer='zeros')) 85 | (self.feed('conv5_3', 'conv6_1_offset') 86 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_1')) 87 | (self.feed('conv6_1') 88 | .conv(3, 3, 72, 1, 1, biased=True, rate=2, relu=False, name='conv6_2_offset', padding='SAME', initializer='zeros')) 89 | (self.feed('conv6_1', 'conv6_2_offset') 90 | .deform_conv(3, 3, 512, 1, 1, biased=False, rate=2, relu=True, num_deform_group=4, name='conv6_2')) 91 | (self.feed('conv6_2', 'rois') 92 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=True, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='offset_t') 93 | .fc(num_out=7 * 7 * 2, name='offset', relu=False) 94 | .reshape(shape=(-1,2,7,7), name='offset_reshape')) 95 | (self.feed('conv6_2', 'rois', 'offset_reshape') 96 | .deform_psroi_pool(group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, output_dim=256, trans_std=1e-1, spatial_scale=0.0625, name='pool_6') 97 | .fc(4096, name='fc6') 98 | .dropout(0.5, name='drop6') 99 | .fc(4096, name='fc7') 100 | .dropout(0.5, name='drop7') 101 | .fc(n_classes, relu=False, name='cls_score') 102 | .softmax(name='cls_prob')) 103 | 104 | (self.feed('drop7') 105 | .fc(n_classes*4, relu=False, name='bbox_pred')) 106 | -------------------------------------------------------------------------------- /lib/networks/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from .VGGnet_train import VGGnet_train 9 | from .VGGnet_test import VGGnet_test 10 | from .Resnet50_train import Resnet50_train 11 | from .Resnet50_test import Resnet50_test 12 | from .Resnet101_train import Resnet101_train 13 | from .Resnet101_test import Resnet101_test 14 | from . import factory 15 | -------------------------------------------------------------------------------- /lib/networks/caffenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .network import Network 3 | 4 | class caffenet(Network): 5 | def __init__(self, trainable=True): 6 | self.inputs = [] 7 | self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) 8 | self.rois = tf.placeholder(tf.float32, shape=[None, 5]) 9 | self.keep_prob = tf.placeholder(tf.float32) 10 | self.layers = dict({'data':self.data, 'rois':self.rois}) 11 | self.trainable = trainable 12 | self.setup() 13 | 14 | def setup(self): 15 | (self.feed('data') 16 | .conv(11, 11, 96, 4, 4, padding='VALID', name='conv1', trainable=False) 17 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool1') 18 | .lrn(2, 2e-05, 0.75, name='norm1') 19 | .conv(5, 5, 256, 1, 1, group=2, name='conv2') 20 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 21 | .lrn(2, 2e-05, 0.75, name='norm2') 22 | .conv(3, 3, 384, 1, 1, name='conv3') 23 | .conv(3, 3, 384, 1, 1, group=2, name='conv4') 24 | .conv(3, 3, 256, 1, 1, group=2, name='conv5') 25 | .feature_extrapolating([1.0, 2.0, 3.0, 4.0], 4, 4, name='conv5_feature')) 26 | 27 | (self.feed('conv5_feature','im_info') 28 | .conv(3,3,) 29 | 30 | (self.feed('conv5_feature', 'rois') 31 | .roi_pool(6, 6, 1.0/16, name='pool5') 32 | .fc(4096, name='fc6') 33 | .dropout(self.keep_prob, name='drop6') 34 | .fc(4096, name='fc7') 35 | .dropout(self.keep_prob, name='drop7') 36 | .fc(174, relu=False, name='subcls_score') 37 | .softmax(name='subcls_prob')) 38 | 39 | (self.feed('subcls_score') 40 | .fc(4, relu=False, name='cls_score') 41 | .softmax(name='cls_prob')) 42 | 43 | (self.feed('subcls_score') 44 | .fc(16, relu=False, name='bbox_pred'))) 45 | -------------------------------------------------------------------------------- /lib/networks/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SubCNN_TF 3 | # Copyright (c) 2016 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | 10 | __sets = {} 11 | 12 | from .VGGnet_test import VGGnet_test 13 | from .VGGnet_testold import VGGnet_testold 14 | from .VGGnet_train import VGGnet_train 15 | from .Resnet50_test import Resnet50_test 16 | from .Resnet50_train import Resnet50_train 17 | from .Resnet101_test import Resnet101_test 18 | from .Resnet101_train import Resnet101_train 19 | from .PVAnet_train import PVAnet_train 20 | from .PVAnet_test import PVAnet_test 21 | 22 | 23 | def get_network(name): 24 | """Get a network by name.""" 25 | if name.split('_')[0] == 'VGGnet': 26 | if name.split('_')[1] == 'test': 27 | return VGGnet_test() 28 | elif name.split('_')[1] == 'train': 29 | return VGGnet_train() 30 | elif name.split('_')[1] == 'testold': 31 | return VGGnet_testold() 32 | else: 33 | raise KeyError('Unknown dataset: {}'.format(name)) 34 | elif name.split('_')[0] == 'Resnet50': 35 | if name.split('_')[1] == 'test': 36 | return Resnet50_test() 37 | elif name.split('_')[1] == 'train': 38 | return Resnet50_train() 39 | else: 40 | raise KeyError('Unknown dataset: {}'.format(name)) 41 | elif name.split('_')[0] == 'Resnet101': 42 | if name.split('_')[1] == 'test': 43 | return Resnet101_test() 44 | elif name.split('_')[1] == 'train': 45 | return Resnet101_train() 46 | else: 47 | raise KeyError('Unknown dataset: {}'.format(name)) 48 | elif name.split('_')[0] == 'PVAnet': 49 | if name.split('_')[1] == 'test': 50 | return PVAnet_test() 51 | elif name.split('_')[1] == 'train': 52 | return PVAnet_train() 53 | else: 54 | raise KeyError('Unknown dataset: {}'.format(name)) 55 | else: 56 | raise KeyError('Unknown dataset: {}'.format(name)) 57 | 58 | def list_networks(): 59 | """List all registered imdbs.""" 60 | return list(__sets.keys()) 61 | -------------------------------------------------------------------------------- /lib/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zardinality/TF_Deformable_Net/00c86380fd2725ebe7ae22f41d460ffc0bca378d/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # R-FCN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Revised by Minyue Jiang 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'psroi_pooling.so') 5 | _psroi_pooling_module = tf.load_op_library(filename) 6 | psroi_pool = _psroi_pooling_module.psroi_pool 7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool PSROIPoolForwardLauncher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int channels, const int height, 20 | const int width, const int pooled_height, const int pooled_width, const float* bottom_rois, 21 | const int output_dim, const int group_size, float* top_data, int* mapping_channel, const Eigen::GpuDevice& d); 22 | 23 | bool PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int num_rois, const float spatial_scale, 24 | const int channels, const int height, const int width, const int pooled_height, const int pooled_width, 25 | const int output_dim, float* bottom_diff, const float* bottom_rois, const Eigen::GpuDevice& d); 26 | 27 | } // namespace tensorflow 28 | 29 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from . import psroi_pooling_op 4 | import pdb 5 | 6 | 7 | @tf.RegisterShape("PSROIPool") 8 | def _psroi_pool_shape(op): 9 | """Shape function for the PSROIPool op. 10 | 11 | """ 12 | dims_data = op.inputs[0].get_shape().as_list() 13 | channels = dims_data[3] 14 | dims_rois = op.inputs[1].get_shape().as_list() 15 | num_rois = dims_rois[0] 16 | output_dim = op.get_attr('output_dim') 17 | group_size = op.get_attr('group_size') 18 | pooled_height = group_size 19 | pooled_width = group_size 20 | 21 | output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 22 | return [output_shape, output_shape] 23 | 24 | @ops.RegisterGradient("PSROIPool") 25 | def _psroi_pool_grad(op, grad, _): 26 | """The gradients for `PSROI_pool`. 27 | Args: 28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 29 | to find the inputs and outputs of the original op. 30 | grad: Gradient with respect to the output of the `roi_pool` op. 31 | Returns: 32 | Gradients with respect to the input of `zero_out`. 33 | """ 34 | 35 | data = op.inputs[0] 36 | rois = op.inputs[1] 37 | mapping_channel = op.outputs[1] 38 | spatial_scale = op.get_attr('spatial_scale') 39 | 40 | # compute gradient 41 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 42 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale) 43 | 44 | return [data_grad, None] # List of one Tensor, since we have one input 45 | 46 | -------------------------------------------------------------------------------- /lib/psroi_pooling_layer/psroi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from . import psroi_pooling_op 4 | from . import psroi_pooling_op_grad 5 | import pdb 6 | 7 | pdb.set_trace() 8 | 9 | rois = tf.convert_to_tensor([ [0, 0, 0, 4, 4]], dtype=tf.float32) 10 | hh=tf.convert_to_tensor(np.random.rand(1,5,5,25),dtype=tf.float32) 11 | [y2, channels] = psroi_pooling_op.psroi_pool(hh, rois, output_dim=1, group_size=5, spatial_scale=1.0) 12 | 13 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 14 | print(( sess.run(hh))) 15 | print(( sess.run(y2))) 16 | pdb.set_trace() 17 | -------------------------------------------------------------------------------- /lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /lib/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /lib/pycocotools/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | if(cnts) for(siz j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(siz i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && ad?1:c=dy && xs>xe) || (dxye); 151 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 152 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 153 | if(dx>=dy) for( int d=0; d<=dx; d++ ) { 154 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 155 | } else for( int d=0; d<=dy; d++ ) { 156 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 157 | } 158 | } 159 | // get points along y-boundary and downsample 160 | free(x); free(y); k=m; m=0; double xd, yd; 161 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 162 | for( j=1; jw-1 ) continue; 165 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 167 | x[m]=(int) xd; y[m]=(int) yd; m++; 168 | } 169 | // compute rle encoding given y-boundary points 170 | k=m; a=malloc(sizeof(uint)*(k+1)); 171 | for( j=0; j0) b[m++]=a[j++]; else { 177 | j++; if(jm, p=0; long x; bool more; 184 | char *s=malloc(sizeof(char)*m*6); 185 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 187 | while( more ) { 188 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 189 | if(more) c |= 0x20; c+=48; s[p++]=c; 190 | } 191 | } 192 | s[p]=0; return s; 193 | } 194 | 195 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 196 | siz m=0, p=0, k; long x; bool more; uint *cnts; 197 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 198 | while( s[p] ) { 199 | x=0; k=0; more=1; 200 | while( more ) { 201 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 202 | more = c & 0x20; p++; k++; 203 | if(!more && (c & 0x10)) x |= -1 << 5*k; 204 | } 205 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 206 | } 207 | rleInit(R,h,w,m,cnts); free(cnts); 208 | } 209 | -------------------------------------------------------------------------------- /lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import roidb -------------------------------------------------------------------------------- /lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | 13 | import numpy as np 14 | 15 | # TODO: make fast_rcnn irrelevant 16 | # >>>> obsolete, because it depends on sth outside of this project 17 | from ..fast_rcnn.config import cfg 18 | # <<<< obsolete 19 | from ..roi_data_layer.minibatch import get_minibatch 20 | 21 | class RoIDataLayer(object): 22 | """Fast R-CNN data layer used for training.""" 23 | 24 | def __init__(self, roidb, num_classes): 25 | """Set the roidb to be used by this layer during training.""" 26 | self._roidb = roidb 27 | self._num_classes = num_classes 28 | self._shuffle_roidb_inds() 29 | 30 | def _shuffle_roidb_inds(self): 31 | """Randomly permute the training roidb.""" 32 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 33 | self._cur = 0 34 | 35 | def _get_next_minibatch_inds(self): 36 | """Return the roidb indices for the next minibatch.""" 37 | 38 | if cfg.TRAIN.HAS_RPN: 39 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 40 | self._shuffle_roidb_inds() 41 | 42 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 43 | self._cur += cfg.TRAIN.IMS_PER_BATCH 44 | else: 45 | # sample images 46 | db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) 47 | i = 0 48 | while (i < cfg.TRAIN.IMS_PER_BATCH): 49 | ind = self._perm[self._cur] 50 | num_objs = self._roidb[ind]['boxes'].shape[0] 51 | if num_objs != 0: 52 | db_inds[i] = ind 53 | i += 1 54 | 55 | self._cur += 1 56 | if self._cur >= len(self._roidb): 57 | self._shuffle_roidb_inds() 58 | 59 | return db_inds 60 | 61 | def _get_next_minibatch(self): 62 | """Return the blobs to be used for the next minibatch. 63 | 64 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 65 | separate process and made available through self._blob_queue. 66 | """ 67 | db_inds = self._get_next_minibatch_inds() 68 | minibatch_db = [self._roidb[i] for i in db_inds] 69 | return get_minibatch(minibatch_db, self._num_classes) 70 | 71 | def forward(self): 72 | """Get blobs and copy them into this layer's top blob vector.""" 73 | blobs = self._get_next_minibatch() 74 | return blobs 75 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | 12 | import PIL 13 | 14 | # TODO: make fast_rcnn irrelevant 15 | # >>>> obsolete, because it depends on sth outside of this project 16 | from ..fast_rcnn.config import cfg 17 | from ..fast_rcnn.bbox_transform import bbox_transform 18 | # <<<< obsolete 19 | from ..utils.cython_bbox import bbox_overlaps 20 | 21 | def prepare_roidb(imdb): 22 | """Enrich the imdb's roidb by adding some derived quantities that 23 | are useful for training. This function precomputes the maximum 24 | overlap, taken over ground-truth boxes, between each ROI and 25 | each ground-truth box. The class with maximum overlap is also 26 | recorded. 27 | """ 28 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 29 | for i in range(imdb.num_images)] 30 | roidb = imdb.roidb 31 | for i in range(len(imdb.image_index)): 32 | roidb[i]['image'] = imdb.image_path_at(i) 33 | roidb[i]['width'] = sizes[i][0] 34 | roidb[i]['height'] = sizes[i][1] 35 | # need gt_overlaps as a dense array for argmax 36 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 37 | # max overlap with gt over classes (columns) 38 | max_overlaps = gt_overlaps.max(axis=1) 39 | # gt class that had the max overlap 40 | max_classes = gt_overlaps.argmax(axis=1) 41 | roidb[i]['max_classes'] = max_classes 42 | roidb[i]['max_overlaps'] = max_overlaps 43 | # sanity checks 44 | # max overlap of 0 => class should be zero (background) 45 | zero_inds = np.where(max_overlaps == 0)[0] 46 | assert all(max_classes[zero_inds] == 0) 47 | # max overlap > 0 => class should not be zero (must be a fg class) 48 | nonzero_inds = np.where(max_overlaps > 0)[0] 49 | assert all(max_classes[nonzero_inds] != 0) 50 | 51 | def add_bbox_regression_targets(roidb): 52 | """ 53 | Add information needed to train bounding-box regressors. 54 | For each roi find the corresponding gt box, and compute the distance. 55 | then normalize the distance into Gaussian by minus mean and divided by std 56 | """ 57 | assert len(roidb) > 0 58 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 59 | 60 | num_images = len(roidb) 61 | # Infer number of classes from the number of columns in gt_overlaps 62 | num_classes = roidb[0]['gt_overlaps'].shape[1] 63 | for im_i in range(num_images): 64 | rois = roidb[im_i]['boxes'] 65 | max_overlaps = roidb[im_i]['max_overlaps'] 66 | max_classes = roidb[im_i]['max_classes'] 67 | roidb[im_i]['bbox_targets'] = \ 68 | _compute_targets(rois, max_overlaps, max_classes) 69 | 70 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 71 | # Use fixed / precomputed "means" and "stds" instead of empirical values 72 | means = np.tile( 73 | np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) 74 | stds = np.tile( 75 | np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) 76 | else: 77 | # Compute values needed for means and stds 78 | # var(x) = E(x^2) - E(x)^2 79 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 80 | sums = np.zeros((num_classes, 4)) 81 | squared_sums = np.zeros((num_classes, 4)) 82 | for im_i in range(num_images): 83 | targets = roidb[im_i]['bbox_targets'] 84 | for cls in range(1, num_classes): 85 | cls_inds = np.where(targets[:, 0] == cls)[0] 86 | if cls_inds.size > 0: 87 | class_counts[cls] += cls_inds.size 88 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 89 | squared_sums[cls, :] += \ 90 | (targets[cls_inds, 1:] ** 2).sum(axis=0) 91 | 92 | means = sums / class_counts 93 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 94 | # too small number will cause nan error 95 | assert np.min(stds) < 0.01, \ 96 | 'Boxes std is too small, std:{}'.format(stds) 97 | 98 | print('bbox target means:') 99 | print(means) 100 | print(means[1:, :].mean(axis=0)) # ignore bg class 101 | print('bbox target stdevs:') 102 | print(stds) 103 | print(stds[1:, :].mean(axis=0)) # ignore bg class 104 | 105 | # Normalize targets 106 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: 107 | print("Normalizing targets") 108 | for im_i in range(num_images): 109 | targets = roidb[im_i]['bbox_targets'] 110 | for cls in range(1, num_classes): 111 | cls_inds = np.where(targets[:, 0] == cls)[0] 112 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 113 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 114 | else: 115 | print("NOT normalizing targets") 116 | 117 | # These values will be needed for making predictions 118 | # (the predicts will need to be unnormalized and uncentered) 119 | return means.ravel(), stds.ravel() 120 | 121 | def _compute_targets(rois, overlaps, labels): 122 | """ 123 | Compute bounding-box regression targets for an image. 124 | for each roi find the corresponding gt_box, then compute the distance. 125 | """ 126 | # Indices of ground-truth ROIs 127 | gt_inds = np.where(overlaps == 1)[0] 128 | if len(gt_inds) == 0: 129 | # Bail if the image has no ground-truth ROIs 130 | return np.zeros((rois.shape[0], 5), dtype=np.float32) 131 | # Indices of examples for which we try to make predictions 132 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] 133 | 134 | # Get IoU overlap between each ex ROI and gt ROI 135 | ex_gt_overlaps = bbox_overlaps( 136 | np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), 137 | np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) 138 | 139 | # Find which gt ROI each ex ROI has max overlap with: 140 | # this will be the ex ROI's gt target 141 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 142 | gt_rois = rois[gt_inds[gt_assignment], :] 143 | ex_rois = rois[ex_inds, :] 144 | 145 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 146 | targets[ex_inds, 0] = labels[ex_inds] 147 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 148 | return targets 149 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb2.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | 10 | import numpy as np 11 | 12 | # TODO: make fast_rcnn irrelevant 13 | # >>>> obsolete, because it depends on sth outside of this project 14 | from ..fast_rcnn.config import cfg 15 | from ..fast_rcnn.bbox_transform import bbox_transform 16 | # <<<< obsolete 17 | from ..utils.cython_bbox import bbox_overlaps 18 | 19 | def prepare_roidb(imdb): 20 | """Enrich the imdb's roidb by adding some derived quantities that 21 | are useful for training. This function precomputes the maximum 22 | overlap, taken over ground-truth boxes, between each ROI and 23 | each ground-truth box. The class with maximum overlap is also 24 | recorded. 25 | """ 26 | roidb = imdb.roidb 27 | for i in range(len(imdb.image_index)): 28 | roidb[i]['image'] = imdb.image_path_at(i) 29 | # need gt_overlaps as a dense array for argmax 30 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 31 | # max overlap with gt over classes (columns) 32 | max_overlaps = gt_overlaps.max(axis=1) 33 | # gt class that had the max overlap 34 | max_classes = gt_overlaps.argmax(axis=1) 35 | 36 | roidb[i]['max_classes'] = max_classes 37 | roidb[i]['max_overlaps'] = max_overlaps 38 | 39 | # sanity checks 40 | # max overlap of 0 => class should be zero (background) 41 | zero_inds = np.where(max_overlaps == 0)[0] 42 | assert all(max_classes[zero_inds] == 0) 43 | # max overlap > 0 => class should not be zero (must be a fg class) 44 | nonzero_inds = np.where(max_overlaps > 0)[0] 45 | assert all(max_classes[nonzero_inds] != 0) 46 | 47 | def add_bbox_regression_targets(roidb): 48 | """Add information needed to train bounding-box regressors.""" 49 | assert len(roidb) > 0 50 | assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' 51 | 52 | num_images = len(roidb) 53 | # Infer number of classes from the number of columns in gt_overlaps 54 | num_classes = roidb[0]['gt_overlaps'].shape[1] 55 | for im_i in range(num_images): 56 | rois = roidb[im_i]['boxes'] 57 | max_overlaps = roidb[im_i]['max_overlaps'] 58 | max_classes = roidb[im_i]['max_classes'] 59 | roidb[im_i]['bbox_targets'] = \ 60 | _compute_targets(rois, max_overlaps, max_classes, num_classes) 61 | 62 | # Compute values needed for means and stds 63 | # var(x) = E(x^2) - E(x)^2 64 | class_counts = np.zeros((num_classes, 1)) + cfg.EPS 65 | sums = np.zeros((num_classes, 4)) 66 | squared_sums = np.zeros((num_classes, 4)) 67 | for im_i in range(num_images): 68 | targets = roidb[im_i]['bbox_targets'] 69 | for cls in range(1, num_classes): 70 | cls_inds = np.where(targets[:, 0] == cls)[0] 71 | if cls_inds.size > 0: 72 | class_counts[cls] += cls_inds.size 73 | sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) 74 | squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) 75 | 76 | means = sums / class_counts 77 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 78 | 79 | # Normalize targets 80 | for im_i in range(num_images): 81 | targets = roidb[im_i]['bbox_targets'] 82 | for cls in range(1, num_classes): 83 | cls_inds = np.where(targets[:, 0] == cls)[0] 84 | roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] 85 | if stds[cls, 0] != 0: 86 | roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] 87 | 88 | # These values will be needed for making predictions 89 | # (the predicts will need to be unnormalized and uncentered) 90 | return means.ravel(), stds.ravel() 91 | 92 | def _compute_targets(rois, overlaps, labels, num_classes): 93 | """Compute bounding-box regression targets for an image.""" 94 | # Ensure ROIs are floats 95 | rois = rois.astype(np.float, copy=False) 96 | 97 | # Indices of ground-truth ROIs 98 | gt_inds = np.where(overlaps == 1)[0] 99 | # Indices of examples for which we try to make predictions 100 | ex_inds = [] 101 | for i in range(1, num_classes): 102 | ex_inds.extend( np.where((labels == i) & (overlaps >= cfg.TRAIN.BBOX_THRESH))[0] ) 103 | 104 | # Get IoU overlap between each ex ROI and gt ROI 105 | ex_gt_overlaps = utils.cython_bbox.bbox_overlaps(rois[ex_inds, :], 106 | rois[gt_inds, :]) 107 | 108 | # Find which gt ROI each ex ROI has max overlap with: 109 | # this will be the ex ROI's gt target 110 | if ex_gt_overlaps.shape[0] != 0: 111 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 112 | else: 113 | gt_assignment = [] 114 | gt_rois = rois[gt_inds[gt_assignment], :] 115 | ex_rois = rois[ex_inds, :] 116 | 117 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS 118 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS 119 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 120 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 121 | 122 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS 123 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS 124 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 125 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 126 | 127 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 128 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 129 | targets_dw = np.log(gt_widths / ex_widths) 130 | targets_dh = np.log(gt_heights / ex_heights) 131 | 132 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 133 | targets[ex_inds, 0] = labels[ex_inds] 134 | targets[ex_inds, 1] = targets_dx 135 | targets[ex_inds, 2] = targets_dy 136 | targets[ex_inds, 3] = targets_dw 137 | targets[ex_inds, 4] = targets_dh 138 | return targets 139 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import roi_pooling_op 8 | from . import roi_pooling_op_grad -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'roi_pooling.so') 5 | _roi_pooling_module = tf.load_op_library(filename) 6 | roi_pool = _roi_pooling_module.roi_pool 7 | roi_pool_grad = _roi_pooling_module.roi_pool_grad 8 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool ROIPoolForwardLaucher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 20 | const int width, const int channels, const int pooled_height, 21 | const int pooled_width, const float* bottom_rois, 22 | float* top_data, int* argmax_data, const Eigen::GpuDevice& d); 23 | 24 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int pooled_height, 26 | const int pooled_width, const float* bottom_rois, 27 | float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d); 28 | 29 | } // namespace tensorflow 30 | 31 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 32 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from . import roi_pooling_op 4 | 5 | @ops.RegisterGradient("RoiPool") 6 | def _roi_pool_grad(op, grad, _): 7 | """The gradients for `roi_pool`. 8 | Args: 9 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 10 | to find the inputs and outputs of the original op. 11 | grad: Gradient with respect to the output of the `roi_pool` op. 12 | Returns: 13 | Gradients with respect to the input of `zero_out`. 14 | """ 15 | data = op.inputs[0] 16 | rois = op.inputs[1] 17 | argmax = op.outputs[1] 18 | pooled_height = op.get_attr('pooled_height') 19 | pooled_width = op.get_attr('pooled_width') 20 | spatial_scale = op.get_attr('spatial_scale') 21 | 22 | # compute gradient 23 | data_grad = roi_pooling_op.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 24 | 25 | return [data_grad, None] # List of one Tensor, since we have one input 26 | -------------------------------------------------------------------------------- /lib/roi_pooling_layer/roi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from . import roi_pooling_op 4 | from . import roi_pooling_op_grad 5 | import tensorflow as tf 6 | import pdb 7 | 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | array = np.random.rand(32, 100, 100, 3) 17 | data = tf.convert_to_tensor(array, dtype=tf.float32) 18 | rois = tf.convert_to_tensor([[0, 10, 10, 20, 20], [31, 30, 30, 40, 40]], dtype=tf.float32) 19 | 20 | W = weight_variable([3, 3, 3, 1]) 21 | h = conv2d(data, W) 22 | 23 | [y, argmax] = roi_pooling_op.roi_pool(h, rois, 6, 6, 1.0/3) 24 | pdb.set_trace() 25 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32) 26 | print(y_data, y, argmax) 27 | 28 | # Minimize the mean squared errors. 29 | loss = tf.reduce_mean(tf.square(y - y_data)) 30 | optimizer = tf.train.GradientDescentOptimizer(0.5) 31 | train = optimizer.minimize(loss) 32 | 33 | init = tf.global_variables_initializer() 34 | 35 | # Launch the graph. 36 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 37 | sess.run(init) 38 | pdb.set_trace() 39 | for step in range(10): 40 | sess.run(train) 41 | print((step, sess.run(W))) 42 | print((sess.run(y))) 43 | 44 | #with tf.device('/gpu:0'): 45 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1) 46 | # print result.eval() 47 | #with tf.device('/cpu:0'): 48 | # run(init) 49 | -------------------------------------------------------------------------------- /lib/rpn_msr/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/rpn_msr/generate.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import cv2 10 | import matplotlib.pyplot as plt 11 | 12 | from ..utils.blob import im_list_to_blob 13 | from ..utils.timer import Timer 14 | 15 | # TODO: make fast_rcnn irrelevant 16 | # >>>> obsolete, because it depends on sth outside of this project 17 | from ..fast_rcnn.config import cfg 18 | # <<<< obsolete 19 | 20 | 21 | def _vis_proposals(im, dets, thresh=0.5): 22 | """Draw detected bounding boxes.""" 23 | inds = np.where(dets[:, -1] >= thresh)[0] 24 | if len(inds) == 0: 25 | return 26 | 27 | class_name = 'obj' 28 | im = im[:, :, (2, 1, 0)] 29 | fig, ax = plt.subplots(figsize=(12, 12)) 30 | ax.imshow(im, aspect='equal') 31 | for i in inds: 32 | bbox = dets[i, :4] 33 | score = dets[i, -1] 34 | 35 | ax.add_patch( 36 | plt.Rectangle((bbox[0], bbox[1]), 37 | bbox[2] - bbox[0], 38 | bbox[3] - bbox[1], fill=False, 39 | edgecolor='red', linewidth=3.5) 40 | ) 41 | ax.text(bbox[0], bbox[1] - 2, 42 | '{:s} {:.3f}'.format(class_name, score), 43 | bbox=dict(facecolor='blue', alpha=0.5), 44 | fontsize=14, color='white') 45 | 46 | ax.set_title(('{} detections with ' 47 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 48 | thresh), 49 | fontsize=14) 50 | plt.axis('off') 51 | plt.tight_layout() 52 | plt.draw() 53 | 54 | def _get_image_blob(im): 55 | """Converts an image into a network input. 56 | 57 | Arguments: 58 | im (ndarray): a color image in BGR order 59 | 60 | Returns: 61 | blob (ndarray): a data blob holding an image pyramid 62 | im_scale_factors (list): list of image scales (relative to im) used 63 | in the image pyramid 64 | """ 65 | im_orig = im.astype(np.float32, copy=True) 66 | im_orig -= cfg.PIXEL_MEANS 67 | 68 | processed_ims = [] 69 | 70 | assert len(cfg.TEST.SCALES_BASE) == 1 71 | im_scale = cfg.TRAIN.SCALES_BASE[0] 72 | 73 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 74 | interpolation=cv2.INTER_LINEAR) 75 | im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] 76 | processed_ims.append(im) 77 | 78 | # Create a blob to hold the input images 79 | blob = im_list_to_blob(processed_ims) 80 | 81 | return blob, im_info 82 | 83 | def im_proposals(net, im): 84 | """Generate RPN proposals on a single image.""" 85 | blobs = {} 86 | blobs['data'], blobs['im_info'] = _get_image_blob(im) 87 | net.blobs['data'].reshape(*(blobs['data'].shape)) 88 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) 89 | blobs_out = net.forward( 90 | data=blobs['data'].astype(np.float32, copy=False), 91 | im_info=blobs['im_info'].astype(np.float32, copy=False)) 92 | 93 | scale = blobs['im_info'][0, 2] 94 | boxes = blobs_out['rois'][:, 1:].copy() / scale 95 | scores = blobs_out['scores'].copy() 96 | return boxes, scores 97 | 98 | def imdb_proposals(net, imdb): 99 | """Generate RPN proposals on all images in an imdb.""" 100 | 101 | _t = Timer() 102 | imdb_boxes = [[] for _ in range(imdb.num_images)] 103 | for i in range(imdb.num_images): 104 | im = cv2.imread(imdb.image_path_at(i)) 105 | _t.tic() 106 | imdb_boxes[i], scores = im_proposals(net, im) 107 | _t.toc() 108 | print('im_proposals: {:d}/{:d} {:.3f}s' \ 109 | .format(i + 1, imdb.num_images, _t.average_time)) 110 | if 0: 111 | dets = np.hstack((imdb_boxes[i], scores)) 112 | # from IPython import embed; embed() 113 | _vis_proposals(im, dets[:3, :], thresh=0.9) 114 | plt.show() 115 | 116 | return imdb_boxes 117 | 118 | def imdb_proposals_det(net, imdb): 119 | """Generate RPN proposals on all images in an imdb.""" 120 | 121 | _t = Timer() 122 | imdb_boxes = [[] for _ in range(imdb.num_images)] 123 | for i in range(imdb.num_images): 124 | im = cv2.imread(imdb.image_path_at(i)) 125 | _t.tic() 126 | boxes, scores = im_proposals(net, im) 127 | _t.toc() 128 | print('im_proposals: {:d}/{:d} {:.3f}s' \ 129 | .format(i + 1, imdb.num_images, _t.average_time)) 130 | dets = np.hstack((boxes, scores)) 131 | imdb_boxes[i] = dets 132 | 133 | if 0: 134 | # from IPython import embed; embed() 135 | _vis_proposals(im, dets[:3, :], thresh=0.9) 136 | plt.show() 137 | 138 | return imdb_boxes 139 | -------------------------------------------------------------------------------- /lib/rpn_msr/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 11 | # 12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 13 | # >> anchors 14 | # 15 | # anchors = 16 | # 17 | # -83 -39 100 56 18 | # -175 -87 192 104 19 | # -359 -183 376 200 20 | # -55 -55 72 72 21 | # -119 -119 136 136 22 | # -247 -247 264 264 23 | # -35 -79 52 96 24 | # -79 -167 96 184 25 | # -167 -343 184 360 26 | 27 | #array([[ -83., -39., 100., 56.], 28 | # [-175., -87., 192., 104.], 29 | # [-359., -183., 376., 200.], 30 | # [ -55., -55., 72., 72.], 31 | # [-119., -119., 136., 136.], 32 | # [-247., -247., 264., 264.], 33 | # [ -35., -79., 52., 96.], 34 | # [ -79., -167., 96., 184.], 35 | # [-167., -343., 184., 360.]]) 36 | 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 38 | scales=2**np.arange(3, 6)): 39 | """ 40 | Generate anchor (reference) windows by enumerating aspect ratios X 41 | scales wrt a reference (0, 0, 15, 15) window. 42 | """ 43 | 44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 45 | ratio_anchors = _ratio_enum(base_anchor, ratios) 46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 47 | for i in range(ratio_anchors.shape[0])]) 48 | return anchors 49 | 50 | def _whctrs(anchor): 51 | """ 52 | Return width, height, x center, and y center for an anchor (window). 53 | """ 54 | 55 | w = anchor[2] - anchor[0] + 1 56 | h = anchor[3] - anchor[1] + 1 57 | x_ctr = anchor[0] + 0.5 * (w - 1) 58 | y_ctr = anchor[1] + 0.5 * (h - 1) 59 | return w, h, x_ctr, y_ctr 60 | 61 | def _mkanchors(ws, hs, x_ctr, y_ctr): 62 | """ 63 | Given a vector of widths (ws) and heights (hs) around a center 64 | (x_ctr, y_ctr), output a set of anchors (windows). 65 | """ 66 | 67 | ws = ws[:, np.newaxis] 68 | hs = hs[:, np.newaxis] 69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 70 | y_ctr - 0.5 * (hs - 1), 71 | x_ctr + 0.5 * (ws - 1), 72 | y_ctr + 0.5 * (hs - 1))) 73 | return anchors 74 | 75 | def _ratio_enum(anchor, ratios): 76 | """ 77 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 78 | """ 79 | 80 | w, h, x_ctr, y_ctr = _whctrs(anchor) 81 | size = w * h 82 | size_ratios = size / ratios 83 | ws = np.round(np.sqrt(size_ratios)) 84 | hs = np.round(ws * ratios) 85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 86 | return anchors 87 | 88 | def _scale_enum(anchor, scales): 89 | """ 90 | Enumerate a set of anchors for each scale wrt an anchor. 91 | """ 92 | 93 | w, h, x_ctr, y_ctr = _whctrs(anchor) 94 | ws = w * scales 95 | hs = h * scales 96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 97 | return anchors 98 | 99 | if __name__ == '__main__': 100 | import time 101 | t = time.time() 102 | a = generate_anchors() 103 | print(time.time() - t) 104 | print(a) 105 | from IPython import embed; embed() 106 | -------------------------------------------------------------------------------- /lib/rpn_msr/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import caffe 9 | 10 | import numpy as np 11 | import yaml 12 | # TODO: make fast_rcnn irrelevant 13 | # >>>> obsolete, because it depends on sth outside of this project 14 | from ..fast_rcnn.config import cfg 15 | # <<<< obsolete 16 | from .generate_anchors import generate_anchors 17 | 18 | # TODO: make fast_rcnn irrelevant 19 | # >>>> obsolete, because it depends on sth outside of this project 20 | from ..fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes 21 | from ..fast_rcnn.nms_wrapper import nms 22 | # <<<< obsolete 23 | 24 | DEBUG = False 25 | 26 | class ProposalLayer(caffe.Layer): 27 | """ 28 | Outputs object detection proposals by applying estimated bounding-box 29 | transformations to a set of regular boxes (called "anchors"). 30 | """ 31 | 32 | def setup(self, bottom, top): 33 | # parse the layer parameter string, which must be valid YAML 34 | layer_params = yaml.load(self.param_str_) 35 | 36 | self._feat_stride = layer_params['feat_stride'] 37 | self._anchors = generate_anchors(cfg.TRAIN.RPN_BASE_SIZE, cfg.TRAIN.RPN_ASPECTS, cfg.TRAIN.RPN_SCALES) 38 | self._num_anchors = self._anchors.shape[0] 39 | 40 | if DEBUG: 41 | print('feat_stride: {}'.format(self._feat_stride)) 42 | print('anchors:') 43 | print(self._anchors) 44 | 45 | # rois blob: holds R regions of interest, each is a 5-tuple 46 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 47 | # rectangle (x1, y1, x2, y2) 48 | top[0].reshape(1, 5) 49 | 50 | # scores blob: holds scores for R regions of interest 51 | if len(top) > 1: 52 | top[1].reshape(1, 1, 1, 1) 53 | 54 | def forward(self, bottom, top): 55 | # Algorithm: 56 | # 57 | # for each (H, W) location i 58 | # generate A anchor boxes centered on cell i 59 | # apply predicted bbox deltas at cell i to each of the A anchors 60 | # clip predicted boxes to image 61 | # remove predicted boxes with either height or width < threshold 62 | # sort all (proposal, score) pairs by score from highest to lowest 63 | # take top pre_nms_topN proposals before NMS 64 | # apply NMS with threshold 0.7 to remaining proposals 65 | # take after_nms_topN proposals after NMS 66 | # return the top proposals (-> RoIs top, scores top) 67 | 68 | assert bottom[0].data.shape[0] == 1, \ 69 | 'Only single item batches are supported' 70 | # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' 71 | cfg_key = 'TEST' 72 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 73 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 74 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 75 | min_size = cfg[cfg_key].RPN_MIN_SIZE 76 | 77 | # the first set of _num_anchors channels are bg probs 78 | # the second set are the fg probs, which we want 79 | scores = bottom[0].data[:, self._num_anchors:, :, :] 80 | bbox_deltas = bottom[1].data 81 | im_info = bottom[2].data[0, :] 82 | 83 | if DEBUG: 84 | print('im_size: ({}, {})'.format(im_info[0], im_info[1])) 85 | print('scale: {}'.format(im_info[2])) 86 | 87 | # 1. Generate proposals from bbox deltas and shifted anchors 88 | height, width = scores.shape[-2:] 89 | 90 | if DEBUG: 91 | print('score map size: {}'.format(scores.shape)) 92 | 93 | # Enumerate all shifts 94 | shift_x = np.arange(0, width) * self._feat_stride 95 | shift_y = np.arange(0, height) * self._feat_stride 96 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 97 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 98 | shift_x.ravel(), shift_y.ravel())).transpose() 99 | 100 | # Enumerate all shifted anchors: 101 | # 102 | # add A anchors (1, A, 4) to 103 | # cell K shifts (K, 1, 4) to get 104 | # shift anchors (K, A, 4) 105 | # reshape to (K*A, 4) shifted anchors 106 | A = self._num_anchors 107 | K = shifts.shape[0] 108 | anchors = self._anchors.reshape((1, A, 4)) + \ 109 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 110 | anchors = anchors.reshape((K * A, 4)) 111 | 112 | # Transpose and reshape predicted bbox transformations to get them 113 | # into the same order as the anchors: 114 | # 115 | # bbox deltas will be (1, 4 * A, H, W) format 116 | # transpose to (1, H, W, 4 * A) 117 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 118 | # in slowest to fastest order 119 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 120 | 121 | # Same story for the scores: 122 | # 123 | # scores are (1, A, H, W) format 124 | # transpose to (1, H, W, A) 125 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 126 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 127 | 128 | # Convert anchors into proposals via bbox transformations 129 | proposals = bbox_transform_inv(anchors, bbox_deltas) 130 | 131 | # 2. clip predicted boxes to image 132 | proposals = clip_boxes(proposals, im_info[:2]) 133 | 134 | # 3. remove predicted boxes with either height or width < threshold 135 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 136 | keep = _filter_boxes(proposals, min_size * im_info[2]) 137 | proposals = proposals[keep, :] 138 | scores = scores[keep] 139 | 140 | # 4. sort all (proposal, score) pairs by score from highest to lowest 141 | # 5. take top pre_nms_topN (e.g. 6000) 142 | order = scores.ravel().argsort()[::-1] 143 | if pre_nms_topN > 0: 144 | order = order[:pre_nms_topN] 145 | proposals = proposals[order, :] 146 | scores = scores[order] 147 | 148 | # 6. apply nms (e.g. threshold = 0.7) 149 | # 7. take after_nms_topN (e.g. 300) 150 | # 8. return the top proposals (-> RoIs top) 151 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 152 | if post_nms_topN > 0: 153 | keep = keep[:post_nms_topN] 154 | proposals = proposals[keep, :] 155 | scores = scores[keep] 156 | print(scores.shape) 157 | 158 | # Output rois blob 159 | # Our RPN implementation only supports a single input image, so all 160 | # batch inds are 0 161 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 162 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 163 | top[0].reshape(*(blob.shape)) 164 | top[0].data[...] = blob 165 | 166 | # [Optional] output scores blob 167 | if len(top) > 1: 168 | top[1].reshape(*(scores.shape)) 169 | top[1].data[...] = scores 170 | 171 | def backward(self, top, propagate_down, bottom): 172 | """This layer does not propagate gradients.""" 173 | pass 174 | 175 | def reshape(self, bottom, top): 176 | """Reshaping happens during the call to forward.""" 177 | pass 178 | 179 | def _filter_boxes(boxes, min_size): 180 | """Remove all boxes with any side smaller than min_size.""" 181 | ws = boxes[:, 2] - boxes[:, 0] + 1 182 | hs = boxes[:, 3] - boxes[:, 1] + 1 183 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 184 | return keep 185 | -------------------------------------------------------------------------------- /lib/rpn_msr/proposal_layer_tf.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import yaml 10 | 11 | from .generate_anchors import generate_anchors 12 | 13 | # TODO: make fast_rcnn irrelevant 14 | # >>>> obsolete, because it depends on sth outside of this project 15 | from ..fast_rcnn.config import cfg 16 | from ..fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes 17 | from ..fast_rcnn.nms_wrapper import nms 18 | # <<<< obsolete 19 | 20 | 21 | DEBUG = False 22 | """ 23 | Outputs object detection proposals by applying estimated bounding-box 24 | transformations to a set of regular boxes (called "anchors"). 25 | """ 26 | def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [8, 16, 32]): 27 | """ 28 | Parameters 29 | ---------- 30 | rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg 31 | NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! 32 | rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN 33 | im_info: a list of [image_height, image_width, scale_ratios] 34 | cfg_key: 'TRAIN' or 'TEST' 35 | _feat_stride: the downsampling ratio of feature map to the original input image 36 | anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) 37 | ---------- 38 | Returns 39 | ---------- 40 | rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] 41 | 42 | # Algorithm: 43 | # 44 | # for each (H, W) location i 45 | # generate A anchor boxes centered on cell i 46 | # apply predicted bbox deltas at cell i to each of the A anchors 47 | # clip predicted boxes to image 48 | # remove predicted boxes with either height or width < threshold 49 | # sort all (proposal, score) pairs by score from highest to lowest 50 | # take top pre_nms_topN proposals before NMS 51 | # apply NMS with threshold 0.7 to remaining proposals 52 | # take after_nms_topN proposals after NMS 53 | # return the top proposals (-> RoIs top, scores top) 54 | #layer_params = yaml.load(self.param_str_) 55 | 56 | """ 57 | _anchors = generate_anchors(scales=np.array(anchor_scales)) 58 | _num_anchors = _anchors.shape[0] 59 | # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W) 60 | # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) # -> (1 , Ax4, H , W) 61 | 62 | #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) 63 | #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) 64 | im_info = im_info[0] 65 | 66 | cfg_key = cfg_key.decode('ascii') 67 | assert rpn_cls_prob_reshape.shape[0] == 1, \ 68 | 'Only single item batches are supported' 69 | # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' 70 | #cfg_key = 'TEST' 71 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 72 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 73 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 74 | min_size = cfg[cfg_key].RPN_MIN_SIZE 75 | 76 | height, width = rpn_cls_prob_reshape.shape[1:3] 77 | 78 | # the first set of _num_anchors channels are bg probs 79 | # the second set are the fg probs, which we want 80 | # (1, H, W, A) 81 | scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1], 82 | [1, height, width, _num_anchors]) 83 | 84 | # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! 85 | # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line 86 | # scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:] 87 | 88 | bbox_deltas = rpn_bbox_pred 89 | #im_info = bottom[2].data[0, :] 90 | 91 | if DEBUG: 92 | print('im_size: ({}, {})'.format(im_info[0], im_info[1])) 93 | print('scale: {}'.format(im_info[2])) 94 | 95 | # 1. Generate proposals from bbox deltas and shifted anchors 96 | if DEBUG: 97 | print('score map size: {}'.format(scores.shape)) 98 | 99 | # Enumerate all shifts 100 | shift_x = np.arange(0, width) * _feat_stride 101 | shift_y = np.arange(0, height) * _feat_stride 102 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 103 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 104 | shift_x.ravel(), shift_y.ravel())).transpose() 105 | 106 | # Enumerate all shifted anchors: 107 | # 108 | # add A anchors (1, A, 4) to 109 | # cell K shifts (K, 1, 4) to get 110 | # shift anchors (K, A, 4) 111 | # reshape to (K*A, 4) shifted anchors 112 | A = _num_anchors 113 | K = shifts.shape[0] 114 | anchors = _anchors.reshape((1, A, 4)) + \ 115 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 116 | anchors = anchors.reshape((K * A, 4)) 117 | 118 | # Transpose and reshape predicted bbox transformations to get them 119 | # into the same order as the anchors: 120 | # 121 | # bbox deltas will be (1, 4 * A, H, W) format 122 | # transpose to (1, H, W, 4 * A) 123 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 124 | # in slowest to fastest order 125 | bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) 126 | 127 | # Same story for the scores: 128 | # 129 | # scores are (1, A, H, W) format 130 | # transpose to (1, H, W, A) 131 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 132 | scores = scores.reshape((-1, 1)) 133 | 134 | # Convert anchors into proposals via bbox transformations 135 | proposals = bbox_transform_inv(anchors, bbox_deltas) 136 | 137 | # 2. clip predicted boxes to image 138 | proposals = clip_boxes(proposals, im_info[:2]) 139 | 140 | # 3. remove predicted boxes with either height or width < threshold 141 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 142 | keep = _filter_boxes(proposals, min_size * im_info[2]) 143 | proposals = proposals[keep, :] 144 | scores = scores[keep] 145 | 146 | # # remove irregular boxes, too fat too tall 147 | # keep = _filter_irregular_boxes(proposals) 148 | # proposals = proposals[keep, :] 149 | # scores = scores[keep] 150 | 151 | # 4. sort all (proposal, score) pairs by score from highest to lowest 152 | # 5. take top pre_nms_topN (e.g. 6000) 153 | order = scores.ravel().argsort()[::-1] 154 | if pre_nms_topN > 0: 155 | order = order[:pre_nms_topN] 156 | proposals = proposals[order, :] 157 | scores = scores[order] 158 | 159 | # 6. apply nms (e.g. threshold = 0.7) 160 | # 7. take after_nms_topN (e.g. 300) 161 | # 8. return the top proposals (-> RoIs top) 162 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 163 | if post_nms_topN > 0: 164 | keep = keep[:post_nms_topN] 165 | proposals = proposals[keep, :] 166 | scores = scores[keep] 167 | # Output rois blob 168 | # Our RPN implementation only supports a single input image, so all 169 | # batch inds are 0 170 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 171 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 172 | return blob 173 | #top[0].reshape(*(blob.shape)) 174 | #top[0].data[...] = blob 175 | 176 | # [Optional] output scores blob 177 | #if len(top) > 1: 178 | # top[1].reshape(*(scores.shape)) 179 | # top[1].data[...] = scores 180 | 181 | def _filter_boxes(boxes, min_size): 182 | """Remove all boxes with any side smaller than min_size.""" 183 | ws = boxes[:, 2] - boxes[:, 0] + 1 184 | hs = boxes[:, 3] - boxes[:, 1] + 1 185 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 186 | return keep 187 | 188 | def _filter_irregular_boxes(boxes, min_ratio = 0.2, max_ratio = 5): 189 | """Remove all boxes with any side smaller than min_size.""" 190 | ws = boxes[:, 2] - boxes[:, 0] + 1 191 | hs = boxes[:, 3] - boxes[:, 1] + 1 192 | rs = ws / hs 193 | keep = np.where((rs <= max_ratio) & (rs >= min_ratio))[0] 194 | return keep 195 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in list(cudaconfig.items()): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print(extra_postargs) 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | 102 | # run the customize_compiler 103 | class custom_build_ext(build_ext): 104 | def build_extensions(self): 105 | customize_compiler_for_nvcc(self.compiler) 106 | build_ext.build_extensions(self) 107 | 108 | ext_modules = [ 109 | Extension( 110 | "utils.cython_bbox", 111 | ["utils/bbox.pyx"], 112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 113 | include_dirs = [numpy_include] 114 | ), 115 | Extension( 116 | "utils.cython_nms", 117 | ["utils/nms.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs = [numpy_include] 120 | ), 121 | Extension( 122 | "nms.cpu_nms", 123 | ["nms/cpu_nms.pyx"], 124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 125 | include_dirs = [numpy_include] 126 | ), 127 | Extension('nms.gpu_nms', 128 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 129 | library_dirs=[CUDA['lib64']], 130 | libraries=['cudart'], 131 | language='c++', 132 | runtime_library_dirs=[CUDA['lib64']], 133 | # this syntax is specific to this build system 134 | # we're only going to use certain compiler args with nvcc and not with gcc 135 | # the implementation of this trick is in customize_compiler() below 136 | extra_compile_args={'gcc': ["-Wno-unused-function"], 137 | 'nvcc': ['-arch=sm_35', 138 | '--ptxas-options=-v', 139 | '-c', 140 | '--compiler-options', 141 | "'-fPIC'"]}, 142 | include_dirs = [numpy_include, CUDA['include']] 143 | ), 144 | Extension( 145 | 'pycocotools._mask', 146 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'], 147 | include_dirs = [numpy_include, 'pycocotools'], 148 | extra_compile_args={ 149 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']}, 150 | ), 151 | ] 152 | 153 | setup( 154 | name='fast_rcnn', 155 | ext_modules=ext_modules, 156 | # inject our custom trigger 157 | cmdclass={'build_ext': custom_build_ext}, 158 | ) 159 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import cython_nms 8 | from . import cython_bbox 9 | from . import boxes_grid 10 | from . import blob 11 | from . import nms 12 | from . import timer -------------------------------------------------------------------------------- /lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | 57 | def bbox_intersections( 58 | np.ndarray[DTYPE_t, ndim=2] boxes, 59 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 60 | """ 61 | For each query box compute the intersection ratio covered by boxes 62 | ---------- 63 | Parameters 64 | ---------- 65 | boxes: (N, 4) ndarray of float 66 | query_boxes: (K, 4) ndarray of float 67 | Returns 68 | ------- 69 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 70 | """ 71 | cdef unsigned int N = boxes.shape[0] 72 | cdef unsigned int K = query_boxes.shape[0] 73 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 74 | cdef DTYPE_t iw, ih, box_area 75 | cdef DTYPE_t ua 76 | cdef unsigned int k, n 77 | for k in range(K): 78 | box_area = ( 79 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 80 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 81 | ) 82 | for n in range(N): 83 | iw = ( 84 | min(boxes[n, 2], query_boxes[k, 2]) - 85 | max(boxes[n, 0], query_boxes[k, 0]) + 1 86 | ) 87 | if iw > 0: 88 | ih = ( 89 | min(boxes[n, 3], query_boxes[k, 3]) - 90 | max(boxes[n, 1], query_boxes[k, 1]) + 1 91 | ) 92 | if ih > 0: 93 | intersec[n, k] = iw * ih / box_area 94 | return intersec -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | from ..fast_rcnn.config import cfg 13 | 14 | def im_list_to_blob(ims): 15 | """Convert a list of images into a network input. 16 | 17 | Assumes images are already prepared (means subtracted, BGR order, ...). 18 | """ 19 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 20 | num_images = len(ims) 21 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 22 | dtype=np.float32) 23 | for i in range(num_images): 24 | im = ims[i] 25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 26 | 27 | return blob 28 | 29 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 30 | """Mean subtract and scale an image for use in a blob.""" 31 | im = im.astype(np.float32, copy=False) 32 | im -= pixel_means 33 | im_shape = im.shape 34 | im_size_min = np.min(im_shape[0:2]) 35 | im_size_max = np.max(im_shape[0:2]) 36 | im_scale = float(target_size) / float(im_size_min) 37 | # Prevent the biggest axis from being more than MAX_SIZE 38 | if np.round(im_scale * im_size_max) > max_size: 39 | im_scale = float(max_size) / float(im_size_max) 40 | if cfg.TRAIN.RANDOM_DOWNSAMPLE: 41 | r = 0.6 + np.random.rand() * 0.4 42 | im_scale *= r 43 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 44 | interpolation=cv2.INTER_LINEAR) 45 | 46 | return im, im_scale 47 | -------------------------------------------------------------------------------- /lib/utils/boxes_grid.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Subcategory CNN 3 | # Copyright (c) 2015 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import math 10 | # TODO: make fast_rcnn irrelevant 11 | # >>>> obsolete, because it depends on sth outside of this project 12 | from ..fast_rcnn.config import cfg 13 | # <<<< obsolete 14 | 15 | def get_boxes_grid(image_height, image_width): 16 | """ 17 | Return the boxes on image grid. 18 | calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead. 19 | """ 20 | 21 | # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE 22 | # coz, here needs a ratio around 1.0, not the accutual size. 23 | # height and width of the feature map 24 | if cfg.NET_NAME == 'CaffeNet': 25 | height = np.floor((image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) 26 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 27 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 28 | 29 | width = np.floor((image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) 30 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 31 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 32 | elif cfg.NET_NAME == 'VGGnet': 33 | height = np.floor(image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) 34 | height = np.floor(height / 2.0 + 0.5) 35 | height = np.floor(height / 2.0 + 0.5) 36 | height = np.floor(height / 2.0 + 0.5) 37 | 38 | width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) 39 | width = np.floor(width / 2.0 + 0.5) 40 | width = np.floor(width / 2.0 + 0.5) 41 | width = np.floor(width / 2.0 + 0.5) 42 | else: 43 | assert (1), 'The network architecture is not supported in utils.get_boxes_grid!' 44 | 45 | # compute the grid box centers 46 | h = np.arange(height) 47 | w = np.arange(width) 48 | y, x = np.meshgrid(h, w, indexing='ij') 49 | centers = np.dstack((x, y)) 50 | centers = np.reshape(centers, (-1, 2)) 51 | num = centers.shape[0] 52 | 53 | # compute width and height of grid box 54 | area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE 55 | aspect = cfg.TRAIN.ASPECTS # height / width 56 | num_aspect = len(aspect) 57 | widths = np.zeros((1, num_aspect), dtype=np.float32) 58 | heights = np.zeros((1, num_aspect), dtype=np.float32) 59 | for i in range(num_aspect): 60 | widths[0,i] = math.sqrt(area / aspect[i]) 61 | heights[0,i] = widths[0,i] * aspect[i] 62 | 63 | # construct grid boxes 64 | centers = np.repeat(centers, num_aspect, axis=0) 65 | widths = np.tile(widths, num).transpose() 66 | heights = np.tile(heights, num).transpose() 67 | 68 | x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5 69 | x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5 70 | y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5 71 | y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5 72 | 73 | boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE 74 | 75 | return boxes_grid, centers[:,0], centers[:,1] 76 | -------------------------------------------------------------------------------- /lib/utils/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /lib/utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /test_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#!/usr/bin/env python\n", 12 | "\n", 13 | "# --------------------------------------------------------\n", 14 | "# Fast R-CNN\n", 15 | "# Copyright (c) 2015 Microsoft\n", 16 | "# Licensed under The MIT License [see LICENSE for details]\n", 17 | "# Written by Ross Girshick\n", 18 | "# --------------------------------------------------------\n", 19 | "\n", 20 | "\"\"\"Test a Fast R-CNN network on an image database.\"\"\"\n", 21 | "%matplotlib inline\n", 22 | "import sys,os\n", 23 | "\n", 24 | "from lib.fast_rcnn.test import test_net, load_test_net\n", 25 | "from lib.fast_rcnn.config import cfg, cfg_from_file\n", 26 | "from lib.datasets.factory import get_imdb\n", 27 | "from lib.networks.factory import get_network\n", 28 | "import argparse\n", 29 | "import pprint\n", 30 | "import time\n", 31 | "import tensorflow as tf\n", 32 | "from easydict import EasyDict as edict\n", 33 | "\n", 34 | "if __name__ == '__main__':\n", 35 | "# args = parse_args()\n", 36 | " args=edict()\n", 37 | " args.cfg_file=\"./experiments/cfgs/faster_rcnn_end2end_resnet.yml\"\n", 38 | " args.model=\"./output/faster_rcnn_end2end_resnet_voc/voc_2007_trainval\"\n", 39 | " args.gpu_id=0\n", 40 | " args.wait=True\n", 41 | " args.imdb_name=\"voc_2007_test\"\n", 42 | " args.comp_mode=\"store_true\"\n", 43 | " args.network_name=\"Resnet50_test\"\n", 44 | " print('Called with args:')\n", 45 | " print(args)\n", 46 | "\n", 47 | " if args.cfg_file is not None:\n", 48 | " cfg_from_file(args.cfg_file)\n", 49 | "\n", 50 | " print('Using config:')\n", 51 | " pprint.pprint(cfg)\n", 52 | "\n", 53 | " while not os.path.exists(args.model) and args.wait:\n", 54 | " print(('Waiting for {} to exist...'.format(args.model)))\n", 55 | " time.sleep(1000)\n", 56 | "\n", 57 | " weights_filename = os.path.splitext(os.path.basename(args.model))[0]\n", 58 | "\n", 59 | " imdb = get_imdb(args.imdb_name)\n", 60 | " imdb.competition_mode(args.comp_mode)\n", 61 | "\n", 62 | " device_name = '/gpu:{:d}'.format(args.gpu_id)\n", 63 | " print(device_name)\n", 64 | " with tf.device(device_name):\n", 65 | " network = get_network(args.network_name)\n", 66 | " print(('Use network `{:s}` in training'.format(args.network_name)))\n", 67 | "\n", 68 | " cfg.GPU_ID = args.gpu_id\n", 69 | " # import os\n", 70 | " # os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\" # see issue #152\n", 71 | " # os.environ[\"CUDA_VISIBLE_DEVICES\"]=str(args.gpu_id)\n", 72 | " # start a session\n", 73 | " saver = tf.train.Saver()\n", 74 | " c = tf.ConfigProto(allow_soft_placement=True)\n", 75 | " c.gpu_options.visible_device_list=str(args.gpu_id)\n", 76 | " sess = tf.Session(config=c)\n", 77 | " saver.restore(sess, tf.train.latest_checkpoint(args.model))\n", 78 | " print((('Loading model weights from {:s}').format(args.model)))\n", 79 | "\n", 80 | " test_net(sess, network, imdb, weights_filename, vis=True, thresh=0.7)\n", 81 | "# load_test_net enables you to work on early generated test result\n", 82 | "# load_test_net(sess, network, imdb, weights_filename)\n" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python [default]", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.5.2" 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 2 107 | } 108 | --------------------------------------------------------------------------------