├── LICENSE ├── README.md ├── data └── scripts │ └── fetch_faster_rcnn_models.sh ├── experiments ├── cfgs │ ├── mobile.yml │ ├── res101-lg.yml │ ├── res101.yml │ ├── res50.yml │ └── vgg16.yml └── scripts │ ├── convert_vgg16.sh │ ├── test.sh │ └── train.sh ├── lib ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── coco.cpython-36.pyc │ │ ├── dis_eval.cpython-36.pyc │ │ ├── ds_utils.cpython-36.pyc │ │ ├── factory.cpython-36.pyc │ │ ├── imdb.cpython-36.pyc │ │ ├── pascal_voc.cpython-36.pyc │ │ └── voc_eval.cpython-36.pyc │ ├── coco.py │ ├── dis_eval.py │ ├── dis_eval.pyc │ ├── ds_utils.py │ ├── factory.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── tools │ │ └── mcg_munge.py │ └── voc_eval.py ├── layer_utils │ ├── __init__.py │ ├── anchor_target_layer.py │ ├── choose_pseudo_gt.py │ ├── generate_anchors.py │ ├── generate_pseudo_gtbox.py │ ├── loss_function.py │ ├── proposal_layer.py │ ├── proposal_target_layer.py │ ├── proposal_top_layer.py │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── build.py │ │ ├── roi_pool.py │ │ ├── roi_pool_py.py │ │ └── src │ │ │ ├── cuda │ │ │ ├── roi_pooling_kernel.cu │ │ │ ├── roi_pooling_kernel.cu.o │ │ │ └── roi_pooling_kernel.h │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ └── roi_pooling_cuda.h │ └── snippets.py ├── model │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── bbox_transform.cpython-36.pyc │ │ ├── config.cpython-36.pyc │ │ ├── nms_wrapper.cpython-36.pyc │ │ ├── test.cpython-36.pyc │ │ ├── test_train.cpython-36.pyc │ │ └── train_val.cpython-36.pyc │ ├── bbox_transform.py │ ├── config.py │ ├── config.pyc │ ├── nms_wrapper.py │ ├── test.py │ ├── test.pyc │ ├── test_train.py │ ├── train_val.py │ └── train_val.pyc ├── nets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── mobilenet_v1.cpython-36.pyc │ │ ├── network.cpython-36.pyc │ │ ├── resnet_v1.cpython-36.pyc │ │ └── vgg16.cpython-36.pyc │ ├── mobilenet_v1.py │ ├── network.py │ ├── resnet_v1.py │ └── vgg16.py ├── nms │ ├── __init__.py │ ├── build.py │ ├── pth_nms.py │ └── src │ │ ├── cuda │ │ ├── nms_kernel.cu │ │ ├── nms_kernel.cu.o │ │ └── nms_kernel.h │ │ ├── nms.c │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms_cuda.h ├── roi_data_layer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── layer.cpython-36.pyc │ │ ├── minibatch.cpython-36.pyc │ │ └── roidb.cpython-36.pyc │ ├── layer.py │ ├── minibatch.py │ └── roidb.py └── utils │ ├── .gitignore │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── bbox.cpython-36.pyc │ ├── blob.cpython-36.pyc │ ├── timer.cpython-36.pyc │ └── visualization.cpython-36.pyc │ ├── bbox.py │ ├── blob.py │ ├── timer.py │ └── visualization.py └── tools ├── _init_paths.py ├── _init_paths.pyc ├── convert_from_tensorflow_mobile.py ├── convert_from_tensorflow_vgg.py ├── demo.py ├── reval.py ├── reval_discovery.py ├── show_boxes_results.py ├── test_net.py └── trainval_net.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xinlei Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Collaborative Learning for Weakly Supervised Object Detection 2 | 3 | If you use this code in your research, please cite 4 | ``` 5 | @inproceedings{ijcai2018-135, 6 | title = {Collaborative Learning for Weakly Supervised Object Detection}, 7 | author = {Jiajie Wang and Jiangchao Yao and Ya Zhang and Rui Zhang}, 8 | booktitle = {Proceedings of the Twenty-Seventh International Joint Conference on 9 | Artificial Intelligence, {IJCAI-18}}, 10 | publisher = {International Joint Conferences on Artificial Intelligence Organization}, 11 | pages = {971--977}, 12 | year = {2018}, 13 | month = {7}, 14 | doi = {10.24963/ijcai.2018/135}, 15 | url = {https://doi.org/10.24963/ijcai.2018/135}, 16 | } 17 | ``` 18 | 19 | ### Prerequisites 20 | - A basic pytorch installation. The version is **0.2**. If you are using the old version **0.1.12**, you can checkout 0.1.12 branch. 21 | - Python packages you might not have: `cffi`, `opencv-python`, `easydict` (similar to [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn)). For `easydict` make sure you have the right version. Xinlei uses 1.6. 22 | - [tensorboard-pytorch](https://github.com/lanpa/tensorboard-pytorch) to visualize the training and validation curve. Please build from source to use the latest tensorflow-tensorboard. 23 | 24 | ### Installation 25 | 1. Clone the repository 26 | ```Shell 27 | git clone https://github.com/ruotianluo/pytorch-faster-rcnn.git 28 | ``` 29 | 30 | 2. Choose your `-arch` option to match your GPU for step 3 and 4. 31 | 32 | | GPU model | Architecture | 33 | | ------------- | ------------- | 34 | | TitanX (Maxwell/Pascal) | sm_52 | 35 | | GTX 960M | sm_50 | 36 | | GTX 1080 (Ti) | sm_61 | 37 | | Grid K520 (AWS g2.2xlarge) | sm_30 | 38 | | Tesla K80 (AWS p2.xlarge) | sm_37 | 39 | 40 | **Note**: You are welcome to contribute the settings on your end if you have made the code work properly on other GPUs. 41 | 42 | 43 | 3. Build RoiPooling module 44 | ``` 45 | cd pytorch-faster-rcnn/lib/layer_utils/roi_pooling/src/cuda 46 | echo "Compiling roi_pooling kernels by nvcc..." 47 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 48 | cd ../../ 49 | python build.py 50 | cd ../../../ 51 | ``` 52 | 53 | 54 | 4. Build NMS 55 | ``` 56 | cd lib/nms/src/cuda 57 | echo "Compiling nms kernels by nvcc..." 58 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 59 | cd ../../ 60 | python build.py 61 | cd ../../ 62 | ``` 63 | 64 | 65 | ### Setup data 66 | Please follow the instructions of py-faster-rcnn [here](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to setup VOC. The steps involve downloading data and optionally creating soft links in the ``data`` folder. Since faster RCNN does not rely on pre-computed proposals, it is safe to ignore the steps that setup proposals. 67 | 68 | If you find it useful, the ``data/cache`` folder created on Xinlei's side is also shared [here](http://ladoga.graphics.cs.cmu.edu/xinleic/tf-faster-rcnn/cache.tgz). 69 | 70 | 71 | ### Train your own model 72 | 1. Download pre-trained models and weights. For the pretrained [wsddn](https://www.robots.ox.ac.uk/~vgg/publications/2016/Bilen16/bilen16.pdf) model, you can find the download link [here](https://goo.gl/j7tp7N). For other pre-trained models like VGG16 and Resnet V1 models, they are provided by [pytorch-vgg](https://github.com/jcjohnson/pytorch-vgg.git) and [pytorch-resnet](https://github.com/ruotianluo/pytorch-resnet) (the ones with caffe in the name). You can download them in the ``data/imagenet_weights`` folder. For example for VGG16 model, you can set up like: 73 | ```Shell 74 | mkdir -p data/imagenet_weights 75 | cd data/imagenet_weights 76 | python # open python in terminal and run the following Python code 77 | ``` 78 | ```Python 79 | import torch 80 | from torch.utils.model_zoo import load_url 81 | from torchvision import models 82 | 83 | sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg16-00b39a1b.pth") 84 | sd['classifier.0.weight'] = sd['classifier.1.weight'] 85 | sd['classifier.0.bias'] = sd['classifier.1.bias'] 86 | del sd['classifier.1.weight'] 87 | del sd['classifier.1.bias'] 88 | 89 | sd['classifier.3.weight'] = sd['classifier.4.weight'] 90 | sd['classifier.3.bias'] = sd['classifier.4.bias'] 91 | del sd['classifier.4.weight'] 92 | del sd['classifier.4.bias'] 93 | 94 | torch.save(sd, "vgg16.pth") 95 | ``` 96 | ```Shell 97 | cd ../.. 98 | ``` 99 | For Resnet101, you can set up like: 100 | ```Shell 101 | mkdir -p data/imagenet_weights 102 | cd data/imagenet_weights 103 | # download from my gdrive (link in pytorch-resnet) 104 | mv resnet101-caffe.pth res101.pth 105 | cd ../.. 106 | ``` 107 | 108 | 2. Train (and test, evaluation) 109 | ```Shell 110 | ./experiments/scripts/train.sh [GPU_ID] [DATASET] [NET] [WSDDN_PRETRAINED] 111 | # Examples: 112 | ./experiments/scripts/train.sh 0 pascal_voc vgg16 path_to_wsddn_pretrained_model 113 | ``` 114 | 115 | 3. Visualization with Tensorboard 116 | ```Shell 117 | tensorboard --logdir=tensorboard/vgg16/voc_2007_trainval/ --port=7001 & 118 | ``` 119 | 120 | 4. Test and evaluate 121 | ```Shell 122 | ./experiments/scripts/test.sh [GPU_ID] [DATASET] [NET] [WSDDN_PRETRAINED] 123 | # Examples: 124 | ./experiments/scripts/test.sh 0 pascal_voc vgg16 path_to_wsddn_pretrained_model 125 | ``` 126 | 127 | By default, trained networks are saved under: 128 | 129 | ``` 130 | output/[NET]/[DATASET]/default/ 131 | ``` 132 | 133 | Test outputs are saved under: 134 | 135 | ``` 136 | output/[NET]/[DATASET]/default/[SNAPSHOT]/ 137 | ``` 138 | 139 | Tensorboard information for train and validation is saved under: 140 | 141 | ``` 142 | tensorboard/[NET]/[DATASET]/default/ 143 | tensorboard/[NET]/[DATASET]/default_val/ 144 | ``` 145 | 146 | ### Our results can be found [here](https://goo.gl/gP1yLd) 147 | -------------------------------------------------------------------------------- /data/scripts/fetch_faster_rcnn_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )" 4 | cd $DIR 5 | 6 | NET=res101 7 | FILE=voc_0712_80k-110k.tgz 8 | # replace it with gs11655.sp.cs.cmu.edu if ladoga.graphics.cs.cmu.edu does not work 9 | URL=http://ladoga.graphics.cs.cmu.edu/xinleic/tf-faster-rcnn/$NET/$FILE 10 | CHECKSUM=cb32e9df553153d311cc5095b2f8c340 11 | 12 | if [ -f $FILE ]; then 13 | echo "File already exists. Checking md5..." 14 | os=`uname -s` 15 | if [ "$os" = "Linux" ]; then 16 | checksum=`md5sum $FILE | awk '{ print $1 }'` 17 | elif [ "$os" = "Darwin" ]; then 18 | checksum=`cat $FILE | md5` 19 | fi 20 | if [ "$checksum" = "$CHECKSUM" ]; then 21 | echo "Checksum is correct. No need to download." 22 | exit 0 23 | else 24 | echo "Checksum is incorrect. Need to download again." 25 | fi 26 | fi 27 | 28 | echo "Downloading Resnet 101 Faster R-CNN models Pret-trained on VOC 07+12 (340M)..." 29 | 30 | wget $URL -O $FILE 31 | 32 | echo "Unzipping..." 33 | 34 | tar zxvf $FILE 35 | 36 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM." 37 | -------------------------------------------------------------------------------- /experiments/cfgs/mobile.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: mobile 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: mobile_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/res101-lg.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101-lg 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res101_faster_rcnn 14 | SCALES: [800] 15 | MAX_SIZE: 1333 16 | TEST: 17 | HAS_RPN: True 18 | SCALES: [800] 19 | MAX_SIZE: 1333 20 | RPN_POST_NMS_TOP_N: 1000 21 | POOLING_MODE: crop 22 | ANCHOR_SCALES: [2,4,8,16,32] 23 | -------------------------------------------------------------------------------- /experiments/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res101_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res50_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: selective_search 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | SNAPSHOT_PREFIX: vgg16_faster_rcnn 13 | TEST: 14 | HAS_RPN: True 15 | POOLING_MODE: spp 16 | -------------------------------------------------------------------------------- /experiments/scripts/convert_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=vgg16 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:2:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=70000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | *) 40 | echo "No dataset given" 41 | exit 42 | ;; 43 | esac 44 | 45 | set +x 46 | NET_FINAL=${NET}_faster_rcnn_iter_${ITERS} 47 | set -x 48 | 49 | if [ ! -f ${NET_FINAL}.index ]; then 50 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 51 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \ 52 | --snapshot ${NET_FINAL} \ 53 | --imdb ${TRAIN_IMDB} \ 54 | --iters ${ITERS} \ 55 | --cfg experiments/cfgs/${NET}.yml \ 56 | --tag ${EXTRA_ARGS_SLUG} \ 57 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS} 58 | else 59 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \ 60 | --snapshot ${NET_FINAL} \ 61 | --imdb ${TRAIN_IMDB} \ 62 | --iters ${ITERS} \ 63 | --cfg experiments/cfgs/${NET}.yml \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS} 65 | fi 66 | fi 67 | 68 | -------------------------------------------------------------------------------- /experiments/scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | unset PYTHONPATH 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | WSDDN=$4 12 | 13 | array=( $@ ) 14 | len=${#array[@]} 15 | EXTRA_ARGS=${array[@]:4:$len} 16 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 17 | 18 | case ${DATASET} in 19 | pascal_voc) 20 | TRAIN_IMDB="voc_2007_trainval" 21 | TEST_IMDB="voc_2007_test" 22 | ITERS=200000 23 | ANCHORS="[8,16,32]" 24 | RATIOS="[0.5,1,2]" 25 | ;; 26 | pascal_voc_0712) 27 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | ITERS=110000 30 | ANCHORS="[8,16,32]" 31 | RATIOS="[0.5,1,2]" 32 | ;; 33 | coco) 34 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 35 | TEST_IMDB="coco_2014_minival" 36 | ITERS=490000 37 | ANCHORS="[4,8,16,32]" 38 | RATIOS="[0.5,1,2]" 39 | ;; 40 | *) 41 | echo "No dataset given" 42 | exit 43 | ;; 44 | esac 45 | 46 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 47 | exec &> >(tee -a "$LOG") 48 | echo Logging output to "$LOG" 49 | 50 | set +x 51 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 52 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 53 | else 54 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 55 | fi 56 | set -x 57 | 58 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 59 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 60 | --imdb ${TEST_IMDB} \ 61 | --model ${NET_FINAL} \ 62 | --cfg experiments/cfgs/${NET}.yml \ 63 | --tag ${EXTRA_ARGS_SLUG} \ 64 | --net ${NET} \ 65 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 66 | ${EXTRA_ARGS} 67 | else 68 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \ 69 | --imdb ${TEST_IMDB} \ 70 | --model ${NET_FINAL} \ 71 | --cfg experiments/cfgs/${NET}.yml \ 72 | --net ${NET} \ 73 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 74 | ${EXTRA_ARGS} 75 | fi 76 | 77 | -------------------------------------------------------------------------------- /experiments/scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | unset PYTHONPATH 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | WSDDN=$4 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:4:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | STEPSIZE="[50000]" 22 | ITERS=200000 23 | ANCHORS="[8,16,32]" 24 | RATIOS="[0.5,1,2]" 25 | ;; 26 | pascal_voc_0712) 27 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | STEPSIZE="[80000]" 30 | ITERS=110000 31 | ANCHORS="[8,16,32]" 32 | RATIOS="[0.5,1,2]" 33 | ;; 34 | coco) 35 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 36 | TEST_IMDB="coco_2014_minival" 37 | STEPSIZE="[350000]" 38 | ITERS=490000 39 | ANCHORS="[4,8,16,32]" 40 | RATIOS="[0.5,1,2]" 41 | ;; 42 | *) 43 | echo "No dataset given" 44 | exit 45 | ;; 46 | esac 47 | 48 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 49 | exec &> >(tee -a "$LOG") 50 | echo Logging output to "$LOG" 51 | 52 | set +x 53 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 54 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 55 | else 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | fi 58 | set -x 59 | 60 | if [ ! -f ${NET_FINAL}.index ]; then 61 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 62 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \ 63 | --weight data/imagenet_weights/${NET}.pth \ 64 | --wsddn data/wsddn_weights/${WSDDN}.pth \ 65 | --imdb ${TRAIN_IMDB} \ 66 | --imdbval ${TEST_IMDB} \ 67 | --iters ${ITERS} \ 68 | --cfg experiments/cfgs/${NET}.yml \ 69 | --tag ${EXTRA_ARGS_SLUG} \ 70 | --net ${NET} \ 71 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 72 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 73 | else 74 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \ 75 | --weight data/imagenet_weights/${NET}.pth \ 76 | --wsddn data/wsddn_weights/${WSDDN}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | ./experiments/scripts/test.sh $@ 88 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/datasets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/coco.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/coco.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/dis_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/dis_eval.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/ds_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/ds_utils.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/imdb.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/imdb.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/pascal_voc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/voc_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/voc_eval.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/dis_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Online Instance Classifier Refinement 3 | # Copyright (c) 2016 HUST MCLAB 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Peng Tang 6 | # -------------------------------------------------------- 7 | 8 | import xml.etree.ElementTree as ET 9 | import os 10 | import pickle 11 | import numpy as np 12 | 13 | def parse_rec(filename): 14 | """ Parse a PASCAL VOC xml file """ 15 | tree = ET.parse(filename) 16 | objects = [] 17 | for obj in tree.findall('object'): 18 | obj_struct = {} 19 | obj_struct['name'] = obj.find('name').text 20 | obj_struct['pose'] = obj.find('pose').text 21 | obj_struct['truncated'] = int(obj.find('truncated').text) 22 | obj_struct['difficult'] = int(obj.find('difficult').text) 23 | bbox = obj.find('bndbox') 24 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 25 | int(bbox.find('ymin').text), 26 | int(bbox.find('xmax').text), 27 | int(bbox.find('ymax').text)] 28 | objects.append(obj_struct) 29 | 30 | return objects 31 | 32 | def dis_eval(detpath, 33 | annopath, 34 | imagesetfile, 35 | classname, 36 | cachedir, 37 | ovthresh=0.5): 38 | """rec, prec, ap = voc_eval(detpath, 39 | annopath, 40 | imagesetfile, 41 | classname, 42 | [ovthresh]) 43 | 44 | Top level function that does the PASCAL VOC evaluation. 45 | 46 | detpath: Path to detections 47 | detpath.format(classname) should produce the detection results file. 48 | annopath: Path to annotations 49 | annopath.format(imagename) should be the xml annotations file. 50 | imagesetfile: Text file containing the list of images, one image per line. 51 | classname: Category name (duh) 52 | cachedir: Directory for caching the annotations 53 | [ovthresh]: Overlap threshold (default = 0.5) 54 | """ 55 | # assumes detections are in detpath.format(classname) 56 | # assumes annotations are in annopath.format(imagename) 57 | # assumes imagesetfile is a text file with each line an image name 58 | # cachedir caches the annotations in a pickle file 59 | 60 | # first load gt 61 | if not os.path.isdir(cachedir): 62 | os.mkdir(cachedir) 63 | cachefile = os.path.join(cachedir, 'annots.pkl') 64 | # read list of images 65 | with open(imagesetfile, 'r') as f: 66 | lines = f.readlines() 67 | imagenames = [x.strip() for x in lines] 68 | 69 | if not os.path.isfile(cachefile): 70 | # load annots 71 | recs = {} 72 | for i, imagename in enumerate(imagenames): 73 | recs[imagename] = parse_rec(annopath.format(imagename)) 74 | if i % 100 == 0: 75 | print('Reading annotation for {:d}/{:d}'.format( 76 | i + 1, len(imagenames))) 77 | # save 78 | print('Saving cached annotations to {:s}'.format(cachefile)) 79 | with open(cachefile, 'wb') as f: 80 | pickle.dump(recs, f) 81 | else: 82 | # load 83 | with open(cachefile, 'rb') as f: 84 | recs = pickle.load(f) 85 | 86 | # extract gt objects for this class 87 | class_recs = {} 88 | nimgs = 0.0 89 | for imagename in imagenames: 90 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 91 | bbox = np.array([x['bbox'] for x in R]) 92 | det = [False] * len(R) 93 | nimgs = nimgs + float(bbox.size > 0) 94 | class_recs[imagename] = {'bbox': bbox, 95 | 'det': det} 96 | 97 | # read dets 98 | detfile = detpath.format(classname) 99 | with open(detfile, 'r') as f: 100 | lines = f.readlines() 101 | 102 | splitlines = [x.strip().split(' ') for x in lines] 103 | image_ids = [x[0] for x in splitlines] 104 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 105 | 106 | # go down dets and mark TPs and FPs 107 | nd = len(image_ids) 108 | tp = np.zeros(nd) 109 | for d in range(nd): 110 | R = class_recs[image_ids[d]] 111 | bb = BB[d, :].astype(float) 112 | ovmax = -np.inf 113 | BBGT = R['bbox'].astype(float) 114 | 115 | if BBGT.size > 0: 116 | # compute overlaps 117 | # intersection 118 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 119 | iymin = np.maximum(BBGT[:, 1], bb[1]) 120 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 121 | iymax = np.minimum(BBGT[:, 3], bb[3]) 122 | iw = np.maximum(ixmax - ixmin + 1., 0.) 123 | ih = np.maximum(iymax - iymin + 1., 0.) 124 | inters = iw * ih 125 | 126 | # union 127 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 128 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 129 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 130 | 131 | overlaps = inters / uni 132 | ovmax = np.max(overlaps) 133 | jmax = np.argmax(overlaps) 134 | 135 | if ovmax > ovthresh: 136 | tp[d] = 1. 137 | continue 138 | 139 | return np.sum(tp) / nimgs -------------------------------------------------------------------------------- /lib/datasets/dis_eval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/dis_eval.pyc -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | from datasets.coco import coco 16 | 17 | import numpy as np 18 | 19 | # Set up voc__ 20 | for year in ['2007', '2012']: 21 | for split in ['train', 'val', 'trainval', 'test']: 22 | name = 'voc_{}_{}'.format(year, split) 23 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 24 | 25 | for year in ['2007', '2012']: 26 | for split in ['train', 'val', 'trainval', 'test']: 27 | name = 'voc_{}_{}_diff'.format(year, split) 28 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year, use_diff=True)) 29 | 30 | # Set up coco_2014_ 31 | for year in ['2014']: 32 | for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']: 33 | name = 'coco_{}_{}'.format(year, split) 34 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 35 | 36 | # Set up coco_2015_ 37 | for year in ['2015']: 38 | for split in ['test', 'test-dev']: 39 | name = 'coco_{}_{}'.format(year, split) 40 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 41 | 42 | 43 | def get_imdb(name): 44 | """Get an imdb (image database) by name.""" 45 | if name not in __sets: 46 | raise KeyError('Unknown dataset: {}'.format(name)) 47 | return __sets[name]() 48 | 49 | 50 | def list_imdbs(): 51 | """List all registered imdbs.""" 52 | return list(__sets.keys()) 53 | -------------------------------------------------------------------------------- /lib/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import os.path as osp 13 | import PIL 14 | from utils.bbox import bbox_overlaps 15 | import numpy as np 16 | import scipy.sparse 17 | from model.config import cfg 18 | 19 | 20 | class imdb(object): 21 | """Image database.""" 22 | 23 | def __init__(self, name, classes=None): 24 | self._name = name 25 | self._num_classes = 0 26 | if not classes: 27 | self._classes = [] 28 | else: 29 | self._classes = classes 30 | self._image_index = [] 31 | self._obj_proposer = 'gt' 32 | self._roidb = None 33 | self._roidb_handler = self.default_roidb 34 | # Use this dict for storing dataset specific config options 35 | self.config = {} 36 | 37 | @property 38 | def name(self): 39 | return self._name 40 | 41 | @property 42 | def num_classes(self): 43 | return len(self._classes) 44 | 45 | @property 46 | def classes(self): 47 | return self._classes 48 | 49 | @property 50 | def image_index(self): 51 | return self._image_index 52 | 53 | @property 54 | def roidb_handler(self): 55 | return self._roidb_handler 56 | 57 | @roidb_handler.setter 58 | def roidb_handler(self, val): 59 | self._roidb_handler = val 60 | 61 | def set_proposal_method(self, method): 62 | method = eval('self.' + method + '_roidb') 63 | self.roidb_handler = method 64 | 65 | @property 66 | def roidb(self): 67 | # A roidb is a list of dictionaries, each with the following keys: 68 | # boxes 69 | # gt_overlaps 70 | # gt_classes 71 | # flipped 72 | if self._roidb is not None: 73 | return self._roidb 74 | self._roidb = self.roidb_handler() 75 | return self._roidb 76 | 77 | @property 78 | def cache_path(self): 79 | cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache')) 80 | if not os.path.exists(cache_path): 81 | os.makedirs(cache_path) 82 | return cache_path 83 | 84 | @property 85 | def num_images(self): 86 | return len(self.image_index) 87 | 88 | def image_path_at(self, i): 89 | raise NotImplementedError 90 | 91 | def default_roidb(self): 92 | raise NotImplementedError 93 | 94 | def evaluate_detections(self, all_boxes, output_dir=None): 95 | """ 96 | all_boxes is a list of length number-of-classes. 97 | Each list element is a list of length number-of-images. 98 | Each of those list elements is either an empty list [] 99 | or a numpy array of detection. 100 | 101 | all_boxes[class][image] = [] or np.array of shape #dets x 5 102 | """ 103 | raise NotImplementedError 104 | 105 | def evaluate_discovery(self, all_boxes, output_dir=None): 106 | """ 107 | all_boxes is a list of length number-of-classes. 108 | Each list element is a list of length number-of-images. 109 | Each of those list elements is either an empty list [] 110 | or a numpy array of detection. 111 | 112 | all_boxes[class][image] = [] or np.array of shape #dets x 5 113 | """ 114 | raise NotImplementedError 115 | 116 | def _get_widths(self): 117 | return [PIL.Image.open(self.image_path_at(i)).size[0] 118 | for i in range(self.num_images)] 119 | 120 | def append_flipped_images(self): 121 | num_images = self.num_images 122 | widths = [PIL.Image.open(self.image_path_at(i)).size[0] 123 | for i in range(num_images)] 124 | for i in range(num_images): 125 | boxes = self.roidb[i]['boxes'].copy() 126 | oldx1 = boxes[:, 0].copy() 127 | oldx2 = boxes[:, 2].copy() 128 | boxes[:, 0] = widths[i] - oldx2 - 1 129 | boxes[:, 2] = widths[i] - oldx1 - 1 130 | assert (boxes[:, 2] >= boxes[:, 0]).all() 131 | entry = {'boxes' : boxes, 132 | 'labels' : self.roidb[i]['labels'], 133 | 'flipped' : True} 134 | self.roidb.append(entry) 135 | self._image_index = self._image_index * 2 136 | 137 | def evaluate_recall(self, candidate_boxes=None, thresholds=None, 138 | area='all', limit=None): 139 | """Evaluate detection proposal recall metrics. 140 | 141 | Returns: 142 | results: dictionary of results with keys 143 | 'ar': average recall 144 | 'recalls': vector recalls at each IoU overlap threshold 145 | 'thresholds': vector of IoU overlap thresholds 146 | 'gt_overlaps': vector of all ground-truth overlaps 147 | """ 148 | # Record max overlap value for each gt box 149 | # Return vector of overlap values 150 | areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3, 151 | '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} 152 | area_ranges = [[0 ** 2, 1e5 ** 2], # all 153 | [0 ** 2, 32 ** 2], # small 154 | [32 ** 2, 96 ** 2], # medium 155 | [96 ** 2, 1e5 ** 2], # large 156 | [96 ** 2, 128 ** 2], # 96-128 157 | [128 ** 2, 256 ** 2], # 128-256 158 | [256 ** 2, 512 ** 2], # 256-512 159 | [512 ** 2, 1e5 ** 2], # 512-inf 160 | ] 161 | assert area in areas, 'unknown area range: {}'.format(area) 162 | area_range = area_ranges[areas[area]] 163 | gt_overlaps = np.zeros(0) 164 | num_pos = 0 165 | for i in range(self.num_images): 166 | # Checking for max_overlaps == 1 avoids including crowd annotations 167 | # (...pretty hacking :/) 168 | max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) 169 | gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & 170 | (max_gt_overlaps == 1))[0] 171 | gt_boxes = self.roidb[i]['boxes'][gt_inds, :] 172 | gt_areas = self.roidb[i]['seg_areas'][gt_inds] 173 | valid_gt_inds = np.where((gt_areas >= area_range[0]) & 174 | (gt_areas <= area_range[1]))[0] 175 | gt_boxes = gt_boxes[valid_gt_inds, :] 176 | num_pos += len(valid_gt_inds) 177 | 178 | if candidate_boxes is None: 179 | # If candidate_boxes is not supplied, the default is to use the 180 | # non-ground-truth boxes from this roidb 181 | non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] 182 | boxes = self.roidb[i]['boxes'][non_gt_inds, :] 183 | else: 184 | boxes = candidate_boxes[i] 185 | if boxes.shape[0] == 0: 186 | continue 187 | if limit is not None and boxes.shape[0] > limit: 188 | boxes = boxes[:limit, :] 189 | 190 | overlaps = bbox_overlaps(boxes.astype(np.float), 191 | gt_boxes.astype(np.float)) 192 | 193 | _gt_overlaps = np.zeros((gt_boxes.shape[0])) 194 | for j in range(gt_boxes.shape[0]): 195 | # find which proposal box maximally covers each gt box 196 | argmax_overlaps = overlaps.argmax(axis=0) 197 | # and get the iou amount of coverage for each gt box 198 | max_overlaps = overlaps.max(axis=0) 199 | # find which gt box is 'best' covered (i.e. 'best' = most iou) 200 | gt_ind = max_overlaps.argmax() 201 | gt_ovr = max_overlaps.max() 202 | assert (gt_ovr >= 0) 203 | # find the proposal box that covers the best covered gt box 204 | box_ind = argmax_overlaps[gt_ind] 205 | # record the iou coverage of this gt box 206 | _gt_overlaps[j] = overlaps[box_ind, gt_ind] 207 | assert (_gt_overlaps[j] == gt_ovr) 208 | # mark the proposal box and the gt box as used 209 | overlaps[box_ind, :] = -1 210 | overlaps[:, gt_ind] = -1 211 | # append recorded iou coverage level 212 | gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) 213 | 214 | gt_overlaps = np.sort(gt_overlaps) 215 | if thresholds is None: 216 | step = 0.05 217 | thresholds = np.arange(0.5, 0.95 + 1e-5, step) 218 | recalls = np.zeros_like(thresholds) 219 | # compute recall for each iou threshold 220 | for i, t in enumerate(thresholds): 221 | recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) 222 | # ar = 2 * np.trapz(recalls, thresholds) 223 | ar = recalls.mean() 224 | return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 225 | 'gt_overlaps': gt_overlaps} 226 | 227 | def create_roidb_from_box_list(self, box_list, gt_roidb): 228 | assert len(box_list) == self.num_images, \ 229 | 'Number of boxes must match number of ground-truth images' 230 | roidb = [] 231 | 232 | if gt_roidb is not None: 233 | for i in range(self.num_images): 234 | boxes = box_list[i] 235 | 236 | real_label = gt_roidb[i]['labels'] 237 | 238 | roidb.append({'boxes' : boxes, 239 | 'labels' : np.array([real_label], dtype=np.int32), 240 | 'flipped' : False}) 241 | else: 242 | for i in range(self.num_images): 243 | boxes = box_list[i] 244 | 245 | roidb.append({'boxes' : boxes, 246 | 'labels' : np.zeros((1, 0), dtype=np.int32), 247 | 'flipped' : False}) 248 | 249 | return roidb 250 | 251 | @staticmethod 252 | def merge_roidbs(a, b): 253 | assert len(a) == len(b) 254 | for i in range(len(a)): 255 | a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) 256 | a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], 257 | b[i]['gt_classes'])) 258 | a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'], 259 | b[i]['gt_overlaps']]) 260 | a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'], 261 | b[i]['seg_areas'])) 262 | return a 263 | 264 | def competition_mode(self, on): 265 | """Turn competition mode on or off.""" 266 | pass 267 | -------------------------------------------------------------------------------- /lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 6 | so that it's consistent with those computed by Jan Hosang (see: 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 8 | computing/research/object-recognition-and-scene-understanding/how- 9 | good-are-detection-proposals-really/) 10 | 11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 13 | """ 14 | 15 | def munge(src_dir): 16 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 17 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 18 | 19 | files = os.listdir(src_dir) 20 | for fn in files: 21 | base, ext = os.path.splitext(fn) 22 | # first 14 chars / first 22 chars / all chars + .mat 23 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 24 | first = base[:14] 25 | second = base[:22] 26 | dst_dir = os.path.join('MCG', 'mat', first, second) 27 | if not os.path.exists(dst_dir): 28 | os.makedirs(dst_dir) 29 | src = os.path.join(src_dir, fn) 30 | dst = os.path.join(dst_dir, fn) 31 | print 'MV: {} -> {}'.format(src, dst) 32 | os.rename(src, dst) 33 | 34 | if __name__ == '__main__': 35 | # src_dir should look something like: 36 | # src_dir = 'MCG-COCO-val2014-boxes' 37 | src_dir = sys.argv[1] 38 | munge(src_dir) 39 | -------------------------------------------------------------------------------- /lib/datasets/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import xml.etree.ElementTree as ET 11 | import os 12 | import pickle 13 | import numpy as np 14 | 15 | def parse_rec(filename): 16 | """ Parse a PASCAL VOC xml file """ 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_struct = {} 21 | obj_struct['name'] = obj.find('name').text 22 | obj_struct['pose'] = obj.find('pose').text 23 | obj_struct['truncated'] = int(obj.find('truncated').text) 24 | obj_struct['difficult'] = int(obj.find('difficult').text) 25 | bbox = obj.find('bndbox') 26 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 27 | int(bbox.find('ymin').text), 28 | int(bbox.find('xmax').text), 29 | int(bbox.find('ymax').text)] 30 | objects.append(obj_struct) 31 | 32 | return objects 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ ap = voc_ap(rec, prec, [use_07_metric]) 37 | Compute VOC AP given precision and recall. 38 | If use_07_metric is true, uses the 39 | VOC 07 11 point method (default:False). 40 | """ 41 | if use_07_metric: 42 | # 11 point metric 43 | ap = 0. 44 | for t in np.arange(0., 1.1, 0.1): 45 | if np.sum(rec >= t) == 0: 46 | p = 0 47 | else: 48 | p = np.max(prec[rec >= t]) 49 | ap = ap + p / 11. 50 | else: 51 | # correct AP calculation 52 | # first append sentinel values at the end 53 | mrec = np.concatenate(([0.], rec, [1.])) 54 | mpre = np.concatenate(([0.], prec, [0.])) 55 | 56 | # compute the precision envelope 57 | for i in range(mpre.size - 1, 0, -1): 58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 59 | 60 | # to calculate area under PR curve, look for points 61 | # where X axis (recall) changes value 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # and sum (\Delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | 69 | def voc_eval(detpath, 70 | annopath, 71 | imagesetfile, 72 | classname, 73 | cachedir, 74 | ovthresh=0.5, 75 | use_07_metric=False, 76 | use_diff=False): 77 | """rec, prec, ap = voc_eval(detpath, 78 | annopath, 79 | imagesetfile, 80 | classname, 81 | [ovthresh], 82 | [use_07_metric]) 83 | 84 | Top level function that does the PASCAL VOC evaluation. 85 | 86 | detpath: Path to detections 87 | detpath.format(classname) should produce the detection results file. 88 | annopath: Path to annotations 89 | annopath.format(imagename) should be the xml annotations file. 90 | imagesetfile: Text file containing the list of images, one image per line. 91 | classname: Category name (duh) 92 | cachedir: Directory for caching the annotations 93 | [ovthresh]: Overlap threshold (default = 0.5) 94 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 95 | (default False) 96 | """ 97 | # assumes detections are in detpath.format(classname) 98 | # assumes annotations are in annopath.format(imagename) 99 | # assumes imagesetfile is a text file with each line an image name 100 | # cachedir caches the annotations in a pickle file 101 | 102 | # first load gt 103 | if not os.path.isdir(cachedir): 104 | os.mkdir(cachedir) 105 | cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile) 106 | # read list of images 107 | with open(imagesetfile, 'r') as f: 108 | lines = f.readlines() 109 | imagenames = [x.strip() for x in lines] 110 | 111 | if not os.path.isfile(cachefile): 112 | # load annotations 113 | recs = {} 114 | for i, imagename in enumerate(imagenames): 115 | recs[imagename] = parse_rec(annopath.format(imagename)) 116 | if i % 100 == 0: 117 | print('Reading annotation for {:d}/{:d}'.format( 118 | i + 1, len(imagenames))) 119 | # save 120 | print('Saving cached annotations to {:s}'.format(cachefile)) 121 | with open(cachefile, 'wb') as f: 122 | pickle.dump(recs, f) 123 | else: 124 | # load 125 | with open(cachefile, 'rb') as f: 126 | try: 127 | recs = pickle.load(f) 128 | except: 129 | recs = pickle.load(f, encoding='bytes') 130 | 131 | # extract gt objects for this class 132 | class_recs = {} 133 | npos = 0 134 | for imagename in imagenames: 135 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 136 | bbox = np.array([x['bbox'] for x in R]) 137 | if use_diff: 138 | difficult = np.array([False for x in R]).astype(np.bool) 139 | else: 140 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 141 | det = [False] * len(R) 142 | npos = npos + sum(~difficult) 143 | class_recs[imagename] = {'bbox': bbox, 144 | 'difficult': difficult, 145 | 'det': det} 146 | 147 | # read dets 148 | detfile = detpath.format(classname) 149 | with open(detfile, 'r') as f: 150 | lines = f.readlines() 151 | 152 | splitlines = [x.strip().split(' ') for x in lines] 153 | image_ids = [x[0] for x in splitlines] 154 | confidence = np.array([float(x[1]) for x in splitlines]) 155 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 156 | 157 | nd = len(image_ids) 158 | tp = np.zeros(nd) 159 | fp = np.zeros(nd) 160 | 161 | if BB.shape[0] > 0: 162 | # sort by confidence 163 | sorted_ind = np.argsort(-confidence) 164 | sorted_scores = np.sort(-confidence) 165 | BB = BB[sorted_ind, :] 166 | image_ids = [image_ids[x] for x in sorted_ind] 167 | 168 | # go down dets and mark TPs and FPs 169 | for d in range(nd): 170 | R = class_recs[image_ids[d]] 171 | bb = BB[d, :].astype(float) 172 | ovmax = -np.inf 173 | BBGT = R['bbox'].astype(float) 174 | 175 | if BBGT.size > 0: 176 | # compute overlaps 177 | # intersection 178 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 179 | iymin = np.maximum(BBGT[:, 1], bb[1]) 180 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 181 | iymax = np.minimum(BBGT[:, 3], bb[3]) 182 | iw = np.maximum(ixmax - ixmin + 1., 0.) 183 | ih = np.maximum(iymax - iymin + 1., 0.) 184 | inters = iw * ih 185 | 186 | # union 187 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 188 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 189 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 190 | 191 | overlaps = inters / uni 192 | ovmax = np.max(overlaps) 193 | jmax = np.argmax(overlaps) 194 | 195 | if ovmax > ovthresh: 196 | if not R['difficult'][jmax]: 197 | if not R['det'][jmax]: 198 | tp[d] = 1. 199 | R['det'][jmax] = 1 200 | else: 201 | fp[d] = 1. 202 | else: 203 | fp[d] = 1. 204 | 205 | # compute precision recall 206 | fp = np.cumsum(fp) 207 | tp = np.cumsum(tp) 208 | rec = tp / float(npos) 209 | # avoid divide by zero in case the first detection matches a difficult 210 | # ground truth 211 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 212 | ap = voc_ap(rec, prec, use_07_metric) 213 | 214 | return rec, prec, ap 215 | -------------------------------------------------------------------------------- /lib/layer_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/layer_utils/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/anchor_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from model.config import cfg 13 | import numpy as np 14 | import numpy.random as npr 15 | from utils.bbox import bbox_overlaps 16 | from model.bbox_transform import bbox_transform 17 | import torch 18 | 19 | def anchor_target_layer(rpn_cls_score, gt_boxes, gt_weights, im_info, _feat_stride, all_anchors, num_anchors): 20 | """Same as the anchor target layer in original Fast/er RCNN """ 21 | A = num_anchors 22 | total_anchors = all_anchors.shape[0] 23 | K = total_anchors / num_anchors 24 | 25 | # allow boxes to sit over the edge by a small amount 26 | _allowed_border = 0 27 | 28 | # map of shape (..., H, W) 29 | height, width = rpn_cls_score.shape[1:3] 30 | 31 | # only keep anchors inside the image 32 | inds_inside = np.where( 33 | (all_anchors[:, 0] >= -_allowed_border) & 34 | (all_anchors[:, 1] >= -_allowed_border) & 35 | (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width 36 | (all_anchors[:, 3] < im_info[0] + _allowed_border) # height 37 | )[0] 38 | 39 | # keep only inside anchors 40 | anchors = all_anchors[inds_inside, :] 41 | 42 | # label: 1 is positive, 0 is negative, -1 is dont care 43 | labels = np.empty((len(inds_inside),), dtype=np.float32) 44 | labels.fill(-1) 45 | 46 | 47 | # overlaps between the anchors and the gt boxes 48 | # overlaps (ex, gt) 49 | overlaps = bbox_overlaps( 50 | np.ascontiguousarray(anchors, dtype=np.float), 51 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 52 | argmax_overlaps = overlaps.argmax(axis=1) 53 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 54 | ''' 55 | add weights items by pseudo scores 56 | ''' 57 | gt_weights_tile = np.tile(gt_weights.reshape(1,-1),(len(inds_inside),1)) 58 | loss_weights = gt_weights_tile[np.arange(len(inds_inside)),argmax_overlaps] 59 | ''' 60 | end of modification 61 | ''' 62 | 63 | gt_argmax_overlaps = overlaps.argmax(axis=0) 64 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 65 | np.arange(overlaps.shape[1])] 66 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 67 | 68 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: 69 | # assign bg labels first so that positive labels can clobber them 70 | # first set the negatives 71 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 72 | 73 | # fg label: for each gt, anchor with highest overlap 74 | labels[gt_argmax_overlaps] = 1 75 | 76 | # fg label: above threshold IOU 77 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 78 | 79 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES: 80 | # assign bg labels last so that negative labels can clobber positives 81 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 82 | 83 | # subsample positive labels if we have too many 84 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) 85 | fg_inds = np.where(labels == 1)[0] 86 | if len(fg_inds) > num_fg: 87 | disable_inds = npr.choice( 88 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 89 | labels[disable_inds] = -1 90 | 91 | # subsample negative labels if we have too many 92 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) 93 | bg_inds = np.where(labels == 0)[0] 94 | if len(bg_inds) > num_bg: 95 | disable_inds = npr.choice( 96 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 97 | labels[disable_inds] = -1 98 | 99 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 100 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 101 | 102 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 103 | # only the positive ones have regression targets 104 | bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) 105 | 106 | bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 107 | if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: 108 | # uniform weighting of examples (given non-uniform sampling) 109 | num_examples = np.sum(labels >= 0) 110 | positive_weights = np.ones((1, 4)) * 1.0 / num_examples 111 | negative_weights = np.ones((1, 4)) * 1.0 / num_examples 112 | else: 113 | assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & 114 | (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) 115 | positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / 116 | np.sum(labels == 1)) 117 | negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / 118 | np.sum(labels == 0)) 119 | 120 | ''' 121 | put loss-weight of bg to 1.0 122 | fg to 1.0 + wsddn_prob 123 | ''' 124 | 125 | loss_weights[labels == 0] = 1.0 126 | #loss_weights[labels == 1] = loss_weights[labels == 1] + 1.0 127 | loss_weights[labels == 1] = loss_weights[labels == 1] 128 | 129 | bbox_outside_weights[labels == 1, :] = loss_weights[labels==1].reshape(-1,1) * positive_weights 130 | bbox_outside_weights[labels == 0, :] = loss_weights[labels==0].reshape(-1,1) * negative_weights 131 | 132 | 133 | 134 | 135 | ''' 136 | bbox_outside_weights[labels == 1, :] = positive_weights 137 | bbox_outside_weights[labels == 0, :] = negative_weights 138 | 139 | 140 | end of change loss-weight 141 | ''' 142 | # map up to original set of anchors 143 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 144 | loss_weights = _unmap(loss_weights, total_anchors, inds_inside, fill=0) 145 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 146 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) 147 | bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) 148 | 149 | # labels 150 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) 151 | labels = labels.reshape((1, 1, A * height, width)) 152 | rpn_labels = labels 153 | 154 | # loss_weight 155 | rpn_loss_weights = loss_weights 156 | # bbox_targets 157 | bbox_targets = bbox_targets \ 158 | .reshape((1, height, width, A * 4)) 159 | 160 | rpn_bbox_targets = bbox_targets 161 | # bbox_inside_weights 162 | bbox_inside_weights = bbox_inside_weights \ 163 | .reshape((1, height, width, A * 4)) 164 | 165 | rpn_bbox_inside_weights = bbox_inside_weights 166 | 167 | # bbox_outside_weights 168 | bbox_outside_weights = bbox_outside_weights \ 169 | .reshape((1, height, width, A * 4)) 170 | 171 | rpn_bbox_outside_weights = bbox_outside_weights 172 | return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, rpn_loss_weights 173 | 174 | 175 | def _unmap(data, count, inds, fill=0): 176 | """ Unmap a subset of item (data) back to the original set of items (of 177 | size count) """ 178 | if len(data.shape) == 1: 179 | ret = np.empty((count,), dtype=np.float32) 180 | ret.fill(fill) 181 | ret[inds] = data 182 | else: 183 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 184 | ret.fill(fill) 185 | ret[inds, :] = data 186 | return ret 187 | 188 | 189 | def _compute_targets(ex_rois, gt_rois): 190 | """Compute bounding-box regression targets for an image.""" 191 | 192 | assert ex_rois.shape[0] == gt_rois.shape[0] 193 | assert ex_rois.shape[1] == 4 194 | assert gt_rois.shape[1] == 5 195 | 196 | return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy() 197 | -------------------------------------------------------------------------------- /lib/layer_utils/choose_pseudo_gt.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | from model.config import cfg 7 | import numpy as np 8 | import numpy.random as npr 9 | from utils.bbox import bbox_overlaps 10 | from model.bbox_transform import bbox_transform 11 | import torch 12 | 13 | 14 | def choose_pseudo_gt(boxes, cls_prob, im_labels): 15 | """Get proposals with highest score. 16 | inputs are all variables""" 17 | 18 | num_images, num_classes = im_labels.size() 19 | boxes = boxes[:,1:] 20 | assert num_images == 1, 'batch size shoud be equal to 1' 21 | im_labels_tmp = im_labels[0, :] 22 | 23 | gt_boxes = [] 24 | gt_classes = [] 25 | gt_scores = [] 26 | for i in range(num_classes): 27 | if im_labels_tmp[i].data.cpu().numpy() == 1: 28 | max_value,max_index = cls_prob[:, i].max(0) 29 | gt_boxes.append(boxes[max_index]) 30 | gt_classes.append(torch.ones(1,1)*(i+1)) # return idx=class+1 to include the background 31 | gt_scores.append(max_value.view(-1,1)) 32 | 33 | gt_boxes = torch.cat(gt_boxes) 34 | gt_classes = torch.cat(gt_classes) 35 | gt_scores = torch.cat(gt_scores) 36 | proposals = {'gt_boxes' : gt_boxes, 37 | 'gt_classes': gt_classes, 38 | 'gt_scores': gt_scores} 39 | 40 | return torch.cat([gt_boxes,gt_classes],1), proposals 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /lib/layer_utils/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 15 | # 16 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 17 | # >> anchors 18 | # 19 | # anchors = 20 | # 21 | # -83 -39 100 56 22 | # -175 -87 192 104 23 | # -359 -183 376 200 24 | # -55 -55 72 72 25 | # -119 -119 136 136 26 | # -247 -247 264 264 27 | # -35 -79 52 96 28 | # -79 -167 96 184 29 | # -167 -343 184 360 30 | 31 | # array([[ -83., -39., 100., 56.], 32 | # [-175., -87., 192., 104.], 33 | # [-359., -183., 376., 200.], 34 | # [ -55., -55., 72., 72.], 35 | # [-119., -119., 136., 136.], 36 | # [-247., -247., 264., 264.], 37 | # [ -35., -79., 52., 96.], 38 | # [ -79., -167., 96., 184.], 39 | # [-167., -343., 184., 360.]]) 40 | 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 42 | scales=2 ** np.arange(3, 6)): 43 | """ 44 | Generate anchor (reference) windows by enumerating aspect ratios X 45 | scales wrt a reference (0, 0, 15, 15) window. 46 | """ 47 | 48 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 49 | ratio_anchors = _ratio_enum(base_anchor, ratios) 50 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 51 | for i in range(ratio_anchors.shape[0])]) 52 | return anchors 53 | 54 | 55 | def _whctrs(anchor): 56 | """ 57 | Return width, height, x center, and y center for an anchor (window). 58 | """ 59 | 60 | w = anchor[2] - anchor[0] + 1 61 | h = anchor[3] - anchor[1] + 1 62 | x_ctr = anchor[0] + 0.5 * (w - 1) 63 | y_ctr = anchor[1] + 0.5 * (h - 1) 64 | return w, h, x_ctr, y_ctr 65 | 66 | 67 | def _mkanchors(ws, hs, x_ctr, y_ctr): 68 | """ 69 | Given a vector of widths (ws) and heights (hs) around a center 70 | (x_ctr, y_ctr), output a set of anchors (windows). 71 | """ 72 | 73 | ws = ws[:, np.newaxis] 74 | hs = hs[:, np.newaxis] 75 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 76 | y_ctr - 0.5 * (hs - 1), 77 | x_ctr + 0.5 * (ws - 1), 78 | y_ctr + 0.5 * (hs - 1))) 79 | return anchors 80 | 81 | 82 | def _ratio_enum(anchor, ratios): 83 | """ 84 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 85 | """ 86 | 87 | w, h, x_ctr, y_ctr = _whctrs(anchor) 88 | size = w * h 89 | size_ratios = size / ratios 90 | ws = np.round(np.sqrt(size_ratios)) 91 | hs = np.round(ws * ratios) 92 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 93 | return anchors 94 | 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | 108 | if __name__ == '__main__': 109 | import time 110 | 111 | t = time.time() 112 | a = generate_anchors() 113 | print(time.time() - t) 114 | print(a) 115 | from IPython import embed; 116 | 117 | embed() 118 | -------------------------------------------------------------------------------- /lib/layer_utils/generate_pseudo_gtbox.py: -------------------------------------------------------------------------------- 1 | from model.config import cfg 2 | import numpy as np 3 | import numpy.random as npr 4 | from utils.bbox import bbox_overlaps 5 | from model.bbox_transform import bbox_transform 6 | from model.nms_wrapper import nms 7 | import torch 8 | 9 | 10 | def generate_pseudo_gtbox(boxes, cls_prob, im_labels): 11 | """Get proposals from fuse_matrix 12 | inputs are all variables""" 13 | pre_nms_topN = 50 14 | nms_Thresh = 0.1 15 | 16 | num_images, num_classes = im_labels.size() 17 | boxes = boxes[:,1:] 18 | assert num_images == 1, 'batch size shoud be equal to 1' 19 | im_labels_tmp = im_labels[0, :] 20 | labelList = im_labels_tmp.data.nonzero().view(-1) 21 | 22 | gt_boxes = [] 23 | gt_classes = [] 24 | gt_scores = [] 25 | 26 | for i in labelList: 27 | scores, order = cls_prob[:,i].contiguous().view(-1).sort(descending=True) 28 | if pre_nms_topN > 0: 29 | order = order[:pre_nms_topN] 30 | scores = scores[:pre_nms_topN].view(-1, 1) 31 | proposals = boxes[order.data, :] 32 | 33 | keep = nms(torch.cat((proposals, scores), 1).data, nms_Thresh) 34 | proposals = proposals[keep, :] 35 | scores = scores[keep,] 36 | gt_boxes.append(proposals) 37 | gt_classes.append(torch.ones(keep.size(0),1)*(i+1)) # return idx=class+1 to include the background 38 | gt_scores.append(scores.view(-1,1)) 39 | 40 | gt_boxes = torch.cat(gt_boxes) 41 | gt_classes = torch.cat(gt_classes) 42 | gt_scores = torch.cat(gt_scores) 43 | proposals = {'gt_boxes' : gt_boxes, 44 | 'gt_classes': gt_classes, 45 | 'gt_scores': gt_scores} 46 | # print(gt_boxes.size()) 47 | # print(gt_classes.size()) 48 | # print(type(gt_boxes)) 49 | # print(type(gt_classes)) 50 | return torch.cat([gt_boxes,gt_classes],1),proposals -------------------------------------------------------------------------------- /lib/layer_utils/loss_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Thu Dec 21 16:22:56 2017 3 | 4 | @author: Jiajie 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | 12 | import torch 13 | import torch.nn.functional as F 14 | from torch.autograd import Variable 15 | 16 | 17 | def bootstrap_cross_entropy(input, target, ishard=False, beta=0.95, weight=None, size_average=True): 18 | r"""Function that measures Cross Entropy between target and output 19 | logits with prediction consistency(bootstrap) 20 | 21 | Args: 22 | input: Variable of arbitrary shape 23 | target: Variable :math:`(N)` where each value is 24 | `0 <= targets[i] <= C-1 25 | ishard: Choose soft/hard bootstrap mode 26 | beta: Weight between ``gt`` label and prediction. In paper, 0.8 for hard and 0.95 for soft 27 | weight (Variable, optional): a manual rescaling weight 28 | if provided it's repeated to match input tensor shape 29 | size_average (bool, optional): By default, the losses are averaged 30 | over observations for each minibatch. However, if the field 31 | sizeAverage is set to False, the losses are instead summed 32 | for each minibatch. Default: ``True`` 33 | 34 | Examples:: 35 | 36 | >>> input = autograd.Variable(torch.randn(3, 5), requires_grad=True) 37 | >>> target = autograd.Variable(torch.LongTensor(3).random_(5)) 38 | >>> loss = bootstrap_cross_entropy(input, target) 39 | >>> loss.backward() 40 | """ 41 | input_prob = F.softmax(input) 42 | target_onehot = Variable(input.data.new(input.data.size()).zero_()) 43 | target_onehot.scatter_(1, target.view(-1,1), 1) 44 | # print(target_onehot) 45 | if ishard: 46 | _,idx = input_prob.max(1) 47 | target_onehot = target_onehot * beta + \ 48 | Variable(input.data.new(input.data.size()).zero_()).scatter_(1, idx.view(-1,1), 1) * (1-beta) 49 | else: 50 | target_onehot = target_onehot * beta + input_prob * (1-beta) 51 | loss = - target_onehot * F.log_softmax(input) 52 | #print(loss.size()) 53 | #print(weight.size()) 54 | #if weight is not None: 55 | # loss = loss.sum(1) * weight 56 | 57 | if size_average: 58 | if weight is not None: 59 | return (loss.sum(1) * weight).mean() 60 | return loss.sum(1).mean() 61 | else: 62 | return loss.sum() 63 | 64 | 65 | 66 | 67 | def BCE_bootstrap_with_logits(input, target, ishard=False, beta=0.95, weight=None, size_average=True): 68 | r"""Function that measures Binary Cross Entropy between target and output 69 | logits with prediction consistency(bootstrap) 70 | 71 | Args: 72 | input: Variable of arbitrary shape 73 | target: Variable of the same shape as input 74 | ishard: Choose soft/hard bootstrap mode 75 | beta: Weight between ``gt`` label and prediction. In paper, 0.8 for hard and 0.95 for soft 76 | weight (Variable, optional): a manual rescaling weight 77 | if provided it's repeated to match input tensor shape 78 | size_average (bool, optional): By default, the losses are averaged 79 | over observations for each minibatch. However, if the field 80 | sizeAverage is set to False, the losses are instead summed 81 | for each minibatch. Default: ``True`` 82 | 83 | Examples:: 84 | 85 | >>> input = autograd.Variable(torch.randn(3), requires_grad=True) 86 | >>> target = autograd.Variable(torch.FloatTensor(3).random_(2)) 87 | >>> loss = BCE_bootstrap_with_logits(input, target) 88 | >>> loss.backward() 89 | """ 90 | if not (target.size() == input.size()): 91 | raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size())) 92 | input_prob = torch.sigmoid(input) 93 | if ishard: 94 | target = target * beta + (input_prob>0.5) * (1-beta) 95 | else: 96 | target = target * beta + input_prob * (1-beta) 97 | print(target) 98 | max_val = (-input).clamp(min=0) 99 | loss = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log() 100 | 101 | if weight is not None: 102 | loss = loss * weight 103 | 104 | if size_average: 105 | return loss.mean() 106 | else: 107 | return loss.sum() -------------------------------------------------------------------------------- /lib/layer_utils/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick and Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from model.config import cfg 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes 13 | from model.nms_wrapper import nms 14 | 15 | import torch 16 | from torch.autograd import Variable 17 | 18 | 19 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): 20 | """A simplified version compared to fast/er RCNN 21 | For details please see the technical report 22 | """ 23 | if type(cfg_key) == bytes: 24 | cfg_key = cfg_key.decode('utf-8') 25 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 26 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 27 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 28 | 29 | # Get the scores and bounding boxes 30 | scores = rpn_cls_prob[:, :, :, num_anchors:] 31 | rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) 32 | scores = scores.contiguous().view(-1, 1) 33 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 34 | proposals = clip_boxes(proposals, im_info[:2]) 35 | 36 | # Pick the top region proposals 37 | scores, order = scores.view(-1).sort(descending=True) 38 | if pre_nms_topN > 0: 39 | order = order[:pre_nms_topN] 40 | scores = scores[:pre_nms_topN].view(-1, 1) 41 | proposals = proposals[order.data, :] 42 | 43 | # Non-maximal suppression 44 | keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) 45 | 46 | # Pick th top region proposals after NMS 47 | if post_nms_topN > 0: 48 | keep = keep[:post_nms_topN] 49 | proposals = proposals[keep, :] 50 | scores = scores[keep,] 51 | 52 | # Only support single image as input 53 | batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) 54 | blob = torch.cat((batch_inds, proposals), 1) 55 | 56 | return blob, scores 57 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import numpy.random as npr 13 | from model.config import cfg 14 | from model.bbox_transform import bbox_transform 15 | from utils.bbox import bbox_overlaps 16 | 17 | 18 | import torch 19 | from torch.autograd import Variable 20 | 21 | def proposal_target_layer (rpn_rois, rpn_scores, gt_boxes, _num_classes, gt_weights): 22 | """ 23 | Assign object detection proposals to ground-truth targets. Produces proposal 24 | classification labels and bounding-box regression targets. 25 | """ 26 | 27 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 28 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 29 | all_rois = rpn_rois 30 | all_scores = rpn_scores 31 | 32 | # Include ground-truth boxes in the set of candidate rois 33 | if cfg.TRAIN.USE_GT: 34 | zeros = rpn_rois.data.new(gt_boxes.shape[0], 1) 35 | all_rois = torch.cat( 36 | (all_rois, torch.cat((zeros, gt_boxes[:, :-1]), 1)) 37 | , 0) 38 | # not sure if it a wise appending, but anyway i am not using it 39 | all_scores = torch.cat((all_scores, zeros), 0) 40 | 41 | num_images = 1 42 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 43 | fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image)) 44 | 45 | # Sample rois with classification labels and bounding box regression 46 | # targets 47 | labels, rois, roi_scores, bbox_targets, bbox_inside_weights, loss_weights = _sample_rois( 48 | all_rois, all_scores, gt_boxes, gt_weights, fg_rois_per_image, 49 | rois_per_image, _num_classes) 50 | 51 | rois = rois.view(-1, 5) 52 | roi_scores = roi_scores.view(-1) 53 | labels = labels.view(-1, 1) 54 | bbox_targets = bbox_targets.view(-1, _num_classes * 4) 55 | bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4) 56 | bbox_outside_weights = (bbox_inside_weights > 0).float() 57 | #print(bbox_outside_weights) 58 | bbox_outside_weights = bbox_outside_weights * loss_weights.view(-1,1) 59 | #print(bbox_outside_weights) 60 | 61 | return rois, roi_scores, labels, Variable(bbox_targets), Variable(bbox_inside_weights), Variable(bbox_outside_weights), Variable(loss_weights) 62 | 63 | 64 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 65 | """Bounding-box regression targets (bbox_target_data) are stored in a 66 | compact form N x (class, tx, ty, tw, th) 67 | 68 | This function expands those targets into the 4-of-4*K representation used 69 | by the network (i.e. only one class has non-zero targets). 70 | 71 | Returns: 72 | bbox_target (ndarray): N x 4K blob of regression targets 73 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 74 | """ 75 | # Inputs are tensor 76 | 77 | clss = bbox_target_data[:, 0] 78 | bbox_targets = clss.new(clss.numel(), 4 * num_classes).zero_() 79 | bbox_inside_weights = clss.new(bbox_targets.shape).zero_() 80 | inds = (clss > 0).nonzero().view(-1) 81 | if inds.numel() > 0: 82 | clss = clss[inds].contiguous().view(-1,1) 83 | dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4) 84 | dim2_inds = torch.cat([4*clss, 4*clss+1, 4*clss+2, 4*clss+3], 1).long() 85 | bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:] 86 | bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds) 87 | 88 | return bbox_targets, bbox_inside_weights 89 | 90 | 91 | def _compute_targets(ex_rois, gt_rois, labels): 92 | """Compute bounding-box regression targets for an image.""" 93 | # Inputs are tensor 94 | 95 | assert ex_rois.shape[0] == gt_rois.shape[0] 96 | assert ex_rois.shape[1] == 4 97 | assert gt_rois.shape[1] == 4 98 | 99 | targets = bbox_transform(ex_rois, gt_rois) 100 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 101 | # Optionally normalize targets by a precomputed mean and stdev 102 | targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 103 | / targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 104 | return torch.cat( 105 | [labels.unsqueeze(1), targets], 1) 106 | 107 | 108 | def _sample_rois(all_rois, all_scores, gt_boxes, gt_weights, fg_rois_per_image, rois_per_image, num_classes): 109 | """Generate a random sample of RoIs comprising foreground and background 110 | examples. 111 | """ 112 | # overlaps: (rois x gt_boxes) 113 | overlaps = bbox_overlaps( 114 | all_rois[:, 1:5].data, 115 | gt_boxes[:, :4].data) 116 | max_overlaps, gt_assignment = overlaps.max(1) 117 | labels = gt_boxes[gt_assignment, [4]] 118 | ''' 119 | add weights items by pseudo scores 120 | ''' 121 | gt_weights = gt_weights.detach().data 122 | gt_weights_tile = gt_weights.view(1,-1).expand_as(overlaps) 123 | loss_weights = gt_weights_tile[torch.arange(0,overlaps.size(0)).long(), gt_assignment] 124 | #print((gt_assignment==1).sum()) 125 | #print(loss_weights) 126 | ''' 127 | end of modification 128 | ''' 129 | # Select foreground RoIs as those with >= FG_THRESH overlap 130 | fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) 131 | # Guard against the case when an image has fewer than fg_rois_per_image 132 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 133 | bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) 134 | 135 | # Small modification to the original version where we ensure a fixed number of regions are sampled 136 | if fg_inds.numel() > 0 and bg_inds.numel() > 0: 137 | fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) 138 | fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()] 139 | bg_rois_per_image = rois_per_image - fg_rois_per_image 140 | to_replace = bg_inds.numel() < bg_rois_per_image 141 | bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()] 142 | elif fg_inds.numel() > 0: 143 | to_replace = fg_inds.numel() < rois_per_image 144 | fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] 145 | fg_rois_per_image = rois_per_image 146 | elif bg_inds.numel() > 0: 147 | to_replace = bg_inds.numel() < rois_per_image 148 | bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()] 149 | fg_rois_per_image = 0 150 | else: 151 | import pdb 152 | pdb.set_trace() 153 | 154 | # The indices that we're selecting (both fg and bg) 155 | keep_inds = torch.cat([fg_inds, bg_inds], 0) 156 | # Select sampled values from various arrays: 157 | labels = labels[keep_inds].contiguous() 158 | # Clamp labels for the background RoIs to 0 159 | labels[int(fg_rois_per_image):] = 0 160 | rois = all_rois[keep_inds].contiguous() 161 | roi_scores = all_scores[keep_inds].contiguous() 162 | 163 | 164 | bbox_target_data = _compute_targets( 165 | rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) 166 | 167 | bbox_targets, bbox_inside_weights = \ 168 | _get_bbox_regression_labels(bbox_target_data, num_classes) 169 | 170 | ''' 171 | modified by jiajie 172 | ''' 173 | #loss_weights = loss_weights[keep_inds].contiguous() + 1.0 174 | loss_weights = loss_weights[keep_inds].contiguous() 175 | loss_weights[int(fg_rois_per_image):] = 1.0 176 | ''' 177 | end of modification 178 | ''' 179 | 180 | #bbox_outside_weights[labels == 1, :] = loss_weights[labels==1].reshape(-1,1) * positive_weights 181 | #bbox_outside_weights[labels == 0, :] = loss_weights[labels==0].reshape(-1,1) * negative_weights 182 | 183 | return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, loss_weights 184 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_top_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from model.config import cfg 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes 13 | import numpy.random as npr 14 | 15 | import torch 16 | 17 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): 18 | """A layer that just selects the top region proposals 19 | without using non-maximal suppression, 20 | For details please see the technical report 21 | """ 22 | rpn_top_n = cfg.TEST.RPN_TOP_N 23 | 24 | scores = rpn_cls_prob[:, :, :, num_anchors:] 25 | 26 | rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) 27 | scores = scores.contiguous().view(-1, 1) 28 | 29 | length = scores.size(0) 30 | if length < rpn_top_n: 31 | # Random selection, maybe unnecessary and loses good proposals 32 | # But such case rarely happens 33 | top_inds = torch.from_numpy(npr.choice(length, size=rpn_top_n, replace=True)).long().cuda() 34 | else: 35 | top_inds = scores.sort(0, descending=True)[1] 36 | top_inds = top_inds[:rpn_top_n] 37 | top_inds = top_inds.view(rpn_top_n) 38 | 39 | # Do the selection here 40 | anchors = anchors[top_inds, :].contiguous() 41 | rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous() 42 | scores = scores[top_inds].contiguous() 43 | 44 | # Convert anchors into proposals via bbox transformations 45 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 46 | 47 | # Clip predicted boxes to image 48 | proposals = clip_boxes(proposals, im_info[:2]) 49 | 50 | # Output rois blob 51 | # Our RPN implementation only supports a single input image, so all 52 | # batch inds are 0 53 | batch_inds = proposals.data.new(proposals.size(0), 1).zero_() 54 | blob = torch.cat([batch_inds, proposals], 1) 55 | return blob, scores 56 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/layer_utils/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/roi_pooling.c'] 7 | headers = ['src/roi_pooling.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/roi_pooling_cuda.c'] 14 | headers += ['src/roi_pooling_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.roi_pooling', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ._ext import roi_pooling 4 | 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale): 8 | self.pooled_width = int(pooled_width) 9 | self.pooled_height = int(pooled_height) 10 | self.spatial_scale = float(spatial_scale) 11 | self.output = None 12 | self.argmax = None 13 | self.rois = None 14 | self.feature_size = None 15 | 16 | def forward(self, features, rois): 17 | batch_size, num_channels, data_height, data_width = features.size() 18 | num_rois = rois.size()[0] 19 | output = torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width) 20 | argmax = torch.IntTensor(num_rois, num_channels, self.pooled_height, self.pooled_width).zero_() 21 | 22 | if not features.is_cuda: 23 | _features = features.permute(0, 2, 3, 1) 24 | roi_pooling.roi_pooling_forward(self.pooled_height, self.pooled_width, self.spatial_scale, 25 | _features, rois, output) 26 | # output = output.cuda() 27 | else: 28 | output = output.cuda() 29 | argmax = argmax.cuda() 30 | roi_pooling.roi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 31 | features, rois, output, argmax) 32 | self.output = output 33 | self.argmax = argmax 34 | self.rois = rois 35 | self.feature_size = features.size() 36 | 37 | return output 38 | 39 | def backward(self, grad_output): 40 | assert(self.feature_size is not None and grad_output.is_cuda) 41 | 42 | batch_size, num_channels, data_height, data_width = self.feature_size 43 | 44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda() 45 | roi_pooling.roi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 46 | grad_output, self.rois, grad_input, self.argmax) 47 | 48 | # print grad_input 49 | 50 | return grad_input, None 51 | 52 | 53 | class RoIPool(torch.nn.Module): 54 | def __init__(self, pooled_height, pooled_width, spatial_scale): 55 | super(RoIPool, self).__init__() 56 | 57 | self.pooled_width = int(pooled_width) 58 | self.pooled_height = int(pooled_height) 59 | self.spatial_scale = float(spatial_scale) 60 | 61 | def forward(self, features, rois): 62 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 63 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/roi_pool_py.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | 7 | class RoIPool(nn.Module): 8 | def __init__(self, pooled_height, pooled_width, spatial_scale): 9 | super(RoIPool, self).__init__() 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | batch_size, num_channels, data_height, data_width = features.size() 16 | num_rois = rois.size()[0] 17 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda() 18 | 19 | for roi_ind, roi in enumerate(rois): 20 | batch_ind = int(roi[0].data[0]) 21 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( 22 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) 23 | roi_width = max(roi_end_w - roi_start_w + 1, 1) 24 | roi_height = max(roi_end_h - roi_start_h + 1, 1) 25 | bin_size_w = float(roi_width) / float(self.pooled_width) 26 | bin_size_h = float(roi_height) / float(self.pooled_height) 27 | 28 | for ph in range(self.pooled_height): 29 | hstart = int(np.floor(ph * bin_size_h)) 30 | hend = int(np.ceil((ph + 1) * bin_size_h)) 31 | hstart = min(data_height, max(0, hstart + roi_start_h)) 32 | hend = min(data_height, max(0, hend + roi_start_h)) 33 | for pw in range(self.pooled_width): 34 | wstart = int(np.floor(pw * bin_size_w)) 35 | wend = int(np.ceil((pw + 1) * bin_size_w)) 36 | wstart = min(data_width, max(0, wstart + roi_start_w)) 37 | wend = min(data_width, max(0, wend + roi_start_w)) 38 | 39 | is_empty = (hend <= hstart) or(wend <= wstart) 40 | if is_empty: 41 | outputs[roi_ind, :, ph, pw] = 0 42 | else: 43 | data = features[batch_ind] 44 | outputs[roi_ind, :, ph, pw] = torch.max( 45 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1) 46 | 47 | return outputs 48 | 49 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "roi_pooling_kernel.h" 9 | 10 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 12 | i += blockDim.x * gridDim.x) 13 | 14 | 15 | __global__ void ROIPoolForward(const int nthreads, const float* bottom_data, 16 | const float spatial_scale, const int height, const int width, 17 | const int channels, const int pooled_height, const int pooled_width, 18 | const float* bottom_rois, float* top_data, int* argmax_data) 19 | { 20 | CUDA_1D_KERNEL_LOOP(index, nthreads) 21 | { 22 | // (n, c, ph, pw) is an element in the pooled output 23 | int n = index; 24 | int pw = n % pooled_width; 25 | n /= pooled_width; 26 | int ph = n % pooled_height; 27 | n /= pooled_height; 28 | int c = n % channels; 29 | n /= channels; 30 | 31 | bottom_rois += n * 5; 32 | int roi_batch_ind = bottom_rois[0]; 33 | int roi_start_w = round(bottom_rois[1] * spatial_scale); 34 | int roi_start_h = round(bottom_rois[2] * spatial_scale); 35 | int roi_end_w = round(bottom_rois[3] * spatial_scale); 36 | int roi_end_h = round(bottom_rois[4] * spatial_scale); 37 | 38 | // Force malformed ROIs to be 1x1 39 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 40 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 41 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 42 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 43 | 44 | int hstart = (int)(floor((float)(ph) * bin_size_h)); 45 | int wstart = (int)(floor((float)(pw) * bin_size_w)); 46 | int hend = (int)(ceil((float)(ph + 1) * bin_size_h)); 47 | int wend = (int)(ceil((float)(pw + 1) * bin_size_w)); 48 | 49 | // Add roi offsets and clip to input boundaries 50 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), height); 51 | hend = fminf(fmaxf(hend + roi_start_h, 0), height); 52 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), width); 53 | wend = fminf(fmaxf(wend + roi_start_w, 0), width); 54 | bool is_empty = (hend <= hstart) || (wend <= wstart); 55 | 56 | // Define an empty pooling region to be zero 57 | float maxval = is_empty ? 0 : -FLT_MAX; 58 | // If nothing is pooled, argmax = -1 causes nothing to be backprop'd 59 | int maxidx = -1; 60 | bottom_data += roi_batch_ind * channels * height * width; 61 | for (int h = hstart; h < hend; ++h) { 62 | for (int w = wstart; w < wend; ++w) { 63 | // int bottom_index = (h * width + w) * channels + c; 64 | int bottom_index = (c * height + h) * width + w; 65 | if (bottom_data[bottom_index] > maxval) { 66 | maxval = bottom_data[bottom_index]; 67 | maxidx = bottom_index; 68 | } 69 | } 70 | } 71 | top_data[index] = maxval; 72 | if (argmax_data != NULL) 73 | argmax_data[index] = maxidx; 74 | } 75 | } 76 | 77 | 78 | int ROIPoolForwardLaucher( 79 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 80 | const int width, const int channels, const int pooled_height, 81 | const int pooled_width, const float* bottom_rois, 82 | float* top_data, int* argmax_data, cudaStream_t stream) 83 | { 84 | const int kThreadsPerBlock = 1024; 85 | const int output_size = num_rois * pooled_height * pooled_width * channels; 86 | cudaError_t err; 87 | 88 | 89 | ROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 90 | output_size, bottom_data, spatial_scale, height, width, channels, pooled_height, 91 | pooled_width, bottom_rois, top_data, argmax_data); 92 | 93 | err = cudaGetLastError(); 94 | if(cudaSuccess != err) 95 | { 96 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 97 | exit( -1 ); 98 | } 99 | 100 | return 1; 101 | } 102 | 103 | 104 | __global__ void ROIPoolBackward(const int nthreads, const float* top_diff, 105 | const int* argmax_data, const int num_rois, const float spatial_scale, 106 | const int height, const int width, const int channels, 107 | const int pooled_height, const int pooled_width, float* bottom_diff, 108 | const float* bottom_rois) { 109 | CUDA_1D_KERNEL_LOOP(index, nthreads) 110 | { 111 | 112 | // (n, c, ph, pw) is an element in the pooled output 113 | int n = index; 114 | int w = n % width; 115 | n /= width; 116 | int h = n % height; 117 | n /= height; 118 | int c = n % channels; 119 | n /= channels; 120 | 121 | float gradient = 0; 122 | // Accumulate gradient over all ROIs that pooled this element 123 | for (int roi_n = 0; roi_n < num_rois; ++roi_n) 124 | { 125 | const float* offset_bottom_rois = bottom_rois + roi_n * 5; 126 | int roi_batch_ind = offset_bottom_rois[0]; 127 | // Skip if ROI's batch index doesn't match n 128 | if (n != roi_batch_ind) { 129 | continue; 130 | } 131 | 132 | int roi_start_w = round(offset_bottom_rois[1] * spatial_scale); 133 | int roi_start_h = round(offset_bottom_rois[2] * spatial_scale); 134 | int roi_end_w = round(offset_bottom_rois[3] * spatial_scale); 135 | int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); 136 | 137 | // Skip if ROI doesn't include (h, w) 138 | const bool in_roi = (w >= roi_start_w && w <= roi_end_w && 139 | h >= roi_start_h && h <= roi_end_h); 140 | if (!in_roi) { 141 | continue; 142 | } 143 | 144 | int offset = roi_n * pooled_height * pooled_width * channels; 145 | const float* offset_top_diff = top_diff + offset; 146 | const int* offset_argmax_data = argmax_data + offset; 147 | 148 | // Compute feasible set of pooled units that could have pooled 149 | // this bottom unit 150 | 151 | // Force malformed ROIs to be 1x1 152 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 153 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 154 | 155 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 156 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 157 | 158 | int phstart = floor((float)(h - roi_start_h) / bin_size_h); 159 | int phend = ceil((float)(h - roi_start_h + 1) / bin_size_h); 160 | int pwstart = floor((float)(w - roi_start_w) / bin_size_w); 161 | int pwend = ceil((float)(w - roi_start_w + 1) / bin_size_w); 162 | 163 | phstart = fminf(fmaxf(phstart, 0), pooled_height); 164 | phend = fminf(fmaxf(phend, 0), pooled_height); 165 | pwstart = fminf(fmaxf(pwstart, 0), pooled_width); 166 | pwend = fminf(fmaxf(pwend, 0), pooled_width); 167 | 168 | for (int ph = phstart; ph < phend; ++ph) { 169 | for (int pw = pwstart; pw < pwend; ++pw) { 170 | if (offset_argmax_data[(c * pooled_height + ph) * pooled_width + pw] == index) 171 | { 172 | gradient += offset_top_diff[(c * pooled_height + ph) * pooled_width + pw]; 173 | } 174 | } 175 | } 176 | } 177 | bottom_diff[index] = gradient; 178 | } 179 | } 180 | 181 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 182 | const int height, const int width, const int channels, const int pooled_height, 183 | const int pooled_width, const float* bottom_rois, 184 | float* bottom_diff, const int* argmax_data, cudaStream_t stream) 185 | { 186 | const int kThreadsPerBlock = 1024; 187 | const int output_size = batch_size * height * width * channels; 188 | cudaError_t err; 189 | 190 | ROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 191 | output_size, top_diff, argmax_data, num_rois, spatial_scale, height, width, channels, pooled_height, 192 | pooled_width, bottom_diff, bottom_rois); 193 | 194 | err = cudaGetLastError(); 195 | if(cudaSuccess != err) 196 | { 197 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 198 | exit( -1 ); 199 | } 200 | 201 | return 1; 202 | } 203 | 204 | 205 | #ifdef __cplusplus 206 | } 207 | #endif 208 | 209 | 210 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | int batch_size = THCudaTensor_size(state, features, 0); 27 | if (batch_size != 1) 28 | { 29 | return 0; 30 | } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | if (batch_size != 1) 70 | { 71 | return 0; 72 | } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/layer_utils/snippets.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from layer_utils.generate_anchors import generate_anchors 12 | 13 | def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)): 14 | """ A wrapper function to generate anchors given different scales 15 | Also return the number of anchors in variable 'length' 16 | """ 17 | anchors = generate_anchors(ratios=np.array(anchor_ratios), scales=np.array(anchor_scales)) 18 | A = anchors.shape[0] 19 | shift_x = np.arange(0, width) * feat_stride 20 | shift_y = np.arange(0, height) * feat_stride 21 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 22 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 23 | K = shifts.shape[0] 24 | # width changes faster, so here it is H, W, C 25 | anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 26 | anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) 27 | length = np.int32(anchors.shape[0]) 28 | 29 | return anchors, length 30 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- 1 | from . import config 2 | -------------------------------------------------------------------------------- /lib/model/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__init__.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/bbox_transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/bbox_transform.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/nms_wrapper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/nms_wrapper.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/test.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/test.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/test_train.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/test_train.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/train_val.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/train_val.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import torch 13 | 14 | def bbox_transform(ex_rois, gt_rois): 15 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 16 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 17 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 18 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 19 | 20 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 21 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 22 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 23 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 24 | 25 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 26 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 27 | targets_dw = torch.log(gt_widths / ex_widths) 28 | targets_dh = torch.log(gt_heights / ex_heights) 29 | 30 | targets = torch.stack( 31 | (targets_dx, targets_dy, targets_dw, targets_dh), 1) 32 | return targets 33 | 34 | 35 | def bbox_transform_inv(boxes, deltas): 36 | # Input should be both tensor or both Variable and on the same device 37 | if len(boxes) == 0: 38 | return deltas.detach() * 0 39 | 40 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 41 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 42 | ctr_x = boxes[:, 0] + 0.5 * widths 43 | ctr_y = boxes[:, 1] + 0.5 * heights 44 | 45 | dx = deltas[:, 0::4] 46 | dy = deltas[:, 1::4] 47 | dw = deltas[:, 2::4] 48 | dh = deltas[:, 3::4] 49 | 50 | pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) 51 | pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) 52 | pred_w = torch.exp(dw) * widths.unsqueeze(1) 53 | pred_h = torch.exp(dh) * heights.unsqueeze(1) 54 | 55 | pred_boxes = torch.cat(\ 56 | [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\ 57 | pred_ctr_y - 0.5 * pred_h,\ 58 | pred_ctr_x + 0.5 * pred_w,\ 59 | pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1) 60 | 61 | return pred_boxes 62 | 63 | 64 | def clip_boxes(boxes, im_shape): 65 | """ 66 | Clip boxes to image boundaries. 67 | boxes must be tensor or Variable, im_shape can be anything but Variable 68 | """ 69 | 70 | if not hasattr(boxes, 'data'): 71 | boxes_ = boxes.numpy() 72 | 73 | boxes = boxes.view(boxes.size(0), -1, 4) 74 | boxes = torch.stack(\ 75 | [boxes[:,:,0].clamp(0, im_shape[1] - 1), 76 | boxes[:,:,1].clamp(0, im_shape[0] - 1), 77 | boxes[:,:,2].clamp(0, im_shape[1] - 1), 78 | boxes[:,:,3].clamp(0, im_shape[0] - 1)], 2).view(boxes.size(0), -1) 79 | 80 | return boxes 81 | -------------------------------------------------------------------------------- /lib/model/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/config.pyc -------------------------------------------------------------------------------- /lib/model/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from nms.pth_nms import pth_nms 12 | 13 | 14 | def nms(dets, thresh): 15 | """Dispatch to either CPU or GPU NMS implementations. 16 | Accept dets as tensor""" 17 | return pth_nms(dets, thresh) 18 | -------------------------------------------------------------------------------- /lib/model/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/test.pyc -------------------------------------------------------------------------------- /lib/model/test_train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import cv2 11 | import numpy as np 12 | try: 13 | import cPickle as pickle 14 | except ImportError: 15 | import pickle 16 | import os 17 | import math 18 | 19 | from utils.timer import Timer 20 | from model.nms_wrapper import nms 21 | from utils.blob import im_list_to_blob 22 | 23 | from model.config import cfg, get_output_dir 24 | from model.bbox_transform import clip_boxes, bbox_transform_inv 25 | 26 | import torch 27 | 28 | def _get_image_blob(im): 29 | """Converts an image into a network input. 30 | Arguments: 31 | im (ndarray): a color image in BGR order 32 | Returns: 33 | blob (ndarray): a data blob holding an image pyramid 34 | im_scale_factors (list): list of image scales (relative to im) used 35 | in the image pyramid 36 | """ 37 | im_orig = im.astype(np.float32, copy=True) 38 | im_orig -= cfg.PIXEL_MEANS 39 | 40 | im_shape = im_orig.shape 41 | im_size_min = np.min(im_shape[0:2]) 42 | im_size_max = np.max(im_shape[0:2]) 43 | 44 | processed_ims = [] 45 | im_scale_factors = [] 46 | 47 | for target_size in cfg.TEST.SCALES: 48 | im_scale = float(target_size) / float(im_size_min) 49 | # Prevent the biggest axis from being more than MAX_SIZE 50 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 51 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 52 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 53 | interpolation=cv2.INTER_LINEAR) 54 | im_scale_factors.append(im_scale) 55 | processed_ims.append(im) 56 | 57 | # Create a blob to hold the input images 58 | blob = im_list_to_blob(processed_ims) 59 | 60 | return blob, np.array(im_scale_factors) 61 | 62 | def _get_rois_blob(im_rois, im_scale_factors): 63 | """Converts RoIs into network inputs. 64 | Arguments: 65 | im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates 66 | im_scale_factors (list): scale factors as returned by _get_image_blob 67 | Returns: 68 | blob (ndarray): R x 5 matrix of RoIs in the image pyramid 69 | """ 70 | rois_blob_real = [] 71 | 72 | for i in range(len(im_scale_factors)): 73 | rois, levels = _project_im_rois(im_rois, np.array([im_scale_factors[i]])) 74 | rois_blob = np.hstack((levels, rois)) 75 | rois_blob_real.append(rois_blob.astype(np.float32, copy=False)) 76 | 77 | return rois_blob_real 78 | 79 | 80 | def _project_im_rois(im_rois, scales): 81 | """Project image RoIs into the image pyramid built by _get_image_blob. 82 | Arguments: 83 | im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates 84 | scales (list): scale factors as returned by _get_image_blob 85 | Returns: 86 | rois (ndarray): R x 4 matrix of projected RoI coordinates 87 | levels (list): image pyramid levels used by each projected RoI 88 | """ 89 | im_rois = im_rois.astype(np.float, copy=False) 90 | 91 | if len(scales) > 1: 92 | widths = im_rois[:, 2] - im_rois[:, 0] + 1 93 | heights = im_rois[:, 3] - im_rois[:, 1] + 1 94 | areas = widths * heights 95 | scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2) 96 | diff_areas = np.abs(scaled_areas - 224 * 224) 97 | levels = diff_areas.argmin(axis=1)[:, np.newaxis] 98 | else: 99 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) 100 | 101 | rois = im_rois * scales[levels] 102 | 103 | return rois, levels 104 | 105 | 106 | def _get_blobs(im, rois): 107 | """Convert an image and RoIs within that image into network inputs.""" 108 | blobs = {} 109 | blobs['data'], im_scale_factors = _get_image_blob(im) 110 | blobs['boxes'] = _get_rois_blob(rois, im_scale_factors) 111 | 112 | return blobs, im_scale_factors 113 | 114 | def _clip_boxes(boxes, im_shape): 115 | """Clip boxes to image boundaries.""" 116 | # x1 >= 0 117 | boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0) 118 | # y1 >= 0 119 | boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0) 120 | # x2 < im_shape[1] 121 | boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1) 122 | # y2 < im_shape[0] 123 | boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1) 124 | return boxes 125 | 126 | def _rescale_boxes(boxes, inds, scales): 127 | """Rescale boxes according to image rescaling.""" 128 | for i in range(boxes.shape[0]): 129 | boxes[i,:] = boxes[i,:] / scales[int(inds[i])] 130 | 131 | return boxes 132 | 133 | def im_detect(net, im, boxes): 134 | blobs, im_scales = _get_blobs(im,boxes) 135 | assert len(im_scales) == 1, "Only single-image batch implemented" 136 | 137 | im_blob = blobs['data'] 138 | blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) 139 | cfg.DEDUP_BOXES=1.0/16.0 140 | for i in range(len(blobs['data'])): 141 | if cfg.DEDUP_BOXES > 0: 142 | v = np.array([1, 1e3, 1e6, 1e9, 1e12]) 143 | hashes = np.round(blobs['boxes'][i] * cfg.DEDUP_BOXES).dot(v) 144 | _, index, inv_index = np.unique(hashes, return_index=True, 145 | return_inverse=True) 146 | blobs['boxes'][i] = blobs['boxes'][i][index, :] 147 | boxes_tmp = boxes[index, :].copy() 148 | else: 149 | boxes_tmp = boxes.copy() 150 | 151 | # TODO 152 | # change the blobs['im_info'], now is an array 153 | cls_prob, bbox_prob, fuse_prob, image_prob, scores_fast, bbox_pred_fast, rois = net.test_image(blobs['data'][i:i+1,:], blobs['im_info'], blobs['boxes'][i]) 154 | ''' 155 | WSDDN 156 | ''' 157 | scores_tmp = fuse_prob 158 | pred_boxes = np.tile(boxes_tmp, (1, fuse_prob.shape[1])) 159 | 160 | ''' 161 | Faster rcnn 162 | ''' 163 | boxes_fast = rois[:, 1:5] / im_scales[0] 164 | scores_fast = np.reshape(scores_fast, [scores_fast.shape[0], -1]) 165 | bbox_pred_fast = np.reshape(bbox_pred_fast, [bbox_pred_fast.shape[0], -1]) 166 | 167 | if cfg.TEST.BBOX_REG: 168 | # Apply bounding-box regression deltas 169 | box_deltas = bbox_pred_fast 170 | pred_boxes_fast = bbox_transform_inv(torch.from_numpy(boxes_fast), torch.from_numpy(box_deltas)).numpy() 171 | pred_boxes_fast = _clip_boxes(pred_boxes_fast, im.shape) 172 | else: 173 | # Simply repeat the boxes, once for each class 174 | pred_boxes = np.tile(boxes_fast, (1, scores_fast.shape[1])) 175 | 176 | 177 | 178 | cfg.TEST.USE_FLIPPED=True 179 | if cfg.TEST.USE_FLIPPED: 180 | blobs['data'][i:i+1] = blobs['data'][i:i+1][:, :, ::-1, :] 181 | width = blobs['data'][i:i+1].shape[2] 182 | oldx1 = blobs['boxes'][i][:, 1].copy() 183 | oldx2 = blobs['boxes'][i][:, 3].copy() 184 | blobs['boxes'][i][:, 1] = width - oldx2 - 1 185 | blobs['boxes'][i][:, 3] = width - oldx1 - 1 186 | assert (blobs['boxes'][i][:, 3] >= blobs['boxes'][i][:, 1]).all() 187 | 188 | cls_prob, bbox_prob, fuse_prob, image_prob, _ , _ , _= net.test_image(blobs['data'][i:i+1,:], blobs['im_info'], blobs['boxes'][i]) 189 | scores_tmp += fuse_prob 190 | 191 | if cfg.DEDUP_BOXES > 0: 192 | # Map scores and predictions back to the original set of boxes 193 | scores_tmp = scores_tmp[inv_index, :] 194 | pred_boxes = pred_boxes[inv_index, :] 195 | 196 | if i == 0: 197 | scores = np.copy(scores_tmp) 198 | else: 199 | scores += scores_tmp 200 | 201 | scores /= len(blobs['data']) * (1. + cfg.TEST.USE_FLIPPED) 202 | 203 | return scores, pred_boxes, scores_fast, pred_boxes_fast 204 | 205 | def apply_nms(all_boxes, thresh): 206 | """Apply non-maximum suppression to all predicted boxes output by the 207 | test_net method. 208 | """ 209 | num_classes = len(all_boxes) 210 | num_images = len(all_boxes[0]) 211 | nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] 212 | for cls_ind in range(num_classes): 213 | for im_ind in range(num_images): 214 | dets = all_boxes[cls_ind][im_ind] 215 | if dets == []: 216 | continue 217 | 218 | x1 = dets[:, 0] 219 | y1 = dets[:, 1] 220 | x2 = dets[:, 2] 221 | y2 = dets[:, 3] 222 | scores = dets[:, 4] 223 | inds = np.where((x2 > x1) & (y2 > y1))[0] 224 | dets = dets[inds,:] 225 | if dets == []: 226 | continue 227 | 228 | keep = nms(torch.from_numpy(dets), thresh).numpy() 229 | if len(keep) == 0: 230 | continue 231 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 232 | return nms_boxes 233 | 234 | def test_train_net(net, imdb, weights_filename, max_per_image=100, thresh=0.): 235 | np.random.seed(cfg.RNG_SEED) 236 | """Test a Fast R-CNN network on an image database.""" 237 | num_images = len(imdb.image_index) 238 | # all detections are collected into: 239 | # all_boxes[cls][image] = N x 5 array of detections in 240 | # (x1, y1, x2, y2, score) 241 | all_boxes = [[[] for _ in range(num_images)] 242 | for _ in range(imdb.num_classes)] 243 | 244 | all_boxes_fast = [[[] for _ in range(num_images)] 245 | for _ in range(imdb.num_classes+1)] 246 | 247 | output_dir = get_output_dir(imdb, weights_filename) #voc_2007_test/default(tag)/vgg16_faster_rcnn_iter_15000 248 | 249 | # timers 250 | _t = {'im_detect' : Timer(), 'misc' : Timer()} 251 | 252 | roidb = imdb.roidb 253 | 254 | 255 | for i in range(num_images): 256 | im = cv2.imread(imdb.image_path_at(i)) 257 | 258 | _t['im_detect'].tic() 259 | scores, boxes, scores_fast, boxes_fast = im_detect(net, im, roidb[i]['boxes']) 260 | _t['im_detect'].toc() 261 | 262 | _t['misc'].tic() 263 | 264 | 265 | for j in range(0, imdb.num_classes): 266 | inds = np.argmax(scores[:, j]) 267 | all_boxes[j][i] = \ 268 | np.hstack((boxes[inds, j*4:(j+1)*4].reshape(1, -1), 269 | np.array([[scores[inds, j]]]))) 270 | ''' 271 | start of faster part 272 | ''' 273 | # skip j = 0, because it's the background class 274 | for j in range(1, imdb.num_classes+1): 275 | inds = np.argmax(scores_fast[:, j]) 276 | all_boxes_fast[j][i] = \ 277 | np.hstack((boxes_fast[inds, j*4:(j+1)*4].reshape(1, -1), 278 | np.array([[scores_fast[inds, j]]]))) 279 | ''' 280 | end of faster part 281 | ''' 282 | 283 | 284 | _t['misc'].toc() 285 | 286 | print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ 287 | .format(i + 1, num_images, _t['im_detect'].average_time(), 288 | _t['misc'].average_time())) 289 | 290 | 291 | output_dir_ws = output_dir + '/' + 'wsddn' 292 | if not os.path.exists(output_dir_ws): 293 | os.makedirs(output_dir_ws) 294 | det_file = os.path.join(output_dir_ws, 'discovery.pkl') 295 | with open(det_file, 'wb') as f: 296 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) 297 | 298 | print('Evaluating detections') 299 | imdb.evaluate_discovery(all_boxes, output_dir_ws) 300 | 301 | 302 | 303 | all_boxes_fast = all_boxes_fast[1:] # filter the background boxes 304 | output_dir_fast = output_dir + '/' + 'faster' 305 | if not os.path.exists(output_dir_fast): 306 | os.makedirs(output_dir_fast) 307 | det_file = os.path.join(output_dir_fast, 'discovery.pkl') 308 | with open(det_file, 'wb') as f: 309 | pickle.dump(all_boxes_fast, f, pickle.HIGHEST_PROTOCOL) 310 | 311 | print('Evaluating detections') 312 | imdb.evaluate_discovery(all_boxes_fast, output_dir_fast) 313 | 314 | -------------------------------------------------------------------------------- /lib/model/train_val.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/train_val.pyc -------------------------------------------------------------------------------- /lib/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__init__.py -------------------------------------------------------------------------------- /lib/nets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/network.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/resnet_v1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/resnet_v1.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/vgg16.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/vgg16.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/mobilenet_v1.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | 15 | import numpy as np 16 | from collections import namedtuple, OrderedDict 17 | 18 | from nets.network import Network 19 | from model.config import cfg 20 | 21 | # The following is adapted from: 22 | # https://github.com/tensorflow/models/blob/master/slim/nets/mobilenet_v1.py 23 | 24 | # Conv and DepthSepConv named tuple define layers of the MobileNet architecture 25 | # Conv defines 3x3 convolution layers 26 | # DepthSepConv defines 3x3 depthwise convolution followed by 1x1 convolution. 27 | # stride is the stride of the convolution 28 | # depth is the number of channels or filters in a layer 29 | Conv = namedtuple('Conv', ['kernel', 'stride', 'depth']) 30 | DepthSepConv = namedtuple('DepthSepConv', ['kernel', 'stride', 'depth']) 31 | 32 | # _CONV_DEFS specifies the MobileNet body 33 | _CONV_DEFS = [ 34 | Conv(kernel=3, stride=2, depth=32), 35 | DepthSepConv(kernel=3, stride=1, depth=64), 36 | DepthSepConv(kernel=3, stride=2, depth=128), 37 | DepthSepConv(kernel=3, stride=1, depth=128), 38 | DepthSepConv(kernel=3, stride=2, depth=256), 39 | DepthSepConv(kernel=3, stride=1, depth=256), 40 | DepthSepConv(kernel=3, stride=2, depth=512), 41 | DepthSepConv(kernel=3, stride=1, depth=512), 42 | DepthSepConv(kernel=3, stride=1, depth=512), 43 | DepthSepConv(kernel=3, stride=1, depth=512), 44 | DepthSepConv(kernel=3, stride=1, depth=512), 45 | DepthSepConv(kernel=3, stride=1, depth=512), 46 | # use stride 1 for the 13th layer 47 | DepthSepConv(kernel=3, stride=1, depth=1024), 48 | DepthSepConv(kernel=3, stride=1, depth=1024) 49 | ] 50 | 51 | def mobilenet_v1_base(final_endpoint='Conv2d_13_pointwise', 52 | min_depth=8, 53 | depth_multiplier=1.0, 54 | conv_defs=None, 55 | output_stride=None): 56 | """Mobilenet v1. 57 | 58 | Constructs a Mobilenet v1 network from inputs to the given final endpoint. 59 | 60 | Args: 61 | inputs: a tensor of shape [batch_size, height, width, channels]. 62 | final_endpoint: specifies the endpoint to construct the network up to. It 63 | can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', 64 | 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5_pointwise', 65 | 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', 66 | 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', 67 | 'Conv2d_12_pointwise', 'Conv2d_13_pointwise']. 68 | min_depth: Minimum depth value (number of channels) for all convolution ops. 69 | Enforced when depth_multiplier < 1, and not an active constraint when 70 | depth_multiplier >= 1. 71 | depth_multiplier: Float multiplier for the depth (number of channels) 72 | for all convolution ops. The value must be greater than zero. Typical 73 | usage will be to set this value in (0, 1) to reduce the number of 74 | parameters or computation cost of the model. 75 | conv_defs: A list of ConvDef namedtuples specifying the net architecture. 76 | output_stride: An integer that specifies the requested ratio of input to 77 | output spatial resolution. If not None, then we invoke atrous convolution 78 | if necessary to prevent the network from reducing the spatial resolution 79 | of the activation maps. Allowed values are 8 (accurate fully convolutional 80 | mode), 16 (fast fully convolutional mode), 32 (classification mode). 81 | scope: Optional variable_scope. 82 | 83 | Returns: 84 | tensor_out: output tensor corresponding to the final_endpoint. 85 | end_points: a set of activations for external use, for example summaries or 86 | losses. 87 | 88 | Raises: 89 | ValueError: if final_endpoint is not set to one of the predefined values, 90 | or depth_multiplier <= 0, or the target output_stride is not 91 | allowed. 92 | """ 93 | depth = lambda d: max(int(d * depth_multiplier), min_depth) 94 | end_points = OrderedDict() 95 | 96 | # Used to find thinned depths for each layer. 97 | if depth_multiplier <= 0: 98 | raise ValueError('depth_multiplier is not greater than zero.') 99 | 100 | if conv_defs is None: 101 | conv_defs = _CONV_DEFS 102 | 103 | if output_stride is not None and output_stride not in [8, 16, 32]: 104 | raise ValueError('Only allowed output_stride values are 8, 16, 32.') 105 | 106 | def conv_bn(in_channels, out_channels, kernel_size=3, stride=1): 107 | return nn.Sequential( 108 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, (kernel_size - 1) // 2, bias=False), 109 | nn.BatchNorm2d(out_channels), 110 | nn.ReLU6(inplace=True) 111 | ) 112 | 113 | def conv_dw(in_channels, kernel_size=3, stride=1, dilation=1): 114 | return nn.Sequential( 115 | nn.Conv2d(in_channels, in_channels, kernel_size, stride, (kernel_size - 1) // 2,\ 116 | groups=in_channels, dilation=dilation, bias=False), 117 | nn.BatchNorm2d(in_channels), 118 | nn.ReLU6(inplace=True) 119 | ) 120 | 121 | def conv_pw(in_channels, out_channels, kernel_size=3, stride=1, dilation=1): 122 | return nn.Sequential( 123 | nn.Conv2d(in_channels, out_channels, kernel_size, stride, 0, bias=False), 124 | nn.BatchNorm2d(out_channels), 125 | nn.ReLU6(inplace=True), 126 | ) 127 | 128 | # The current_stride variable keeps track of the output stride of the 129 | # activations, i.e., the running product of convolution strides up to the 130 | # current network layer. This allows us to invoke atrous convolution 131 | # whenever applying the next convolution would result in the activations 132 | # having output stride larger than the target output_stride. 133 | current_stride = 1 134 | 135 | # The atrous convolution rate parameter. 136 | rate = 1 137 | 138 | in_channels = 3 139 | for i, conv_def in enumerate(conv_defs): 140 | end_point_base = 'Conv2d_%d' % i 141 | 142 | if output_stride is not None and current_stride == output_stride: 143 | # If we have reached the target output_stride, then we need to employ 144 | # atrous convolution with stride=1 and multiply the atrous rate by the 145 | # current unit's stride for use in subsequent layers. 146 | layer_stride = 1 147 | layer_rate = rate 148 | rate *= conv_def.stride 149 | else: 150 | layer_stride = conv_def.stride 151 | layer_rate = 1 152 | current_stride *= conv_def.stride 153 | 154 | out_channels = depth(conv_def.depth) 155 | if isinstance(conv_def, Conv): 156 | end_point = end_point_base 157 | end_points[end_point] = conv_bn(in_channels, out_channels, conv_def.kernel, 158 | stride=conv_def.stride) 159 | if end_point == final_endpoint: 160 | return nn.Sequential(end_points) 161 | 162 | elif isinstance(conv_def, DepthSepConv): 163 | end_points[end_point_base] = nn.Sequential(OrderedDict([ 164 | ('depthwise', conv_dw(in_channels, conv_def.kernel, stride=layer_stride, dilation=layer_rate)), 165 | ('pointwise', conv_pw(in_channels, out_channels, 1, stride=1))])) 166 | 167 | if end_point_base + '_pointwise' == final_endpoint: 168 | return nn.Sequential(end_points) 169 | else: 170 | raise ValueError('Unknown convolution type %s for layer %d' 171 | % (conv_def.ltype, i)) 172 | in_channels = out_channels 173 | raise ValueError('Unknown final endpoint %s' % final_endpoint) 174 | 175 | class mobilenetv1(Network): 176 | def __init__(self): 177 | Network.__init__(self) 178 | self._feat_stride = [16, ] 179 | self._feat_compress = [1. / float(self._feat_stride[0]), ] 180 | self._depth_multiplier = cfg.MOBILENET.DEPTH_MULTIPLIER 181 | self._net_conv_channels = 512 182 | self._fc7_channels = 1024 183 | 184 | def init_weights(self): 185 | def normal_init(m, mean, stddev, truncated=False): 186 | """ 187 | weight initalizer: truncated normal and random normal. 188 | """ 189 | if m.__class__.__name__.find('Conv') == -1: 190 | return 191 | if truncated: 192 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation 193 | else: 194 | m.weight.data.normal_(mean, stddev) 195 | if m.bias is not None: m.bias.data.zero_() 196 | 197 | self.mobilenet.apply(lambda m: normal_init(m, 0, 0.09, True)) 198 | normal_init(self.rpn_net, 0, 0.01, cfg.TRAIN.TRUNCATED) 199 | normal_init(self.rpn_cls_score_net, 0, 0.01, cfg.TRAIN.TRUNCATED) 200 | normal_init(self.rpn_bbox_pred_net, 0, 0.01, cfg.TRAIN.TRUNCATED) 201 | normal_init(self.cls_score_net, 0, 0.01, cfg.TRAIN.TRUNCATED) 202 | normal_init(self.bbox_pred_net, 0, 0.001, cfg.TRAIN.TRUNCATED) 203 | 204 | def _image_to_head(self): 205 | net_conv = self._layers['head'](self._image) 206 | self._act_summaries['conv'] = net_conv 207 | 208 | return net_conv 209 | 210 | def _head_to_tail(self, pool5): 211 | fc7 = self._layers['tail'](pool5) 212 | fc7 = fc7.mean(3).mean(2) 213 | return fc7 214 | 215 | def _init_head_tail(self): 216 | self.mobilenet = mobilenet_v1_base() 217 | 218 | # Fix blocks 219 | assert (0 <= cfg.MOBILENET.FIXED_LAYERS <= 12) 220 | for m in list(self.mobilenet.children())[:cfg.MOBILENET.FIXED_LAYERS]: 221 | for p in m.parameters(): 222 | p.requires_grad = False 223 | 224 | def set_bn_fix(m): 225 | classname = m.__class__.__name__ 226 | if classname.find('BatchNorm') != -1: 227 | for p in m.parameters(): p.requires_grad=False 228 | 229 | self.mobilenet.apply(set_bn_fix) 230 | 231 | # Add weight decay 232 | def l2_regularizer(m, wd): 233 | if m.__class__.__name__.find('Conv') != -1: 234 | m.weight.weight_decay = cfg.MOBILENET.WEIGHT_DECAY 235 | if cfg.MOBILENET.REGU_DEPTH: 236 | self.mobilenet.apply(lambda x: l2_regularizer(x, cfg.MOBILENET.WEIGHT_DECAY)) 237 | else: 238 | self.mobilenet.apply(lambda x: l2_regularizer(x, 0)) 239 | # always set the first conv layer 240 | list(self.mobilenet.children())[0].apply(lambda x: l2_regularizer(x, cfg.MOBILENET.WEIGHT_DECAY)) 241 | 242 | # Build mobilenet. 243 | self._layers['head'] = nn.Sequential(*list(self.mobilenet.children())[:12]) 244 | self._layers['tail'] = nn.Sequential(*list(self.mobilenet.children())[12:]) 245 | 246 | def load_pretrained_cnn(self, state_dict): 247 | # TODO 248 | print('Warning: No available pretrained model yet') 249 | return 250 | self.mobilenet.load_state_dict({k: state_dict[k] for k in list(self.resnet.state_dict())}) 251 | -------------------------------------------------------------------------------- /lib/nets/resnet_v1.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi He and Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from nets.network import Network 11 | from model.config import cfg 12 | 13 | import utils.timer 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | from torch.autograd import Variable 19 | import math 20 | import torch.utils.model_zoo as model_zoo 21 | 22 | 23 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 24 | 'resnet152'] 25 | 26 | 27 | model_urls = { 28 | 'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth', 29 | 'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth', 30 | 'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth', 31 | 'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth', 32 | 'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth', 33 | } 34 | 35 | 36 | def conv3x3(in_planes, out_planes, stride=1): 37 | "3x3 convolution with padding" 38 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 39 | padding=1, bias=False) 40 | 41 | 42 | class BasicBlock(nn.Module): 43 | expansion = 1 44 | 45 | def __init__(self, inplanes, planes, stride=1, downsample=None): 46 | super(BasicBlock, self).__init__() 47 | self.conv1 = conv3x3(inplanes, planes, stride) 48 | self.bn1 = nn.BatchNorm2d(planes) 49 | self.relu = nn.ReLU(inplace=True) 50 | self.conv2 = conv3x3(planes, planes) 51 | self.bn2 = nn.BatchNorm2d(planes) 52 | self.downsample = downsample 53 | self.stride = stride 54 | 55 | def forward(self, x): 56 | residual = x 57 | 58 | out = self.conv1(x) 59 | out = self.bn1(out) 60 | out = self.relu(out) 61 | 62 | out = self.conv2(out) 63 | out = self.bn2(out) 64 | 65 | if self.downsample is not None: 66 | residual = self.downsample(x) 67 | 68 | out += residual 69 | out = self.relu(out) 70 | 71 | return out 72 | 73 | 74 | class Bottleneck(nn.Module): 75 | expansion = 4 76 | 77 | def __init__(self, inplanes, planes, stride=1, downsample=None): 78 | super(Bottleneck, self).__init__() 79 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change 80 | self.bn1 = nn.BatchNorm2d(planes) 81 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change 82 | padding=1, bias=False) 83 | self.bn2 = nn.BatchNorm2d(planes) 84 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 85 | self.bn3 = nn.BatchNorm2d(planes * 4) 86 | self.relu = nn.ReLU(inplace=True) 87 | self.downsample = downsample 88 | self.stride = stride 89 | 90 | def forward(self, x): 91 | residual = x 92 | 93 | out = self.conv1(x) 94 | out = self.bn1(out) 95 | out = self.relu(out) 96 | 97 | out = self.conv2(out) 98 | out = self.bn2(out) 99 | out = self.relu(out) 100 | 101 | out = self.conv3(out) 102 | out = self.bn3(out) 103 | 104 | if self.downsample is not None: 105 | residual = self.downsample(x) 106 | 107 | out += residual 108 | out = self.relu(out) 109 | 110 | return out 111 | 112 | 113 | class ResNet(nn.Module): 114 | def __init__(self, block, layers, num_classes=1000): 115 | self.inplanes = 64 116 | super(ResNet, self).__init__() 117 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 118 | bias=False) 119 | self.bn1 = nn.BatchNorm2d(64) 120 | self.relu = nn.ReLU(inplace=True) 121 | # maxpool different from pytorch-resnet, to match tf-faster-rcnn 122 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 123 | self.layer1 = self._make_layer(block, 64, layers[0]) 124 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 125 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 126 | # use stride 1 for the last conv4 layer (same as tf-faster-rcnn) 127 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1) 128 | 129 | for m in self.modules(): 130 | if isinstance(m, nn.Conv2d): 131 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 132 | m.weight.data.normal_(0, math.sqrt(2. / n)) 133 | elif isinstance(m, nn.BatchNorm2d): 134 | m.weight.data.fill_(1) 135 | m.bias.data.zero_() 136 | 137 | def _make_layer(self, block, planes, blocks, stride=1): 138 | downsample = None 139 | if stride != 1 or self.inplanes != planes * block.expansion: 140 | downsample = nn.Sequential( 141 | nn.Conv2d(self.inplanes, planes * block.expansion, 142 | kernel_size=1, stride=stride, bias=False), 143 | nn.BatchNorm2d(planes * block.expansion), 144 | ) 145 | 146 | layers = [] 147 | layers.append(block(self.inplanes, planes, stride, downsample)) 148 | self.inplanes = planes * block.expansion 149 | for i in range(1, blocks): 150 | layers.append(block(self.inplanes, planes)) 151 | 152 | return nn.Sequential(*layers) 153 | 154 | def resnet18(pretrained=False): 155 | """Constructs a ResNet-18 model. 156 | Args: 157 | pretrained (bool): If True, returns a model pre-trained on ImageNet 158 | """ 159 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 160 | if pretrained: 161 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 162 | return model 163 | 164 | 165 | def resnet34(pretrained=False): 166 | """Constructs a ResNet-34 model. 167 | Args: 168 | pretrained (bool): If True, returns a model pre-trained on ImageNet 169 | """ 170 | model = ResNet(BasicBlock, [3, 4, 6, 3]) 171 | if pretrained: 172 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 173 | return model 174 | 175 | 176 | def resnet50(pretrained=False): 177 | """Constructs a ResNet-50 model. 178 | Args: 179 | pretrained (bool): If True, returns a model pre-trained on ImageNet 180 | """ 181 | model = ResNet(Bottleneck, [3, 4, 6, 3]) 182 | if pretrained: 183 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 184 | return model 185 | 186 | 187 | def resnet101(pretrained=False): 188 | """Constructs a ResNet-101 model. 189 | Args: 190 | pretrained (bool): If True, returns a model pre-trained on ImageNet 191 | """ 192 | model = ResNet(Bottleneck, [3, 4, 23, 3]) 193 | if pretrained: 194 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 195 | return model 196 | 197 | 198 | def resnet152(pretrained=False): 199 | """Constructs a ResNet-152 model. 200 | Args: 201 | pretrained (bool): If True, returns a model pre-trained on ImageNet 202 | """ 203 | model = ResNet(Bottleneck, [3, 8, 36, 3]) 204 | if pretrained: 205 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 206 | return model 207 | 208 | class resnetv1(Network): 209 | def __init__(self, num_layers=50): 210 | Network.__init__(self) 211 | self._feat_stride = [16, ] 212 | self._feat_compress = [1. / float(self._feat_stride[0]), ] 213 | self._num_layers = num_layers 214 | self._net_conv_channels = 1024 215 | self._fc7_channels = 2048 216 | 217 | def _crop_pool_layer(self, bottom, rois): 218 | return Network._crop_pool_layer(self, bottom, rois, cfg.RESNET.MAX_POOL) 219 | 220 | def _image_to_head(self): 221 | net_conv = self._layers['head'](self._image) 222 | self._act_summaries['conv'] = net_conv 223 | 224 | return net_conv 225 | 226 | def _head_to_tail(self, pool5): 227 | fc7 = self.resnet.layer4(pool5).mean(3).mean(2) # average pooling after layer4 228 | return fc7 229 | 230 | def _init_head_tail(self): 231 | # choose different blocks for different number of layers 232 | if self._num_layers == 50: 233 | self.resnet = resnet50() 234 | 235 | elif self._num_layers == 101: 236 | self.resnet = resnet101() 237 | 238 | elif self._num_layers == 152: 239 | self.resnet = resnet152() 240 | 241 | else: 242 | # other numbers are not supported 243 | raise NotImplementedError 244 | 245 | # Fix blocks 246 | for p in self.resnet.bn1.parameters(): p.requires_grad=False 247 | for p in self.resnet.conv1.parameters(): p.requires_grad=False 248 | assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) 249 | if cfg.RESNET.FIXED_BLOCKS >= 3: 250 | for p in self.resnet.layer3.parameters(): p.requires_grad=False 251 | if cfg.RESNET.FIXED_BLOCKS >= 2: 252 | for p in self.resnet.layer2.parameters(): p.requires_grad=False 253 | if cfg.RESNET.FIXED_BLOCKS >= 1: 254 | for p in self.resnet.layer1.parameters(): p.requires_grad=False 255 | 256 | def set_bn_fix(m): 257 | classname = m.__class__.__name__ 258 | if classname.find('BatchNorm') != -1: 259 | for p in m.parameters(): p.requires_grad=False 260 | 261 | self.resnet.apply(set_bn_fix) 262 | 263 | # Build resnet. 264 | self._layers['head'] = nn.Sequential(self.resnet.conv1, self.resnet.bn1,self.resnet.relu, 265 | self.resnet.maxpool,self.resnet.layer1,self.resnet.layer2,self.resnet.layer3) 266 | 267 | def train(self, mode=True): 268 | # Override train so that the training mode is set as we want 269 | nn.Module.train(self, mode) 270 | if mode: 271 | # Set fixed blocks to be in eval mode (not really doing anything) 272 | self.resnet.eval() 273 | if cfg.RESNET.FIXED_BLOCKS <= 3: 274 | self.resnet.layer4.train() 275 | if cfg.RESNET.FIXED_BLOCKS <= 2: 276 | self.resnet.layer3.train() 277 | if cfg.RESNET.FIXED_BLOCKS <= 1: 278 | self.resnet.layer2.train() 279 | if cfg.RESNET.FIXED_BLOCKS == 0: 280 | self.resnet.layer1.train() 281 | 282 | # Set batchnorm always in eval mode during training 283 | def set_bn_eval(m): 284 | classname = m.__class__.__name__ 285 | if classname.find('BatchNorm') != -1: 286 | m.eval() 287 | 288 | self.resnet.apply(set_bn_eval) 289 | 290 | def load_pretrained_cnn(self, state_dict): 291 | self.resnet.load_state_dict({k: state_dict[k] for k in list(self.resnet.state_dict())}) 292 | -------------------------------------------------------------------------------- /lib/nets/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from nets.network import Network 11 | from model.config import cfg 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | from torch.autograd import Variable 17 | import math 18 | import torchvision.models as models 19 | 20 | class vgg16(Network): 21 | def __init__(self): 22 | Network.__init__(self) 23 | self._feat_stride = [16, ] 24 | self._feat_compress = [1. / float(self._feat_stride[0]), ] 25 | self._net_conv_channels = 512 26 | self._fc7_channels = 4096 27 | 28 | def _init_head_tail(self): 29 | self.vgg = models.vgg16() 30 | # Remove fc8 31 | self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1]) 32 | 33 | # Fix the layers before conv3: 34 | for layer in range(10): 35 | for p in self.vgg.features[layer].parameters(): p.requires_grad = False 36 | 37 | # not using the last maxpool layer 38 | self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1]) 39 | 40 | def _image_to_head(self): 41 | net_conv = self._layers['head'](self._image) 42 | self._act_summaries['conv'] = net_conv 43 | 44 | return net_conv 45 | 46 | def _head_to_tail(self, pool5): 47 | pool5_flat = pool5.view(pool5.size(0), -1) 48 | fc7 = self.vgg.classifier(pool5_flat) 49 | 50 | return fc7 51 | 52 | def load_pretrained_cnn(self, state_dict): 53 | self.vgg.load_state_dict({k:v for k,v in state_dict.items() if k in self.vgg.state_dict()}) -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 35 | 36 | dets = dets[order].contiguous() 37 | 38 | keep = torch.LongTensor(dets.size(0)) 39 | num_out = torch.LongTensor(1) 40 | # keep = torch.cuda.LongTensor(dets.size(0)) 41 | # num_out = torch.cuda.LongTensor(1) 42 | nms.gpu_nms(keep, num_out, dets, thresh) 43 | 44 | return order[keep[:num_out[0]].cuda()].contiguous() 45 | # return order[keep[:num_out[0]]].contiguous() 46 | 47 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nms/src/cuda/nms_kernel.cu.o -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/layer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | from model.config import cfg 17 | from roi_data_layer.minibatch import get_minibatch 18 | import numpy as np 19 | import time 20 | 21 | class RoIDataLayer(object): 22 | """Fast R-CNN data layer used for training.""" 23 | 24 | def __init__(self, roidb, num_classes, random=False): 25 | """Set the roidb to be used by this layer during training.""" 26 | self._roidb = roidb 27 | self._num_classes = num_classes 28 | # Also set a random flag 29 | self._random = random 30 | self._shuffle_roidb_inds() 31 | 32 | def _shuffle_roidb_inds(self): 33 | """Randomly permute the training roidb.""" 34 | # If the random flag is set, 35 | # then the database is shuffled according to system time 36 | # Useful for the validation set 37 | if self._random: 38 | st0 = np.random.get_state() 39 | millis = int(round(time.time() * 1000)) % 4294967295 40 | np.random.seed(millis) 41 | 42 | if cfg.TRAIN.ASPECT_GROUPING: 43 | raise NotImplementedError 44 | ''' 45 | widths = np.array([r['width'] for r in self._roidb]) 46 | heights = np.array([r['height'] for r in self._roidb]) 47 | horz = (widths >= heights) 48 | vert = np.logical_not(horz) 49 | horz_inds = np.where(horz)[0] 50 | vert_inds = np.where(vert)[0] 51 | inds = np.hstack(( 52 | np.random.permutation(horz_inds), 53 | np.random.permutation(vert_inds))) 54 | inds = np.reshape(inds, (-1, 2)) 55 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 56 | inds = np.reshape(inds[row_perm, :], (-1,)) 57 | self._perm = inds 58 | ''' 59 | else: 60 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 61 | # Restore the random state 62 | if self._random: 63 | np.random.set_state(st0) 64 | 65 | self._cur = 0 66 | 67 | def _get_next_minibatch_inds(self): 68 | """Return the roidb indices for the next minibatch.""" 69 | 70 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 71 | self._shuffle_roidb_inds() 72 | 73 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 74 | self._cur += cfg.TRAIN.IMS_PER_BATCH 75 | 76 | return db_inds 77 | 78 | def _get_next_minibatch(self): 79 | """Return the blobs to be used for the next minibatch. 80 | 81 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 82 | separate process and made available through self._blob_queue. 83 | """ 84 | db_inds = self._get_next_minibatch_inds() 85 | minibatch_db = [self._roidb[i] for i in db_inds] 86 | return get_minibatch(minibatch_db, self._num_classes) 87 | 88 | def forward(self): 89 | """Get blobs and copy them into this layer's top blob vector.""" 90 | blobs = self._get_next_minibatch() 91 | return blobs 92 | -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | import cv2 16 | from model.config import cfg 17 | from utils.blob import prep_im_for_blob, im_list_to_blob 18 | 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | # Sample random scales to use for each image in this batch 23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 24 | size=num_images) 25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 27 | format(num_images, cfg.TRAIN.BATCH_SIZE) 28 | 29 | # Get the input image blob, formatted for caffe 30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 31 | 32 | blobs = {'data': im_blob} 33 | 34 | assert len(im_scales) == 1, "Single batch only" 35 | assert len(roidb) == 1, "Single batch only" 36 | 37 | # gt boxes: (x1, y1, x2, y2, cls) 38 | #if cfg.TRAIN.USE_ALL_GT: 39 | # Include all ground truth boxes 40 | # gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 41 | #else: 42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 43 | # gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 44 | #gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 45 | #gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 46 | #gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 47 | boxes = roidb[0]['boxes'] * im_scales[0] 48 | batch_ind = 0 * np.ones((boxes.shape[0], 1)) 49 | boxes = np.hstack((batch_ind, boxes)) 50 | DEDUP_BOXES=1./16. 51 | if DEDUP_BOXES > 0: 52 | v = np.array([1,1e3, 1e6, 1e9, 1e12]) 53 | hashes = np.round(boxes * DEDUP_BOXES).dot(v) 54 | _, index, inv_index = np.unique(hashes, return_index=True, 55 | return_inverse=True) 56 | boxes = boxes[index, :] 57 | 58 | blobs['boxes'] = boxes 59 | blobs['im_info'] = np.array( 60 | [im_blob.shape[1], im_blob.shape[2], im_scales[0]], 61 | dtype=np.float32) 62 | blobs['labels'] = roidb[0]['labels'] 63 | 64 | return blobs 65 | 66 | def _get_image_blob(roidb, scale_inds): 67 | """Builds an input blob from the images in the roidb at the specified 68 | scales. 69 | """ 70 | num_images = len(roidb) 71 | processed_ims = [] 72 | im_scales = [] 73 | for i in range(num_images): 74 | im = cv2.imread(roidb[i]['image']) 75 | if roidb[i]['flipped']: 76 | im = im[:, ::-1, :] 77 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 78 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 79 | cfg.TRAIN.MAX_SIZE) 80 | im_scales.append(im_scale) 81 | processed_ims.append(im) 82 | 83 | # Create a blob to hold the input images 84 | blob = im_list_to_blob(processed_ims) 85 | 86 | return blob, im_scales 87 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | 14 | def prepare_roidb(imdb): 15 | """Enrich the imdb's roidb by adding some derived quantities that 16 | are useful for training. This function precomputes the maximum 17 | overlap, taken over ground-truth boxes, between each ROI and 18 | each ground-truth box. The class with maximum overlap is also 19 | recorded. 20 | """ 21 | roidb = imdb.roidb 22 | for i in range(len(imdb.image_index)): 23 | roidb[i]['image'] = imdb.image_path_at(i) 24 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.h 4 | *.hpp 5 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/bbox.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/bbox.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/blob.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/blob.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/timer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/timer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/visualization.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/visualization.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/bbox.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def bbox_overlaps(boxes, query_boxes): 5 | """ 6 | Parameters 7 | ---------- 8 | boxes: (N, 4) ndarray or tensor or variable 9 | query_boxes: (K, 4) ndarray or tensor or variable 10 | Returns 11 | ------- 12 | overlaps: (N, K) overlap between boxes and query_boxes 13 | """ 14 | if isinstance(boxes, np.ndarray): 15 | boxes = torch.from_numpy(boxes) 16 | query_boxes = torch.from_numpy(query_boxes) 17 | out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return 18 | else: 19 | out_fn = lambda x: x 20 | 21 | box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \ 22 | (boxes[:, 3] - boxes[:, 1] + 1) 23 | query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \ 24 | (query_boxes[:, 3] - query_boxes[:, 1] + 1) 25 | 26 | iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0) 27 | ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0) 28 | ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih 29 | overlaps = iw * ih / ua 30 | return out_fn(overlaps) -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import cv2 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """Convert a list of images into a network input. 19 | 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in range(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | 30 | return blob 31 | 32 | 33 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 34 | """Mean subtract and scale an image for use in a blob.""" 35 | im = im.astype(np.float32, copy=False) 36 | im -= pixel_means 37 | im_shape = im.shape 38 | im_size_min = np.min(im_shape[0:2]) 39 | im_size_max = np.max(im_shape[0:2]) 40 | im_scale = float(target_size) / float(im_size_min) 41 | # Prevent the biggest axis from being more than MAX_SIZE 42 | if np.round(im_scale * im_size_max) > max_size: 43 | im_scale = float(max_size) / float(im_size_max) 44 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 45 | interpolation=cv2.INTER_LINEAR) 46 | 47 | return im, im_scale 48 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | import torch 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self._total_time = {} 15 | self._calls = {} 16 | self._start_time = {} 17 | self._diff = {} 18 | self._average_time = {} 19 | 20 | def tic(self, name='default'): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | torch.cuda.synchronize() 24 | self._start_time[name] = time.time() 25 | 26 | def toc(self, name='default', average=True): 27 | torch.cuda.synchronize() 28 | self._diff[name] = time.time() - self._start_time[name] 29 | self._total_time[name] = self._total_time.get(name, 0.) + self._diff[name] 30 | self._calls[name] = self._calls.get(name, 0 ) + 1 31 | self._average_time[name] = self._total_time[name] / self._calls[name] 32 | if average: 33 | return self._average_time[name] 34 | else: 35 | return self._diff[name] 36 | 37 | def average_time(self, name='default'): 38 | return self._average_time[name] 39 | 40 | def total_time(self, name='default'): 41 | return self._total_time[name] 42 | 43 | timer = Timer() 44 | -------------------------------------------------------------------------------- /lib/utils/visualization.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from six.moves import range 12 | import PIL.Image as Image 13 | import PIL.ImageColor as ImageColor 14 | import PIL.ImageDraw as ImageDraw 15 | import PIL.ImageFont as ImageFont 16 | 17 | STANDARD_COLORS = [ 18 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 19 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 20 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 21 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 22 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 23 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 24 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 25 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 26 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 27 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 28 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 29 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 30 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 31 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 32 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 33 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 34 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 35 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 36 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 37 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 38 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 39 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 40 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 41 | ] 42 | 43 | NUM_COLORS = len(STANDARD_COLORS) 44 | 45 | try: 46 | FONT = ImageFont.truetype('arial.ttf', 24) 47 | except IOError: 48 | FONT = ImageFont.load_default() 49 | 50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4): 51 | draw = ImageDraw.Draw(image) 52 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 53 | draw.line([(left, top), (left, bottom), (right, bottom), 54 | (right, top), (left, top)], width=thickness, fill=color) 55 | text_bottom = bottom 56 | # Reverse list and print from bottom to top. 57 | text_width, text_height = font.getsize(display_str) 58 | margin = np.ceil(0.05 * text_height) 59 | draw.rectangle( 60 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 61 | text_bottom)], 62 | fill=color) 63 | draw.text( 64 | (left + margin, text_bottom - text_height - margin), 65 | display_str, 66 | fill='black', 67 | font=font) 68 | 69 | return image 70 | 71 | def draw_bounding_boxes(image, gt_boxes, im_info): 72 | num_boxes = gt_boxes.shape[0] 73 | gt_boxes_new = gt_boxes.copy() 74 | gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2]) 75 | disp_image = Image.fromarray(np.uint8(image[0])) 76 | 77 | for i in range(num_boxes): 78 | this_class = int(gt_boxes_new[i, 4]) 79 | disp_image = _draw_single_box(disp_image, 80 | gt_boxes_new[i, 0], 81 | gt_boxes_new[i, 1], 82 | gt_boxes_new[i, 2], 83 | gt_boxes_new[i, 3], 84 | 'N%02d-C%02d' % (i, this_class), 85 | FONT, 86 | color=STANDARD_COLORS[this_class % NUM_COLORS]) 87 | 88 | image[0, :] = np.array(disp_image) 89 | return image -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '..', 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, '..', 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /tools/_init_paths.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/tools/_init_paths.pyc -------------------------------------------------------------------------------- /tools/convert_from_tensorflow_mobile.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python import pywrap_tensorflow 3 | from collections import OrderedDict 4 | import re 5 | import torch 6 | 7 | import argparse 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model') 9 | parser.add_argument('--tensorflow_model', 10 | help='the path of tensorflow_model', 11 | default=None, type=str) 12 | 13 | args = parser.parse_args() 14 | 15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model) 16 | var_to_shape_map = reader.get_variable_to_shape_map() 17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()} 18 | 19 | del var_dict['Variable'] 20 | 21 | for k in list(var_dict.keys()): 22 | if 'Momentum' in k: 23 | del var_dict[k] 24 | 25 | for k in list(var_dict.keys()): 26 | if k.find('/') >= 0: 27 | var_dict['mobilenet' + k[k.find('/'):]] = var_dict[k] 28 | del var_dict[k] 29 | 30 | dummy_replace = OrderedDict([ 31 | ('moving_mean', 'running_mean'),\ 32 | ('moving_variance', 'running_var'),\ 33 | ('weights', 'weight'),\ 34 | ('biases', 'bias'),\ 35 | ('/BatchNorm', '.1'),\ 36 | ('_pointwise/', '.pointwise.0.'),\ 37 | ('_depthwise/depthwise_', '.depthwise.0.'),\ 38 | ('_pointwise.1', '.pointwise.1'),\ 39 | ('_depthwise.1', '.depthwise.1'),\ 40 | ('Conv2d_0/', 'Conv2d_0.0.'),\ 41 | ('mobilenet/rpn_conv/3x3', 'rpn_net'),\ 42 | ('mobilenet/rpn_cls_score', 'rpn_cls_score_net'),\ 43 | ('mobilenet/cls_score', 'cls_score_net'),\ 44 | ('mobilenet/rpn_bbox_pred', 'rpn_bbox_pred_net'),\ 45 | ('mobilenet/bbox_pred', 'bbox_pred_net'),\ 46 | ('gamma', 'weight'),\ 47 | ('beta', 'bias'),\ 48 | ('/', '.')]) 49 | 50 | for a, b in dummy_replace.items(): 51 | for k in list(var_dict.keys()): 52 | if a in k: 53 | var_dict[k.replace(a,b)] = var_dict[k] 54 | del var_dict[k] 55 | 56 | # print set(var_dict.keys()) - set(x.keys()) 57 | # print set(x.keys()) - set(var_dict.keys()) 58 | 59 | for k in list(var_dict.keys()): 60 | if var_dict[k].ndim == 4: 61 | if 'depthwise' in k: 62 | var_dict[k] = var_dict[k].transpose((2, 3, 0, 1)).copy(order='C') 63 | else: 64 | var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C') 65 | if var_dict[k].ndim == 2: 66 | var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C') 67 | # assert x[k].shape == var_dict[k].shape, k 68 | 69 | for k in list(var_dict.keys()): 70 | var_dict[k] = torch.from_numpy(var_dict[k]) 71 | 72 | 73 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth') 74 | -------------------------------------------------------------------------------- /tools/convert_from_tensorflow_vgg.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python import pywrap_tensorflow 3 | from collections import OrderedDict 4 | import re 5 | import torch 6 | 7 | import argparse 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model') 9 | parser.add_argument('--tensorflow_model', 10 | help='the path of tensorflow_model', 11 | default=None, type=str) 12 | 13 | args = parser.parse_args() 14 | 15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model) 16 | var_to_shape_map = reader.get_variable_to_shape_map() 17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()} 18 | 19 | del var_dict['Variable'] 20 | 21 | for k in list(var_dict.keys()): 22 | if 'Momentum' in k: 23 | del var_dict[k] 24 | 25 | for k in list(var_dict.keys()): 26 | if k.find('/') >= 0: 27 | var_dict['vgg' + k[k.find('/'):]] = var_dict[k] 28 | del var_dict[k] 29 | 30 | dummy_replace = OrderedDict([ 31 | ('weights', 'weight'),\ 32 | ('biases', 'bias'),\ 33 | ('vgg/rpn_conv/3x3', 'rpn_net'),\ 34 | ('vgg/rpn_cls_score', 'rpn_cls_score_net'),\ 35 | ('vgg/cls_score', 'cls_score_net'),\ 36 | ('vgg/rpn_bbox_pred', 'rpn_bbox_pred_net'),\ 37 | ('vgg/bbox_pred', 'bbox_pred_net'),\ 38 | ('/', '.')]) 39 | 40 | for a, b in dummy_replace.items(): 41 | for k in list(var_dict.keys()): 42 | if a in k: 43 | var_dict[k.replace(a,b)] = var_dict[k] 44 | del var_dict[k] 45 | 46 | layer_map = OrderedDict([ 47 | ('conv1.conv1_1', 'features.0'),\ 48 | ('conv1.conv1_2', 'features.2'),\ 49 | ('conv2.conv2_1', 'features.5'),\ 50 | ('conv2.conv2_2', 'features.7'),\ 51 | ('conv3.conv3_1', 'features.10'),\ 52 | ('conv3.conv3_2', 'features.12'),\ 53 | ('conv3.conv3_3', 'features.14'),\ 54 | ('conv4.conv4_1', 'features.17'),\ 55 | ('conv4.conv4_2', 'features.19'),\ 56 | ('conv4.conv4_3', 'features.21'),\ 57 | ('conv5.conv5_1', 'features.24'),\ 58 | ('conv5.conv5_2', 'features.26'),\ 59 | ('conv5.conv5_3', 'features.28'),\ 60 | ('fc6', 'classifier.0'),\ 61 | ('fc7', 'classifier.3')]) 62 | 63 | for a, b in layer_map.items(): 64 | for k in list(var_dict.keys()): 65 | if a in k: 66 | var_dict[k.replace(a,b)] = var_dict[k] 67 | del var_dict[k] 68 | 69 | for k in list(var_dict.keys()): 70 | if 'classifier.0' in k: 71 | if var_dict[k].ndim == 2: # weight 72 | var_dict[k] = var_dict[k].reshape(7,7,512,4096).transpose((3, 2, 0, 1)).reshape(4096, -1).copy(order='C') 73 | else: 74 | if var_dict[k].ndim == 4: 75 | var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C') 76 | if var_dict[k].ndim == 2: 77 | var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C') 78 | # assert x[k].shape == var_dict[k].shape, k 79 | 80 | for k in list(var_dict.keys()): 81 | var_dict[k] = torch.from_numpy(var_dict[k]) 82 | 83 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth') 84 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Tensorflow Faster R-CNN 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Xinlei Chen, based on code from Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | """ 10 | Demo script showing detections in sample images. 11 | 12 | See README.md for installation instructions before running. 13 | """ 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | import _init_paths 19 | from model.config import cfg 20 | from model.test import im_detect 21 | from model.nms_wrapper import nms 22 | 23 | from utils.timer import Timer 24 | import matplotlib.pyplot as plt 25 | import numpy as np 26 | import os, cv2 27 | import argparse 28 | 29 | from nets.vgg16 import vgg16 30 | from nets.resnet_v1 import resnetv1 31 | 32 | import torch 33 | 34 | CLASSES = ('__background__', 35 | 'aeroplane', 'bicycle', 'bird', 'boat', 36 | 'bottle', 'bus', 'car', 'cat', 'chair', 37 | 'cow', 'diningtable', 'dog', 'horse', 38 | 'motorbike', 'person', 'pottedplant', 39 | 'sheep', 'sofa', 'train', 'tvmonitor') 40 | 41 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)} 42 | DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)} 43 | 44 | def vis_detections(im, class_name, dets, thresh=0.5): 45 | """Draw detected bounding boxes.""" 46 | inds = np.where(dets[:, -1] >= thresh)[0] 47 | if len(inds) == 0: 48 | return 49 | 50 | im = im[:, :, (2, 1, 0)] 51 | fig, ax = plt.subplots(figsize=(12, 12)) 52 | ax.imshow(im, aspect='equal') 53 | for i in inds: 54 | bbox = dets[i, :4] 55 | score = dets[i, -1] 56 | 57 | ax.add_patch( 58 | plt.Rectangle((bbox[0], bbox[1]), 59 | bbox[2] - bbox[0], 60 | bbox[3] - bbox[1], fill=False, 61 | edgecolor='red', linewidth=3.5) 62 | ) 63 | ax.text(bbox[0], bbox[1] - 2, 64 | '{:s} {:.3f}'.format(class_name, score), 65 | bbox=dict(facecolor='blue', alpha=0.5), 66 | fontsize=14, color='white') 67 | 68 | ax.set_title(('{} detections with ' 69 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 70 | thresh), 71 | fontsize=14) 72 | plt.axis('off') 73 | plt.tight_layout() 74 | plt.draw() 75 | 76 | def demo(net, image_name): 77 | """Detect object classes in an image using pre-computed object proposals.""" 78 | 79 | # Load the demo image 80 | im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 81 | im = cv2.imread(im_file) 82 | 83 | # Detect all object classes and regress object bounds 84 | timer = Timer() 85 | timer.tic() 86 | scores, boxes = im_detect(net, im) 87 | timer.toc() 88 | print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0])) 89 | 90 | # Visualize detections for each class 91 | CONF_THRESH = 0.8 92 | NMS_THRESH = 0.3 93 | for cls_ind, cls in enumerate(CLASSES[1:]): 94 | cls_ind += 1 # because we skipped background 95 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 96 | cls_scores = scores[:, cls_ind] 97 | dets = np.hstack((cls_boxes, 98 | cls_scores[:, np.newaxis])).astype(np.float32) 99 | keep = nms(torch.from_numpy(dets), NMS_THRESH) 100 | dets = dets[keep.numpy(), :] 101 | vis_detections(im, cls, dets, thresh=CONF_THRESH) 102 | 103 | def parse_args(): 104 | """Parse input arguments.""" 105 | parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo') 106 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]', 107 | choices=NETS.keys(), default='res101') 108 | parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]', 109 | choices=DATASETS.keys(), default='pascal_voc_0712') 110 | args = parser.parse_args() 111 | 112 | return args 113 | 114 | if __name__ == '__main__': 115 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 116 | args = parse_args() 117 | 118 | # model path 119 | demonet = args.demo_net 120 | dataset = args.dataset 121 | saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default', 122 | NETS[demonet][0] %(70000 if dataset == 'pascal_voc' else 110000)) 123 | 124 | 125 | if not os.path.isfile(saved_model): 126 | raise IOError(('{:s} not found.\nDid you download the proper networks from ' 127 | 'our server and place them properly?').format(saved_model)) 128 | 129 | # load network 130 | if demonet == 'vgg16': 131 | net = vgg16() 132 | elif demonet == 'res101': 133 | net = resnetv1(num_layers=101) 134 | else: 135 | raise NotImplementedError 136 | net.create_architecture(21, 137 | tag='default', anchor_scales=[8, 16, 32]) 138 | 139 | net.load_state_dict(torch.load(saved_model)) 140 | 141 | net.eval() 142 | net.cuda() 143 | 144 | print('Loaded network {:s}'.format(saved_model)) 145 | 146 | im_names = ['000456.jpg', '000542.jpg', '001150.jpg', 147 | '001763.jpg', '004545.jpg'] 148 | for im_name in im_names: 149 | print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') 150 | print('Demo for data/demo/{}'.format(im_name)) 151 | demo(net, im_name) 152 | 153 | plt.show() 154 | -------------------------------------------------------------------------------- /tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | # Reval = re-eval. Re-evaluate saved detections. 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import _init_paths 16 | from model.test import apply_nms 17 | from model.config import cfg 18 | from datasets.factory import get_imdb 19 | import pickle 20 | import os, sys, argparse 21 | import numpy as np 22 | import pprint 23 | 24 | 25 | def parse_args(): 26 | """ 27 | Parse input arguments 28 | """ 29 | parser = argparse.ArgumentParser(description='Re-evaluate results') 30 | parser.add_argument('output_dir', nargs=1, help='results directory', 31 | type=str) 32 | parser.add_argument('--imdb', dest='imdb_name', 33 | help='dataset to re-evaluate', 34 | default='voc_2007_test', type=str) 35 | parser.add_argument('--matlab', dest='matlab_eval', 36 | help='use matlab for evaluation', 37 | action='store_true') 38 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 39 | action='store_true') 40 | parser.add_argument('--nms', dest='apply_nms', help='apply nms', 41 | action='store_true') 42 | 43 | if len(sys.argv) == 1: 44 | parser.print_help() 45 | sys.exit(1) 46 | 47 | args = parser.parse_args() 48 | return args 49 | 50 | 51 | def from_dets(imdb_name, output_dir, args): 52 | imdb = get_imdb(imdb_name) 53 | imdb.competition_mode(args.comp_mode) 54 | imdb.config['matlab_eval'] = args.matlab_eval 55 | with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: 56 | dets = pickle.load(f) 57 | 58 | if args.apply_nms: 59 | print('Applying NMS to all detections') 60 | nms_dets = apply_nms(dets, cfg.TEST.NMS) 61 | else: 62 | nms_dets = dets 63 | 64 | print('Evaluating detections') 65 | imdb.evaluate_detections(nms_dets, output_dir) 66 | 67 | 68 | if __name__ == '__main__': 69 | args = parse_args() 70 | pprint.pprint(args) 71 | output_dir = os.path.abspath(args.output_dir[0]) 72 | imdb_name = args.imdb_name 73 | from_dets(imdb_name, output_dir, args) 74 | -------------------------------------------------------------------------------- /tools/reval_discovery.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # -------------------------------------------------------- 9 | 10 | # Reval = re-eval. Re-evaluate saved detections. 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import _init_paths 16 | from model.test import apply_nms 17 | from model.config import cfg 18 | from datasets.factory import get_imdb 19 | import pickle 20 | import os, sys, argparse 21 | import numpy as np 22 | import pprint 23 | 24 | 25 | def parse_args(): 26 | """ 27 | Parse input arguments 28 | """ 29 | parser = argparse.ArgumentParser(description='Re-evaluate results') 30 | parser.add_argument('output_dir', nargs=1, help='results directory', 31 | type=str) 32 | parser.add_argument('--imdb', dest='imdb_name', 33 | help='dataset to re-evaluate', 34 | default='voc_2007_test', type=str) 35 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 36 | action='store_true') 37 | 38 | if len(sys.argv) == 1: 39 | parser.print_help() 40 | sys.exit(1) 41 | 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def from_dets(imdb_name, output_dir, args): 47 | imdb = get_imdb(imdb_name) 48 | imdb.competition_mode(args.comp_mode) 49 | with open(os.path.join(output_dir, 'discovery.pkl'), 'rb') as f: 50 | dets = pickle.load(f) 51 | 52 | 53 | print('Evaluating detections') 54 | imdb.evaluate_discovery(dets, output_dir) 55 | 56 | 57 | if __name__ == '__main__': 58 | args = parse_args() 59 | pprint.pprint(args) 60 | output_dir = os.path.abspath(args.output_dir[0]) 61 | imdb_name = args.imdb_name 62 | from_dets(imdb_name, output_dir, args) 63 | -------------------------------------------------------------------------------- /tools/show_boxes_results.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Dec 25 01:50:15 2017 4 | 5 | @author: jjwang 6 | """ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | 13 | import _init_paths 14 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir 15 | from datasets.factory import get_imdb 16 | import datasets.imdb 17 | import argparse 18 | import pprint 19 | import numpy as np 20 | import sys 21 | import os 22 | import pickle as pickle 23 | import cv2 24 | from matplotlib import pyplot as plt 25 | 26 | 27 | def parse_args(): 28 | """ 29 | Parse input arguments 30 | """ 31 | parser = argparse.ArgumentParser(description='show the imgs and the resulted boxes') 32 | parser.add_argument('--box', default='/DATA3_DB7/data/jjwang/workspace/wsFaster-rcnn/output/vgg16/voc_2007_test/WSDDN_PRE_50000/vgg16_faster_rcnn_iter_90000/wsddn/detections.pkl', help='boxes pkl file to load') 33 | parser.add_argument('--thr', default=0.1, type=float, help='idx of test img') 34 | 35 | if len(sys.argv) == 1: 36 | parser.print_help() 37 | sys.exit(1) 38 | 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | 44 | 45 | 46 | if __name__ == '__main__': 47 | args = parse_args() 48 | print('Called with args:') 49 | print(args) 50 | 51 | 52 | 53 | with open(args.box, 'rb') as fid: 54 | try: 55 | content = pickle.load(fid) 56 | except: 57 | content = pickle.load(fid, encoding='bytes') 58 | 59 | 60 | boxpathList = args.box.split('/') 61 | save_base = '/'.join(boxpathList[-5:-1]) 62 | save_path = os.path.join('../cache',save_base) 63 | save_path = os.path.join(save_path, boxpathList[-1].split('.')[0]) 64 | if not os.path.exists(save_path): 65 | os.makedirs(save_path) 66 | save_path = '../cache/' + save_path 67 | imdbname = boxpathList[-5] 68 | print('getting imdb {:s}'.format(imdbname)) 69 | imdb = get_imdb('voc_2007_test') 70 | 71 | for idx in range(len(imdb.image_index)): 72 | im = cv2.imread(imdb.image_path_at(idx)) 73 | im = im[:,:,::-1] 74 | height, width, depth = im.shape 75 | dpi = 80 76 | plt.figure(figsize=(width/dpi,height/dpi),dpi=dpi) 77 | colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() 78 | plt.imshow(im) # plot the image for matplotlib 79 | currentAxis = plt.gca() 80 | plt.axis('off') 81 | # scale each detection back up to the image 82 | # scale = torch.Tensor([rgb_image.shape[1::-1], rgb_image.shape[1::-1]]) 83 | for i in range(20): 84 | for j in range(len(content[i][idx])): 85 | score = content[i][idx][j][-1] 86 | if score > 0.1: 87 | label_name = imdb._classes[i] 88 | display_txt = '%s: %.2f'%(label_name, score) 89 | pt = content[i][idx][j][:-1] 90 | coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1 91 | color = colors[i] 92 | currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) 93 | currentAxis.text(pt[0], pt[1], display_txt, bbox={'facecolor':color, 'alpha':0.5}) 94 | 95 | plt.savefig(save_path + '/' + imdb.image_index[idx] + '.jpg') 96 | plt.close() 97 | if idx % 500 == 0 : 98 | print(idx) 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import _init_paths 11 | from model.test import test_net 12 | from model.test_train import test_train_net 13 | from model.config import cfg, cfg_from_file, cfg_from_list 14 | from datasets.factory import get_imdb 15 | import argparse 16 | import pprint 17 | import time, os, sys 18 | 19 | from nets.vgg16 import vgg16 20 | from nets.resnet_v1 import resnetv1 21 | from nets.mobilenet_v1 import mobilenetv1 22 | 23 | import torch 24 | 25 | def parse_args(): 26 | """ 27 | Parse input arguments 28 | """ 29 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 30 | parser.add_argument('--cfg', dest='cfg_file', 31 | help='optional config file', default=None, type=str) 32 | parser.add_argument('--model', dest='model', 33 | help='model to test', 34 | default=None, type=str) 35 | parser.add_argument('--imdb', dest='imdb_name', 36 | help='dataset to test', 37 | default='voc_2007_test', type=str) 38 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 39 | action='store_true') 40 | parser.add_argument('--num_dets', dest='max_per_image', 41 | help='max number of detections per image', 42 | default=100, type=int) 43 | parser.add_argument('--tag', dest='tag', 44 | help='tag of the model', 45 | default='', type=str) 46 | parser.add_argument('--net', dest='net', 47 | help='vgg16, res50, res101, res152, mobile', 48 | default='res50', type=str) 49 | parser.add_argument('--set', dest='set_cfgs', 50 | help='set config keys', default=None, 51 | nargs=argparse.REMAINDER) 52 | 53 | if len(sys.argv) == 1: 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | args = parser.parse_args() 58 | return args 59 | 60 | if __name__ == '__main__': 61 | args = parse_args() 62 | 63 | print('Called with args:') 64 | print(args) 65 | 66 | if args.cfg_file is not None: 67 | cfg_from_file(args.cfg_file) 68 | if args.set_cfgs is not None: 69 | cfg_from_list(args.set_cfgs) 70 | 71 | print('Using config:') 72 | pprint.pprint(cfg) 73 | 74 | if 1: #always cuda 75 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 76 | else: 77 | torch.set_default_tensor_type('torch.FloatTensor') 78 | 79 | # if has model, get the name from it 80 | # if does not, then just use the initialization weights 81 | if args.model: 82 | filename = os.path.splitext(os.path.basename(args.model))[0] 83 | else: 84 | filename = os.path.splitext(os.path.basename(args.weight))[0] 85 | 86 | tag = args.tag 87 | tag = tag if tag else 'default' 88 | filename = tag + '/' + filename #defalut/vgg16_faster_rcnn_iter_15000 89 | 90 | imdb = get_imdb(args.imdb_name) 91 | imdb.competition_mode(args.comp_mode) 92 | 93 | # load network 94 | if args.net == 'vgg16': 95 | net = vgg16() 96 | elif args.net == 'res50': 97 | net = resnetv1(num_layers=50) 98 | elif args.net == 'res101': 99 | net = resnetv1(num_layers=101) 100 | elif args.net == 'res152': 101 | net = resnetv1(num_layers=152) 102 | elif args.net == 'mobile': 103 | net = mobilenetv1() 104 | else: 105 | raise NotImplementedError 106 | 107 | # load model 108 | net.create_architecture(imdb.num_classes, tag='default') 109 | 110 | net.eval() 111 | net.cuda() 112 | 113 | if args.model: 114 | print(('Loading model check point from {:s}').format(args.model)) 115 | net.load_state_dict(torch.load(args.model)) 116 | print('Loaded.') 117 | else: 118 | print(('Loading initial weights from {:s}').format(args.weight)) 119 | print('Loaded.') 120 | 121 | if args.imdb_name[-4:] == 'test': 122 | test_net(net, imdb, filename, max_per_image=args.max_per_image) 123 | else: 124 | test_train_net(net, imdb, filename, max_per_image=args.max_per_image) -------------------------------------------------------------------------------- /tools/trainval_net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick 5 | # Modified to train WSDDN 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import _init_paths 12 | from model.train_val import get_training_roidb, train_net 13 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir 14 | from datasets.factory import get_imdb 15 | import datasets.imdb 16 | import argparse 17 | import pprint 18 | import numpy as np 19 | import sys 20 | import torch 21 | 22 | from nets.vgg16 import vgg16 23 | from nets.resnet_v1 import resnetv1 24 | from nets.mobilenet_v1 import mobilenetv1 25 | 26 | def parse_args(): 27 | """ 28 | Parse input arguments 29 | """ 30 | parser = argparse.ArgumentParser(description='Train a faster-rcnn in wealy supervised situation with wsddn modules') 31 | parser.add_argument('--cfg', dest='cfg_file', 32 | help='optional config file', 33 | default=None, type=str) 34 | parser.add_argument('--weight', dest='weight', 35 | help='initialize with pretrained model weights', 36 | type=str) 37 | parser.add_argument('--wsddn', dest='wsddn', 38 | help='initialize with pretrained wsddn model weights', 39 | type=str) 40 | parser.add_argument('--imdb', dest='imdb_name', 41 | help='dataset to train on', 42 | default='voc_2007_trainval', type=str) 43 | parser.add_argument('--imdbval', dest='imdbval_name', 44 | help='dataset to validate on', 45 | default='voc_2007_test', type=str) 46 | parser.add_argument('--iters', dest='max_iters', 47 | help='number of iterations to train', 48 | default=70000, type=int) 49 | parser.add_argument('--tag', dest='tag', 50 | help='tag of the model', 51 | default=None, type=str) 52 | parser.add_argument('--net', dest='net', 53 | help='vgg16, res50, res101, res152, mobile', 54 | default='res50', type=str) 55 | parser.add_argument('--set', dest='set_cfgs', 56 | help='set config keys', default=None, 57 | nargs=argparse.REMAINDER) 58 | 59 | if len(sys.argv) == 1: 60 | parser.print_help() 61 | sys.exit(1) 62 | 63 | args = parser.parse_args() 64 | return args 65 | 66 | 67 | def combined_roidb(imdb_names): 68 | """ 69 | Combine multiple roidbs 70 | """ 71 | 72 | def get_roidb(imdb_name): 73 | imdb = get_imdb(imdb_name) 74 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 75 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 76 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 77 | roidb = get_training_roidb(imdb) 78 | return roidb 79 | 80 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 81 | roidb = roidbs[0] 82 | if len(roidbs) > 1: 83 | for r in roidbs[1:]: 84 | roidb.extend(r) 85 | tmp = get_imdb(imdb_names.split('+')[1]) 86 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 87 | else: 88 | imdb = get_imdb(imdb_names) 89 | return imdb, roidb 90 | 91 | 92 | if __name__ == '__main__': 93 | args = parse_args() 94 | 95 | print('Called with args:') 96 | print(args) 97 | 98 | if 1: # Always cuda 99 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 100 | else: 101 | torch.set_default_tensor_type('torch.FloatTensor') 102 | 103 | if args.cfg_file is not None: 104 | cfg_from_file(args.cfg_file) 105 | if args.set_cfgs is not None: 106 | cfg_from_list(args.set_cfgs) 107 | 108 | print('Using config:') 109 | pprint.pprint(cfg) 110 | 111 | np.random.seed(cfg.RNG_SEED) 112 | 113 | # train set 114 | imdb, roidb = combined_roidb(args.imdb_name) 115 | print('{:d} roidb entries'.format(len(roidb))) 116 | 117 | # output directory where the models are saved 118 | output_dir = get_output_dir(imdb, args.tag) 119 | print('Output will be saved to `{:s}`'.format(output_dir)) 120 | 121 | # tensorboard directory where the summaries are saved during training 122 | tb_dir = get_output_tb_dir(imdb, args.tag) 123 | print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir)) 124 | 125 | # also add the validation set, but with no flipping images 126 | orgflip = cfg.TRAIN.USE_FLIPPED 127 | cfg.TRAIN.USE_FLIPPED = False 128 | _, valroidb = combined_roidb(args.imdbval_name) 129 | print('{:d} validation roidb entries'.format(len(valroidb))) 130 | cfg.TRAIN.USE_FLIPPED = orgflip 131 | 132 | # load network 133 | if args.net == 'vgg16': 134 | net = vgg16() 135 | elif args.net == 'res50': 136 | net = resnetv1(num_layers=50) 137 | elif args.net == 'res101': 138 | net = resnetv1(num_layers=101) 139 | elif args.net == 'res152': 140 | net = resnetv1(num_layers=152) 141 | elif args.net == 'mobile': 142 | net = mobilenetv1() 143 | else: 144 | raise NotImplementedError 145 | 146 | train_net(net, imdb, roidb, valroidb, output_dir, tb_dir, 147 | pretrained_model=args.weight, 148 | wsddn_premodel=args.wsddn, 149 | max_iters=args.max_iters) 150 | --------------------------------------------------------------------------------