├── LICENSE
├── README.md
├── data
    └── scripts
    │   └── fetch_faster_rcnn_models.sh
├── experiments
    ├── cfgs
    │   ├── mobile.yml
    │   ├── res101-lg.yml
    │   ├── res101.yml
    │   ├── res50.yml
    │   └── vgg16.yml
    └── scripts
    │   ├── convert_vgg16.sh
    │   ├── test.sh
    │   └── train.sh
├── lib
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── coco.cpython-36.pyc
    │   │   ├── dis_eval.cpython-36.pyc
    │   │   ├── ds_utils.cpython-36.pyc
    │   │   ├── factory.cpython-36.pyc
    │   │   ├── imdb.cpython-36.pyc
    │   │   ├── pascal_voc.cpython-36.pyc
    │   │   └── voc_eval.cpython-36.pyc
    │   ├── coco.py
    │   ├── dis_eval.py
    │   ├── dis_eval.pyc
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   └── voc_eval.py
    ├── layer_utils
    │   ├── __init__.py
    │   ├── anchor_target_layer.py
    │   ├── choose_pseudo_gt.py
    │   ├── generate_anchors.py
    │   ├── generate_pseudo_gtbox.py
    │   ├── loss_function.py
    │   ├── proposal_layer.py
    │   ├── proposal_target_layer.py
    │   ├── proposal_top_layer.py
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── roi_pool.py
    │   │   ├── roi_pool_py.py
    │   │   └── src
    │   │   │   ├── cuda
    │   │   │       ├── roi_pooling_kernel.cu
    │   │   │       ├── roi_pooling_kernel.cu.o
    │   │   │       └── roi_pooling_kernel.h
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   └── roi_pooling_cuda.h
    │   └── snippets.py
    ├── model
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── bbox_transform.cpython-36.pyc
    │   │   ├── config.cpython-36.pyc
    │   │   ├── nms_wrapper.cpython-36.pyc
    │   │   ├── test.cpython-36.pyc
    │   │   ├── test_train.cpython-36.pyc
    │   │   └── train_val.cpython-36.pyc
    │   ├── bbox_transform.py
    │   ├── config.py
    │   ├── config.pyc
    │   ├── nms_wrapper.py
    │   ├── test.py
    │   ├── test.pyc
    │   ├── test_train.py
    │   ├── train_val.py
    │   └── train_val.pyc
    ├── nets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── mobilenet_v1.cpython-36.pyc
    │   │   ├── network.cpython-36.pyc
    │   │   ├── resnet_v1.cpython-36.pyc
    │   │   └── vgg16.cpython-36.pyc
    │   ├── mobilenet_v1.py
    │   ├── network.py
    │   ├── resnet_v1.py
    │   └── vgg16.py
    ├── nms
    │   ├── __init__.py
    │   ├── build.py
    │   ├── pth_nms.py
    │   └── src
    │   │   ├── cuda
    │   │       ├── nms_kernel.cu
    │   │       ├── nms_kernel.cu.o
    │   │       └── nms_kernel.h
    │   │   ├── nms.c
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms_cuda.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── layer.cpython-36.pyc
    │   │   ├── minibatch.cpython-36.pyc
    │   │   └── roidb.cpython-36.pyc
    │   ├── layer.py
    │   ├── minibatch.py
    │   └── roidb.py
    └── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-36.pyc
    │       ├── bbox.cpython-36.pyc
    │       ├── blob.cpython-36.pyc
    │       ├── timer.cpython-36.pyc
    │       └── visualization.cpython-36.pyc
    │   ├── bbox.py
    │   ├── blob.py
    │   ├── timer.py
    │   └── visualization.py
└── tools
    ├── _init_paths.py
    ├── _init_paths.pyc
    ├── convert_from_tensorflow_mobile.py
    ├── convert_from_tensorflow_vgg.py
    ├── demo.py
    ├── reval.py
    ├── reval_discovery.py
    ├── show_boxes_results.py
    ├── test_net.py
    └── trainval_net.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Xinlei Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Collaborative Learning for Weakly Supervised Object Detection
  2 | 
  3 | If you use this code in your research, please cite
  4 | ```
  5 | @inproceedings{ijcai2018-135,
  6 |   title     = {Collaborative Learning for Weakly Supervised Object Detection},
  7 |   author    = {Jiajie Wang and Jiangchao Yao and Ya Zhang and Rui Zhang},
  8 |   booktitle = {Proceedings of the Twenty-Seventh International Joint Conference on
  9 |                Artificial Intelligence, {IJCAI-18}},
 10 |   publisher = {International Joint Conferences on Artificial Intelligence Organization},             
 11 |   pages     = {971--977},
 12 |   year      = {2018},
 13 |   month     = {7},
 14 |   doi       = {10.24963/ijcai.2018/135},
 15 |   url       = {https://doi.org/10.24963/ijcai.2018/135},
 16 | }
 17 | ```
 18 | 
 19 | ### Prerequisites
 20 |   - A basic pytorch installation. The version is **0.2**. If you are using the old version **0.1.12**, you can checkout 0.1.12 branch.
 21 |   - Python packages you might not have: `cffi`, `opencv-python`, `easydict` (similar to [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn)). For `easydict` make sure you have the right version. Xinlei uses 1.6.
 22 |   - [tensorboard-pytorch](https://github.com/lanpa/tensorboard-pytorch) to visualize the training and validation curve. Please build from source to use the latest tensorflow-tensorboard.
 23 | 
 24 | ### Installation
 25 | 1. Clone the repository
 26 |   ```Shell
 27 |   git clone https://github.com/ruotianluo/pytorch-faster-rcnn.git
 28 |   ```
 29 | 
 30 | 2. Choose your `-arch` option to match your GPU for step 3 and 4.
 31 | 
 32 |   | GPU model  | Architecture |
 33 |   | ------------- | ------------- |
 34 |   | TitanX (Maxwell/Pascal) | sm_52 |
 35 |   | GTX 960M | sm_50 |
 36 |   | GTX 1080 (Ti) | sm_61 |
 37 |   | Grid K520 (AWS g2.2xlarge) | sm_30 |
 38 |   | Tesla K80 (AWS p2.xlarge) | sm_37 |
 39 | 
 40 |   **Note**: You are welcome to contribute the settings on your end if you have made the code work properly on other GPUs.
 41 | 
 42 | 
 43 | 3. Build RoiPooling module
 44 |   ```
 45 |   cd pytorch-faster-rcnn/lib/layer_utils/roi_pooling/src/cuda
 46 |   echo "Compiling roi_pooling kernels by nvcc..."
 47 |   nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 48 |   cd ../../
 49 |   python build.py
 50 |   cd ../../../
 51 |   ```
 52 | 
 53 | 
 54 | 4. Build NMS
 55 |   ```
 56 |   cd lib/nms/src/cuda
 57 |   echo "Compiling nms kernels by nvcc..."
 58 |   nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 59 |   cd ../../
 60 |   python build.py
 61 |   cd ../../
 62 |   ```
 63 | 
 64 | 
 65 | ### Setup data
 66 | Please follow the instructions of py-faster-rcnn [here](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to setup VOC. The steps involve downloading data and optionally creating soft links in the ``data`` folder. Since faster RCNN does not rely on pre-computed proposals, it is safe to ignore the steps that setup proposals.
 67 | 
 68 | If you find it useful, the ``data/cache`` folder created on Xinlei's side is also shared [here](http://ladoga.graphics.cs.cmu.edu/xinleic/tf-faster-rcnn/cache.tgz).
 69 | 
 70 | 
 71 | ### Train your own model
 72 | 1. Download pre-trained models and weights. For the pretrained [wsddn](https://www.robots.ox.ac.uk/~vgg/publications/2016/Bilen16/bilen16.pdf) model, you can find the download link [here](https://goo.gl/j7tp7N). For other pre-trained models like VGG16 and Resnet V1 models, they are provided by [pytorch-vgg](https://github.com/jcjohnson/pytorch-vgg.git) and [pytorch-resnet](https://github.com/ruotianluo/pytorch-resnet) (the ones with caffe in the name). You can download them in the ``data/imagenet_weights`` folder. For example for VGG16 model, you can set up like:
 73 |    ```Shell
 74 |    mkdir -p data/imagenet_weights
 75 |    cd data/imagenet_weights
 76 |    python # open python in terminal and run the following Python code
 77 |    ```
 78 |    ```Python
 79 |    import torch
 80 |    from torch.utils.model_zoo import load_url
 81 |    from torchvision import models
 82 | 
 83 |    sd = load_url("https://s3-us-west-2.amazonaws.com/jcjohns-models/vgg16-00b39a1b.pth")
 84 |    sd['classifier.0.weight'] = sd['classifier.1.weight']
 85 |    sd['classifier.0.bias'] = sd['classifier.1.bias']
 86 |    del sd['classifier.1.weight']
 87 |    del sd['classifier.1.bias']
 88 | 
 89 |    sd['classifier.3.weight'] = sd['classifier.4.weight']
 90 |    sd['classifier.3.bias'] = sd['classifier.4.bias']
 91 |    del sd['classifier.4.weight']
 92 |    del sd['classifier.4.bias']
 93 | 
 94 |    torch.save(sd, "vgg16.pth")
 95 |    ```
 96 |    ```Shell
 97 |    cd ../..
 98 |    ```
 99 |    For Resnet101, you can set up like:
100 |    ```Shell
101 |    mkdir -p data/imagenet_weights
102 |    cd data/imagenet_weights
103 |    # download from my gdrive (link in pytorch-resnet)
104 |    mv resnet101-caffe.pth res101.pth
105 |    cd ../..
106 |    ```
107 | 
108 | 2. Train (and test, evaluation)
109 |   ```Shell
110 |   ./experiments/scripts/train.sh [GPU_ID] [DATASET] [NET] [WSDDN_PRETRAINED]
111 |   # Examples:
112 |   ./experiments/scripts/train.sh 0 pascal_voc vgg16 path_to_wsddn_pretrained_model
113 |   ```
114 | 
115 | 3. Visualization with Tensorboard
116 |   ```Shell
117 |   tensorboard --logdir=tensorboard/vgg16/voc_2007_trainval/ --port=7001 &
118 |   ```
119 | 
120 | 4. Test and evaluate
121 |   ```Shell
122 |   ./experiments/scripts/test.sh [GPU_ID] [DATASET] [NET] [WSDDN_PRETRAINED]
123 |   # Examples:
124 |   ./experiments/scripts/test.sh 0 pascal_voc vgg16 path_to_wsddn_pretrained_model
125 |   ```
126 | 
127 | By default, trained networks are saved under:
128 | 
129 | ```
130 | output/[NET]/[DATASET]/default/
131 | ```
132 | 
133 | Test outputs are saved under:
134 | 
135 | ```
136 | output/[NET]/[DATASET]/default/[SNAPSHOT]/
137 | ```
138 | 
139 | Tensorboard information for train and validation is saved under:
140 | 
141 | ```
142 | tensorboard/[NET]/[DATASET]/default/
143 | tensorboard/[NET]/[DATASET]/default_val/
144 | ```
145 | 
146 | ### Our results can be found [here](https://goo.gl/gP1yLd)
147 | 


--------------------------------------------------------------------------------
/data/scripts/fetch_faster_rcnn_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
 4 | cd $DIR
 5 | 
 6 | NET=res101
 7 | FILE=voc_0712_80k-110k.tgz
 8 | # replace it with gs11655.sp.cs.cmu.edu if ladoga.graphics.cs.cmu.edu does not work
 9 | URL=http://ladoga.graphics.cs.cmu.edu/xinleic/tf-faster-rcnn/$NET/$FILE
10 | CHECKSUM=cb32e9df553153d311cc5095b2f8c340
11 | 
12 | if [ -f $FILE ]; then
13 |   echo "File already exists. Checking md5..."
14 |   os=`uname -s`
15 |   if [ "$os" = "Linux" ]; then
16 |     checksum=`md5sum $FILE | awk '{ print $1 }'`
17 |   elif [ "$os" = "Darwin" ]; then
18 |     checksum=`cat $FILE | md5`
19 |   fi
20 |   if [ "$checksum" = "$CHECKSUM" ]; then
21 |     echo "Checksum is correct. No need to download."
22 |     exit 0
23 |   else
24 |     echo "Checksum is incorrect. Need to download again."
25 |   fi
26 | fi
27 | 
28 | echo "Downloading Resnet 101 Faster R-CNN models Pret-trained on VOC 07+12 (340M)..."
29 | 
30 | wget $URL -O $FILE
31 | 
32 | echo "Unzipping..."
33 | 
34 | tar zxvf $FILE
35 | 
36 | echo "Done. Please run this command again to verify that checksum = $CHECKSUM."
37 | 


--------------------------------------------------------------------------------
/experiments/cfgs/mobile.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: mobile
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: mobile_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res101-lg.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101-lg
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res101_faster_rcnn
14 |   SCALES: [800]
15 |   MAX_SIZE: 1333
16 | TEST:
17 |   HAS_RPN: True
18 |   SCALES: [800]
19 |   MAX_SIZE: 1333
20 |   RPN_POST_NMS_TOP_N: 1000
21 | POOLING_MODE: crop
22 | ANCHOR_SCALES: [2,4,8,16,32]
23 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res101_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res50_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: selective_search
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   SNAPSHOT_PREFIX: vgg16_faster_rcnn
13 | TEST:
14 |   HAS_RPN: True
15 | POOLING_MODE: spp
16 | 


--------------------------------------------------------------------------------
/experiments/scripts/convert_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=vgg16
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:2:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     ITERS=70000
22 |     ANCHORS="[8,16,32]"
23 |     RATIOS="[0.5,1,2]"
24 |     ;;
25 |   pascal_voc_0712)
26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
27 |     TEST_IMDB="voc_2007_test"
28 |     ITERS=110000
29 |     ANCHORS="[8,16,32]"
30 |     RATIOS="[0.5,1,2]"
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
34 |     TEST_IMDB="coco_2014_minival"
35 |     ITERS=490000
36 |     ANCHORS="[4,8,16,32]"
37 |     RATIOS="[0.5,1,2]"
38 |     ;;
39 |   *)
40 |     echo "No dataset given"
41 |     exit
42 |     ;;
43 | esac
44 | 
45 | set +x
46 | NET_FINAL=${NET}_faster_rcnn_iter_${ITERS}
47 | set -x
48 | 
49 | if [ ! -f ${NET_FINAL}.index ]; then
50 |     if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
51 |         CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
52 |             --snapshot ${NET_FINAL} \
53 |             --imdb ${TRAIN_IMDB} \
54 |             --iters ${ITERS} \
55 |             --cfg experiments/cfgs/${NET}.yml \
56 |             --tag ${EXTRA_ARGS_SLUG} \
57 |             --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
58 |     else
59 |         CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
60 |             --snapshot ${NET_FINAL} \
61 |             --imdb ${TRAIN_IMDB} \
62 |             --iters ${ITERS} \
63 |             --cfg experiments/cfgs/${NET}.yml \
64 |             --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
65 |     fi
66 | fi
67 | 
68 | 


--------------------------------------------------------------------------------
/experiments/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | unset PYTHONPATH
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | WSDDN=$4
12 | 
13 | array=( $@ )
14 | len=${#array[@]}
15 | EXTRA_ARGS=${array[@]:4:$len}
16 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
17 | 
18 | case ${DATASET} in
19 |   pascal_voc)
20 |     TRAIN_IMDB="voc_2007_trainval"
21 |     TEST_IMDB="voc_2007_test"
22 |     ITERS=200000
23 |     ANCHORS="[8,16,32]"
24 |     RATIOS="[0.5,1,2]"
25 |     ;;
26 |   pascal_voc_0712)
27 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     ITERS=110000
30 |     ANCHORS="[8,16,32]"
31 |     RATIOS="[0.5,1,2]"
32 |     ;;
33 |   coco)
34 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
35 |     TEST_IMDB="coco_2014_minival"
36 |     ITERS=490000
37 |     ANCHORS="[4,8,16,32]"
38 |     RATIOS="[0.5,1,2]"
39 |     ;;
40 |   *)
41 |     echo "No dataset given"
42 |     exit
43 |     ;;
44 | esac
45 | 
46 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
47 | exec &> >(tee -a "$LOG")
48 | echo Logging output to "$LOG"
49 | 
50 | set +x
51 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
52 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
53 | else
54 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
55 | fi
56 | set -x
57 | 
58 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
59 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
60 |     --imdb ${TEST_IMDB} \
61 |     --model ${NET_FINAL} \
62 |     --cfg experiments/cfgs/${NET}.yml \
63 |     --tag ${EXTRA_ARGS_SLUG} \
64 |     --net ${NET} \
65 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
66 |           ${EXTRA_ARGS}
67 | else
68 |   CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/test_net.py \
69 |     --imdb ${TEST_IMDB} \
70 |     --model ${NET_FINAL} \
71 |     --cfg experiments/cfgs/${NET}.yml \
72 |     --net ${NET} \
73 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
74 |           ${EXTRA_ARGS}
75 | fi
76 | 
77 | 


--------------------------------------------------------------------------------
/experiments/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | unset PYTHONPATH
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | WSDDN=$4
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:4:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     STEPSIZE="[50000]"
22 |     ITERS=200000
23 |     ANCHORS="[8,16,32]"
24 |     RATIOS="[0.5,1,2]"
25 |     ;;
26 |   pascal_voc_0712)
27 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     STEPSIZE="[80000]"
30 |     ITERS=110000
31 |     ANCHORS="[8,16,32]"
32 |     RATIOS="[0.5,1,2]"
33 |     ;;
34 |   coco)
35 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
36 |     TEST_IMDB="coco_2014_minival"
37 |     STEPSIZE="[350000]"
38 |     ITERS=490000
39 |     ANCHORS="[4,8,16,32]"
40 |     RATIOS="[0.5,1,2]"
41 |     ;;
42 |   *)
43 |     echo "No dataset given"
44 |     exit
45 |     ;;
46 | esac
47 | 
48 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
49 | exec &> >(tee -a "$LOG")
50 | echo Logging output to "$LOG"
51 | 
52 | set +x
53 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
54 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
55 | else
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | fi
58 | set -x
59 | 
60 | if [ ! -f ${NET_FINAL}.index ]; then
61 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
62 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \
63 |       --weight data/imagenet_weights/${NET}.pth \
64 |       --wsddn data/wsddn_weights/${WSDDN}.pth \
65 |       --imdb ${TRAIN_IMDB} \
66 |       --imdbval ${TEST_IMDB} \
67 |       --iters ${ITERS} \
68 |       --cfg experiments/cfgs/${NET}.yml \
69 |       --tag ${EXTRA_ARGS_SLUG} \
70 |       --net ${NET} \
71 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
72 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
73 |   else
74 |     CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/trainval_net.py \
75 |       --weight data/imagenet_weights/${NET}.pth \
76 |       --wsddn data/wsddn_weights/${WSDDN}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | ./experiments/scripts/test.sh $@
88 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/coco.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/dis_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/dis_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/ds_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/ds_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/factory.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/factory.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/imdb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/imdb.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/voc_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/__pycache__/voc_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/dis_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Online Instance Classifier Refinement
  3 | # Copyright (c) 2016 HUST MCLAB
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Peng Tang
  6 | # --------------------------------------------------------
  7 | 
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import pickle
 11 | import numpy as np
 12 | 
 13 | def parse_rec(filename):
 14 |     """ Parse a PASCAL VOC xml file """
 15 |     tree = ET.parse(filename)
 16 |     objects = []
 17 |     for obj in tree.findall('object'):
 18 |         obj_struct = {}
 19 |         obj_struct['name'] = obj.find('name').text
 20 |         obj_struct['pose'] = obj.find('pose').text
 21 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 22 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 23 |         bbox = obj.find('bndbox')
 24 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 25 |                               int(bbox.find('ymin').text),
 26 |                               int(bbox.find('xmax').text),
 27 |                               int(bbox.find('ymax').text)]
 28 |         objects.append(obj_struct)
 29 | 
 30 |     return objects
 31 | 
 32 | def dis_eval(detpath,
 33 |              annopath,
 34 |              imagesetfile,
 35 |              classname,
 36 |              cachedir,
 37 |              ovthresh=0.5):
 38 |     """rec, prec, ap = voc_eval(detpath,
 39 |                                 annopath,
 40 |                                 imagesetfile,
 41 |                                 classname,
 42 |                                 [ovthresh])
 43 | 
 44 |     Top level function that does the PASCAL VOC evaluation.
 45 | 
 46 |     detpath: Path to detections
 47 |         detpath.format(classname) should produce the detection results file.
 48 |     annopath: Path to annotations
 49 |         annopath.format(imagename) should be the xml annotations file.
 50 |     imagesetfile: Text file containing the list of images, one image per line.
 51 |     classname: Category name (duh)
 52 |     cachedir: Directory for caching the annotations
 53 |     [ovthresh]: Overlap threshold (default = 0.5)
 54 |     """
 55 |     # assumes detections are in detpath.format(classname)
 56 |     # assumes annotations are in annopath.format(imagename)
 57 |     # assumes imagesetfile is a text file with each line an image name
 58 |     # cachedir caches the annotations in a pickle file
 59 | 
 60 |     # first load gt
 61 |     if not os.path.isdir(cachedir):
 62 |         os.mkdir(cachedir)
 63 |     cachefile = os.path.join(cachedir, 'annots.pkl')
 64 |     # read list of images
 65 |     with open(imagesetfile, 'r') as f:
 66 |         lines = f.readlines()
 67 |     imagenames = [x.strip() for x in lines]
 68 | 
 69 |     if not os.path.isfile(cachefile):
 70 |         # load annots
 71 |         recs = {}
 72 |         for i, imagename in enumerate(imagenames):
 73 |             recs[imagename] = parse_rec(annopath.format(imagename))
 74 |             if i % 100 == 0:
 75 |                 print('Reading annotation for {:d}/{:d}'.format(
 76 |                     i + 1, len(imagenames)))
 77 |         # save
 78 |         print('Saving cached annotations to {:s}'.format(cachefile))
 79 |         with open(cachefile, 'wb') as f:
 80 |             pickle.dump(recs, f)
 81 |     else:
 82 |         # load
 83 |         with open(cachefile, 'rb') as f:
 84 |             recs = pickle.load(f)
 85 | 
 86 |     # extract gt objects for this class
 87 |     class_recs = {}
 88 |     nimgs = 0.0
 89 |     for imagename in imagenames:
 90 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
 91 |         bbox = np.array([x['bbox'] for x in R])
 92 |         det = [False] * len(R)
 93 |         nimgs = nimgs + float(bbox.size > 0)
 94 |         class_recs[imagename] = {'bbox': bbox,
 95 |                                  'det': det}
 96 | 
 97 |     # read dets
 98 |     detfile = detpath.format(classname)
 99 |     with open(detfile, 'r') as f:
100 |         lines = f.readlines()
101 | 
102 |     splitlines = [x.strip().split(' ') for x in lines]
103 |     image_ids = [x[0] for x in splitlines]
104 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
105 | 
106 |     # go down dets and mark TPs and FPs
107 |     nd = len(image_ids)
108 |     tp = np.zeros(nd)
109 |     for d in range(nd):
110 |         R = class_recs[image_ids[d]]
111 |         bb = BB[d, :].astype(float)
112 |         ovmax = -np.inf
113 |         BBGT = R['bbox'].astype(float)
114 | 
115 |         if BBGT.size > 0:
116 |             # compute overlaps
117 |             # intersection
118 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
119 |             iymin = np.maximum(BBGT[:, 1], bb[1])
120 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
121 |             iymax = np.minimum(BBGT[:, 3], bb[3])
122 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
123 |             ih = np.maximum(iymax - iymin + 1., 0.)
124 |             inters = iw * ih
125 | 
126 |             # union
127 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
128 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
129 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
130 | 
131 |             overlaps = inters / uni
132 |             ovmax = np.max(overlaps)
133 |             jmax = np.argmax(overlaps)
134 | 
135 |         if ovmax > ovthresh:
136 |             tp[d] = 1.
137 |             continue
138 | 
139 |     return np.sum(tp) / nimgs


--------------------------------------------------------------------------------
/lib/datasets/dis_eval.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/datasets/dis_eval.pyc


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | 
17 | import numpy as np
18 | 
19 | # Set up voc_<year>_<split> 
20 | for year in ['2007', '2012']:
21 |   for split in ['train', 'val', 'trainval', 'test']:
22 |     name = 'voc_{}_{}'.format(year, split)
23 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
24 | 
25 | for year in ['2007', '2012']:
26 |   for split in ['train', 'val', 'trainval', 'test']:
27 |     name = 'voc_{}_{}_diff'.format(year, split)
28 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year, use_diff=True))
29 | 
30 | # Set up coco_2014_<split>
31 | for year in ['2014']:
32 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
33 |     name = 'coco_{}_{}'.format(year, split)
34 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
35 | 
36 | # Set up coco_2015_<split>
37 | for year in ['2015']:
38 |   for split in ['test', 'test-dev']:
39 |     name = 'coco_{}_{}'.format(year, split)
40 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
41 | 
42 | 
43 | def get_imdb(name):
44 |   """Get an imdb (image database) by name."""
45 |   if name not in __sets:
46 |     raise KeyError('Unknown dataset: {}'.format(name))
47 |   return __sets[name]()
48 | 
49 | 
50 | def list_imdbs():
51 |   """List all registered imdbs."""
52 |   return list(__sets.keys())
53 | 


--------------------------------------------------------------------------------
/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import os.path as osp
 13 | import PIL
 14 | from utils.bbox import bbox_overlaps
 15 | import numpy as np
 16 | import scipy.sparse
 17 | from model.config import cfg
 18 | 
 19 | 
 20 | class imdb(object):
 21 |   """Image database."""
 22 | 
 23 |   def __init__(self, name, classes=None):
 24 |     self._name = name
 25 |     self._num_classes = 0
 26 |     if not classes:
 27 |       self._classes = []
 28 |     else:
 29 |       self._classes = classes
 30 |     self._image_index = []
 31 |     self._obj_proposer = 'gt'
 32 |     self._roidb = None
 33 |     self._roidb_handler = self.default_roidb
 34 |     # Use this dict for storing dataset specific config options
 35 |     self.config = {}
 36 | 
 37 |   @property
 38 |   def name(self):
 39 |     return self._name
 40 | 
 41 |   @property
 42 |   def num_classes(self):
 43 |     return len(self._classes)
 44 | 
 45 |   @property
 46 |   def classes(self):
 47 |     return self._classes
 48 | 
 49 |   @property
 50 |   def image_index(self):
 51 |     return self._image_index
 52 | 
 53 |   @property
 54 |   def roidb_handler(self):
 55 |     return self._roidb_handler
 56 | 
 57 |   @roidb_handler.setter
 58 |   def roidb_handler(self, val):
 59 |     self._roidb_handler = val
 60 | 
 61 |   def set_proposal_method(self, method):
 62 |     method = eval('self.' + method + '_roidb')
 63 |     self.roidb_handler = method
 64 | 
 65 |   @property
 66 |   def roidb(self):
 67 |     # A roidb is a list of dictionaries, each with the following keys:
 68 |     #   boxes
 69 |     #   gt_overlaps
 70 |     #   gt_classes
 71 |     #   flipped
 72 |     if self._roidb is not None:
 73 |       return self._roidb
 74 |     self._roidb = self.roidb_handler()
 75 |     return self._roidb
 76 | 
 77 |   @property
 78 |   def cache_path(self):
 79 |     cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
 80 |     if not os.path.exists(cache_path):
 81 |       os.makedirs(cache_path)
 82 |     return cache_path
 83 | 
 84 |   @property
 85 |   def num_images(self):
 86 |     return len(self.image_index)
 87 | 
 88 |   def image_path_at(self, i):
 89 |     raise NotImplementedError
 90 | 
 91 |   def default_roidb(self):
 92 |     raise NotImplementedError
 93 | 
 94 |   def evaluate_detections(self, all_boxes, output_dir=None):
 95 |     """
 96 |     all_boxes is a list of length number-of-classes.
 97 |     Each list element is a list of length number-of-images.
 98 |     Each of those list elements is either an empty list []
 99 |     or a numpy array of detection.
100 | 
101 |     all_boxes[class][image] = [] or np.array of shape #dets x 5
102 |     """
103 |     raise NotImplementedError
104 | 
105 |   def evaluate_discovery(self, all_boxes, output_dir=None):
106 |     """
107 |     all_boxes is a list of length number-of-classes.
108 |     Each list element is a list of length number-of-images.
109 |     Each of those list elements is either an empty list []
110 |     or a numpy array of detection.
111 | 
112 |     all_boxes[class][image] = [] or np.array of shape #dets x 5
113 |     """
114 |     raise NotImplementedError
115 | 
116 |   def _get_widths(self):
117 |     return [PIL.Image.open(self.image_path_at(i)).size[0]
118 |             for i in range(self.num_images)]
119 | 
120 |   def append_flipped_images(self):
121 |     num_images = self.num_images
122 |     widths = [PIL.Image.open(self.image_path_at(i)).size[0]
123 |               for i in range(num_images)]
124 |     for i in range(num_images):
125 |         boxes = self.roidb[i]['boxes'].copy()
126 |         oldx1 = boxes[:, 0].copy()
127 |         oldx2 = boxes[:, 2].copy()
128 |         boxes[:, 0] = widths[i] - oldx2 - 1
129 |         boxes[:, 2] = widths[i] - oldx1 - 1
130 |         assert (boxes[:, 2] >= boxes[:, 0]).all()
131 |         entry = {'boxes' : boxes,
132 |                  'labels' : self.roidb[i]['labels'],
133 |                  'flipped' : True}
134 |         self.roidb.append(entry)
135 |     self._image_index = self._image_index * 2
136 | 
137 |   def evaluate_recall(self, candidate_boxes=None, thresholds=None,
138 |                       area='all', limit=None):
139 |     """Evaluate detection proposal recall metrics.
140 | 
141 |     Returns:
142 |         results: dictionary of results with keys
143 |             'ar': average recall
144 |             'recalls': vector recalls at each IoU overlap threshold
145 |             'thresholds': vector of IoU overlap thresholds
146 |             'gt_overlaps': vector of all ground-truth overlaps
147 |     """
148 |     # Record max overlap value for each gt box
149 |     # Return vector of overlap values
150 |     areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
151 |              '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
152 |     area_ranges = [[0 ** 2, 1e5 ** 2],  # all
153 |                    [0 ** 2, 32 ** 2],  # small
154 |                    [32 ** 2, 96 ** 2],  # medium
155 |                    [96 ** 2, 1e5 ** 2],  # large
156 |                    [96 ** 2, 128 ** 2],  # 96-128
157 |                    [128 ** 2, 256 ** 2],  # 128-256
158 |                    [256 ** 2, 512 ** 2],  # 256-512
159 |                    [512 ** 2, 1e5 ** 2],  # 512-inf
160 |                    ]
161 |     assert area in areas, 'unknown area range: {}'.format(area)
162 |     area_range = area_ranges[areas[area]]
163 |     gt_overlaps = np.zeros(0)
164 |     num_pos = 0
165 |     for i in range(self.num_images):
166 |       # Checking for max_overlaps == 1 avoids including crowd annotations
167 |       # (...pretty hacking :/)
168 |       max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
169 |       gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
170 |                          (max_gt_overlaps == 1))[0]
171 |       gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
172 |       gt_areas = self.roidb[i]['seg_areas'][gt_inds]
173 |       valid_gt_inds = np.where((gt_areas >= area_range[0]) &
174 |                                (gt_areas <= area_range[1]))[0]
175 |       gt_boxes = gt_boxes[valid_gt_inds, :]
176 |       num_pos += len(valid_gt_inds)
177 | 
178 |       if candidate_boxes is None:
179 |         # If candidate_boxes is not supplied, the default is to use the
180 |         # non-ground-truth boxes from this roidb
181 |         non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
182 |         boxes = self.roidb[i]['boxes'][non_gt_inds, :]
183 |       else:
184 |         boxes = candidate_boxes[i]
185 |       if boxes.shape[0] == 0:
186 |         continue
187 |       if limit is not None and boxes.shape[0] > limit:
188 |         boxes = boxes[:limit, :]
189 | 
190 |       overlaps = bbox_overlaps(boxes.astype(np.float),
191 |                                gt_boxes.astype(np.float))
192 | 
193 |       _gt_overlaps = np.zeros((gt_boxes.shape[0]))
194 |       for j in range(gt_boxes.shape[0]):
195 |         # find which proposal box maximally covers each gt box
196 |         argmax_overlaps = overlaps.argmax(axis=0)
197 |         # and get the iou amount of coverage for each gt box
198 |         max_overlaps = overlaps.max(axis=0)
199 |         # find which gt box is 'best' covered (i.e. 'best' = most iou)
200 |         gt_ind = max_overlaps.argmax()
201 |         gt_ovr = max_overlaps.max()
202 |         assert (gt_ovr >= 0)
203 |         # find the proposal box that covers the best covered gt box
204 |         box_ind = argmax_overlaps[gt_ind]
205 |         # record the iou coverage of this gt box
206 |         _gt_overlaps[j] = overlaps[box_ind, gt_ind]
207 |         assert (_gt_overlaps[j] == gt_ovr)
208 |         # mark the proposal box and the gt box as used
209 |         overlaps[box_ind, :] = -1
210 |         overlaps[:, gt_ind] = -1
211 |       # append recorded iou coverage level
212 |       gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
213 | 
214 |     gt_overlaps = np.sort(gt_overlaps)
215 |     if thresholds is None:
216 |       step = 0.05
217 |       thresholds = np.arange(0.5, 0.95 + 1e-5, step)
218 |     recalls = np.zeros_like(thresholds)
219 |     # compute recall for each iou threshold
220 |     for i, t in enumerate(thresholds):
221 |       recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
222 |     # ar = 2 * np.trapz(recalls, thresholds)
223 |     ar = recalls.mean()
224 |     return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
225 |             'gt_overlaps': gt_overlaps}
226 | 
227 |   def create_roidb_from_box_list(self, box_list, gt_roidb):
228 |     assert len(box_list) == self.num_images, \
229 |             'Number of boxes must match number of ground-truth images'
230 |     roidb = []
231 | 
232 |     if gt_roidb is not None:
233 |         for i in range(self.num_images):
234 |             boxes = box_list[i]
235 | 
236 |             real_label = gt_roidb[i]['labels']
237 | 
238 |             roidb.append({'boxes' : boxes,
239 |                           'labels' : np.array([real_label], dtype=np.int32),
240 |                           'flipped' : False})
241 |     else:
242 |         for i in range(self.num_images):
243 |             boxes = box_list[i]
244 | 
245 |             roidb.append({'boxes' : boxes,
246 |                           'labels' : np.zeros((1, 0), dtype=np.int32),
247 |                           'flipped' : False})
248 | 
249 |     return roidb
250 | 
251 |   @staticmethod
252 |   def merge_roidbs(a, b):
253 |     assert len(a) == len(b)
254 |     for i in range(len(a)):
255 |       a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
256 |       a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
257 |                                       b[i]['gt_classes']))
258 |       a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
259 |                                                  b[i]['gt_overlaps']])
260 |       a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
261 |                                      b[i]['seg_areas']))
262 |     return a
263 | 
264 |   def competition_mode(self, on):
265 |     """Turn competition mode on or off."""
266 |     pass
267 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False,
 76 |              use_diff=False):
 77 |   """rec, prec, ap = voc_eval(detpath,
 78 |                               annopath,
 79 |                               imagesetfile,
 80 |                               classname,
 81 |                               [ovthresh],
 82 |                               [use_07_metric])
 83 | 
 84 |   Top level function that does the PASCAL VOC evaluation.
 85 | 
 86 |   detpath: Path to detections
 87 |       detpath.format(classname) should produce the detection results file.
 88 |   annopath: Path to annotations
 89 |       annopath.format(imagename) should be the xml annotations file.
 90 |   imagesetfile: Text file containing the list of images, one image per line.
 91 |   classname: Category name (duh)
 92 |   cachedir: Directory for caching the annotations
 93 |   [ovthresh]: Overlap threshold (default = 0.5)
 94 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 95 |       (default False)
 96 |   """
 97 |   # assumes detections are in detpath.format(classname)
 98 |   # assumes annotations are in annopath.format(imagename)
 99 |   # assumes imagesetfile is a text file with each line an image name
100 |   # cachedir caches the annotations in a pickle file
101 | 
102 |   # first load gt
103 |   if not os.path.isdir(cachedir):
104 |     os.mkdir(cachedir)
105 |   cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
106 |   # read list of images
107 |   with open(imagesetfile, 'r') as f:
108 |     lines = f.readlines()
109 |   imagenames = [x.strip() for x in lines]
110 | 
111 |   if not os.path.isfile(cachefile):
112 |     # load annotations
113 |     recs = {}
114 |     for i, imagename in enumerate(imagenames):
115 |       recs[imagename] = parse_rec(annopath.format(imagename))
116 |       if i % 100 == 0:
117 |         print('Reading annotation for {:d}/{:d}'.format(
118 |           i + 1, len(imagenames)))
119 |     # save
120 |     print('Saving cached annotations to {:s}'.format(cachefile))
121 |     with open(cachefile, 'wb') as f:
122 |       pickle.dump(recs, f)
123 |   else:
124 |     # load
125 |     with open(cachefile, 'rb') as f:
126 |       try:
127 |         recs = pickle.load(f)
128 |       except:
129 |         recs = pickle.load(f, encoding='bytes')
130 | 
131 |   # extract gt objects for this class
132 |   class_recs = {}
133 |   npos = 0
134 |   for imagename in imagenames:
135 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
136 |     bbox = np.array([x['bbox'] for x in R])
137 |     if use_diff:
138 |       difficult = np.array([False for x in R]).astype(np.bool)
139 |     else:
140 |       difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
141 |     det = [False] * len(R)
142 |     npos = npos + sum(~difficult)
143 |     class_recs[imagename] = {'bbox': bbox,
144 |                              'difficult': difficult,
145 |                              'det': det}
146 | 
147 |   # read dets
148 |   detfile = detpath.format(classname)
149 |   with open(detfile, 'r') as f:
150 |     lines = f.readlines()
151 | 
152 |   splitlines = [x.strip().split(' ') for x in lines]
153 |   image_ids = [x[0] for x in splitlines]
154 |   confidence = np.array([float(x[1]) for x in splitlines])
155 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
156 | 
157 |   nd = len(image_ids)
158 |   tp = np.zeros(nd)
159 |   fp = np.zeros(nd)
160 | 
161 |   if BB.shape[0] > 0:
162 |     # sort by confidence
163 |     sorted_ind = np.argsort(-confidence)
164 |     sorted_scores = np.sort(-confidence)
165 |     BB = BB[sorted_ind, :]
166 |     image_ids = [image_ids[x] for x in sorted_ind]
167 | 
168 |     # go down dets and mark TPs and FPs
169 |     for d in range(nd):
170 |       R = class_recs[image_ids[d]]
171 |       bb = BB[d, :].astype(float)
172 |       ovmax = -np.inf
173 |       BBGT = R['bbox'].astype(float)
174 | 
175 |       if BBGT.size > 0:
176 |         # compute overlaps
177 |         # intersection
178 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
179 |         iymin = np.maximum(BBGT[:, 1], bb[1])
180 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
181 |         iymax = np.minimum(BBGT[:, 3], bb[3])
182 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
183 |         ih = np.maximum(iymax - iymin + 1., 0.)
184 |         inters = iw * ih
185 | 
186 |         # union
187 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
188 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
189 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
190 | 
191 |         overlaps = inters / uni
192 |         ovmax = np.max(overlaps)
193 |         jmax = np.argmax(overlaps)
194 | 
195 |       if ovmax > ovthresh:
196 |         if not R['difficult'][jmax]:
197 |           if not R['det'][jmax]:
198 |             tp[d] = 1.
199 |             R['det'][jmax] = 1
200 |           else:
201 |             fp[d] = 1.
202 |       else:
203 |         fp[d] = 1.
204 | 
205 |   # compute precision recall
206 |   fp = np.cumsum(fp)
207 |   tp = np.cumsum(tp)
208 |   rec = tp / float(npos)
209 |   # avoid divide by zero in case the first detection matches a difficult
210 |   # ground truth
211 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
212 |   ap = voc_ap(rec, prec, use_07_metric)
213 | 
214 |   return rec, prec, ap
215 | 


--------------------------------------------------------------------------------
/lib/layer_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/layer_utils/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | from model.config import cfg
 13 | import numpy as np
 14 | import numpy.random as npr
 15 | from utils.bbox import bbox_overlaps
 16 | from model.bbox_transform import bbox_transform
 17 | import torch
 18 | 
 19 | def anchor_target_layer(rpn_cls_score, gt_boxes, gt_weights, im_info, _feat_stride, all_anchors, num_anchors):
 20 |   """Same as the anchor target layer in original Fast/er RCNN """
 21 |   A = num_anchors
 22 |   total_anchors = all_anchors.shape[0]
 23 |   K = total_anchors / num_anchors
 24 | 
 25 |   # allow boxes to sit over the edge by a small amount
 26 |   _allowed_border = 0
 27 | 
 28 |   # map of shape (..., H, W)
 29 |   height, width = rpn_cls_score.shape[1:3]
 30 | 
 31 |   # only keep anchors inside the image
 32 |   inds_inside = np.where(
 33 |     (all_anchors[:, 0] >= -_allowed_border) &
 34 |     (all_anchors[:, 1] >= -_allowed_border) &
 35 |     (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
 36 |     (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
 37 |   )[0]
 38 | 
 39 |   # keep only inside anchors
 40 |   anchors = all_anchors[inds_inside, :]
 41 | 
 42 |   # label: 1 is positive, 0 is negative, -1 is dont care
 43 |   labels = np.empty((len(inds_inside),), dtype=np.float32)
 44 |   labels.fill(-1)
 45 | 
 46 | 
 47 |   # overlaps between the anchors and the gt boxes
 48 |   # overlaps (ex, gt)
 49 |   overlaps = bbox_overlaps(
 50 |     np.ascontiguousarray(anchors, dtype=np.float),
 51 |     np.ascontiguousarray(gt_boxes, dtype=np.float))
 52 |   argmax_overlaps = overlaps.argmax(axis=1)
 53 |   max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
 54 |   '''
 55 |       add weights items by pseudo scores
 56 |   '''
 57 |   gt_weights_tile = np.tile(gt_weights.reshape(1,-1),(len(inds_inside),1))
 58 |   loss_weights = gt_weights_tile[np.arange(len(inds_inside)),argmax_overlaps]
 59 |   '''
 60 |       end of modification
 61 |   '''
 62 |   
 63 |   gt_argmax_overlaps = overlaps.argmax(axis=0)
 64 |   gt_max_overlaps = overlaps[gt_argmax_overlaps,
 65 |                              np.arange(overlaps.shape[1])]
 66 |   gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 67 | 
 68 |   if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 69 |     # assign bg labels first so that positive labels can clobber them
 70 |     # first set the negatives
 71 |     labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 72 | 
 73 |   # fg label: for each gt, anchor with highest overlap
 74 |   labels[gt_argmax_overlaps] = 1
 75 | 
 76 |   # fg label: above threshold IOU
 77 |   labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
 78 | 
 79 |   if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 80 |     # assign bg labels last so that negative labels can clobber positives
 81 |     labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 82 | 
 83 |   # subsample positive labels if we have too many
 84 |   num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
 85 |   fg_inds = np.where(labels == 1)[0]
 86 |   if len(fg_inds) > num_fg:
 87 |     disable_inds = npr.choice(
 88 |       fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 89 |     labels[disable_inds] = -1
 90 | 
 91 |   # subsample negative labels if we have too many
 92 |   num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
 93 |   bg_inds = np.where(labels == 0)[0]
 94 |   if len(bg_inds) > num_bg:
 95 |     disable_inds = npr.choice(
 96 |       bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 97 |     labels[disable_inds] = -1
 98 | 
 99 |   bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
100 |   bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
101 | 
102 |   bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
103 |   # only the positive ones have regression targets
104 |   bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
105 | 
106 |   bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
107 |   if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
108 |     # uniform weighting of examples (given non-uniform sampling)
109 |     num_examples = np.sum(labels >= 0)
110 |     positive_weights = np.ones((1, 4)) * 1.0 / num_examples
111 |     negative_weights = np.ones((1, 4)) * 1.0 / num_examples
112 |   else:
113 |     assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
114 |             (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
115 |     positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
116 |                         np.sum(labels == 1))
117 |     negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
118 |                         np.sum(labels == 0))
119 |     
120 |   '''
121 |     put loss-weight of bg to 1.0
122 |                        fg to 1.0 + wsddn_prob
123 |   '''
124 |   
125 |   loss_weights[labels == 0] = 1.0
126 |   #loss_weights[labels == 1] = loss_weights[labels == 1] + 1.0
127 |   loss_weights[labels == 1] = loss_weights[labels == 1]
128 | 
129 |   bbox_outside_weights[labels == 1, :] = loss_weights[labels==1].reshape(-1,1) * positive_weights
130 |   bbox_outside_weights[labels == 0, :] = loss_weights[labels==0].reshape(-1,1) * negative_weights 
131 |   
132 |   
133 | 
134 | 
135 |   '''
136 |       bbox_outside_weights[labels == 1, :] = positive_weights
137 |       bbox_outside_weights[labels == 0, :] = negative_weights
138 |       
139 |       
140 |       end of change loss-weight
141 |   '''
142 |   # map up to original set of anchors
143 |   labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
144 |   loss_weights = _unmap(loss_weights, total_anchors, inds_inside, fill=0)
145 |   bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
146 |   bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
147 |   bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
148 | 
149 |   # labels
150 |   labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
151 |   labels = labels.reshape((1, 1, A * height, width))
152 |   rpn_labels = labels
153 |   
154 |   # loss_weight
155 |   rpn_loss_weights = loss_weights
156 |   # bbox_targets
157 |   bbox_targets = bbox_targets \
158 |     .reshape((1, height, width, A * 4))
159 | 
160 |   rpn_bbox_targets = bbox_targets
161 |   # bbox_inside_weights
162 |   bbox_inside_weights = bbox_inside_weights \
163 |     .reshape((1, height, width, A * 4))
164 | 
165 |   rpn_bbox_inside_weights = bbox_inside_weights
166 | 
167 |   # bbox_outside_weights
168 |   bbox_outside_weights = bbox_outside_weights \
169 |     .reshape((1, height, width, A * 4))
170 | 
171 |   rpn_bbox_outside_weights = bbox_outside_weights
172 |   return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, rpn_loss_weights
173 | 
174 | 
175 | def _unmap(data, count, inds, fill=0):
176 |   """ Unmap a subset of item (data) back to the original set of items (of
177 |   size count) """
178 |   if len(data.shape) == 1:
179 |     ret = np.empty((count,), dtype=np.float32)
180 |     ret.fill(fill)
181 |     ret[inds] = data
182 |   else:
183 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
184 |     ret.fill(fill)
185 |     ret[inds, :] = data
186 |   return ret
187 | 
188 | 
189 | def _compute_targets(ex_rois, gt_rois):
190 |   """Compute bounding-box regression targets for an image."""
191 | 
192 |   assert ex_rois.shape[0] == gt_rois.shape[0]
193 |   assert ex_rois.shape[1] == 4
194 |   assert gt_rois.shape[1] == 5
195 | 
196 |   return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy()
197 | 


--------------------------------------------------------------------------------
/lib/layer_utils/choose_pseudo_gt.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os
 6 | from model.config import cfg
 7 | import numpy as np
 8 | import numpy.random as npr
 9 | from utils.bbox import bbox_overlaps
10 | from model.bbox_transform import bbox_transform
11 | import torch
12 | 
13 | 
14 | def choose_pseudo_gt(boxes, cls_prob, im_labels):
15 |     """Get proposals with highest score.
16 |     inputs are all variables"""
17 | 
18 |     num_images, num_classes = im_labels.size()
19 |     boxes = boxes[:,1:]
20 |     assert num_images == 1, 'batch size shoud be equal to 1'
21 |     im_labels_tmp = im_labels[0, :]
22 |     
23 |     gt_boxes = []
24 |     gt_classes = []
25 |     gt_scores = []
26 |     for i in range(num_classes):
27 |         if im_labels_tmp[i].data.cpu().numpy() == 1:
28 |             max_value,max_index = cls_prob[:, i].max(0)
29 |             gt_boxes.append(boxes[max_index])
30 |             gt_classes.append(torch.ones(1,1)*(i+1))  # return idx=class+1 to include the background
31 |             gt_scores.append(max_value.view(-1,1))
32 |             
33 |     gt_boxes = torch.cat(gt_boxes)
34 |     gt_classes = torch.cat(gt_classes)
35 |     gt_scores = torch.cat(gt_scores)
36 |     proposals = {'gt_boxes' : gt_boxes,
37 |                  'gt_classes': gt_classes,
38 |                  'gt_scores': gt_scores}
39 |     
40 |     return torch.cat([gt_boxes,gt_classes],1), proposals
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/lib/layer_utils/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 15 | #
 16 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 17 | #    >> anchors
 18 | #
 19 | #    anchors =
 20 | #
 21 | #       -83   -39   100    56
 22 | #      -175   -87   192   104
 23 | #      -359  -183   376   200
 24 | #       -55   -55    72    72
 25 | #      -119  -119   136   136
 26 | #      -247  -247   264   264
 27 | #       -35   -79    52    96
 28 | #       -79  -167    96   184
 29 | #      -167  -343   184   360
 30 | 
 31 | # array([[ -83.,  -39.,  100.,   56.],
 32 | #       [-175.,  -87.,  192.,  104.],
 33 | #       [-359., -183.,  376.,  200.],
 34 | #       [ -55.,  -55.,   72.,   72.],
 35 | #       [-119., -119.,  136.,  136.],
 36 | #       [-247., -247.,  264.,  264.],
 37 | #       [ -35.,  -79.,   52.,   96.],
 38 | #       [ -79., -167.,   96.,  184.],
 39 | #       [-167., -343.,  184.,  360.]])
 40 | 
 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 42 |                      scales=2 ** np.arange(3, 6)):
 43 |   """
 44 |   Generate anchor (reference) windows by enumerating aspect ratios X
 45 |   scales wrt a reference (0, 0, 15, 15) window.
 46 |   """
 47 | 
 48 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 49 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 50 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 51 |                        for i in range(ratio_anchors.shape[0])])
 52 |   return anchors
 53 | 
 54 | 
 55 | def _whctrs(anchor):
 56 |   """
 57 |   Return width, height, x center, and y center for an anchor (window).
 58 |   """
 59 | 
 60 |   w = anchor[2] - anchor[0] + 1
 61 |   h = anchor[3] - anchor[1] + 1
 62 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 63 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 64 |   return w, h, x_ctr, y_ctr
 65 | 
 66 | 
 67 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 68 |   """
 69 |   Given a vector of widths (ws) and heights (hs) around a center
 70 |   (x_ctr, y_ctr), output a set of anchors (windows).
 71 |   """
 72 | 
 73 |   ws = ws[:, np.newaxis]
 74 |   hs = hs[:, np.newaxis]
 75 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 76 |                        y_ctr - 0.5 * (hs - 1),
 77 |                        x_ctr + 0.5 * (ws - 1),
 78 |                        y_ctr + 0.5 * (hs - 1)))
 79 |   return anchors
 80 | 
 81 | 
 82 | def _ratio_enum(anchor, ratios):
 83 |   """
 84 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
 85 |   """
 86 | 
 87 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 88 |   size = w * h
 89 |   size_ratios = size / ratios
 90 |   ws = np.round(np.sqrt(size_ratios))
 91 |   hs = np.round(ws * ratios)
 92 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 93 |   return anchors
 94 | 
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |   """
 98 |   Enumerate a set of anchors for each scale wrt an anchor.
 99 |   """
100 | 
101 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |   ws = w * scales
103 |   hs = h * scales
104 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |   return anchors
106 | 
107 | 
108 | if __name__ == '__main__':
109 |   import time
110 | 
111 |   t = time.time()
112 |   a = generate_anchors()
113 |   print(time.time() - t)
114 |   print(a)
115 |   from IPython import embed;
116 | 
117 |   embed()
118 | 


--------------------------------------------------------------------------------
/lib/layer_utils/generate_pseudo_gtbox.py:
--------------------------------------------------------------------------------
 1 | from model.config import cfg
 2 | import numpy as np
 3 | import numpy.random as npr
 4 | from utils.bbox import bbox_overlaps
 5 | from model.bbox_transform import bbox_transform
 6 | from model.nms_wrapper import nms
 7 | import torch
 8 | 
 9 | 
10 | def generate_pseudo_gtbox(boxes, cls_prob, im_labels):
11 |     """Get proposals from fuse_matrix
12 |     inputs are all variables"""
13 |     pre_nms_topN = 50
14 |     nms_Thresh = 0.1
15 |     
16 |     num_images, num_classes = im_labels.size()
17 |     boxes = boxes[:,1:]
18 |     assert num_images == 1, 'batch size shoud be equal to 1'
19 |     im_labels_tmp = im_labels[0, :]
20 |     labelList = im_labels_tmp.data.nonzero().view(-1)
21 |     
22 |     gt_boxes = []
23 |     gt_classes = []
24 |     gt_scores = []
25 |     
26 |     for i in labelList:
27 |         scores, order = cls_prob[:,i].contiguous().view(-1).sort(descending=True)
28 |         if pre_nms_topN > 0:
29 |           order = order[:pre_nms_topN]
30 |           scores = scores[:pre_nms_topN].view(-1, 1)
31 |           proposals = boxes[order.data, :]
32 |           
33 |         keep = nms(torch.cat((proposals, scores), 1).data, nms_Thresh)
34 |         proposals = proposals[keep, :]
35 |         scores = scores[keep,]
36 |         gt_boxes.append(proposals)
37 |         gt_classes.append(torch.ones(keep.size(0),1)*(i+1))  # return idx=class+1 to include the background
38 |         gt_scores.append(scores.view(-1,1))
39 |             
40 |     gt_boxes = torch.cat(gt_boxes)
41 |     gt_classes = torch.cat(gt_classes)
42 |     gt_scores = torch.cat(gt_scores)
43 |     proposals = {'gt_boxes' : gt_boxes,
44 |                  'gt_classes': gt_classes,
45 |                  'gt_scores': gt_scores}
46 |   #  print(gt_boxes.size())
47 |   #  print(gt_classes.size())
48 |   #  print(type(gt_boxes))
49 |   #  print(type(gt_classes))
50 |     return torch.cat([gt_boxes,gt_classes],1),proposals


--------------------------------------------------------------------------------
/lib/layer_utils/loss_function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Thu Dec 21 16:22:56 2017
  3 | 
  4 | @author: Jiajie
  5 | """
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | 
 12 | import torch
 13 | import torch.nn.functional as F
 14 | from torch.autograd import Variable
 15 | 
 16 | 
 17 | def bootstrap_cross_entropy(input, target, ishard=False, beta=0.95, weight=None, size_average=True):
 18 |     r"""Function that measures Cross Entropy between target and output
 19 |     logits with prediction consistency(bootstrap)
 20 | 
 21 |     Args:
 22 |         input: Variable of arbitrary shape
 23 |         target: Variable :math:`(N)` where each value is
 24 |             `0 <= targets[i] <= C-1
 25 |         ishard: Choose soft/hard bootstrap mode
 26 |         beta: Weight between ``gt`` label and prediction. In paper, 0.8 for hard and 0.95 for soft
 27 |         weight (Variable, optional): a manual rescaling weight
 28 |                 if provided it's repeated to match input tensor shape
 29 |         size_average (bool, optional): By default, the losses are averaged
 30 |                 over observations for each minibatch. However, if the field
 31 |                 sizeAverage is set to False, the losses are instead summed
 32 |                 for each minibatch. Default: ``True``
 33 | 
 34 |     Examples::
 35 | 
 36 |          >>> input = autograd.Variable(torch.randn(3, 5), requires_grad=True)
 37 |          >>> target = autograd.Variable(torch.LongTensor(3).random_(5))
 38 |          >>> loss = bootstrap_cross_entropy(input, target)
 39 |          >>> loss.backward()
 40 |     """
 41 |     input_prob = F.softmax(input)
 42 |     target_onehot = Variable(input.data.new(input.data.size()).zero_())
 43 |     target_onehot.scatter_(1, target.view(-1,1), 1)
 44 | #    print(target_onehot)
 45 |     if ishard:
 46 |         _,idx = input_prob.max(1)
 47 |         target_onehot = target_onehot * beta + \
 48 |                     Variable(input.data.new(input.data.size()).zero_()).scatter_(1, idx.view(-1,1), 1) * (1-beta)
 49 |     else:
 50 |         target_onehot = target_onehot * beta + input_prob * (1-beta)
 51 |     loss = - target_onehot * F.log_softmax(input)
 52 |     #print(loss.size())
 53 |     #print(weight.size())
 54 |     #if weight is not None:
 55 |     #    loss = loss.sum(1) * weight
 56 | 
 57 |     if size_average:
 58 |         if weight is not None:
 59 |             return (loss.sum(1) * weight).mean()
 60 |         return loss.sum(1).mean()
 61 |     else:
 62 |         return loss.sum()
 63 |     
 64 |     
 65 | 
 66 | 
 67 | def BCE_bootstrap_with_logits(input, target, ishard=False, beta=0.95, weight=None, size_average=True):
 68 |     r"""Function that measures Binary Cross Entropy between target and output
 69 |     logits with prediction consistency(bootstrap)
 70 | 
 71 |     Args:
 72 |         input: Variable of arbitrary shape
 73 |         target: Variable of the same shape as input
 74 |         ishard: Choose soft/hard bootstrap mode
 75 |         beta: Weight between ``gt`` label and prediction. In paper, 0.8 for hard and 0.95 for soft
 76 |         weight (Variable, optional): a manual rescaling weight
 77 |                 if provided it's repeated to match input tensor shape
 78 |         size_average (bool, optional): By default, the losses are averaged
 79 |                 over observations for each minibatch. However, if the field
 80 |                 sizeAverage is set to False, the losses are instead summed
 81 |                 for each minibatch. Default: ``True``
 82 | 
 83 |     Examples::
 84 | 
 85 |          >>> input = autograd.Variable(torch.randn(3), requires_grad=True)
 86 |          >>> target = autograd.Variable(torch.FloatTensor(3).random_(2))
 87 |          >>> loss = BCE_bootstrap_with_logits(input, target)
 88 |          >>> loss.backward()
 89 |     """
 90 |     if not (target.size() == input.size()):
 91 |         raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
 92 |     input_prob = torch.sigmoid(input)
 93 |     if ishard:
 94 |         target = target * beta + (input_prob>0.5) * (1-beta)
 95 |     else:
 96 |         target = target * beta + input_prob * (1-beta)
 97 |     print(target)
 98 |     max_val = (-input).clamp(min=0)
 99 |     loss = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log()
100 | 
101 |     if weight is not None:
102 |         loss = loss * weight
103 | 
104 |     if size_average:
105 |         return loss.mean()
106 |     else:
107 |         return loss.sum()


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick and Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | from model.nms_wrapper import nms
14 | 
15 | import torch
16 | from torch.autograd import Variable
17 | 
18 | 
19 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
20 |   """A simplified version compared to fast/er RCNN
21 |      For details please see the technical report
22 |   """
23 |   if type(cfg_key) == bytes:
24 |       cfg_key = cfg_key.decode('utf-8')
25 |   pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
26 |   post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
27 |   nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
28 | 
29 |   # Get the scores and bounding boxes
30 |   scores = rpn_cls_prob[:, :, :, num_anchors:]
31 |   rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
32 |   scores = scores.contiguous().view(-1, 1)
33 |   proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
34 |   proposals = clip_boxes(proposals, im_info[:2])
35 | 
36 |   # Pick the top region proposals
37 |   scores, order = scores.view(-1).sort(descending=True)
38 |   if pre_nms_topN > 0:
39 |     order = order[:pre_nms_topN]
40 |     scores = scores[:pre_nms_topN].view(-1, 1)
41 |   proposals = proposals[order.data, :]
42 | 
43 |   # Non-maximal suppression
44 |   keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)
45 | 
46 |   # Pick th top region proposals after NMS
47 |   if post_nms_topN > 0:
48 |     keep = keep[:post_nms_topN]
49 |   proposals = proposals[keep, :]
50 |   scores = scores[keep,]
51 | 
52 |   # Only support single image as input
53 |   batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_())
54 |   blob = torch.cat((batch_inds, proposals), 1)
55 | 
56 |   return blob, scores
57 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from model.config import cfg
 14 | from model.bbox_transform import bbox_transform
 15 | from utils.bbox import bbox_overlaps
 16 | 
 17 | 
 18 | import torch
 19 | from torch.autograd import Variable
 20 | 
 21 | def proposal_target_layer (rpn_rois, rpn_scores, gt_boxes, _num_classes, gt_weights):
 22 |   """
 23 |   Assign object detection proposals to ground-truth targets. Produces proposal
 24 |   classification labels and bounding-box regression targets.
 25 |   """
 26 | 
 27 |   # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 28 |   # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 29 |   all_rois = rpn_rois
 30 |   all_scores = rpn_scores
 31 | 
 32 |   # Include ground-truth boxes in the set of candidate rois
 33 |   if cfg.TRAIN.USE_GT:
 34 |     zeros = rpn_rois.data.new(gt_boxes.shape[0], 1)
 35 |     all_rois = torch.cat(
 36 |       (all_rois, torch.cat((zeros, gt_boxes[:, :-1]), 1))
 37 |     , 0)
 38 |     # not sure if it a wise appending, but anyway i am not using it
 39 |     all_scores = torch.cat((all_scores, zeros), 0)
 40 | 
 41 |   num_images = 1
 42 |   rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 43 |   fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image))
 44 | 
 45 |   # Sample rois with classification labels and bounding box regression
 46 |   # targets
 47 |   labels, rois, roi_scores, bbox_targets, bbox_inside_weights, loss_weights = _sample_rois(
 48 |     all_rois, all_scores, gt_boxes, gt_weights, fg_rois_per_image,
 49 |     rois_per_image, _num_classes)
 50 | 
 51 |   rois = rois.view(-1, 5)
 52 |   roi_scores = roi_scores.view(-1)
 53 |   labels = labels.view(-1, 1)
 54 |   bbox_targets = bbox_targets.view(-1, _num_classes * 4)
 55 |   bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4)
 56 |   bbox_outside_weights = (bbox_inside_weights > 0).float()
 57 |   #print(bbox_outside_weights)
 58 |   bbox_outside_weights = bbox_outside_weights * loss_weights.view(-1,1)
 59 |   #print(bbox_outside_weights)
 60 | 
 61 |   return rois, roi_scores, labels, Variable(bbox_targets), Variable(bbox_inside_weights), Variable(bbox_outside_weights), Variable(loss_weights)
 62 | 
 63 | 
 64 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
 65 |   """Bounding-box regression targets (bbox_target_data) are stored in a
 66 |   compact form N x (class, tx, ty, tw, th)
 67 | 
 68 |   This function expands those targets into the 4-of-4*K representation used
 69 |   by the network (i.e. only one class has non-zero targets).
 70 | 
 71 |   Returns:
 72 |       bbox_target (ndarray): N x 4K blob of regression targets
 73 |       bbox_inside_weights (ndarray): N x 4K blob of loss weights
 74 |   """
 75 |   # Inputs are tensor
 76 | 
 77 |   clss = bbox_target_data[:, 0]
 78 |   bbox_targets = clss.new(clss.numel(), 4 * num_classes).zero_()
 79 |   bbox_inside_weights = clss.new(bbox_targets.shape).zero_()
 80 |   inds = (clss > 0).nonzero().view(-1)
 81 |   if inds.numel() > 0:
 82 |     clss = clss[inds].contiguous().view(-1,1)
 83 |     dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
 84 |     dim2_inds = torch.cat([4*clss, 4*clss+1, 4*clss+2, 4*clss+3], 1).long()
 85 |     bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:]
 86 |     bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds)
 87 | 
 88 |   return bbox_targets, bbox_inside_weights
 89 | 
 90 | 
 91 | def _compute_targets(ex_rois, gt_rois, labels):
 92 |   """Compute bounding-box regression targets for an image."""
 93 |   # Inputs are tensor
 94 | 
 95 |   assert ex_rois.shape[0] == gt_rois.shape[0]
 96 |   assert ex_rois.shape[1] == 4
 97 |   assert gt_rois.shape[1] == 4
 98 | 
 99 |   targets = bbox_transform(ex_rois, gt_rois)
100 |   if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
101 |     # Optionally normalize targets by a precomputed mean and stdev
102 |     targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
103 |                / targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS))
104 |   return torch.cat(
105 |     [labels.unsqueeze(1), targets], 1)
106 | 
107 | 
108 | def _sample_rois(all_rois, all_scores, gt_boxes, gt_weights, fg_rois_per_image, rois_per_image, num_classes):
109 |   """Generate a random sample of RoIs comprising foreground and background
110 |   examples.
111 |   """
112 |   # overlaps: (rois x gt_boxes)
113 |   overlaps = bbox_overlaps(
114 |     all_rois[:, 1:5].data,
115 |     gt_boxes[:, :4].data)
116 |   max_overlaps, gt_assignment = overlaps.max(1)
117 |   labels = gt_boxes[gt_assignment, [4]]
118 |   '''
119 |       add weights items by pseudo scores
120 |   '''
121 |   gt_weights = gt_weights.detach().data
122 |   gt_weights_tile = gt_weights.view(1,-1).expand_as(overlaps)
123 |   loss_weights = gt_weights_tile[torch.arange(0,overlaps.size(0)).long(), gt_assignment]
124 |   #print((gt_assignment==1).sum())
125 |   #print(loss_weights)
126 |   '''
127 |       end of modification
128 |   '''
129 |   # Select foreground RoIs as those with >= FG_THRESH overlap
130 |   fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
131 |   # Guard against the case when an image has fewer than fg_rois_per_image
132 |   # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
133 |   bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)
134 | 
135 |   # Small modification to the original version where we ensure a fixed number of regions are sampled
136 |   if fg_inds.numel() > 0 and bg_inds.numel() > 0:
137 |     fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
138 |     fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
139 |     bg_rois_per_image = rois_per_image - fg_rois_per_image
140 |     to_replace = bg_inds.numel() < bg_rois_per_image
141 |     bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
142 |   elif fg_inds.numel() > 0:
143 |     to_replace = fg_inds.numel() < rois_per_image
144 |     fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
145 |     fg_rois_per_image = rois_per_image
146 |   elif bg_inds.numel() > 0:
147 |     to_replace = bg_inds.numel() < rois_per_image
148 |     bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
149 |     fg_rois_per_image = 0
150 |   else:
151 |     import pdb
152 |     pdb.set_trace()
153 | 
154 |   # The indices that we're selecting (both fg and bg)
155 |   keep_inds = torch.cat([fg_inds, bg_inds], 0)
156 |   # Select sampled values from various arrays:
157 |   labels = labels[keep_inds].contiguous()
158 |   # Clamp labels for the background RoIs to 0
159 |   labels[int(fg_rois_per_image):] = 0
160 |   rois = all_rois[keep_inds].contiguous()
161 |   roi_scores = all_scores[keep_inds].contiguous()
162 |   
163 |   
164 |   bbox_target_data = _compute_targets(
165 |     rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)
166 | 
167 |   bbox_targets, bbox_inside_weights = \
168 |     _get_bbox_regression_labels(bbox_target_data, num_classes)
169 | 
170 |   '''
171 |   modified by jiajie
172 |   '''
173 |   #loss_weights = loss_weights[keep_inds].contiguous() + 1.0
174 |   loss_weights = loss_weights[keep_inds].contiguous()
175 |   loss_weights[int(fg_rois_per_image):] = 1.0
176 |   '''
177 |   end of modification
178 |   '''
179 |   
180 |   #bbox_outside_weights[labels == 1, :] = loss_weights[labels==1].reshape(-1,1) * positive_weights
181 |   #bbox_outside_weights[labels == 0, :] = loss_weights[labels==0].reshape(-1,1) * negative_weights 
182 | 
183 |   return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, loss_weights
184 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_top_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | import numpy.random as npr
14 | 
15 | import torch
16 | 
17 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
18 |   """A layer that just selects the top region proposals
19 |      without using non-maximal suppression,
20 |      For details please see the technical report
21 |   """
22 |   rpn_top_n = cfg.TEST.RPN_TOP_N
23 | 
24 |   scores = rpn_cls_prob[:, :, :, num_anchors:]
25 | 
26 |   rpn_bbox_pred = rpn_bbox_pred.view(-1, 4)
27 |   scores = scores.contiguous().view(-1, 1)
28 | 
29 |   length = scores.size(0)
30 |   if length < rpn_top_n:
31 |     # Random selection, maybe unnecessary and loses good proposals
32 |     # But such case rarely happens
33 |     top_inds = torch.from_numpy(npr.choice(length, size=rpn_top_n, replace=True)).long().cuda()
34 |   else:
35 |     top_inds = scores.sort(0, descending=True)[1]
36 |     top_inds = top_inds[:rpn_top_n]
37 |     top_inds = top_inds.view(rpn_top_n)
38 | 
39 |   # Do the selection here
40 |   anchors = anchors[top_inds, :].contiguous()
41 |   rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous()
42 |   scores = scores[top_inds].contiguous()
43 | 
44 |   # Convert anchors into proposals via bbox transformations
45 |   proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
46 | 
47 |   # Clip predicted boxes to image
48 |   proposals = clip_boxes(proposals, im_info[:2])
49 | 
50 |   # Output rois blob
51 |   # Our RPN implementation only supports a single input image, so all
52 |   # batch inds are 0
53 |   batch_inds = proposals.data.new(proposals.size(0), 1).zero_()
54 |   blob = torch.cat([batch_inds, proposals], 1)
55 |   return blob, scores
56 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/layer_utils/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/roi_pooling.c']
 7 | headers = ['src/roi_pooling.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/roi_pooling_cuda.c']
14 |     headers += ['src/roi_pooling_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/roi_pooling_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.roi_pooling',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from ._ext import roi_pooling
 4 | 
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 8 |         self.pooled_width = int(pooled_width)
 9 |         self.pooled_height = int(pooled_height)
10 |         self.spatial_scale = float(spatial_scale)
11 |         self.output = None
12 |         self.argmax = None
13 |         self.rois = None
14 |         self.feature_size = None
15 | 
16 |     def forward(self, features, rois):
17 |         batch_size, num_channels, data_height, data_width = features.size()
18 |         num_rois = rois.size()[0]
19 |         output = torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)
20 |         argmax = torch.IntTensor(num_rois, num_channels, self.pooled_height, self.pooled_width).zero_()
21 | 
22 |         if not features.is_cuda:
23 |             _features = features.permute(0, 2, 3, 1)
24 |             roi_pooling.roi_pooling_forward(self.pooled_height, self.pooled_width, self.spatial_scale,
25 |                                             _features, rois, output)
26 |             # output = output.cuda()
27 |         else:
28 |             output = output.cuda()
29 |             argmax = argmax.cuda()
30 |             roi_pooling.roi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
31 |                                                  features, rois, output, argmax)
32 |             self.output = output
33 |             self.argmax = argmax
34 |             self.rois = rois
35 |             self.feature_size = features.size()
36 | 
37 |         return output
38 | 
39 |     def backward(self, grad_output):
40 |         assert(self.feature_size is not None and grad_output.is_cuda)
41 | 
42 |         batch_size, num_channels, data_height, data_width = self.feature_size
43 | 
44 |         grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
45 |         roi_pooling.roi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
46 |                                               grad_output, self.rois, grad_input, self.argmax)
47 | 
48 |         # print grad_input
49 | 
50 |         return grad_input, None
51 | 
52 | 
53 | class RoIPool(torch.nn.Module):
54 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
55 |         super(RoIPool, self).__init__()
56 | 
57 |         self.pooled_width = int(pooled_width)
58 |         self.pooled_height = int(pooled_height)
59 |         self.spatial_scale = float(spatial_scale)
60 | 
61 |     def forward(self, features, rois):
62 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
63 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/roi_pool_py.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RoIPool(nn.Module):
 8 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 9 |         super(RoIPool, self).__init__()
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         batch_size, num_channels, data_height, data_width = features.size()
16 |         num_rois = rois.size()[0]
17 |         outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
18 | 
19 |         for roi_ind, roi in enumerate(rois):
20 |             batch_ind = int(roi[0].data[0])
21 |             roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
22 |                 roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
23 |             roi_width = max(roi_end_w - roi_start_w + 1, 1)
24 |             roi_height = max(roi_end_h - roi_start_h + 1, 1)
25 |             bin_size_w = float(roi_width) / float(self.pooled_width)
26 |             bin_size_h = float(roi_height) / float(self.pooled_height)
27 | 
28 |             for ph in range(self.pooled_height):
29 |                 hstart = int(np.floor(ph * bin_size_h))
30 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
31 |                 hstart = min(data_height, max(0, hstart + roi_start_h))
32 |                 hend = min(data_height, max(0, hend + roi_start_h))
33 |                 for pw in range(self.pooled_width):
34 |                     wstart = int(np.floor(pw * bin_size_w))
35 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
36 |                     wstart = min(data_width, max(0, wstart + roi_start_w))
37 |                     wend = min(data_width, max(0, wend + roi_start_w))
38 | 
39 |                     is_empty = (hend <= hstart) or(wend <= wstart)
40 |                     if is_empty:
41 |                         outputs[roi_ind, :, ph, pw] = 0
42 |                     else:
43 |                         data = features[batch_ind]
44 |                         outputs[roi_ind, :, ph, pw] = torch.max(
45 |                             torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)
46 | 
47 |         return outputs
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_pooling_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |        i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 | __global__ void ROIPoolForward(const int nthreads, const float* bottom_data,
 16 |     const float spatial_scale, const int height, const int width,
 17 |     const int channels, const int pooled_height, const int pooled_width,
 18 |     const float* bottom_rois, float* top_data, int* argmax_data)
 19 | {
 20 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
 21 |     {
 22 |         // (n, c, ph, pw) is an element in the pooled output
 23 |         int n = index;
 24 |         int pw = n % pooled_width;
 25 |         n /= pooled_width;
 26 |         int ph = n % pooled_height;
 27 |         n /= pooled_height;
 28 |         int c = n % channels;
 29 |         n /= channels;
 30 | 
 31 |         bottom_rois += n * 5;
 32 |         int roi_batch_ind = bottom_rois[0];
 33 |         int roi_start_w = round(bottom_rois[1] * spatial_scale);
 34 |         int roi_start_h = round(bottom_rois[2] * spatial_scale);
 35 |         int roi_end_w = round(bottom_rois[3] * spatial_scale);
 36 |         int roi_end_h = round(bottom_rois[4] * spatial_scale);
 37 | 
 38 |         // Force malformed ROIs to be 1x1
 39 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 40 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 41 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 42 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 43 | 
 44 |         int hstart = (int)(floor((float)(ph) * bin_size_h));
 45 |         int wstart = (int)(floor((float)(pw) * bin_size_w));
 46 |         int hend = (int)(ceil((float)(ph + 1) * bin_size_h));
 47 |         int wend = (int)(ceil((float)(pw + 1) * bin_size_w));
 48 | 
 49 |         // Add roi offsets and clip to input boundaries
 50 |         hstart = fminf(fmaxf(hstart + roi_start_h, 0), height);
 51 |         hend = fminf(fmaxf(hend + roi_start_h, 0), height);
 52 |         wstart = fminf(fmaxf(wstart + roi_start_w, 0), width);
 53 |         wend = fminf(fmaxf(wend + roi_start_w, 0), width);
 54 |         bool is_empty = (hend <= hstart) || (wend <= wstart);
 55 | 
 56 |         // Define an empty pooling region to be zero
 57 |         float maxval = is_empty ? 0 : -FLT_MAX;
 58 |         // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
 59 |         int maxidx = -1;
 60 |         bottom_data += roi_batch_ind * channels * height * width;
 61 |         for (int h = hstart; h < hend; ++h) {
 62 |             for (int w = wstart; w < wend; ++w) {
 63 |     //            int bottom_index = (h * width + w) * channels + c;
 64 |                 int bottom_index = (c * height + h) * width + w;
 65 |                 if (bottom_data[bottom_index] > maxval) {
 66 |                     maxval = bottom_data[bottom_index];
 67 |                     maxidx = bottom_index;
 68 |                 }
 69 |             }
 70 |         }
 71 |         top_data[index] = maxval;
 72 |         if (argmax_data != NULL)
 73 |             argmax_data[index] = maxidx;
 74 |     }
 75 | }
 76 | 
 77 | 
 78 | int ROIPoolForwardLaucher(
 79 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
 80 |     const int width, const int channels, const int pooled_height,
 81 |     const int pooled_width, const float* bottom_rois,
 82 |     float* top_data, int* argmax_data, cudaStream_t stream)
 83 | {
 84 |     const int kThreadsPerBlock = 1024;
 85 |     const int output_size = num_rois * pooled_height * pooled_width * channels;
 86 |     cudaError_t err;
 87 | 
 88 | 
 89 |     ROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 90 |       output_size, bottom_data, spatial_scale, height, width, channels, pooled_height,
 91 |       pooled_width, bottom_rois, top_data, argmax_data);
 92 | 
 93 |     err = cudaGetLastError();
 94 |     if(cudaSuccess != err)
 95 |     {
 96 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 97 |         exit( -1 );
 98 |     }
 99 | 
100 |     return 1;
101 | }
102 | 
103 | 
104 | __global__ void ROIPoolBackward(const int nthreads, const float* top_diff,
105 |     const int* argmax_data, const int num_rois, const float spatial_scale,
106 |     const int height, const int width, const int channels,
107 |     const int pooled_height, const int pooled_width, float* bottom_diff,
108 |     const float* bottom_rois) {
109 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
110 |     {
111 | 
112 |         // (n, c, ph, pw) is an element in the pooled output
113 |         int n = index;
114 |         int w = n % width;
115 |         n /= width;
116 |         int h = n % height;
117 |         n /= height;
118 |         int c = n % channels;
119 |         n /= channels;
120 | 
121 |         float gradient = 0;
122 |         // Accumulate gradient over all ROIs that pooled this element
123 |         for (int roi_n = 0; roi_n < num_rois; ++roi_n)
124 |         {
125 |             const float* offset_bottom_rois = bottom_rois + roi_n * 5;
126 |             int roi_batch_ind = offset_bottom_rois[0];
127 |             // Skip if ROI's batch index doesn't match n
128 |             if (n != roi_batch_ind) {
129 |                 continue;
130 |             }
131 | 
132 |             int roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
133 |             int roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
134 |             int roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
135 |             int roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
136 | 
137 |             // Skip if ROI doesn't include (h, w)
138 |             const bool in_roi = (w >= roi_start_w && w <= roi_end_w &&
139 |                                h >= roi_start_h && h <= roi_end_h);
140 |             if (!in_roi) {
141 |                 continue;
142 |             }
143 | 
144 |             int offset = roi_n * pooled_height * pooled_width * channels;
145 |             const float* offset_top_diff = top_diff + offset;
146 |             const int* offset_argmax_data = argmax_data + offset;
147 | 
148 |             // Compute feasible set of pooled units that could have pooled
149 |             // this bottom unit
150 | 
151 |             // Force malformed ROIs to be 1x1
152 |             int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
153 |             int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
154 | 
155 |             float bin_size_h = (float)(roi_height) / (float)(pooled_height);
156 |             float bin_size_w = (float)(roi_width) / (float)(pooled_width);
157 | 
158 |             int phstart = floor((float)(h - roi_start_h) / bin_size_h);
159 |             int phend = ceil((float)(h - roi_start_h + 1) / bin_size_h);
160 |             int pwstart = floor((float)(w - roi_start_w) / bin_size_w);
161 |             int pwend = ceil((float)(w - roi_start_w + 1) / bin_size_w);
162 | 
163 |             phstart = fminf(fmaxf(phstart, 0), pooled_height);
164 |             phend = fminf(fmaxf(phend, 0), pooled_height);
165 |             pwstart = fminf(fmaxf(pwstart, 0), pooled_width);
166 |             pwend = fminf(fmaxf(pwend, 0), pooled_width);
167 | 
168 |             for (int ph = phstart; ph < phend; ++ph) {
169 |                 for (int pw = pwstart; pw < pwend; ++pw) {
170 |                     if (offset_argmax_data[(c * pooled_height + ph) * pooled_width + pw] == index)
171 |                     {
172 |                         gradient += offset_top_diff[(c * pooled_height + ph) * pooled_width + pw];
173 |                     }
174 |                 }
175 |             }
176 |         }
177 |         bottom_diff[index] = gradient;
178 |   }
179 | }
180 | 
181 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
182 |     const int height, const int width, const int channels, const int pooled_height,
183 |     const int pooled_width, const float* bottom_rois,
184 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream)
185 | {
186 |     const int kThreadsPerBlock = 1024;
187 |     const int output_size = batch_size * height * width * channels;
188 |     cudaError_t err;
189 | 
190 |     ROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
191 |       output_size, top_diff, argmax_data, num_rois, spatial_scale, height, width, channels, pooled_height,
192 |       pooled_width, bottom_diff, bottom_rois);
193 | 
194 |     err = cudaGetLastError();
195 |     if(cudaSuccess != err)
196 |     {
197 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
198 |         exit( -1 );
199 |     }
200 | 
201 |     return 1;
202 | }
203 | 
204 | 
205 | #ifdef __cplusplus
206 | }
207 | #endif
208 | 
209 | 
210 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.cu.o


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/cuda/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     int batch_size = THCudaTensor_size(state, features, 0);
27 |     if (batch_size != 1)
28 |     {
29 |         return 0;
30 |     }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     if (batch_size != 1)
70 |     {
71 |         return 0;
72 |     }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/layer_utils/snippets.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from layer_utils.generate_anchors import generate_anchors
12 | 
13 | def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)):
14 |   """ A wrapper function to generate anchors given different scales
15 |     Also return the number of anchors in variable 'length'
16 |   """
17 |   anchors = generate_anchors(ratios=np.array(anchor_ratios), scales=np.array(anchor_scales))
18 |   A = anchors.shape[0]
19 |   shift_x = np.arange(0, width) * feat_stride
20 |   shift_y = np.arange(0, height) * feat_stride
21 |   shift_x, shift_y = np.meshgrid(shift_x, shift_y)
22 |   shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
23 |   K = shifts.shape[0]
24 |   # width changes faster, so here it is H, W, C
25 |   anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
26 |   anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
27 |   length = np.int32(anchors.shape[0])
28 | 
29 |   return anchors, length
30 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
1 | from . import config
2 | 


--------------------------------------------------------------------------------
/lib/model/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__init__.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/bbox_transform.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/bbox_transform.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/nms_wrapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/nms_wrapper.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/test.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/test.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/test_train.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/test_train.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/train_val.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/__pycache__/train_val.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | import torch
13 | 
14 | def bbox_transform(ex_rois, gt_rois):
15 |   ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
16 |   ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
17 |   ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
18 |   ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
19 | 
20 |   gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
21 |   gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
22 |   gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
23 |   gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
24 | 
25 |   targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
26 |   targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
27 |   targets_dw = torch.log(gt_widths / ex_widths)
28 |   targets_dh = torch.log(gt_heights / ex_heights)
29 | 
30 |   targets = torch.stack(
31 |     (targets_dx, targets_dy, targets_dw, targets_dh), 1)
32 |   return targets
33 | 
34 | 
35 | def bbox_transform_inv(boxes, deltas):
36 |   # Input should be both tensor or both Variable and on the same device
37 |   if len(boxes) == 0:
38 |     return deltas.detach() * 0
39 | 
40 |   widths = boxes[:, 2] - boxes[:, 0] + 1.0
41 |   heights = boxes[:, 3] - boxes[:, 1] + 1.0
42 |   ctr_x = boxes[:, 0] + 0.5 * widths
43 |   ctr_y = boxes[:, 1] + 0.5 * heights
44 | 
45 |   dx = deltas[:, 0::4]
46 |   dy = deltas[:, 1::4]
47 |   dw = deltas[:, 2::4]
48 |   dh = deltas[:, 3::4]
49 |   
50 |   pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
51 |   pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
52 |   pred_w = torch.exp(dw) * widths.unsqueeze(1)
53 |   pred_h = torch.exp(dh) * heights.unsqueeze(1)
54 | 
55 |   pred_boxes = torch.cat(\
56 |     [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\
57 |                               pred_ctr_y - 0.5 * pred_h,\
58 |                               pred_ctr_x + 0.5 * pred_w,\
59 |                               pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1)
60 | 
61 |   return pred_boxes
62 | 
63 | 
64 | def clip_boxes(boxes, im_shape):
65 |   """
66 |   Clip boxes to image boundaries.
67 |   boxes must be tensor or Variable, im_shape can be anything but Variable
68 |   """
69 | 
70 |   if not hasattr(boxes, 'data'):
71 |     boxes_ = boxes.numpy()
72 | 
73 |   boxes = boxes.view(boxes.size(0), -1, 4)
74 |   boxes = torch.stack(\
75 |     [boxes[:,:,0].clamp(0, im_shape[1] - 1),
76 |      boxes[:,:,1].clamp(0, im_shape[0] - 1),
77 |      boxes[:,:,2].clamp(0, im_shape[1] - 1),
78 |      boxes[:,:,3].clamp(0, im_shape[0] - 1)], 2).view(boxes.size(0), -1)
79 | 
80 |   return boxes
81 | 


--------------------------------------------------------------------------------
/lib/model/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/config.pyc


--------------------------------------------------------------------------------
/lib/model/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from nms.pth_nms import pth_nms
12 | 
13 | 
14 | def nms(dets, thresh):
15 |   """Dispatch to either CPU or GPU NMS implementations.
16 |   Accept dets as tensor"""
17 |   return pth_nms(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/lib/model/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/test.pyc


--------------------------------------------------------------------------------
/lib/model/test_train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Xinlei Chen
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | try:
 13 |   import cPickle as pickle
 14 | except ImportError:
 15 |   import pickle
 16 | import os
 17 | import math
 18 | 
 19 | from utils.timer import Timer
 20 | from model.nms_wrapper import nms
 21 | from utils.blob import im_list_to_blob
 22 | 
 23 | from model.config import cfg, get_output_dir
 24 | from model.bbox_transform import clip_boxes, bbox_transform_inv
 25 | 
 26 | import torch
 27 | 
 28 | def _get_image_blob(im):
 29 |   """Converts an image into a network input.
 30 |   Arguments:
 31 |     im (ndarray): a color image in BGR order
 32 |   Returns:
 33 |     blob (ndarray): a data blob holding an image pyramid
 34 |     im_scale_factors (list): list of image scales (relative to im) used
 35 |       in the image pyramid
 36 |   """
 37 |   im_orig = im.astype(np.float32, copy=True)
 38 |   im_orig -= cfg.PIXEL_MEANS
 39 | 
 40 |   im_shape = im_orig.shape
 41 |   im_size_min = np.min(im_shape[0:2])
 42 |   im_size_max = np.max(im_shape[0:2])
 43 | 
 44 |   processed_ims = []
 45 |   im_scale_factors = []
 46 | 
 47 |   for target_size in cfg.TEST.SCALES:
 48 |     im_scale = float(target_size) / float(im_size_min)
 49 |     # Prevent the biggest axis from being more than MAX_SIZE
 50 |     if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 51 |       im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 52 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 53 |             interpolation=cv2.INTER_LINEAR)
 54 |     im_scale_factors.append(im_scale)
 55 |     processed_ims.append(im)
 56 | 
 57 |   # Create a blob to hold the input images
 58 |   blob = im_list_to_blob(processed_ims)
 59 | 
 60 |   return blob, np.array(im_scale_factors)
 61 | 
 62 | def _get_rois_blob(im_rois, im_scale_factors):
 63 |     """Converts RoIs into network inputs.
 64 |     Arguments:
 65 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 66 |         im_scale_factors (list): scale factors as returned by _get_image_blob
 67 |     Returns:
 68 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid
 69 |     """
 70 |     rois_blob_real = []
 71 | 
 72 |     for i in range(len(im_scale_factors)):
 73 |         rois, levels = _project_im_rois(im_rois, np.array([im_scale_factors[i]]))
 74 |         rois_blob = np.hstack((levels, rois))
 75 |         rois_blob_real.append(rois_blob.astype(np.float32, copy=False))
 76 | 
 77 |     return rois_blob_real
 78 | 
 79 | 
 80 | def _project_im_rois(im_rois, scales):
 81 |     """Project image RoIs into the image pyramid built by _get_image_blob.
 82 |     Arguments:
 83 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 84 |         scales (list): scale factors as returned by _get_image_blob
 85 |     Returns:
 86 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
 87 |         levels (list): image pyramid levels used by each projected RoI
 88 |     """
 89 |     im_rois = im_rois.astype(np.float, copy=False)
 90 | 
 91 |     if len(scales) > 1:
 92 |         widths = im_rois[:, 2] - im_rois[:, 0] + 1
 93 |         heights = im_rois[:, 3] - im_rois[:, 1] + 1
 94 |         areas = widths * heights
 95 |         scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
 96 |         diff_areas = np.abs(scaled_areas - 224 * 224)
 97 |         levels = diff_areas.argmin(axis=1)[:, np.newaxis]
 98 |     else:
 99 |         levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
100 | 
101 |     rois = im_rois * scales[levels]
102 | 
103 |     return rois, levels
104 | 
105 | 
106 | def _get_blobs(im, rois):
107 |   """Convert an image and RoIs within that image into network inputs."""
108 |   blobs = {}
109 |   blobs['data'], im_scale_factors = _get_image_blob(im)
110 |   blobs['boxes'] = _get_rois_blob(rois, im_scale_factors)
111 |   
112 |   return blobs, im_scale_factors
113 | 
114 | def _clip_boxes(boxes, im_shape):
115 |   """Clip boxes to image boundaries."""
116 |   # x1 >= 0
117 |   boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
118 |   # y1 >= 0
119 |   boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
120 |   # x2 < im_shape[1]
121 |   boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
122 |   # y2 < im_shape[0]
123 |   boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
124 |   return boxes
125 | 
126 | def _rescale_boxes(boxes, inds, scales):
127 |   """Rescale boxes according to image rescaling."""
128 |   for i in range(boxes.shape[0]):
129 |     boxes[i,:] = boxes[i,:] / scales[int(inds[i])]
130 | 
131 |   return boxes
132 | 
133 | def im_detect(net, im, boxes):
134 |   blobs, im_scales = _get_blobs(im,boxes)
135 |   assert len(im_scales) == 1, "Only single-image batch implemented"
136 | 
137 |   im_blob = blobs['data']
138 |   blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32)
139 |   cfg.DEDUP_BOXES=1.0/16.0
140 |   for i in range(len(blobs['data'])):
141 |     if cfg.DEDUP_BOXES > 0:
142 |         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
143 |         hashes = np.round(blobs['boxes'][i] * cfg.DEDUP_BOXES).dot(v)
144 |         _, index, inv_index = np.unique(hashes, return_index=True,
145 |                                         return_inverse=True)
146 |         blobs['boxes'][i] = blobs['boxes'][i][index, :]
147 |         boxes_tmp = boxes[index, :].copy()
148 |     else:
149 |         boxes_tmp = boxes.copy()
150 |     
151 |     # TODO
152 |     # change the blobs['im_info'], now is an array
153 |     cls_prob, bbox_prob, fuse_prob, image_prob, scores_fast, bbox_pred_fast, rois = net.test_image(blobs['data'][i:i+1,:], blobs['im_info'], blobs['boxes'][i])
154 |     '''
155 |        WSDDN
156 |     '''
157 |     scores_tmp = fuse_prob
158 |     pred_boxes = np.tile(boxes_tmp, (1, fuse_prob.shape[1]))
159 |     
160 |     '''
161 |        Faster rcnn
162 |     '''
163 |     boxes_fast = rois[:, 1:5] / im_scales[0]
164 |     scores_fast = np.reshape(scores_fast, [scores_fast.shape[0], -1])
165 |     bbox_pred_fast = np.reshape(bbox_pred_fast, [bbox_pred_fast.shape[0], -1])
166 |     
167 |     if cfg.TEST.BBOX_REG:
168 |     # Apply bounding-box regression deltas
169 |       box_deltas = bbox_pred_fast
170 |       pred_boxes_fast = bbox_transform_inv(torch.from_numpy(boxes_fast), torch.from_numpy(box_deltas)).numpy()
171 |       pred_boxes_fast = _clip_boxes(pred_boxes_fast, im.shape)
172 |     else:
173 |     # Simply repeat the boxes, once for each class
174 |       pred_boxes = np.tile(boxes_fast, (1, scores_fast.shape[1]))    
175 |     
176 |     
177 |     
178 |     cfg.TEST.USE_FLIPPED=True
179 |     if cfg.TEST.USE_FLIPPED:
180 |       blobs['data'][i:i+1] = blobs['data'][i:i+1][:, :, ::-1, :]
181 |       width = blobs['data'][i:i+1].shape[2]
182 |       oldx1 = blobs['boxes'][i][:, 1].copy()
183 |       oldx2 = blobs['boxes'][i][:, 3].copy()
184 |       blobs['boxes'][i][:, 1] = width - oldx2 - 1
185 |       blobs['boxes'][i][:, 3] = width - oldx1 - 1
186 |       assert (blobs['boxes'][i][:, 3] >= blobs['boxes'][i][:, 1]).all()
187 |       
188 |       cls_prob, bbox_prob, fuse_prob, image_prob, _ , _ , _= net.test_image(blobs['data'][i:i+1,:], blobs['im_info'], blobs['boxes'][i])
189 |       scores_tmp += fuse_prob
190 | 
191 |     if cfg.DEDUP_BOXES > 0:
192 |         # Map scores and predictions back to the original set of boxes
193 |       scores_tmp = scores_tmp[inv_index, :]
194 |       pred_boxes = pred_boxes[inv_index, :]
195 | 
196 |     if i == 0:        
197 |        scores = np.copy(scores_tmp)
198 |     else:
199 |      scores += scores_tmp
200 | 
201 |   scores /= len(blobs['data']) * (1. + cfg.TEST.USE_FLIPPED)
202 | 
203 |   return scores, pred_boxes, scores_fast, pred_boxes_fast
204 | 
205 | def apply_nms(all_boxes, thresh):
206 |   """Apply non-maximum suppression to all predicted boxes output by the
207 |   test_net method.
208 |   """
209 |   num_classes = len(all_boxes)
210 |   num_images = len(all_boxes[0])
211 |   nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
212 |   for cls_ind in range(num_classes):
213 |     for im_ind in range(num_images):
214 |       dets = all_boxes[cls_ind][im_ind]
215 |       if dets == []:
216 |         continue
217 | 
218 |       x1 = dets[:, 0]
219 |       y1 = dets[:, 1]
220 |       x2 = dets[:, 2]
221 |       y2 = dets[:, 3]
222 |       scores = dets[:, 4]
223 |       inds = np.where((x2 > x1) & (y2 > y1))[0]
224 |       dets = dets[inds,:]
225 |       if dets == []:
226 |         continue
227 | 
228 |       keep = nms(torch.from_numpy(dets), thresh).numpy()
229 |       if len(keep) == 0:
230 |         continue
231 |       nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
232 |   return nms_boxes
233 | 
234 | def test_train_net(net, imdb, weights_filename, max_per_image=100, thresh=0.):
235 |   np.random.seed(cfg.RNG_SEED)
236 |   """Test a Fast R-CNN network on an image database."""
237 |   num_images = len(imdb.image_index)
238 |   # all detections are collected into:
239 |   #  all_boxes[cls][image] = N x 5 array of detections in
240 |   #  (x1, y1, x2, y2, score)
241 |   all_boxes = [[[] for _ in range(num_images)]
242 |          for _ in range(imdb.num_classes)]
243 | 
244 |   all_boxes_fast = [[[] for _ in range(num_images)]
245 |          for _ in range(imdb.num_classes+1)]  
246 | 
247 |   output_dir = get_output_dir(imdb, weights_filename)  #voc_2007_test/default(tag)/vgg16_faster_rcnn_iter_15000
248 | 
249 |   # timers
250 |   _t = {'im_detect' : Timer(), 'misc' : Timer()}
251 | 
252 |   roidb = imdb.roidb
253 |     
254 |     
255 |   for i in range(num_images):
256 |     im = cv2.imread(imdb.image_path_at(i))
257 | 
258 |     _t['im_detect'].tic()
259 |     scores, boxes, scores_fast, boxes_fast = im_detect(net, im, roidb[i]['boxes'])
260 |     _t['im_detect'].toc()
261 | 
262 |     _t['misc'].tic()
263 | 
264 | 
265 |     for j in range(0, imdb.num_classes):
266 |       inds = np.argmax(scores[:, j])
267 |       all_boxes[j][i] = \
268 |                 np.hstack((boxes[inds, j*4:(j+1)*4].reshape(1, -1), 
269 |                            np.array([[scores[inds, j]]])))
270 |     '''
271 |         start of faster part
272 |     '''
273 |     # skip j = 0, because it's the background class
274 |     for j in range(1, imdb.num_classes+1):
275 |       inds = np.argmax(scores_fast[:, j])
276 |       all_boxes_fast[j][i] = \
277 |                 np.hstack((boxes_fast[inds, j*4:(j+1)*4].reshape(1, -1), 
278 |                            np.array([[scores_fast[inds, j]]])))
279 |     '''
280 |         end of faster part
281 |     '''
282 | 
283 | 
284 |     _t['misc'].toc()
285 | 
286 |     print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
287 |         .format(i + 1, num_images, _t['im_detect'].average_time(),
288 |             _t['misc'].average_time()))
289 | 
290 | 
291 |   output_dir_ws = output_dir + '/' + 'wsddn' 
292 |   if not os.path.exists(output_dir_ws):
293 |     os.makedirs(output_dir_ws)
294 |   det_file = os.path.join(output_dir_ws, 'discovery.pkl')
295 |   with open(det_file, 'wb') as f:
296 |     pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
297 | 
298 |   print('Evaluating detections')
299 |   imdb.evaluate_discovery(all_boxes, output_dir_ws)
300 | 
301 | 
302 | 
303 |   all_boxes_fast = all_boxes_fast[1:]  # filter the background boxes  
304 |   output_dir_fast = output_dir + '/' + 'faster'
305 |   if not os.path.exists(output_dir_fast):
306 |     os.makedirs(output_dir_fast)  
307 |   det_file = os.path.join(output_dir_fast, 'discovery.pkl')
308 |   with open(det_file, 'wb') as f:
309 |     pickle.dump(all_boxes_fast, f, pickle.HIGHEST_PROTOCOL)
310 | 
311 |   print('Evaluating detections')
312 |   imdb.evaluate_discovery(all_boxes_fast, output_dir_fast)
313 | 
314 | 


--------------------------------------------------------------------------------
/lib/model/train_val.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/model/train_val.pyc


--------------------------------------------------------------------------------
/lib/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__init__.py


--------------------------------------------------------------------------------
/lib/nets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/network.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/network.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/resnet_v1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/resnet_v1.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/vgg16.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nets/__pycache__/vgg16.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/mobilenet_v1.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Xinlei Chen
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from torch.autograd import Variable
 14 | 
 15 | import numpy as np
 16 | from collections import namedtuple, OrderedDict
 17 | 
 18 | from nets.network import Network
 19 | from model.config import cfg
 20 | 
 21 | # The following is adapted from:
 22 | # https://github.com/tensorflow/models/blob/master/slim/nets/mobilenet_v1.py
 23 | 
 24 | # Conv and DepthSepConv named tuple define layers of the MobileNet architecture
 25 | # Conv defines 3x3 convolution layers
 26 | # DepthSepConv defines 3x3 depthwise convolution followed by 1x1 convolution.
 27 | # stride is the stride of the convolution
 28 | # depth is the number of channels or filters in a layer
 29 | Conv = namedtuple('Conv', ['kernel', 'stride', 'depth'])
 30 | DepthSepConv = namedtuple('DepthSepConv', ['kernel', 'stride', 'depth'])
 31 | 
 32 | # _CONV_DEFS specifies the MobileNet body
 33 | _CONV_DEFS = [
 34 |     Conv(kernel=3, stride=2, depth=32),
 35 |     DepthSepConv(kernel=3, stride=1, depth=64),
 36 |     DepthSepConv(kernel=3, stride=2, depth=128),
 37 |     DepthSepConv(kernel=3, stride=1, depth=128),
 38 |     DepthSepConv(kernel=3, stride=2, depth=256),
 39 |     DepthSepConv(kernel=3, stride=1, depth=256),
 40 |     DepthSepConv(kernel=3, stride=2, depth=512),
 41 |     DepthSepConv(kernel=3, stride=1, depth=512),
 42 |     DepthSepConv(kernel=3, stride=1, depth=512),
 43 |     DepthSepConv(kernel=3, stride=1, depth=512),
 44 |     DepthSepConv(kernel=3, stride=1, depth=512),
 45 |     DepthSepConv(kernel=3, stride=1, depth=512),
 46 |     # use stride 1 for the 13th layer
 47 |     DepthSepConv(kernel=3, stride=1, depth=1024),
 48 |     DepthSepConv(kernel=3, stride=1, depth=1024)
 49 | ]
 50 | 
 51 | def mobilenet_v1_base(final_endpoint='Conv2d_13_pointwise',
 52 |                       min_depth=8,
 53 |                       depth_multiplier=1.0,
 54 |                       conv_defs=None,
 55 |                       output_stride=None):
 56 |     """Mobilenet v1.
 57 | 
 58 |     Constructs a Mobilenet v1 network from inputs to the given final endpoint.
 59 | 
 60 |     Args:
 61 |         inputs: a tensor of shape [batch_size, height, width, channels].
 62 |         final_endpoint: specifies the endpoint to construct the network up to. It
 63 |             can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise',
 64 |             'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5_pointwise',
 65 |             'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise',
 66 |             'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise',
 67 |             'Conv2d_12_pointwise', 'Conv2d_13_pointwise'].
 68 |         min_depth: Minimum depth value (number of channels) for all convolution ops.
 69 |             Enforced when depth_multiplier < 1, and not an active constraint when
 70 |             depth_multiplier >= 1.
 71 |         depth_multiplier: Float multiplier for the depth (number of channels)
 72 |             for all convolution ops. The value must be greater than zero. Typical
 73 |             usage will be to set this value in (0, 1) to reduce the number of
 74 |             parameters or computation cost of the model.
 75 |         conv_defs: A list of ConvDef namedtuples specifying the net architecture.
 76 |         output_stride: An integer that specifies the requested ratio of input to
 77 |             output spatial resolution. If not None, then we invoke atrous convolution
 78 |             if necessary to prevent the network from reducing the spatial resolution
 79 |             of the activation maps. Allowed values are 8 (accurate fully convolutional
 80 |             mode), 16 (fast fully convolutional mode), 32 (classification mode).
 81 |         scope: Optional variable_scope.
 82 | 
 83 |     Returns:
 84 |         tensor_out: output tensor corresponding to the final_endpoint.
 85 |         end_points: a set of activations for external use, for example summaries or
 86 |                                 losses.
 87 | 
 88 |     Raises:
 89 |         ValueError: if final_endpoint is not set to one of the predefined values,
 90 |                                 or depth_multiplier <= 0, or the target output_stride is not
 91 |                                 allowed.
 92 |     """
 93 |     depth = lambda d: max(int(d * depth_multiplier), min_depth)
 94 |     end_points = OrderedDict()
 95 | 
 96 |     # Used to find thinned depths for each layer.
 97 |     if depth_multiplier <= 0:
 98 |         raise ValueError('depth_multiplier is not greater than zero.')
 99 | 
100 |     if conv_defs is None:
101 |         conv_defs = _CONV_DEFS
102 | 
103 |     if output_stride is not None and output_stride not in [8, 16, 32]:
104 |         raise ValueError('Only allowed output_stride values are 8, 16, 32.')
105 | 
106 |     def conv_bn(in_channels, out_channels, kernel_size=3, stride=1):
107 |         return nn.Sequential(
108 |             nn.Conv2d(in_channels, out_channels, kernel_size, stride, (kernel_size - 1) // 2, bias=False),
109 |             nn.BatchNorm2d(out_channels),
110 |             nn.ReLU6(inplace=True)
111 |         )
112 | 
113 |     def conv_dw(in_channels, kernel_size=3, stride=1, dilation=1):
114 |         return nn.Sequential(
115 |             nn.Conv2d(in_channels, in_channels, kernel_size, stride, (kernel_size - 1) // 2,\
116 |                       groups=in_channels, dilation=dilation, bias=False),
117 |             nn.BatchNorm2d(in_channels),
118 |             nn.ReLU6(inplace=True)
119 |         )
120 | 
121 |     def conv_pw(in_channels, out_channels, kernel_size=3, stride=1, dilation=1):
122 |         return nn.Sequential(
123 |             nn.Conv2d(in_channels, out_channels, kernel_size, stride, 0, bias=False),
124 |             nn.BatchNorm2d(out_channels),
125 |             nn.ReLU6(inplace=True),
126 |         )
127 | 
128 |     # The current_stride variable keeps track of the output stride of the
129 |     # activations, i.e., the running product of convolution strides up to the
130 |     # current network layer. This allows us to invoke atrous convolution
131 |     # whenever applying the next convolution would result in the activations
132 |     # having output stride larger than the target output_stride.
133 |     current_stride = 1
134 | 
135 |     # The atrous convolution rate parameter.
136 |     rate = 1
137 | 
138 |     in_channels = 3
139 |     for i, conv_def in enumerate(conv_defs):
140 |         end_point_base = 'Conv2d_%d' % i
141 | 
142 |         if output_stride is not None and current_stride == output_stride:
143 |             # If we have reached the target output_stride, then we need to employ
144 |             # atrous convolution with stride=1 and multiply the atrous rate by the
145 |             # current unit's stride for use in subsequent layers.
146 |             layer_stride = 1
147 |             layer_rate = rate
148 |             rate *= conv_def.stride
149 |         else:
150 |             layer_stride = conv_def.stride
151 |             layer_rate = 1
152 |             current_stride *= conv_def.stride
153 | 
154 |         out_channels = depth(conv_def.depth)
155 |         if isinstance(conv_def, Conv):
156 |             end_point = end_point_base
157 |             end_points[end_point] = conv_bn(in_channels, out_channels, conv_def.kernel,
158 |                                             stride=conv_def.stride)
159 |             if end_point == final_endpoint:
160 |                 return nn.Sequential(end_points)
161 | 
162 |         elif isinstance(conv_def, DepthSepConv):
163 |             end_points[end_point_base] = nn.Sequential(OrderedDict([
164 |                 ('depthwise', conv_dw(in_channels, conv_def.kernel, stride=layer_stride, dilation=layer_rate)),
165 |                 ('pointwise', conv_pw(in_channels, out_channels, 1, stride=1))]))
166 | 
167 |             if end_point_base + '_pointwise' == final_endpoint:
168 |                 return nn.Sequential(end_points)
169 |         else:
170 |             raise ValueError('Unknown convolution type %s for layer %d'
171 |                                                 % (conv_def.ltype, i))
172 |         in_channels = out_channels
173 |     raise ValueError('Unknown final endpoint %s' % final_endpoint)
174 | 
175 | class mobilenetv1(Network):
176 |   def __init__(self):
177 |     Network.__init__(self)
178 |     self._feat_stride = [16, ]
179 |     self._feat_compress = [1. / float(self._feat_stride[0]), ]
180 |     self._depth_multiplier = cfg.MOBILENET.DEPTH_MULTIPLIER
181 |     self._net_conv_channels = 512
182 |     self._fc7_channels = 1024
183 | 
184 |   def init_weights(self):
185 |     def normal_init(m, mean, stddev, truncated=False):
186 |       """
187 |       weight initalizer: truncated normal and random normal.
188 |       """
189 |       if m.__class__.__name__.find('Conv') == -1:
190 |         return
191 |       if truncated:
192 |         m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
193 |       else:
194 |         m.weight.data.normal_(mean, stddev)
195 |       if m.bias is not None: m.bias.data.zero_()
196 |       
197 |     self.mobilenet.apply(lambda m: normal_init(m, 0, 0.09, True))
198 |     normal_init(self.rpn_net, 0, 0.01, cfg.TRAIN.TRUNCATED)
199 |     normal_init(self.rpn_cls_score_net, 0, 0.01,  cfg.TRAIN.TRUNCATED)
200 |     normal_init(self.rpn_bbox_pred_net, 0, 0.01,  cfg.TRAIN.TRUNCATED)
201 |     normal_init(self.cls_score_net, 0, 0.01,  cfg.TRAIN.TRUNCATED)
202 |     normal_init(self.bbox_pred_net, 0, 0.001,  cfg.TRAIN.TRUNCATED)
203 | 
204 |   def _image_to_head(self):
205 |     net_conv = self._layers['head'](self._image)
206 |     self._act_summaries['conv'] = net_conv
207 | 
208 |     return net_conv
209 | 
210 |   def _head_to_tail(self, pool5):
211 |     fc7 = self._layers['tail'](pool5)
212 |     fc7 = fc7.mean(3).mean(2)
213 |     return fc7
214 | 
215 |   def _init_head_tail(self):
216 |     self.mobilenet = mobilenet_v1_base()
217 | 
218 |     # Fix blocks  
219 |     assert (0 <= cfg.MOBILENET.FIXED_LAYERS <= 12)
220 |     for m in list(self.mobilenet.children())[:cfg.MOBILENET.FIXED_LAYERS]:
221 |       for p in m.parameters():
222 |         p.requires_grad = False
223 |     
224 |     def set_bn_fix(m):
225 |       classname = m.__class__.__name__
226 |       if classname.find('BatchNorm') != -1:
227 |         for p in m.parameters(): p.requires_grad=False
228 | 
229 |     self.mobilenet.apply(set_bn_fix)
230 | 
231 |     # Add weight decay
232 |     def l2_regularizer(m, wd):
233 |       if m.__class__.__name__.find('Conv') != -1:
234 |         m.weight.weight_decay = cfg.MOBILENET.WEIGHT_DECAY
235 |     if cfg.MOBILENET.REGU_DEPTH:
236 |       self.mobilenet.apply(lambda x: l2_regularizer(x, cfg.MOBILENET.WEIGHT_DECAY))
237 |     else:
238 |       self.mobilenet.apply(lambda x: l2_regularizer(x, 0))
239 |       # always set the first conv layer
240 |       list(self.mobilenet.children())[0].apply(lambda x: l2_regularizer(x, cfg.MOBILENET.WEIGHT_DECAY))
241 | 
242 |     # Build mobilenet.
243 |     self._layers['head'] = nn.Sequential(*list(self.mobilenet.children())[:12])
244 |     self._layers['tail'] = nn.Sequential(*list(self.mobilenet.children())[12:])
245 | 
246 |   def load_pretrained_cnn(self, state_dict):
247 |     # TODO
248 |     print('Warning: No available pretrained model yet')
249 |     return
250 |     self.mobilenet.load_state_dict({k: state_dict[k] for k in list(self.resnet.state_dict())})
251 | 


--------------------------------------------------------------------------------
/lib/nets/resnet_v1.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi He and Xinlei Chen
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | from nets.network import Network
 11 | from model.config import cfg
 12 | 
 13 | import utils.timer
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | from torch.autograd import Variable
 19 | import math
 20 | import torch.utils.model_zoo as model_zoo
 21 | 
 22 | 
 23 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 24 |        'resnet152']
 25 | 
 26 | 
 27 | model_urls = {
 28 |   'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth',
 29 |   'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth',
 30 |   'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth',
 31 |   'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth',
 32 |   'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth',
 33 | }
 34 | 
 35 | 
 36 | def conv3x3(in_planes, out_planes, stride=1):
 37 |   "3x3 convolution with padding"
 38 |   return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 39 |            padding=1, bias=False)
 40 | 
 41 | 
 42 | class BasicBlock(nn.Module):
 43 |   expansion = 1
 44 | 
 45 |   def __init__(self, inplanes, planes, stride=1, downsample=None):
 46 |     super(BasicBlock, self).__init__()
 47 |     self.conv1 = conv3x3(inplanes, planes, stride)
 48 |     self.bn1 = nn.BatchNorm2d(planes)
 49 |     self.relu = nn.ReLU(inplace=True)
 50 |     self.conv2 = conv3x3(planes, planes)
 51 |     self.bn2 = nn.BatchNorm2d(planes)
 52 |     self.downsample = downsample
 53 |     self.stride = stride
 54 | 
 55 |   def forward(self, x):
 56 |     residual = x
 57 | 
 58 |     out = self.conv1(x)
 59 |     out = self.bn1(out)
 60 |     out = self.relu(out)
 61 | 
 62 |     out = self.conv2(out)
 63 |     out = self.bn2(out)
 64 | 
 65 |     if self.downsample is not None:
 66 |       residual = self.downsample(x)
 67 | 
 68 |     out += residual
 69 |     out = self.relu(out)
 70 | 
 71 |     return out
 72 | 
 73 | 
 74 | class Bottleneck(nn.Module):
 75 |   expansion = 4
 76 | 
 77 |   def __init__(self, inplanes, planes, stride=1, downsample=None):
 78 |     super(Bottleneck, self).__init__()
 79 |     self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
 80 |     self.bn1 = nn.BatchNorm2d(planes)
 81 |     self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
 82 |                  padding=1, bias=False)
 83 |     self.bn2 = nn.BatchNorm2d(planes)
 84 |     self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 85 |     self.bn3 = nn.BatchNorm2d(planes * 4)
 86 |     self.relu = nn.ReLU(inplace=True)
 87 |     self.downsample = downsample
 88 |     self.stride = stride
 89 | 
 90 |   def forward(self, x):
 91 |     residual = x
 92 | 
 93 |     out = self.conv1(x)
 94 |     out = self.bn1(out)
 95 |     out = self.relu(out)
 96 | 
 97 |     out = self.conv2(out)
 98 |     out = self.bn2(out)
 99 |     out = self.relu(out)
100 | 
101 |     out = self.conv3(out)
102 |     out = self.bn3(out)
103 | 
104 |     if self.downsample is not None:
105 |       residual = self.downsample(x)
106 | 
107 |     out += residual
108 |     out = self.relu(out)
109 | 
110 |     return out
111 | 
112 | 
113 | class ResNet(nn.Module):
114 |   def __init__(self, block, layers, num_classes=1000):
115 |     self.inplanes = 64
116 |     super(ResNet, self).__init__()
117 |     self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
118 |                  bias=False)
119 |     self.bn1 = nn.BatchNorm2d(64)
120 |     self.relu = nn.ReLU(inplace=True)
121 |     # maxpool different from pytorch-resnet, to match tf-faster-rcnn
122 |     self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
123 |     self.layer1 = self._make_layer(block, 64, layers[0])
124 |     self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
125 |     self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
126 |     # use stride 1 for the last conv4 layer (same as tf-faster-rcnn)
127 |     self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
128 | 
129 |     for m in self.modules():
130 |       if isinstance(m, nn.Conv2d):
131 |         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
132 |         m.weight.data.normal_(0, math.sqrt(2. / n))
133 |       elif isinstance(m, nn.BatchNorm2d):
134 |         m.weight.data.fill_(1)
135 |         m.bias.data.zero_()
136 | 
137 |   def _make_layer(self, block, planes, blocks, stride=1):
138 |     downsample = None
139 |     if stride != 1 or self.inplanes != planes * block.expansion:
140 |       downsample = nn.Sequential(
141 |         nn.Conv2d(self.inplanes, planes * block.expansion,
142 |               kernel_size=1, stride=stride, bias=False),
143 |         nn.BatchNorm2d(planes * block.expansion),
144 |       )
145 | 
146 |     layers = []
147 |     layers.append(block(self.inplanes, planes, stride, downsample))
148 |     self.inplanes = planes * block.expansion
149 |     for i in range(1, blocks):
150 |       layers.append(block(self.inplanes, planes))
151 | 
152 |     return nn.Sequential(*layers)
153 | 
154 | def resnet18(pretrained=False):
155 |   """Constructs a ResNet-18 model.
156 |   Args:
157 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
158 |   """
159 |   model = ResNet(BasicBlock, [2, 2, 2, 2])
160 |   if pretrained:
161 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
162 |   return model
163 | 
164 | 
165 | def resnet34(pretrained=False):
166 |   """Constructs a ResNet-34 model.
167 |   Args:
168 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
169 |   """
170 |   model = ResNet(BasicBlock, [3, 4, 6, 3])
171 |   if pretrained:
172 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
173 |   return model
174 | 
175 | 
176 | def resnet50(pretrained=False):
177 |   """Constructs a ResNet-50 model.
178 |   Args:
179 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
180 |   """
181 |   model = ResNet(Bottleneck, [3, 4, 6, 3])
182 |   if pretrained:
183 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
184 |   return model
185 | 
186 | 
187 | def resnet101(pretrained=False):
188 |   """Constructs a ResNet-101 model.
189 |   Args:
190 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
191 |   """
192 |   model = ResNet(Bottleneck, [3, 4, 23, 3])
193 |   if pretrained:
194 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
195 |   return model
196 | 
197 | 
198 | def resnet152(pretrained=False):
199 |   """Constructs a ResNet-152 model.
200 |   Args:
201 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
202 |   """
203 |   model = ResNet(Bottleneck, [3, 8, 36, 3])
204 |   if pretrained:
205 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
206 |   return model
207 | 
208 | class resnetv1(Network):
209 |   def __init__(self, num_layers=50):
210 |     Network.__init__(self)
211 |     self._feat_stride = [16, ]
212 |     self._feat_compress = [1. / float(self._feat_stride[0]), ]
213 |     self._num_layers = num_layers
214 |     self._net_conv_channels = 1024
215 |     self._fc7_channels = 2048
216 | 
217 |   def _crop_pool_layer(self, bottom, rois):
218 |     return Network._crop_pool_layer(self, bottom, rois, cfg.RESNET.MAX_POOL)
219 | 
220 |   def _image_to_head(self):
221 |     net_conv = self._layers['head'](self._image)
222 |     self._act_summaries['conv'] = net_conv
223 | 
224 |     return net_conv
225 | 
226 |   def _head_to_tail(self, pool5):
227 |     fc7 = self.resnet.layer4(pool5).mean(3).mean(2) # average pooling after layer4
228 |     return fc7
229 | 
230 |   def _init_head_tail(self):
231 |     # choose different blocks for different number of layers
232 |     if self._num_layers == 50:
233 |       self.resnet = resnet50()
234 | 
235 |     elif self._num_layers == 101:
236 |       self.resnet = resnet101()
237 | 
238 |     elif self._num_layers == 152:
239 |       self.resnet = resnet152()
240 | 
241 |     else:
242 |       # other numbers are not supported
243 |       raise NotImplementedError
244 | 
245 |     # Fix blocks 
246 |     for p in self.resnet.bn1.parameters(): p.requires_grad=False
247 |     for p in self.resnet.conv1.parameters(): p.requires_grad=False
248 |     assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
249 |     if cfg.RESNET.FIXED_BLOCKS >= 3:
250 |       for p in self.resnet.layer3.parameters(): p.requires_grad=False
251 |     if cfg.RESNET.FIXED_BLOCKS >= 2:
252 |       for p in self.resnet.layer2.parameters(): p.requires_grad=False
253 |     if cfg.RESNET.FIXED_BLOCKS >= 1:
254 |       for p in self.resnet.layer1.parameters(): p.requires_grad=False
255 | 
256 |     def set_bn_fix(m):
257 |       classname = m.__class__.__name__
258 |       if classname.find('BatchNorm') != -1:
259 |         for p in m.parameters(): p.requires_grad=False
260 | 
261 |     self.resnet.apply(set_bn_fix)
262 | 
263 |     # Build resnet.
264 |     self._layers['head'] = nn.Sequential(self.resnet.conv1, self.resnet.bn1,self.resnet.relu, 
265 |       self.resnet.maxpool,self.resnet.layer1,self.resnet.layer2,self.resnet.layer3)
266 | 
267 |   def train(self, mode=True):
268 |     # Override train so that the training mode is set as we want
269 |     nn.Module.train(self, mode)
270 |     if mode:
271 |       # Set fixed blocks to be in eval mode (not really doing anything)
272 |       self.resnet.eval()
273 |       if cfg.RESNET.FIXED_BLOCKS <= 3:
274 |         self.resnet.layer4.train()
275 |       if cfg.RESNET.FIXED_BLOCKS <= 2:
276 |         self.resnet.layer3.train()
277 |       if cfg.RESNET.FIXED_BLOCKS <= 1:
278 |         self.resnet.layer2.train()
279 |       if cfg.RESNET.FIXED_BLOCKS == 0:
280 |         self.resnet.layer1.train()
281 | 
282 |       # Set batchnorm always in eval mode during training
283 |       def set_bn_eval(m):
284 |         classname = m.__class__.__name__
285 |         if classname.find('BatchNorm') != -1:
286 |           m.eval()
287 | 
288 |       self.resnet.apply(set_bn_eval)
289 | 
290 |   def load_pretrained_cnn(self, state_dict):
291 |     self.resnet.load_state_dict({k: state_dict[k] for k in list(self.resnet.state_dict())})
292 | 


--------------------------------------------------------------------------------
/lib/nets/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | from nets.network import Network
11 | from model.config import cfg
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.functional as F
16 | from torch.autograd import Variable
17 | import math
18 | import torchvision.models as models
19 | 
20 | class vgg16(Network):
21 |   def __init__(self):
22 |     Network.__init__(self)
23 |     self._feat_stride = [16, ]
24 |     self._feat_compress = [1. / float(self._feat_stride[0]), ]
25 |     self._net_conv_channels = 512
26 |     self._fc7_channels = 4096
27 | 
28 |   def _init_head_tail(self):
29 |     self.vgg = models.vgg16()
30 |     # Remove fc8
31 |     self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1])
32 | 
33 |     # Fix the layers before conv3:
34 |     for layer in range(10):
35 |       for p in self.vgg.features[layer].parameters(): p.requires_grad = False
36 | 
37 |     # not using the last maxpool layer
38 |     self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1])
39 | 
40 |   def _image_to_head(self):
41 |     net_conv = self._layers['head'](self._image)
42 |     self._act_summaries['conv'] = net_conv
43 |     
44 |     return net_conv
45 | 
46 |   def _head_to_tail(self, pool5):
47 |     pool5_flat = pool5.view(pool5.size(0), -1)
48 |     fc7 = self.vgg.classifier(pool5_flat)
49 | 
50 |     return fc7
51 | 
52 |   def load_pretrained_cnn(self, state_dict):
53 |     self.vgg.load_state_dict({k:v for k,v in state_dict.items() if k in self.vgg.state_dict()})


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 | 
36 |     dets = dets[order].contiguous()
37 | 
38 |     keep = torch.LongTensor(dets.size(0))
39 |     num_out = torch.LongTensor(1)
40 |     # keep = torch.cuda.LongTensor(dets.size(0))
41 |     # num_out = torch.cuda.LongTensor(1)
42 |     nms.gpu_nms(keep, num_out, dets, thresh)
43 | 
44 |     return order[keep[:num_out[0]].cuda()].contiguous()
45 |     # return order[keep[:num_out[0]]].contiguous()
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/nms/src/cuda/nms_kernel.cu.o


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """The data layer used during training to train a Fast R-CNN network.
 9 | 
10 | RoIDataLayer implements a Caffe Python layer.
11 | """
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | from model.config import cfg
17 | from roi_data_layer.minibatch import get_minibatch
18 | import numpy as np
19 | import time
20 | 
21 | class RoIDataLayer(object):
22 |   """Fast R-CNN data layer used for training."""
23 | 
24 |   def __init__(self, roidb, num_classes, random=False):
25 |     """Set the roidb to be used by this layer during training."""
26 |     self._roidb = roidb
27 |     self._num_classes = num_classes
28 |     # Also set a random flag
29 |     self._random = random
30 |     self._shuffle_roidb_inds()
31 | 
32 |   def _shuffle_roidb_inds(self):
33 |     """Randomly permute the training roidb."""
34 |     # If the random flag is set, 
35 |     # then the database is shuffled according to system time
36 |     # Useful for the validation set
37 |     if self._random:
38 |       st0 = np.random.get_state()
39 |       millis = int(round(time.time() * 1000)) % 4294967295
40 |       np.random.seed(millis)
41 |     
42 |     if cfg.TRAIN.ASPECT_GROUPING:
43 |       raise NotImplementedError
44 |       '''
45 |       widths = np.array([r['width'] for r in self._roidb])
46 |       heights = np.array([r['height'] for r in self._roidb])
47 |       horz = (widths >= heights)
48 |       vert = np.logical_not(horz)
49 |       horz_inds = np.where(horz)[0]
50 |       vert_inds = np.where(vert)[0]
51 |       inds = np.hstack((
52 |           np.random.permutation(horz_inds),
53 |           np.random.permutation(vert_inds)))
54 |       inds = np.reshape(inds, (-1, 2))
55 |       row_perm = np.random.permutation(np.arange(inds.shape[0]))
56 |       inds = np.reshape(inds[row_perm, :], (-1,))
57 |       self._perm = inds
58 |       '''
59 |     else:
60 |       self._perm = np.random.permutation(np.arange(len(self._roidb)))
61 |     # Restore the random state
62 |     if self._random:
63 |       np.random.set_state(st0)
64 |       
65 |     self._cur = 0
66 | 
67 |   def _get_next_minibatch_inds(self):
68 |     """Return the roidb indices for the next minibatch."""
69 |     
70 |     if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
71 |       self._shuffle_roidb_inds()
72 | 
73 |     db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
74 |     self._cur += cfg.TRAIN.IMS_PER_BATCH
75 | 
76 |     return db_inds
77 | 
78 |   def _get_next_minibatch(self):
79 |     """Return the blobs to be used for the next minibatch.
80 | 
81 |     If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
82 |     separate process and made available through self._blob_queue.
83 |     """
84 |     db_inds = self._get_next_minibatch_inds()
85 |     minibatch_db = [self._roidb[i] for i in db_inds]
86 |     return get_minibatch(minibatch_db, self._num_classes)
87 |       
88 |   def forward(self):
89 |     """Get blobs and copy them into this layer's top blob vector."""
90 |     blobs = self._get_next_minibatch()
91 |     return blobs
92 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | import cv2
16 | from model.config import cfg
17 | from utils.blob import prep_im_for_blob, im_list_to_blob
18 | 
19 | def get_minibatch(roidb, num_classes):
20 |   """Given a roidb, construct a minibatch sampled from it."""
21 |   num_images = len(roidb)
22 |   # Sample random scales to use for each image in this batch
23 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
24 |                   size=num_images)
25 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
26 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
27 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
28 | 
29 |   # Get the input image blob, formatted for caffe
30 |   im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
31 | 
32 |   blobs = {'data': im_blob}
33 | 
34 |   assert len(im_scales) == 1, "Single batch only"
35 |   assert len(roidb) == 1, "Single batch only"
36 |   
37 |   # gt boxes: (x1, y1, x2, y2, cls)
38 |   #if cfg.TRAIN.USE_ALL_GT:
39 |     # Include all ground truth boxes
40 |   #  gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
41 |   #else:
42 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
43 |   #  gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
44 |   #gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
45 |   #gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
46 |   #gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
47 |   boxes = roidb[0]['boxes'] * im_scales[0]
48 |   batch_ind = 0 * np.ones((boxes.shape[0], 1))
49 |   boxes = np.hstack((batch_ind, boxes))
50 |   DEDUP_BOXES=1./16.
51 |   if DEDUP_BOXES > 0:
52 |     v = np.array([1,1e3, 1e6, 1e9, 1e12])
53 |     hashes = np.round(boxes * DEDUP_BOXES).dot(v)
54 |     _, index, inv_index = np.unique(hashes, return_index=True,
55 |                                     return_inverse=True)
56 |     boxes = boxes[index, :]
57 |   
58 |   blobs['boxes'] = boxes
59 |   blobs['im_info'] = np.array(
60 |     [im_blob.shape[1], im_blob.shape[2], im_scales[0]],
61 |     dtype=np.float32)
62 |   blobs['labels'] = roidb[0]['labels']
63 | 
64 |   return blobs
65 | 
66 | def _get_image_blob(roidb, scale_inds):
67 |   """Builds an input blob from the images in the roidb at the specified
68 |   scales.
69 |   """
70 |   num_images = len(roidb)
71 |   processed_ims = []
72 |   im_scales = []
73 |   for i in range(num_images):
74 |     im = cv2.imread(roidb[i]['image'])
75 |     if roidb[i]['flipped']:
76 |       im = im[:, ::-1, :]
77 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
78 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
79 |                     cfg.TRAIN.MAX_SIZE)
80 |     im_scales.append(im_scale)
81 |     processed_ims.append(im)
82 | 
83 |   # Create a blob to hold the input images
84 |   blob = im_list_to_blob(processed_ims)
85 | 
86 |   return blob, im_scales
87 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | 
14 | def prepare_roidb(imdb):
15 |   """Enrich the imdb's roidb by adding some derived quantities that
16 |   are useful for training. This function precomputes the maximum
17 |   overlap, taken over ground-truth boxes, between each ROI and
18 |   each ground-truth box. The class with maximum overlap is also
19 |   recorded.
20 |   """
21 |   roidb = imdb.roidb
22 |   for i in range(len(imdb.image_index)):
23 |      roidb[i]['image'] = imdb.image_path_at(i)
24 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.h
4 | *.hpp
5 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/bbox.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/bbox.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/blob.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/blob.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/timer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/timer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/visualization.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/lib/utils/__pycache__/visualization.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/bbox.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def bbox_overlaps(boxes, query_boxes):
 5 |     """
 6 |     Parameters
 7 |     ----------
 8 |     boxes: (N, 4) ndarray or tensor or variable
 9 |     query_boxes: (K, 4) ndarray or tensor or variable
10 |     Returns
11 |     -------
12 |     overlaps: (N, K) overlap between boxes and query_boxes
13 |     """
14 |     if isinstance(boxes, np.ndarray):
15 |         boxes = torch.from_numpy(boxes)
16 |         query_boxes = torch.from_numpy(query_boxes)
17 |         out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return
18 |     else:
19 |         out_fn = lambda x: x
20 | 
21 |     box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \
22 |             (boxes[:, 3] - boxes[:, 1] + 1)
23 |     query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \
24 |             (query_boxes[:, 3] - query_boxes[:, 1] + 1)
25 | 
26 |     iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0)
27 |     ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0)
28 |     ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih
29 |     overlaps = iw * ih / ua
30 |     return out_fn(overlaps)


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import cv2
15 | 
16 | 
17 | def im_list_to_blob(ims):
18 |   """Convert a list of images into a network input.
19 | 
20 |   Assumes images are already prepared (means subtracted, BGR order, ...).
21 |   """
22 |   max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 |   num_images = len(ims)
24 |   blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 |                   dtype=np.float32)
26 |   for i in range(num_images):
27 |     im = ims[i]
28 |     blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | 
30 |   return blob
31 | 
32 | 
33 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
34 |   """Mean subtract and scale an image for use in a blob."""
35 |   im = im.astype(np.float32, copy=False)
36 |   im -= pixel_means
37 |   im_shape = im.shape
38 |   im_size_min = np.min(im_shape[0:2])
39 |   im_size_max = np.max(im_shape[0:2])
40 |   im_scale = float(target_size) / float(im_size_min)
41 |   # Prevent the biggest axis from being more than MAX_SIZE
42 |   if np.round(im_scale * im_size_max) > max_size:
43 |     im_scale = float(max_size) / float(im_size_max)
44 |   im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
45 |                   interpolation=cv2.INTER_LINEAR)
46 | 
47 |   return im, im_scale
48 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | import torch
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self._total_time = {}
15 |         self._calls = {}
16 |         self._start_time = {}
17 |         self._diff = {}
18 |         self._average_time = {}
19 | 
20 |     def tic(self, name='default'):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         torch.cuda.synchronize()
24 |         self._start_time[name] = time.time()
25 | 
26 |     def toc(self, name='default', average=True):
27 |         torch.cuda.synchronize()
28 |         self._diff[name] = time.time() - self._start_time[name]
29 |         self._total_time[name] = self._total_time.get(name, 0.) + self._diff[name]
30 |         self._calls[name] = self._calls.get(name, 0 ) + 1
31 |         self._average_time[name] = self._total_time[name] / self._calls[name]
32 |         if average:
33 |             return self._average_time[name]
34 |         else:
35 |             return self._diff[name]
36 | 
37 |     def average_time(self, name='default'):
38 |         return self._average_time[name]
39 | 
40 |     def total_time(self, name='default'):
41 |         return self._total_time[name]
42 | 
43 | timer = Timer()
44 | 


--------------------------------------------------------------------------------
/lib/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from six.moves import range
12 | import PIL.Image as Image
13 | import PIL.ImageColor as ImageColor
14 | import PIL.ImageDraw as ImageDraw
15 | import PIL.ImageFont as ImageFont
16 | 
17 | STANDARD_COLORS = [
18 |     'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
19 |     'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
20 |     'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
21 |     'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
22 |     'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
23 |     'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
24 |     'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
25 |     'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
26 |     'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
27 |     'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
28 |     'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
29 |     'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
30 |     'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
31 |     'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
32 |     'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
33 |     'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
34 |     'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
35 |     'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
36 |     'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
37 |     'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
38 |     'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
39 |     'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
40 |     'WhiteSmoke', 'Yellow', 'YellowGreen'
41 | ]
42 | 
43 | NUM_COLORS = len(STANDARD_COLORS)
44 | 
45 | try:
46 |   FONT = ImageFont.truetype('arial.ttf', 24)
47 | except IOError:
48 |   FONT = ImageFont.load_default()
49 | 
50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4):
51 |   draw = ImageDraw.Draw(image)
52 |   (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
53 |   draw.line([(left, top), (left, bottom), (right, bottom),
54 |              (right, top), (left, top)], width=thickness, fill=color)
55 |   text_bottom = bottom
56 |   # Reverse list and print from bottom to top.
57 |   text_width, text_height = font.getsize(display_str)
58 |   margin = np.ceil(0.05 * text_height)
59 |   draw.rectangle(
60 |       [(left, text_bottom - text_height - 2 * margin), (left + text_width,
61 |                                                         text_bottom)],
62 |       fill=color)
63 |   draw.text(
64 |       (left + margin, text_bottom - text_height - margin),
65 |       display_str,
66 |       fill='black',
67 |       font=font)
68 | 
69 |   return image
70 | 
71 | def draw_bounding_boxes(image, gt_boxes, im_info):
72 |   num_boxes = gt_boxes.shape[0]
73 |   gt_boxes_new = gt_boxes.copy()
74 |   gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2])
75 |   disp_image = Image.fromarray(np.uint8(image[0]))
76 | 
77 |   for i in range(num_boxes):
78 |     this_class = int(gt_boxes_new[i, 4])
79 |     disp_image = _draw_single_box(disp_image, 
80 |                                 gt_boxes_new[i, 0],
81 |                                 gt_boxes_new[i, 1],
82 |                                 gt_boxes_new[i, 2],
83 |                                 gt_boxes_new[i, 3],
84 |                                 'N%02d-C%02d' % (i, this_class),
85 |                                 FONT,
86 |                                 color=STANDARD_COLORS[this_class % NUM_COLORS])
87 | 
88 |   image[0, :] = np.array(disp_image)
89 |   return image


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '..', 'lib')
12 | add_path(lib_path)
13 | 
14 | coco_path = osp.join(this_dir, '..', 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 | 


--------------------------------------------------------------------------------
/tools/_init_paths.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunarker/Collaborative-Learning-for-Weakly-Supervised-Object-Detection/6af3d84b70222a4dbc75b81e004c430e4307a108/tools/_init_paths.pyc


--------------------------------------------------------------------------------
/tools/convert_from_tensorflow_mobile.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python import pywrap_tensorflow
 3 | from collections import OrderedDict
 4 | import re
 5 | import torch
 6 | 
 7 | import argparse
 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model')
 9 | parser.add_argument('--tensorflow_model',
10 |                     help='the path of tensorflow_model',
11 |                     default=None, type=str)
12 | 
13 | args = parser.parse_args()
14 | 
15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model)
16 | var_to_shape_map = reader.get_variable_to_shape_map()
17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()}
18 | 
19 | del var_dict['Variable']
20 | 
21 | for k in list(var_dict.keys()):
22 |     if 'Momentum' in k:
23 |         del var_dict[k]
24 | 
25 | for k in list(var_dict.keys()):
26 |     if k.find('/') >= 0:
27 |         var_dict['mobilenet' + k[k.find('/'):]] = var_dict[k]
28 |         del var_dict[k]
29 | 
30 | dummy_replace = OrderedDict([
31 |                 ('moving_mean', 'running_mean'),\
32 |                 ('moving_variance', 'running_var'),\
33 |                 ('weights', 'weight'),\
34 |                 ('biases', 'bias'),\
35 |                 ('/BatchNorm', '.1'),\
36 |                 ('_pointwise/', '.pointwise.0.'),\
37 |                 ('_depthwise/depthwise_', '.depthwise.0.'),\
38 |                 ('_pointwise.1', '.pointwise.1'),\
39 |                 ('_depthwise.1', '.depthwise.1'),\
40 |                 ('Conv2d_0/', 'Conv2d_0.0.'),\
41 |                 ('mobilenet/rpn_conv/3x3', 'rpn_net'),\
42 |                 ('mobilenet/rpn_cls_score', 'rpn_cls_score_net'),\
43 |                 ('mobilenet/cls_score', 'cls_score_net'),\
44 |                 ('mobilenet/rpn_bbox_pred', 'rpn_bbox_pred_net'),\
45 |                 ('mobilenet/bbox_pred', 'bbox_pred_net'),\
46 |                 ('gamma', 'weight'),\
47 |                 ('beta', 'bias'),\
48 |                 ('/', '.')])
49 | 
50 | for a, b in dummy_replace.items():
51 |     for k in list(var_dict.keys()):
52 |         if a in k:
53 |             var_dict[k.replace(a,b)] = var_dict[k]
54 |             del var_dict[k]
55 | 
56 | # print set(var_dict.keys()) - set(x.keys())
57 | # print set(x.keys()) - set(var_dict.keys())
58 | 
59 | for k in list(var_dict.keys()):
60 |     if var_dict[k].ndim == 4:
61 |         if 'depthwise' in k:
62 |             var_dict[k] = var_dict[k].transpose((2, 3, 0, 1)).copy(order='C')
63 |         else:
64 |             var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C')
65 |     if var_dict[k].ndim == 2:
66 |         var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C')
67 |     # assert x[k].shape == var_dict[k].shape, k
68 | 
69 | for k in list(var_dict.keys()):
70 |     var_dict[k] = torch.from_numpy(var_dict[k])
71 | 
72 | 
73 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth')
74 | 


--------------------------------------------------------------------------------
/tools/convert_from_tensorflow_vgg.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python import pywrap_tensorflow
 3 | from collections import OrderedDict
 4 | import re
 5 | import torch
 6 | 
 7 | import argparse
 8 | parser = argparse.ArgumentParser(description='Convert tf-faster-rcnn model to pytorch-faster-rcnn model')
 9 | parser.add_argument('--tensorflow_model',
10 |                     help='the path of tensorflow_model',
11 |                     default=None, type=str)
12 | 
13 | args = parser.parse_args()
14 | 
15 | reader = pywrap_tensorflow.NewCheckpointReader(args.tensorflow_model)
16 | var_to_shape_map = reader.get_variable_to_shape_map()
17 | var_dict = {k:reader.get_tensor(k) for k in var_to_shape_map.keys()}
18 | 
19 | del var_dict['Variable']
20 | 
21 | for k in list(var_dict.keys()):
22 |     if 'Momentum' in k:
23 |         del var_dict[k]
24 | 
25 | for k in list(var_dict.keys()):
26 |     if k.find('/') >= 0:
27 |         var_dict['vgg' + k[k.find('/'):]] = var_dict[k]
28 |         del var_dict[k]
29 | 
30 | dummy_replace = OrderedDict([
31 |                 ('weights', 'weight'),\
32 |                 ('biases', 'bias'),\
33 |                 ('vgg/rpn_conv/3x3', 'rpn_net'),\
34 |                 ('vgg/rpn_cls_score', 'rpn_cls_score_net'),\
35 |                 ('vgg/cls_score', 'cls_score_net'),\
36 |                 ('vgg/rpn_bbox_pred', 'rpn_bbox_pred_net'),\
37 |                 ('vgg/bbox_pred', 'bbox_pred_net'),\
38 |                 ('/', '.')])
39 | 
40 | for a, b in dummy_replace.items():
41 |     for k in list(var_dict.keys()):
42 |         if a in k:
43 |             var_dict[k.replace(a,b)] = var_dict[k]
44 |             del var_dict[k]
45 | 
46 | layer_map = OrderedDict([
47 |     ('conv1.conv1_1', 'features.0'),\
48 |     ('conv1.conv1_2', 'features.2'),\
49 |     ('conv2.conv2_1', 'features.5'),\
50 |     ('conv2.conv2_2', 'features.7'),\
51 |     ('conv3.conv3_1', 'features.10'),\
52 |     ('conv3.conv3_2', 'features.12'),\
53 |     ('conv3.conv3_3', 'features.14'),\
54 |     ('conv4.conv4_1', 'features.17'),\
55 |     ('conv4.conv4_2', 'features.19'),\
56 |     ('conv4.conv4_3', 'features.21'),\
57 |     ('conv5.conv5_1', 'features.24'),\
58 |     ('conv5.conv5_2', 'features.26'),\
59 |     ('conv5.conv5_3', 'features.28'),\
60 |     ('fc6', 'classifier.0'),\
61 |     ('fc7', 'classifier.3')])
62 | 
63 | for a, b in layer_map.items():
64 |     for k in list(var_dict.keys()):
65 |         if a in k:
66 |             var_dict[k.replace(a,b)] = var_dict[k]
67 |             del var_dict[k]
68 | 
69 | for k in list(var_dict.keys()):
70 |     if 'classifier.0' in k:
71 |         if var_dict[k].ndim == 2: # weight
72 |             var_dict[k] = var_dict[k].reshape(7,7,512,4096).transpose((3, 2, 0, 1)).reshape(4096, -1).copy(order='C')
73 |     else:
74 |         if var_dict[k].ndim == 4:
75 |             var_dict[k] = var_dict[k].transpose((3, 2, 0, 1)).copy(order='C')
76 |         if var_dict[k].ndim == 2:
77 |             var_dict[k] = var_dict[k].transpose((1, 0)).copy(order='C')
78 |     # assert x[k].shape == var_dict[k].shape, k
79 | 
80 | for k in list(var_dict.keys()):
81 |     var_dict[k] = torch.from_numpy(var_dict[k])
82 | 
83 | torch.save(var_dict, args.tensorflow_model[:args.tensorflow_model.find('.ckpt')]+'.pth')
84 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Tensorflow Faster R-CNN
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Xinlei Chen, based on code from Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | """
 10 | Demo script showing detections in sample images.
 11 | 
 12 | See README.md for installation instructions before running.
 13 | """
 14 | from __future__ import absolute_import
 15 | from __future__ import division
 16 | from __future__ import print_function
 17 | 
 18 | import _init_paths
 19 | from model.config import cfg
 20 | from model.test import im_detect
 21 | from model.nms_wrapper import nms
 22 | 
 23 | from utils.timer import Timer
 24 | import matplotlib.pyplot as plt
 25 | import numpy as np
 26 | import os, cv2
 27 | import argparse
 28 | 
 29 | from nets.vgg16 import vgg16
 30 | from nets.resnet_v1 import resnetv1
 31 | 
 32 | import torch
 33 | 
 34 | CLASSES = ('__background__',
 35 |            'aeroplane', 'bicycle', 'bird', 'boat',
 36 |            'bottle', 'bus', 'car', 'cat', 'chair',
 37 |            'cow', 'diningtable', 'dog', 'horse',
 38 |            'motorbike', 'person', 'pottedplant',
 39 |            'sheep', 'sofa', 'train', 'tvmonitor')
 40 | 
 41 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)}
 42 | DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
 43 | 
 44 | def vis_detections(im, class_name, dets, thresh=0.5):
 45 |     """Draw detected bounding boxes."""
 46 |     inds = np.where(dets[:, -1] >= thresh)[0]
 47 |     if len(inds) == 0:
 48 |         return
 49 | 
 50 |     im = im[:, :, (2, 1, 0)]
 51 |     fig, ax = plt.subplots(figsize=(12, 12))
 52 |     ax.imshow(im, aspect='equal')
 53 |     for i in inds:
 54 |         bbox = dets[i, :4]
 55 |         score = dets[i, -1]
 56 | 
 57 |         ax.add_patch(
 58 |             plt.Rectangle((bbox[0], bbox[1]),
 59 |                           bbox[2] - bbox[0],
 60 |                           bbox[3] - bbox[1], fill=False,
 61 |                           edgecolor='red', linewidth=3.5)
 62 |             )
 63 |         ax.text(bbox[0], bbox[1] - 2,
 64 |                 '{:s} {:.3f}'.format(class_name, score),
 65 |                 bbox=dict(facecolor='blue', alpha=0.5),
 66 |                 fontsize=14, color='white')
 67 | 
 68 |     ax.set_title(('{} detections with '
 69 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 70 |                                                   thresh),
 71 |                   fontsize=14)
 72 |     plt.axis('off')
 73 |     plt.tight_layout()
 74 |     plt.draw()
 75 | 
 76 | def demo(net, image_name):
 77 |     """Detect object classes in an image using pre-computed object proposals."""
 78 | 
 79 |     # Load the demo image
 80 |     im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 81 |     im = cv2.imread(im_file)
 82 | 
 83 |     # Detect all object classes and regress object bounds
 84 |     timer = Timer()
 85 |     timer.tic()
 86 |     scores, boxes = im_detect(net, im)
 87 |     timer.toc()
 88 |     print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))
 89 | 
 90 |     # Visualize detections for each class
 91 |     CONF_THRESH = 0.8
 92 |     NMS_THRESH = 0.3
 93 |     for cls_ind, cls in enumerate(CLASSES[1:]):
 94 |         cls_ind += 1 # because we skipped background
 95 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
 96 |         cls_scores = scores[:, cls_ind]
 97 |         dets = np.hstack((cls_boxes,
 98 |                           cls_scores[:, np.newaxis])).astype(np.float32)
 99 |         keep = nms(torch.from_numpy(dets), NMS_THRESH)
100 |         dets = dets[keep.numpy(), :]
101 |         vis_detections(im, cls, dets, thresh=CONF_THRESH)
102 | 
103 | def parse_args():
104 |     """Parse input arguments."""
105 |     parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo')
106 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
107 |                         choices=NETS.keys(), default='res101')
108 |     parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
109 |                         choices=DATASETS.keys(), default='pascal_voc_0712')
110 |     args = parser.parse_args()
111 | 
112 |     return args
113 | 
114 | if __name__ == '__main__':
115 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
116 |     args = parse_args()
117 | 
118 |     # model path
119 |     demonet = args.demo_net
120 |     dataset = args.dataset
121 |     saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default',
122 |                               NETS[demonet][0] %(70000 if dataset == 'pascal_voc' else 110000))
123 | 
124 | 
125 |     if not os.path.isfile(saved_model):
126 |         raise IOError(('{:s} not found.\nDid you download the proper networks from '
127 |                        'our server and place them properly?').format(saved_model))
128 | 
129 |     # load network
130 |     if demonet == 'vgg16':
131 |         net = vgg16()
132 |     elif demonet == 'res101':
133 |         net = resnetv1(num_layers=101)
134 |     else:
135 |         raise NotImplementedError
136 |     net.create_architecture(21,
137 |                           tag='default', anchor_scales=[8, 16, 32])
138 | 
139 |     net.load_state_dict(torch.load(saved_model))
140 | 
141 |     net.eval()
142 |     net.cuda()
143 | 
144 |     print('Loaded network {:s}'.format(saved_model))
145 | 
146 |     im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
147 |                 '001763.jpg', '004545.jpg']
148 |     for im_name in im_names:
149 |         print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
150 |         print('Demo for data/demo/{}'.format(im_name))
151 |         demo(net, im_name)
152 | 
153 |     plt.show()
154 | 


--------------------------------------------------------------------------------
/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | # Reval = re-eval. Re-evaluate saved detections.
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import _init_paths
16 | from model.test import apply_nms
17 | from model.config import cfg
18 | from datasets.factory import get_imdb
19 | import pickle
20 | import os, sys, argparse
21 | import numpy as np
22 | import pprint
23 | 
24 | 
25 | def parse_args():
26 |   """
27 |   Parse input arguments
28 |   """
29 |   parser = argparse.ArgumentParser(description='Re-evaluate results')
30 |   parser.add_argument('output_dir', nargs=1, help='results directory',
31 |                       type=str)
32 |   parser.add_argument('--imdb', dest='imdb_name',
33 |                       help='dataset to re-evaluate',
34 |                       default='voc_2007_test', type=str)
35 |   parser.add_argument('--matlab', dest='matlab_eval',
36 |                       help='use matlab for evaluation',
37 |                       action='store_true')
38 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
39 |                       action='store_true')
40 |   parser.add_argument('--nms', dest='apply_nms', help='apply nms',
41 |                       action='store_true')
42 | 
43 |   if len(sys.argv) == 1:
44 |     parser.print_help()
45 |     sys.exit(1)
46 | 
47 |   args = parser.parse_args()
48 |   return args
49 | 
50 | 
51 | def from_dets(imdb_name, output_dir, args):
52 |   imdb = get_imdb(imdb_name)
53 |   imdb.competition_mode(args.comp_mode)
54 |   imdb.config['matlab_eval'] = args.matlab_eval
55 |   with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
56 |     dets = pickle.load(f)
57 | 
58 |   if args.apply_nms:
59 |     print('Applying NMS to all detections')
60 |     nms_dets = apply_nms(dets, cfg.TEST.NMS)
61 |   else:
62 |     nms_dets = dets
63 | 
64 |   print('Evaluating detections')
65 |   imdb.evaluate_detections(nms_dets, output_dir)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |   args = parse_args()
70 |   pprint.pprint(args)
71 |   output_dir = os.path.abspath(args.output_dir[0])
72 |   imdb_name = args.imdb_name
73 |   from_dets(imdb_name, output_dir, args)
74 | 


--------------------------------------------------------------------------------
/tools/reval_discovery.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # --------------------------------------------------------
 9 | 
10 | # Reval = re-eval. Re-evaluate saved detections.
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import _init_paths
16 | from model.test import apply_nms
17 | from model.config import cfg
18 | from datasets.factory import get_imdb
19 | import pickle
20 | import os, sys, argparse
21 | import numpy as np
22 | import pprint
23 | 
24 | 
25 | def parse_args():
26 |   """
27 |   Parse input arguments
28 |   """
29 |   parser = argparse.ArgumentParser(description='Re-evaluate results')
30 |   parser.add_argument('output_dir', nargs=1, help='results directory',
31 |                       type=str)
32 |   parser.add_argument('--imdb', dest='imdb_name',
33 |                       help='dataset to re-evaluate',
34 |                       default='voc_2007_test', type=str)
35 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
36 |                       action='store_true')
37 | 
38 |   if len(sys.argv) == 1:
39 |     parser.print_help()
40 |     sys.exit(1)
41 | 
42 |   args = parser.parse_args()
43 |   return args
44 | 
45 | 
46 | def from_dets(imdb_name, output_dir, args):
47 |   imdb = get_imdb(imdb_name)
48 |   imdb.competition_mode(args.comp_mode)
49 |   with open(os.path.join(output_dir, 'discovery.pkl'), 'rb') as f:
50 |     dets = pickle.load(f)
51 | 
52 | 
53 |   print('Evaluating detections')
54 |   imdb.evaluate_discovery(dets, output_dir)
55 | 
56 | 
57 | if __name__ == '__main__':
58 |   args = parse_args()
59 |   pprint.pprint(args)
60 |   output_dir = os.path.abspath(args.output_dir[0])
61 |   imdb_name = args.imdb_name
62 |   from_dets(imdb_name, output_dir, args)
63 | 


--------------------------------------------------------------------------------
/tools/show_boxes_results.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Dec 25 01:50:15 2017
  4 | 
  5 | @author: jjwang
  6 | """
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | 
 13 | import _init_paths
 14 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir
 15 | from datasets.factory import get_imdb
 16 | import datasets.imdb
 17 | import argparse
 18 | import pprint
 19 | import numpy as np
 20 | import sys
 21 | import os 
 22 | import pickle as pickle
 23 | import cv2
 24 | from matplotlib import pyplot as plt
 25 | 
 26 | 
 27 | def parse_args():
 28 |   """
 29 |   Parse input arguments
 30 |   """
 31 |   parser = argparse.ArgumentParser(description='show the imgs and the resulted boxes')
 32 |   parser.add_argument('--box', default='/DATA3_DB7/data/jjwang/workspace/wsFaster-rcnn/output/vgg16/voc_2007_test/WSDDN_PRE_50000/vgg16_faster_rcnn_iter_90000/wsddn/detections.pkl', help='boxes pkl file to load')
 33 |   parser.add_argument('--thr', default=0.1, type=float, help='idx of test img')
 34 | 
 35 |   if len(sys.argv) == 1:
 36 |     parser.print_help()
 37 |     sys.exit(1)
 38 | 
 39 |   args = parser.parse_args()
 40 |   return args
 41 | 
 42 | 
 43 | 
 44 | 
 45 | 
 46 | if __name__ == '__main__':
 47 |   args = parse_args()
 48 |   print('Called with args:')
 49 |   print(args)
 50 |   
 51 |   
 52 |   
 53 |   with open(args.box, 'rb') as fid:
 54 |     try:
 55 |          content = pickle.load(fid)
 56 |     except:
 57 |          content = pickle.load(fid, encoding='bytes')
 58 |    
 59 | 
 60 |   boxpathList = args.box.split('/')
 61 |   save_base = '/'.join(boxpathList[-5:-1])
 62 |   save_path = os.path.join('../cache',save_base)
 63 |   save_path = os.path.join(save_path, boxpathList[-1].split('.')[0])
 64 |   if not os.path.exists(save_path):
 65 |      os.makedirs(save_path)
 66 |   save_path = '../cache/' + save_path 
 67 |   imdbname = boxpathList[-5]
 68 |   print('getting imdb {:s}'.format(imdbname))
 69 |   imdb = get_imdb('voc_2007_test')
 70 |   
 71 |   for idx in range(len(imdb.image_index)):
 72 |         im = cv2.imread(imdb.image_path_at(idx))
 73 |         im = im[:,:,::-1]
 74 |         height, width, depth = im.shape
 75 |         dpi = 80
 76 |         plt.figure(figsize=(width/dpi,height/dpi),dpi=dpi)
 77 |         colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
 78 |         plt.imshow(im)  # plot the image for matplotlib
 79 |         currentAxis = plt.gca()
 80 |         plt.axis('off')
 81 |         # scale each detection back up to the image
 82 |         # scale = torch.Tensor([rgb_image.shape[1::-1], rgb_image.shape[1::-1]])
 83 |         for i in range(20):
 84 |             for j in range(len(content[i][idx])):
 85 |                 score = content[i][idx][j][-1]
 86 |                 if score > 0.1:
 87 |                     label_name = imdb._classes[i]
 88 |                     display_txt = '%s: %.2f'%(label_name, score)
 89 |                     pt = content[i][idx][j][:-1]
 90 |                     coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1
 91 |                     color = colors[i]
 92 |                     currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
 93 |                     currentAxis.text(pt[0], pt[1], display_txt, bbox={'facecolor':color, 'alpha':0.5})
 94 |         
 95 |         plt.savefig(save_path + '/' + imdb.image_index[idx] + '.jpg')
 96 |         plt.close() 
 97 |         if idx % 500 == 0 :
 98 |             print(idx)
 99 |       
100 |       
101 |   
102 |   


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import _init_paths
 11 | from model.test import test_net
 12 | from model.test_train import test_train_net
 13 | from model.config import cfg, cfg_from_file, cfg_from_list
 14 | from datasets.factory import get_imdb
 15 | import argparse
 16 | import pprint
 17 | import time, os, sys
 18 | 
 19 | from nets.vgg16 import vgg16
 20 | from nets.resnet_v1 import resnetv1
 21 | from nets.mobilenet_v1 import mobilenetv1
 22 | 
 23 | import torch
 24 | 
 25 | def parse_args():
 26 |   """
 27 |   Parse input arguments
 28 |   """
 29 |   parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 30 |   parser.add_argument('--cfg', dest='cfg_file',
 31 |             help='optional config file', default=None, type=str)
 32 |   parser.add_argument('--model', dest='model',
 33 |             help='model to test',
 34 |             default=None, type=str)
 35 |   parser.add_argument('--imdb', dest='imdb_name',
 36 |             help='dataset to test',
 37 |             default='voc_2007_test', type=str)
 38 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
 39 |             action='store_true')
 40 |   parser.add_argument('--num_dets', dest='max_per_image',
 41 |             help='max number of detections per image',
 42 |             default=100, type=int)
 43 |   parser.add_argument('--tag', dest='tag',
 44 |                         help='tag of the model',
 45 |                         default='', type=str)
 46 |   parser.add_argument('--net', dest='net',
 47 |                       help='vgg16, res50, res101, res152, mobile',
 48 |                       default='res50', type=str)
 49 |   parser.add_argument('--set', dest='set_cfgs',
 50 |                         help='set config keys', default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |   if len(sys.argv) == 1:
 54 |     parser.print_help()
 55 |     sys.exit(1)
 56 | 
 57 |   args = parser.parse_args()
 58 |   return args
 59 | 
 60 | if __name__ == '__main__':
 61 |   args = parse_args()
 62 | 
 63 |   print('Called with args:')
 64 |   print(args)
 65 | 
 66 |   if args.cfg_file is not None:
 67 |     cfg_from_file(args.cfg_file)
 68 |   if args.set_cfgs is not None:
 69 |     cfg_from_list(args.set_cfgs)
 70 | 
 71 |   print('Using config:')
 72 |   pprint.pprint(cfg)
 73 | 
 74 |   if 1: #always cuda
 75 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 76 |   else:
 77 |     torch.set_default_tensor_type('torch.FloatTensor')
 78 | 
 79 |   # if has model, get the name from it
 80 |   # if does not, then just use the initialization weights
 81 |   if args.model:
 82 |     filename = os.path.splitext(os.path.basename(args.model))[0]
 83 |   else:
 84 |     filename = os.path.splitext(os.path.basename(args.weight))[0]
 85 | 
 86 |   tag = args.tag
 87 |   tag = tag if tag else 'default'
 88 |   filename = tag + '/' + filename  #defalut/vgg16_faster_rcnn_iter_15000
 89 | 
 90 |   imdb = get_imdb(args.imdb_name)
 91 |   imdb.competition_mode(args.comp_mode)
 92 | 
 93 |   # load network
 94 |   if args.net == 'vgg16':
 95 |     net = vgg16()
 96 |   elif args.net == 'res50':
 97 |     net = resnetv1(num_layers=50)
 98 |   elif args.net == 'res101':
 99 |     net = resnetv1(num_layers=101)
100 |   elif args.net == 'res152':
101 |     net = resnetv1(num_layers=152)
102 |   elif args.net == 'mobile':
103 |     net = mobilenetv1()
104 |   else:
105 |     raise NotImplementedError
106 | 
107 |   # load model
108 |   net.create_architecture(imdb.num_classes, tag='default')
109 | 
110 |   net.eval()
111 |   net.cuda()
112 | 
113 |   if args.model:
114 |     print(('Loading model check point from {:s}').format(args.model))
115 |     net.load_state_dict(torch.load(args.model))
116 |     print('Loaded.')
117 |   else:
118 |     print(('Loading initial weights from {:s}').format(args.weight))
119 |     print('Loaded.')
120 | 
121 |   if args.imdb_name[-4:] == 'test':
122 |       test_net(net, imdb, filename, max_per_image=args.max_per_image)
123 |   else:
124 |       test_train_net(net, imdb, filename, max_per_image=args.max_per_image)


--------------------------------------------------------------------------------
/tools/trainval_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick
  5 | # Modified to train WSDDN
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import _init_paths
 12 | from model.train_val import get_training_roidb, train_net
 13 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir
 14 | from datasets.factory import get_imdb
 15 | import datasets.imdb
 16 | import argparse
 17 | import pprint
 18 | import numpy as np
 19 | import sys
 20 | import torch
 21 | 
 22 | from nets.vgg16 import vgg16
 23 | from nets.resnet_v1 import resnetv1
 24 | from nets.mobilenet_v1 import mobilenetv1
 25 | 
 26 | def parse_args():
 27 |   """
 28 |   Parse input arguments
 29 |   """
 30 |   parser = argparse.ArgumentParser(description='Train a faster-rcnn in wealy supervised situation with wsddn modules')
 31 |   parser.add_argument('--cfg', dest='cfg_file',
 32 |                       help='optional config file',
 33 |                       default=None, type=str)
 34 |   parser.add_argument('--weight', dest='weight',
 35 |                       help='initialize with pretrained model weights',
 36 |                       type=str)
 37 |   parser.add_argument('--wsddn', dest='wsddn',
 38 |                       help='initialize with pretrained wsddn model weights',
 39 |                       type=str)
 40 |   parser.add_argument('--imdb', dest='imdb_name',
 41 |                       help='dataset to train on',
 42 |                       default='voc_2007_trainval', type=str)
 43 |   parser.add_argument('--imdbval', dest='imdbval_name',
 44 |                       help='dataset to validate on',
 45 |                       default='voc_2007_test', type=str)
 46 |   parser.add_argument('--iters', dest='max_iters',
 47 |                       help='number of iterations to train',
 48 |                       default=70000, type=int)
 49 |   parser.add_argument('--tag', dest='tag',
 50 |                       help='tag of the model',
 51 |                       default=None, type=str)
 52 |   parser.add_argument('--net', dest='net',
 53 |                       help='vgg16, res50, res101, res152, mobile',
 54 |                       default='res50', type=str)
 55 |   parser.add_argument('--set', dest='set_cfgs',
 56 |                       help='set config keys', default=None,
 57 |                       nargs=argparse.REMAINDER)
 58 | 
 59 |   if len(sys.argv) == 1:
 60 |     parser.print_help()
 61 |     sys.exit(1)
 62 | 
 63 |   args = parser.parse_args()
 64 |   return args
 65 | 
 66 | 
 67 | def combined_roidb(imdb_names):
 68 |   """
 69 |   Combine multiple roidbs
 70 |   """
 71 | 
 72 |   def get_roidb(imdb_name):
 73 |     imdb = get_imdb(imdb_name)
 74 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
 75 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 76 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
 77 |     roidb = get_training_roidb(imdb)
 78 |     return roidb
 79 | 
 80 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 81 |   roidb = roidbs[0]
 82 |   if len(roidbs) > 1:
 83 |     for r in roidbs[1:]:
 84 |       roidb.extend(r)
 85 |     tmp = get_imdb(imdb_names.split('+')[1])
 86 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
 87 |   else:
 88 |     imdb = get_imdb(imdb_names)
 89 |   return imdb, roidb
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |   args = parse_args()
 94 | 
 95 |   print('Called with args:')
 96 |   print(args)
 97 |   
 98 |   if 1: # Always cuda
 99 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
100 |   else:
101 |     torch.set_default_tensor_type('torch.FloatTensor')
102 |   
103 |   if args.cfg_file is not None:
104 |     cfg_from_file(args.cfg_file)
105 |   if args.set_cfgs is not None:
106 |     cfg_from_list(args.set_cfgs)
107 | 
108 |   print('Using config:')
109 |   pprint.pprint(cfg)
110 | 
111 |   np.random.seed(cfg.RNG_SEED)
112 | 
113 |   # train set
114 |   imdb, roidb = combined_roidb(args.imdb_name)
115 |   print('{:d} roidb entries'.format(len(roidb)))
116 | 
117 |   # output directory where the models are saved
118 |   output_dir = get_output_dir(imdb, args.tag)
119 |   print('Output will be saved to `{:s}`'.format(output_dir))
120 | 
121 |   # tensorboard directory where the summaries are saved during training
122 |   tb_dir = get_output_tb_dir(imdb, args.tag)
123 |   print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir))
124 | 
125 |   # also add the validation set, but with no flipping images
126 |   orgflip = cfg.TRAIN.USE_FLIPPED
127 |   cfg.TRAIN.USE_FLIPPED = False
128 |   _, valroidb = combined_roidb(args.imdbval_name)
129 |   print('{:d} validation roidb entries'.format(len(valroidb)))
130 |   cfg.TRAIN.USE_FLIPPED = orgflip
131 | 
132 |   # load network
133 |   if args.net == 'vgg16':
134 |     net = vgg16()
135 |   elif args.net == 'res50':
136 |     net = resnetv1(num_layers=50)
137 |   elif args.net == 'res101':
138 |     net = resnetv1(num_layers=101)
139 |   elif args.net == 'res152':
140 |     net = resnetv1(num_layers=152)
141 |   elif args.net == 'mobile':
142 |     net = mobilenetv1()
143 |   else:
144 |     raise NotImplementedError
145 |     
146 |   train_net(net, imdb, roidb, valroidb, output_dir, tb_dir,
147 |             pretrained_model=args.weight,
148 |             wsddn_premodel=args.wsddn,
149 |             max_iters=args.max_iters)
150 | 


--------------------------------------------------------------------------------