├── .gitignore
├── LICENSE
├── README.md
├── _init_paths.py
├── cfgs
    ├── res101.yml
    ├── res101_ls.yml
    ├── res50.yml
    └── vgg16.yml
├── demo.py
├── images
    ├── img1.jpg
    ├── img1_det.jpg
    ├── img1_det_res101.jpg
    ├── img2.jpg
    ├── img2_det.jpg
    ├── img2_det_res101.jpg
    ├── img3.jpg
    ├── img3_det.jpg
    ├── img3_det_res101.jpg
    ├── img4.jpg
    ├── img4_det.jpg
    └── img4_det_res101.jpg
├── lib
    ├── Makefile
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imagenet.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── pascal_voc_rbg.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   ├── vg.py
    │   ├── vg_eval.py
    │   └── voc_eval.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── couplenet
    │   │   ├── __init__.py
    │   │   ├── couplenet.py
    │   │   └── resnet_atrous.py
    │   ├── faster_rcnn
    │   │   ├── __init__.py
    │   │   ├── faster_rcnn.py
    │   │   ├── resnet.py
    │   │   └── vgg16.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── nms
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── make.sh
    │   │   ├── nms_cpu.py
    │   │   ├── nms_gpu.py
    │   │   ├── nms_kernel.cu
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cuda.c
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── nms_cuda_kernel.cu
    │   │   │   └── nms_cuda_kernel.h
    │   ├── psroi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── psroi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── psroi_pooling.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── psroi_pool.py
    │   │   └── src
    │   │   │   ├── cuda
    │   │   │       ├── psroi_pooling_kernel.cu
    │   │   │       └── psroi_pooling_kernel.h
    │   │   │   ├── psroi_pooling_cuda.c
    │   │   │   └── psroi_pooling_cuda.h
    │   ├── rfcn
    │   │   ├── __init__.py
    │   │   ├── resnet_atrous.py
    │   │   └── rfcn.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align.c
    │   │   │   ├── roi_align.h
    │   │   │   ├── roi_align_cuda.c
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   └── roi_align_kernel.h
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize
    │   │   │   │   └── __init__.py
    │   │   │   └── roi_crop
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   ├── make.sh
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   └── src
    │   │   │   ├── roi_crop.c
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda.c
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop_cuda_kernel.cu
    │   │   │   └── roi_crop_cuda_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.cu
    │   │   │   └── roi_pooling_kernel.h
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_target_layer.py
    │   │   ├── bbox_transform.py
    │   │   ├── generate_anchors.py
    │   │   ├── proposal_layer.py
    │   │   ├── proposal_target_layer_cascade.py
    │   │   └── rpn.py
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── bbox.c
    │   │   ├── bbox.pyx
    │   │   ├── blob.py
    │   │   ├── config.py
    │   │   ├── logger.py
    │   │   └── net_utils.py
    ├── pycocotools
    │   ├── UPSTREAM_REV
    │   ├── __init__.py
    │   ├── _mask.c
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── license.txt
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── minibatch.py
    │   ├── roibatchLoader.py
    │   └── roidb.py
    └── setup.py
├── requirements.txt
├── test_net.py
└── trainval_net.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | .idea/
3 | *.pyc
4 | *~
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Jianwei Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A Pytorch Implementation of R-FCN/CoupleNet
  2 | 
  3 | This repo has moved to [princewang1994/RFCN_CoupleNet.pytorch](https://github.com/princewang1994/RFCN_CoupleNet.pytorch), it will stop updating here.
  4 | 
  5 | ## Introduction
  6 | 
  7 | This project is an pytorch implement R-FCN and CoupleNet, large part code is reference from [jwyang/faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch). The R-FCN structure is refer to [Caffe R-FCN](https://github.com/daijifeng001/R-FCN) and [Py-R-FCN](https://github.com/YuwenXiong/py-R-FCN)
  8 | 
  9 | - For R-FCN, mAP@0.5 reached 73.2 in VOC2007 trainval dataset
 10 | - For CoupleNet, mAP@0.5 reached 75.2 in VOC2007 trainval dataset
 11 | 
 12 | ## R-FCN
 13 | 
 14 | arXiv:1605.06409: [R-FCN: Object Detection via Region-based Fully Convolutional Networks](https://arxiv.org/abs/1605.06409)
 15 | 
 16 | ![15063403082127](http://princepicbed.oss-cn-beijing.aliyuncs.com/blog_201807132042010817.jpg)
 17 | 
 18 | This repo has following modification compare to [jwyang/faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch):
 19 | 
 20 | - **R-FCN architecture**: We refered to the origin [Caffe version] of R-FCN, the main structure of R-FCN is show in following figure.
 21 | - **PS-RoIPooling with CUDA** :(refer to the other pytorch implement R-FCN, pytorch_RFCN). I have modified it to fit multi-image training (not only batch-size=1 is supported)
 22 | - **Implement multi-scale training:** As the original paper says, each image is randomly reized to differenct resolutions (400, 500, 600, 700, 800) when training, and during test time, we use fix input size(600). These make 1.2 mAP gain in our experiments.
 23 | - **Implement OHEM:** in this repo, we implement Online Hard Example Mining(OHEM) method in the paper, set `OHEM: False` in `cfgs/res101.yml` for using OHEM. Unluckly, it cause a bit performance degration in my experiments
 24 | 
 25 | ![](http://princepicbed.oss-cn-beijing.aliyuncs.com/blog_20180817160334.jpg)
 26 | 
 27 | ## CoupleNet
 28 | 
 29 | arXiv:1708.02863:[CoupleNet: Coupling Global Structure with Local Parts for Object Detection](https://arxiv.org/abs/1708.02863)
 30 | 
 31 | ![](http://princepicbed.oss-cn-beijing.aliyuncs.com/blog_20180816205255.png)
 32 | 
 33 | - Making changes based on R-FCN
 34 | - Implement local/global FCN in CoupleNet
 35 | 
 36 | ## Tutorial
 37 | 
 38 | * [R-FCN blog](http://blog.prince2015.club/2018/07/13/R-FCN/)
 39 | 
 40 | ## Benchmarking
 41 | 
 42 | We benchmark our code thoroughly on three datasets: pascal voc using two different architecture: R-FCN and CoupleNet. Results shows following:
 43 | 
 44 | 1). PASCAL VOC 2007 (Train: 07_trainval - Test: 07_test, scale=400, 500, 600, 700, 800)
 45 | 
 46 | model    | #GPUs | batch size | lr        | lr_decay | max_epoch     |  time/epoch | mem/GPU | mAP
 47 | ---------|--------|-----|--------|-----|-----|-------|--------|-----
 48 | [R-FCN](https://drive.google.com/file/d/1JMh0gguOozEEIRijQxkQnMKLTAp2_iu5/view?usp=sharing)  | 1 | 2 | 4e-3 | 8   | 20  |  0.88 hr | 3000 MB  | 73.8
 49 | CouleNet  | 1 | 2 | 4e-3 | 8   | 20 |  0.60 hr | 8900 MB  | 75.2
 50 | 
 51 | - Pretrained model for R-FCN(VOC2007) has released~, See `Test` part following
 52 | 
 53 | 
 54 | ## Preparation
 55 | 
 56 | 
 57 | First of all, clone the code
 58 | ```
 59 | $ git clone https://github.com/princewang1994/R-FCN.pytorch.git
 60 | ```
 61 | 
 62 | Then, create a folder:
 63 | ```
 64 | $ cd R-FCN.pytorch && mkdir data
 65 | $ cd data
 66 | $ ln -s $VOC_DEVKIT_ROOT .
 67 | ```
 68 | 
 69 | ### prerequisites
 70 | 
 71 | * Python 3.6
 72 | * Pytorch 0.3.0, **NOT suport 0.4.0 because of some errors**
 73 | * CUDA 8.0 or higher
 74 | 
 75 | ### Data Preparation
 76 | 
 77 | * **PASCAL_VOC 07+12**: Please follow the instructions in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to prepare VOC datasets. Actually, you can refer to any others. After downloading the data, creat softlinks in the folder data/.
 78 | * **Pretrained ResNet**: download from [here](https://drive.google.com/file/d/1I4Jmh2bU6BJVnwqfg5EDe8KGGdec2UE8/view?usp=sharing) and put it to `$RFCN_ROOT/data/pretrained_model/resnet101_caffe.pth`.
 79 | 
 80 | 
 81 | ### Compilation
 82 | 
 83 | As pointed out by [ruotianluo/pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn), choose the right `-arch` in `make.sh` file, to compile the cuda code:
 84 | 
 85 | | GPU model  | Architecture |
 86 | | ------------- | ------------- |
 87 | | TitanX (Maxwell/Pascal) | sm_52 |
 88 | | GTX 960M | sm_50 |
 89 | | GTX 1080 (Ti) | sm_61 |
 90 | | Grid K520 (AWS g2.2xlarge) | sm_30 |
 91 | | Tesla K80 (AWS p2.xlarge) | sm_37 |
 92 | 
 93 | More details about setting the architecture can be found [here](https://developer.nvidia.com/cuda-gpus) or [here](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
 94 | 
 95 | Install all the python dependencies using pip:
 96 | ```
 97 | $ pip install -r requirements.txt
 98 | ```
 99 | 
100 | Compile the cuda dependencies using following simple commands:
101 | 
102 | ```
103 | $ cd lib
104 | $ sh make.sh
105 | ```
106 | 
107 | It will compile all the modules you need, including NMS, ROI_Pooing, ROI_Align and ROI_Crop. The default version is compiled with Python 2.7, please compile by yourself if you are using a different python version.
108 | 
109 | ## Train
110 | 
111 | To train a R-FCN model with ResNet101 on pascal_voc, simply run:
112 | ```
113 | $ CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net.py \
114 | 				   --arch rfcn \
115 |                    --dataset pascal_voc --net res101 \
116 |                    --bs $BATCH_SIZE --nw $WORKER_NUMBER \
117 |                    --lr $LEARNING_RATE --lr_decay_step $DECAY_STEP \
118 |                    --cuda
119 | ```
120 | 
121 | - Set `--s` to identified differenct experiments. 
122 | - For CoupleNet training, replace `--arch rfcn` with `--arch couplenet`, other arguments should be modified according to your machine. (e.g. larger learning rate for bigger batch-size)
123 | - Model are saved to `$RFCN_ROOT/save` 
124 | 
125 | ## Test
126 | 
127 | If you want to evlauate the detection performance of a pre-trained model on pascal_voc test set, simply run
128 | ```
129 | $ python test_net.py --dataset pascal_voc --arch rfcn \
130 | 				   --net res101 \
131 |                    --checksession $SESSION \
132 |                    --checkepoch $EPOCH \
133 |                    --checkpoint $CHECKPOINT \
134 |                    --cuda
135 | ```
136 | - Specify the specific model session(`--s` in training phase), chechepoch and checkpoint, e.g., SESSION=1, EPOCH=6, CHECKPOINT=5010.
137 | 
138 | ###  Pretrained Model
139 | 
140 | - R-FCN VOC2007: [faster_rcnn_2_12_5010.pth](https://drive.google.com/file/d/1JMh0gguOozEEIRijQxkQnMKLTAp2_iu5/view?usp=sharing)
141 | 
142 | Download from link above and put it to `save/rfcn/res101/pascal_voc/faster_rcnn_2_12_5010.pth`. Then you can set `$SESSiON=2, $EPOCH=12, $CHECKPOINT=5010` in test command. It'll got 73.2 mAP.
143 | 
144 | ## Demo
145 | 
146 | Below are some detection results:
147 | 
148 | <div style="color:#0000FF" align="center">
149 | <img src="images/img3_det_res101.jpg" width="430"/> <img src="images/img4_det_res101.jpg" width="430"/>
150 | </div>
151 | 
152 | ## Going to do
153 | 
154 | - Keeping updating structures to reach the state-of-art
155 | - More benchmarking in VOC0712/COCO
156 | - ~~RFCN Pretrained model for VOC07~~
157 | - CoupleNet pretrained model for VOC07
158 | - Adapt to fit PyTorch 0.4.0
159 | 
160 | ## Acknowledgement
161 | 
162 | This project is writen by [Prince Wang](https://github.com/princewang1994), and thanks the faster-rcnn.pytorch's code provider [jwyang](https://github.com/jwyang)
163 | 


--------------------------------------------------------------------------------
/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 
14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 | 


--------------------------------------------------------------------------------
/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   DOUBLE_BIAS: False
13 |   LEARNING_RATE: 0.001
14 |   OHEM: False
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_SIZE: 7
18 | POOLING_MODE: align
19 | CROP_RESIZE_WITH_MAX_POOL: False
20 | 


--------------------------------------------------------------------------------
/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   SCALES: [800]
13 |   DOUBLE_BIAS: False
14 |   LEARNING_RATE: 0.001
15 | TEST:
16 |   HAS_RPN: True
17 |   SCALES: [800]
18 |   MAX_SIZE: 1200
19 |   RPN_POST_NMS_TOP_N: 1000
20 | POOLING_SIZE: 7
21 | POOLING_MODE: align
22 | CROP_RESIZE_WITH_MAX_POOL: False
23 | 


--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   # IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   WEIGHT_DECAY: 0.0001
13 |   DOUBLE_BIAS: False
14 |   SNAPSHOT_PREFIX: res50_faster_rcnn
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_MODE: crop
18 | 


--------------------------------------------------------------------------------
/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   BATCH_SIZE: 256
10 |   LEARNING_RATE: 0.01
11 | TEST:
12 |   HAS_RPN: True
13 | POOLING_MODE: align
14 | CROP_RESIZE_WITH_MAX_POOL: False
15 | 


--------------------------------------------------------------------------------
/images/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img1.jpg


--------------------------------------------------------------------------------
/images/img1_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img1_det.jpg


--------------------------------------------------------------------------------
/images/img1_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img1_det_res101.jpg


--------------------------------------------------------------------------------
/images/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img2.jpg


--------------------------------------------------------------------------------
/images/img2_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img2_det.jpg


--------------------------------------------------------------------------------
/images/img2_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img2_det_res101.jpg


--------------------------------------------------------------------------------
/images/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img3.jpg


--------------------------------------------------------------------------------
/images/img3_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img3_det.jpg


--------------------------------------------------------------------------------
/images/img3_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img3_det_res101.jpg


--------------------------------------------------------------------------------
/images/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img4.jpg


--------------------------------------------------------------------------------
/images/img4_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img4_det.jpg


--------------------------------------------------------------------------------
/images/img4_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img4_det_res101.jpg


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
 1 | CUDA_PATH="/usr/local/cuda/"
 2 | CUDA_ARCH="-gencode arch=compute_61,code=sm_61"
 3 | ROOT=`pwd`
 4 | 
 5 | all: build_ext nms roi_pooling roi_align roi_crop psroi_pooling
 6 | 	ls
 7 | 
 8 | build_ext:
 9 | 	python setup.py build_ext --inplace
10 | 	rm -rf build
11 | 
12 | nms:
13 | 	# compile NMS
14 | 	cd model/nms/src; \
15 | 	echo "Compiling nms kernels by nvcc..."; \
16 | 	nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
17 | 		 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
18 | 
19 | 
20 | #cd ../
21 | #python build.py
22 | #
23 | ## compile roi_pooling
24 | #cd ../../
25 | #cd model/roi_pooling/src
26 | #echo "Compiling roi pooling kernels by nvcc..."
27 | #nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
28 | #	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
29 | #cd ../
30 | #python build.py
31 | #
32 | ## compile roi_align
33 | #cd ../../
34 | #cd model/roi_align/src
35 | #echo "Compiling roi align kernels by nvcc..."
36 | #nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
37 | #	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
38 | #cd ../
39 | #python build.py
40 | #
41 | ## compile roi_crop
42 | #cd ../../
43 | #cd model/roi_crop/src
44 | #echo "Compiling roi crop kernels by nvcc..."
45 | #nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
46 | #	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
47 | #cd ../
48 | #python build.py
49 | #
50 | ## compile roi_crop
51 | #cd ../../
52 | #cd model/psroi_pooling/src/cuda
53 | #echo "Compiling psroi pooling kernels by nvcc..."
54 | #nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu \
55 | #	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
56 | #cd ../../
57 | #python build.py
58 | 
59 | #clean:
60 | #    rm model/nms/src/*.o
61 | #    rm model/roi_pooling/src/*.o
62 | #    rm model/roi_align/src/*.o
63 | #    rm model/roi_crop/src/*.o
64 | #    rm model/psroi_pooling/src/cuda/*.o
65 | 
66 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | from datasets.imagenet import imagenet
17 | from datasets.vg import vg
18 | 
19 | import numpy as np
20 | 
21 | # Set up voc_<year>_<split>
22 | for year in ['2007', '2012']:
23 |   for split in ['train', 'val', 'trainval', 'test']:
24 |     name = 'voc_{}_{}'.format(year, split)
25 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
26 | 
27 | # Set up coco_2014_<split>
28 | for year in ['2014']:
29 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
30 |     name = 'coco_{}_{}'.format(year, split)
31 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
32 | 
33 | # Set up coco_2014_cap_<split>
34 | for year in ['2014']:
35 |   for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']:
36 |     name = 'coco_{}_{}'.format(year, split)
37 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
38 | 
39 | # Set up coco_2015_<split>
40 | for year in ['2015']:
41 |   for split in ['test', 'test-dev']:
42 |     name = 'coco_{}_{}'.format(year, split)
43 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
44 | 
45 | # Set up vg_<split>
46 | # for version in ['1600-400-20']:
47 | #     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
48 | #         name = 'vg_{}_{}'.format(version,split)
49 | #         __sets[name] = (lambda split=split, version=version: vg(version, split))
50 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
51 |     for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
52 |         name = 'vg_{}_{}'.format(version,split)
53 |         __sets[name] = (lambda split=split, version=version: vg(version, split))
54 |         
55 | # set up image net.
56 | for split in ['train', 'val', 'val1', 'val2', 'test']:
57 |     name = 'imagenet_{}'.format(split)
58 |     devkit_path = 'data/imagenet/ILSVRC/devkit'
59 |     data_path = 'data/imagenet/ILSVRC'
60 |     __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))
61 | 
62 | def get_imdb(name):
63 |   """Get an imdb (image database) by name."""
64 |   if name not in __sets:
65 |     raise KeyError('Unknown dataset: {}'.format(name))
66 |   return __sets[name]()
67 | 
68 | 
69 | def list_imdbs():
70 |   """List all registered imdbs."""
71 |   return list(__sets.keys())
72 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | 
 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 7 | so that it's consistent with those computed by Jan Hosang (see:
 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 9 |   computing/research/object-recognition-and-scene-understanding/how-
10 |   good-are-detection-proposals-really/)
11 | 
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 | 
16 | def munge(src_dir):
17 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 | 
20 |     files = os.listdir(src_dir)
21 |     for fn in files:
22 |         base, ext = os.path.splitext(fn)
23 |         # first 14 chars / first 22 chars / all chars + .mat
24 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 |         first = base[:14]
26 |         second = base[:22]
27 |         dst_dir = os.path.join('MCG', 'mat', first, second)
28 |         if not os.path.exists(dst_dir):
29 |             os.makedirs(dst_dir)
30 |         src = os.path.join(src_dir, fn)
31 |         dst = os.path.join(dst_dir, fn)
32 |         print('MV: {} -> {}'.format(src, dst))
33 |         os.rename(src, dst)
34 | 
35 | if __name__ == '__main__':
36 |     # src_dir should look something like:
37 |     #  src_dir = 'MCG-COCO-val2014-boxes'
38 |     src_dir = sys.argv[1]
39 |     munge(src_dir)
40 | 


--------------------------------------------------------------------------------
/lib/datasets/vg_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Fast/er R-CNN
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Bharath Hariharan
  6 | # --------------------------------------------------------
  7 | 
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import numpy as np
 11 | from .voc_eval import voc_ap
 12 | 
 13 | def vg_eval( detpath,
 14 |              gt_roidb,
 15 |              image_index,
 16 |              classindex,
 17 |              ovthresh=0.5,
 18 |              use_07_metric=False,
 19 |              eval_attributes=False):
 20 |     """rec, prec, ap, sorted_scores, npos = voc_eval(
 21 |                                 detpath, 
 22 |                                 gt_roidb,
 23 |                                 image_index,
 24 |                                 classindex,
 25 |                                 [ovthresh],
 26 |                                 [use_07_metric])
 27 | 
 28 |     Top level function that does the Visual Genome evaluation.
 29 | 
 30 |     detpath: Path to detections
 31 |     gt_roidb: List of ground truth structs.
 32 |     image_index: List of image ids.
 33 |     classindex: Category index
 34 |     [ovthresh]: Overlap threshold (default = 0.5)
 35 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 36 |         (default False)
 37 |     """
 38 |     # extract gt objects for this class
 39 |     class_recs = {}
 40 |     npos = 0
 41 |     for item,imagename in zip(gt_roidb,image_index):
 42 |         if eval_attributes:
 43 |             bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
 44 |         else:
 45 |             bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
 46 |         difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
 47 |         det = [False] * bbox.shape[0]
 48 |         npos = npos + sum(~difficult)        
 49 |         class_recs[str(imagename)] = {'bbox': bbox,
 50 |                                  'difficult': difficult,
 51 |                                  'det': det}
 52 |     if npos == 0:
 53 |         # No ground truth examples
 54 |         return 0,0,0,0,npos
 55 | 
 56 |     # read dets
 57 |     with open(detpath, 'r') as f:
 58 |         lines = f.readlines()
 59 |     if len(lines) == 0:
 60 |         # No detection examples
 61 |         return 0,0,0,0,npos
 62 | 
 63 |     splitlines = [x.strip().split(' ') for x in lines]
 64 |     image_ids = [x[0] for x in splitlines]
 65 |     confidence = np.array([float(x[1]) for x in splitlines])
 66 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 67 | 
 68 |     # sort by confidence
 69 |     sorted_ind = np.argsort(-confidence)
 70 |     sorted_scores = -np.sort(-confidence)
 71 |     BB = BB[sorted_ind, :]
 72 |     image_ids = [image_ids[x] for x in sorted_ind]
 73 | 
 74 |     # go down dets and mark TPs and FPs
 75 |     nd = len(image_ids)
 76 |     tp = np.zeros(nd)
 77 |     fp = np.zeros(nd)
 78 |     for d in range(nd):
 79 |         R = class_recs[image_ids[d]]
 80 |         bb = BB[d, :].astype(float)
 81 |         ovmax = -np.inf
 82 |         BBGT = R['bbox'].astype(float)
 83 | 
 84 |         if BBGT.size > 0:
 85 |             # compute overlaps
 86 |             # intersection
 87 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
 88 |             iymin = np.maximum(BBGT[:, 1], bb[1])
 89 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
 90 |             iymax = np.minimum(BBGT[:, 3], bb[3])
 91 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 92 |             ih = np.maximum(iymax - iymin + 1., 0.)
 93 |             inters = iw * ih
 94 | 
 95 |             # union
 96 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 97 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
 98 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
 99 | 
100 |             overlaps = inters / uni
101 |             ovmax = np.max(overlaps)
102 |             jmax = np.argmax(overlaps)
103 | 
104 |         if ovmax > ovthresh:
105 |             if not R['difficult'][jmax]:
106 |                 if not R['det'][jmax]:
107 |                     tp[d] = 1.
108 |                     R['det'][jmax] = 1
109 |                 else:
110 |                     fp[d] = 1.
111 |         else:
112 |             fp[d] = 1.
113 | 
114 |     # compute precision recall
115 |     fp = np.cumsum(fp)
116 |     tp = np.cumsum(tp)
117 |     rec = tp / float(npos)
118 |     # avoid divide by zero in case the first detection matches a difficult
119 |     # ground truth
120 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
121 |     ap = voc_ap(rec, prec, use_07_metric)
122 |     
123 |     return rec, prec, ap, sorted_scores, npos
124 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False):
 76 |   """rec, prec, ap = voc_eval(detpath,
 77 |                               annopath,
 78 |                               imagesetfile,
 79 |                               classname,
 80 |                               [ovthresh],
 81 |                               [use_07_metric])
 82 | 
 83 |   Top level function that does the PASCAL VOC evaluation.
 84 | 
 85 |   detpath: Path to detections
 86 |       detpath.format(classname) should produce the detection results file.
 87 |   annopath: Path to annotations
 88 |       annopath.format(imagename) should be the xml annotations file.
 89 |   imagesetfile: Text file containing the list of images, one image per line.
 90 |   classname: Category name (duh)
 91 |   cachedir: Directory for caching the annotations
 92 |   [ovthresh]: Overlap threshold (default = 0.5)
 93 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 94 |       (default False)
 95 |   """
 96 |   # assumes detections are in detpath.format(classname)
 97 |   # assumes annotations are in annopath.format(imagename)
 98 |   # assumes imagesetfile is a text file with each line an image name
 99 |   # cachedir caches the annotations in a pickle file
100 | 
101 |   # first load gt
102 |   if not os.path.isdir(cachedir):
103 |     os.mkdir(cachedir)
104 |   cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
105 |   # read list of images
106 |   with open(imagesetfile, 'r') as f:
107 |     lines = f.readlines()
108 |   imagenames = [x.strip() for x in lines]
109 | 
110 |   if not os.path.isfile(cachefile):
111 |     # load annotations
112 |     recs = {}
113 |     for i, imagename in enumerate(imagenames):
114 |       recs[imagename] = parse_rec(annopath.format(imagename))
115 |       if i % 100 == 0:
116 |         print('Reading annotation for {:d}/{:d}'.format(
117 |           i + 1, len(imagenames)))
118 |     # save
119 |     print('Saving cached annotations to {:s}'.format(cachefile))
120 |     with open(cachefile, 'wb') as f:
121 |       pickle.dump(recs, f)
122 |   else:
123 |     # load
124 |     with open(cachefile, 'rb') as f:
125 |       try:
126 |         recs = pickle.load(f)
127 |       except:
128 |         recs = pickle.load(f, encoding='bytes')
129 | 
130 |   # extract gt objects for this class
131 |   class_recs = {}
132 |   npos = 0
133 |   for imagename in imagenames:
134 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
135 |     bbox = np.array([x['bbox'] for x in R])
136 |     difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
137 |     det = [False] * len(R)
138 |     npos = npos + sum(~difficult)
139 |     class_recs[imagename] = {'bbox': bbox,
140 |                              'difficult': difficult,
141 |                              'det': det}
142 | 
143 |   # read dets
144 |   detfile = detpath.format(classname)
145 |   with open(detfile, 'r') as f:
146 |     lines = f.readlines()
147 | 
148 |   splitlines = [x.strip().split(' ') for x in lines]
149 |   image_ids = [x[0] for x in splitlines]
150 |   confidence = np.array([float(x[1]) for x in splitlines])
151 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
152 | 
153 |   nd = len(image_ids)
154 |   tp = np.zeros(nd)
155 |   fp = np.zeros(nd)
156 | 
157 |   if BB.shape[0] > 0:
158 |     # sort by confidence
159 |     sorted_ind = np.argsort(-confidence)
160 |     sorted_scores = np.sort(-confidence)
161 |     BB = BB[sorted_ind, :]
162 |     image_ids = [image_ids[x] for x in sorted_ind]
163 | 
164 |     # go down dets and mark TPs and FPs
165 |     for d in range(nd):
166 |       R = class_recs[image_ids[d]]
167 |       bb = BB[d, :].astype(float)
168 |       ovmax = -np.inf
169 |       BBGT = R['bbox'].astype(float)
170 | 
171 |       if BBGT.size > 0:
172 |         # compute overlaps
173 |         # intersection
174 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
175 |         iymin = np.maximum(BBGT[:, 1], bb[1])
176 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
177 |         iymax = np.minimum(BBGT[:, 3], bb[3])
178 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
179 |         ih = np.maximum(iymax - iymin + 1., 0.)
180 |         inters = iw * ih
181 | 
182 |         # union
183 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 | 
187 |         overlaps = inters / uni
188 |         ovmax = np.max(overlaps)
189 |         jmax = np.argmax(overlaps)
190 | 
191 |       if ovmax > ovthresh:
192 |         if not R['difficult'][jmax]:
193 |           if not R['det'][jmax]:
194 |             tp[d] = 1.
195 |             R['det'][jmax] = 1
196 |           else:
197 |             fp[d] = 1.
198 |       else:
199 |         fp[d] = 1.
200 | 
201 |   # compute precision recall
202 |   fp = np.cumsum(fp)
203 |   tp = np.cumsum(tp)
204 |   rec = tp / float(npos)
205 |   # avoid divide by zero in case the first detection matches a difficult
206 |   # ground truth
207 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 |   ap = voc_ap(rec, prec, use_07_metric)
209 | 
210 |   return rec, prec, ap
211 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | export CUDA_PATH=/usr/local/cuda/
 6 | 
 7 | python setup.py build_ext --inplace
 8 | rm -rf build
 9 | 
10 | CUDA_ARCH="-gencode arch=compute_61,code=sm_61"
11 | 
12 | # clean build file
13 | rm model/nms/src/*.o
14 | rm model/roi_pooling/src/*.o
15 | rm model/roi_align/src/*.o
16 | rm model/roi_crop/src/*.o
17 | rm model/psroi_pooling/src/cuda/*.o
18 | 
19 | 
20 | # compile NMS
21 | cd model/nms/src
22 | echo "Compiling nms kernels by nvcc..."
23 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
24 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
25 | 
26 | cd ../
27 | python build.py
28 | 
29 | # compile roi_pooling
30 | cd ../../
31 | cd model/roi_pooling/src
32 | echo "Compiling roi pooling kernels by nvcc..."
33 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
34 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
35 | cd ../
36 | python build.py
37 | 
38 | # compile roi_align
39 | cd ../../
40 | cd model/roi_align/src
41 | echo "Compiling roi align kernels by nvcc..."
42 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
43 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
44 | cd ../
45 | python build.py
46 | 
47 | # compile roi_crop
48 | cd ../../
49 | cd model/roi_crop/src
50 | echo "Compiling roi crop kernels by nvcc..."
51 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
52 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
53 | cd ../
54 | python build.py
55 | 
56 | # compile roi_crop
57 | cd ../../
58 | cd model/psroi_pooling/src/cuda
59 | echo "Compiling psroi pooling kernels by nvcc..."
60 | nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu \
61 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
62 | cd ../../
63 | python build.py
64 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/__init__.py


--------------------------------------------------------------------------------
/lib/model/couplenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet_atrous import resnet


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import resnet
2 | from .vgg16 import vgg16


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | import torchvision.models as models
  7 | from torch.autograd import Variable
  8 | import numpy as np
  9 | from model.utils.config import cfg
 10 | from model.rpn.rpn import _RPN
 11 | from model.roi_pooling.modules.roi_pool import _RoIPooling
 12 | from model.roi_crop.modules.roi_crop import _RoICrop
 13 | from model.roi_align.modules.roi_align import RoIAlignAvg
 14 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
 15 | import time
 16 | import pdb
 17 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta
 18 | 
 19 | class _fasterRCNN(nn.Module):
 20 |     """ faster RCNN """
 21 |     def __init__(self, classes, class_agnostic):
 22 |         super(_fasterRCNN, self).__init__()
 23 |         self.classes = classes
 24 |         self.n_classes = len(classes)
 25 |         self.class_agnostic = class_agnostic
 26 |         # loss
 27 |         self.RCNN_loss_cls = 0
 28 |         self.RCNN_loss_bbox = 0
 29 | 
 30 |         # define rpn
 31 |         self.RCNN_rpn = _RPN(self.dout_base_model)
 32 |         self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
 33 |         self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
 34 |         self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
 35 | 
 36 |         self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
 37 |         self.RCNN_roi_crop = _RoICrop()
 38 | 
 39 |     def forward(self, im_data, im_info, gt_boxes, num_boxes):
 40 |         batch_size = im_data.size(0)
 41 | 
 42 |         im_info = im_info.data
 43 |         gt_boxes = gt_boxes.data
 44 |         num_boxes = num_boxes.data
 45 | 
 46 |         # feed image data to base model to obtain base feature map
 47 |         base_feat = self.RCNN_base(im_data)
 48 | 
 49 |         # feed base feature map tp RPN to obtain rois
 50 |         rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
 51 | 
 52 |         # if it is training phrase, then use ground trubut bboxes for refining
 53 |         if self.training:
 54 |             roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
 55 |             rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
 56 | 
 57 |             rois_label = Variable(rois_label.view(-1).long())
 58 |             rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
 59 |             rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
 60 |             rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
 61 |         else:
 62 |             rois_label = None
 63 |             rois_target = None
 64 |             rois_inside_ws = None
 65 |             rois_outside_ws = None
 66 |             rpn_loss_cls = 0
 67 |             rpn_loss_bbox = 0
 68 | 
 69 |         rois = Variable(rois)
 70 |         # do roi pooling based on predicted rois
 71 | 
 72 |         if cfg.POOLING_MODE == 'crop':
 73 |             # pdb.set_trace()
 74 |             # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
 75 |             grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
 76 |             grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
 77 |             pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
 78 |             if cfg.CROP_RESIZE_WITH_MAX_POOL:
 79 |                 pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
 80 |         elif cfg.POOLING_MODE == 'align':
 81 |             pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
 82 |         elif cfg.POOLING_MODE == 'pool':
 83 |             pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
 84 | 
 85 |         # feed pooled features to top model
 86 |         pooled_feat = self._head_to_tail(pooled_feat)
 87 | 
 88 |         # compute bbox offset
 89 |         bbox_pred = self.RCNN_bbox_pred(pooled_feat)
 90 |         if self.training and not self.class_agnostic:
 91 |             # select the corresponding columns according to roi labels
 92 |             bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
 93 |             bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
 94 |             bbox_pred = bbox_pred_select.squeeze(1)
 95 | 
 96 |         # compute object classification probability
 97 |         cls_score = self.RCNN_cls_score(pooled_feat)
 98 |         cls_prob = F.softmax(cls_score)
 99 | 
100 |         RCNN_loss_cls = 0
101 |         RCNN_loss_bbox = 0
102 | 
103 |         if self.training:
104 |             # classification loss
105 |             RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
106 | 
107 |             # bounding box regression L1 loss
108 |             RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
109 | 
110 | 
111 |         cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
112 |         bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
113 | 
114 |         return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
115 | 
116 |     def _init_weights(self):
117 |         def normal_init(m, mean, stddev, truncated=False):
118 |             """
119 |             weight initalizer: truncated normal and random normal.
120 |             """
121 |             # x is a parameter
122 |             if truncated:
123 |                 m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
124 |             else:
125 |                 m.weight.data.normal_(mean, stddev)
126 |                 m.bias.data.zero_()
127 | 
128 |         normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
129 |         normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
130 |         normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
131 |         normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
132 |         normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
133 | 
134 |     def create_architecture(self):
135 |         self._init_modules()
136 |         self._init_weights()
137 | 


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
17 | import pdb
18 | 
19 | class vgg16(_fasterRCNN):
20 |   def __init__(self, classes, pretrained=False, class_agnostic=False):
21 |     self.model_path = 'data/pretrained_model/vgg16_caffe.pth'
22 |     self.dout_base_model = 512
23 |     self.pretrained = pretrained
24 |     self.class_agnostic = class_agnostic
25 | 
26 |     _fasterRCNN.__init__(self, classes, class_agnostic)
27 | 
28 |   def _init_modules(self):
29 |     vgg = models.vgg16()
30 |     if self.pretrained:
31 |         print("Loading pretrained weights from %s" %(self.model_path))
32 |         state_dict = torch.load(self.model_path)
33 |         vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
34 | 
35 |     vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
36 | 
37 |     # not using the last maxpool layer
38 |     self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
39 | 
40 |     # Fix the layers before conv3:
41 |     for layer in range(10):
42 |       for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
43 | 
44 |     # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
45 | 
46 |     self.RCNN_top = vgg.classifier
47 | 
48 |     # not using the last maxpool layer
49 |     self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
50 | 
51 |     if self.class_agnostic:
52 |       self.RCNN_bbox_pred = nn.Linear(4096, 4)
53 |     else:
54 |       self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)      
55 | 
56 |   def _head_to_tail(self, pool5):
57 |     
58 |     pool5_flat = pool5.view(pool5.size(0), -1)
59 |     fc7 = self.RCNN_top(pool5_flat)
60 | 
61 |     return fc7
62 | 
63 | 


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/nms/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | def nms_cpu(dets, thresh):
 7 |     dets = dets.numpy()
 8 |     x1 = dets[:, 0]
 9 |     y1 = dets[:, 1]
10 |     x2 = dets[:, 2]
11 |     y2 = dets[:, 3]
12 |     scores = dets[:, 4]
13 | 
14 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
15 |     order = scores.argsort()[::-1]
16 | 
17 |     keep = []
18 |     while order.size > 0:
19 |         i = order.item(0)
20 |         keep.append(i)
21 |         xx1 = np.maximum(x1[i], x1[order[1:]])
22 |         yy1 = np.maximum(y1[i], y1[order[1:]])
23 |         xx2 = np.maximum(x2[i], x2[order[1:]])
24 |         yy2 = np.maximum(y2[i], y2[order[1:]])
25 | 
26 |         w = np.maximum(0.0, xx2 - xx1 + 1)
27 |         h = np.maximum(0.0, yy2 - yy1 + 1)
28 |         inter = w * h
29 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
30 | 
31 |         inds = np.where(ovr <= thresh)[0]
32 |         order = order[inds + 1]
33 | 
34 |     return torch.IntTensor(keep)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from model.utils.config import cfg
 9 | if torch.cuda.is_available():
10 |     from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     # ---numpy version---
18 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     # ---pytorch version---
20 | 
21 |     return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         boxes_host->size[0], 
15 | 		         boxes_host->size[1],
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/_ext/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._psroi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | sources = []
 6 | headers = []
 7 | defines = []
 8 | with_cuda = False
 9 | 
10 | if torch.cuda.is_available():
11 |     print('Including CUDA code.')
12 |     sources += ['src/psroi_pooling_cuda.c']
13 |     headers += ['src/psroi_pooling_cuda.h']
14 |     defines += [('WITH_CUDA', None)]
15 |     with_cuda = True
16 | 
17 | this_file = os.path.dirname(os.path.realpath(__file__))
18 | print(this_file)
19 | extra_objects = ['src/cuda/psroi_pooling.cu.o']
20 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
21 | 
22 | ffi = create_extension(
23 |     '_ext.psroi_pooling',
24 |     headers=headers,
25 |     sources=sources,
26 |     define_macros=defines,
27 |     relative_to=__file__,
28 |     with_cuda=with_cuda,
29 |     extra_objects=extra_objects
30 | )
31 | 
32 | if __name__ == '__main__':
33 |     ffi.build()
34 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/functions/psroi_pooling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import psroi_pooling 
 4 | 
 5 | 
 6 | class PSRoIPoolingFunction(Function):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
 8 |         self.pooled_width = int(pooled_width)
 9 |         self.pooled_height = int(pooled_height)
10 |         self.spatial_scale = float(spatial_scale)
11 |         self.group_size = int(group_size)
12 |         self.output_dim = int(output_dim)
13 |         self.output = None
14 |         self.mappingchannel = None
15 |         self.rois = None
16 |         self.feature_size = None
17 | 
18 |     def forward(self, features, rois):
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size()[0]
21 |         output = torch.zeros(num_rois, self.output_dim, self.pooled_height, self.pooled_width)
22 |         mappingchannel = torch.IntTensor(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_()
23 |         output = output.cuda()
24 |         mappingchannel = mappingchannel.cuda()
25 |         psroi_pooling.psroi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size, self.output_dim, \
26 |         features, rois, output, mappingchannel)
27 |         self.output = output
28 |         self.mappingchannel = mappingchannel
29 |         self.rois = rois
30 |         self.feature_size = features.size()
31 | 
32 |         return output
33 | 
34 |     def backward(self, grad_output):
35 |         assert(self.feature_size is not None and grad_output.is_cuda)
36 | 
37 |         batch_size, num_channels, data_height, data_width = self.feature_size
38 | 
39 |         grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
40 | 
41 |         psroi_pooling.psroi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, self.output_dim,  \
42 |         grad_output, self.rois, grad_input, self.mappingchannel)
43 |         return grad_input, None
44 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/modules/psroi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | import sys
 3 | from model.psroi_pooling.functions.psroi_pooling import PSRoIPoolingFunction
 4 | 
 5 | 
 6 | class PSRoIPool(Module):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
 8 |         super(PSRoIPool, self).__init__()
 9 | 
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 |         self.group_size = int(group_size)
14 |         self.output_dim = int(output_dim)
15 | 
16 |     def forward(self, features, rois):
17 |         return PSRoIPoolingFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size, self.output_dim)(features, rois)
18 | 
19 | if __name__ == '__main__':
20 |     import torch
21 |     import numpy as np
22 |     from torch.autograd import Variable
23 |     from model.roi_pooling.modules.roi_pool import _RoIPooling
24 | 
25 |     input = torch.randn(2, 21*7*7, 50, 72)
26 |     rois = torch.from_numpy(
27 |         np.array([
28 |             [0.0000, 350.6689, 211.0240, 779.0886, 777.7496],
29 |             [0.0000, 744.0627, 277.4919, 988.4307, 602.7589],
30 |             [1.0000, 350.6689, 211.0240, 779.0886, 777.7496],
31 |             [1.0000, 744.0627, 277.4919, 988.4307, 602.7589],
32 |         ])
33 |     ).float()
34 | 
35 |     pool = PSRoIPool(7, 7, 1/16.0, 7, 21)
36 |     input = Variable(input.cuda())
37 |     rois = Variable(rois.cuda())
38 |     print(rois.size(), input.size())
39 |     print(input)
40 |     out = pool(input, rois)
41 |     print(out)
42 |     print(out.size())
43 | 
44 |     print('============================')
45 |     roi_pool = _RoIPooling(7, 7, 1/16.0)
46 |     out = roi_pool(input, rois.view(-1, 5))
47 |     print(out)
48 |     print(out.size())


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/cuda/psroi_pooling_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "psroi_pooling_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |        i += blockDim.x * gridDim.x)
 13 | 
 14 | __global__ void PSROIPoolForward(const int nthreads, const float* bottom_data,
 15 |     const float spatial_scale, const int height, const int width,
 16 |     const int channels, const int pooled_height, const int pooled_width,
 17 |     const int group_size, const int output_dim,
 18 |     const float* bottom_rois, float* top_data, int* mapping_channel)
 19 | {
 20 | 
 21 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
 22 |     {
 23 |         // (n, c, ph, pw) is an element in the pooled output
 24 |         int pw = index % pooled_width;
 25 |       	int ph = (index / pooled_width) % pooled_height;
 26 |       	int ctop = (index / pooled_width / pooled_height) % output_dim;
 27 |       	int n = index / pooled_width / pooled_height / output_dim;
 28 | 
 29 |         bottom_rois += n * 5;
 30 |         int roi_batch_ind = bottom_rois[0];
 31 | 	    float roi_start_w =
 32 |         	static_cast<float>(round(bottom_rois[1])) * spatial_scale;
 33 |       	float roi_start_h =
 34 |         	static_cast<float>(round(bottom_rois[2])) * spatial_scale;
 35 |       	float roi_end_w =
 36 |         	static_cast<float>(round(bottom_rois[3]) + 1.) * spatial_scale;
 37 |       	float roi_end_h =
 38 |         	static_cast<float>(round(bottom_rois[4]) + 1.) * spatial_scale;
 39 | 
 40 |         // Force malformed ROIs to be 1x1
 41 |         float roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
 42 |       	float roi_height = max(roi_end_h - roi_start_h, 0.1);
 43 | 
 44 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 45 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 46 | 
 47 |         int hstart = floor(static_cast<float>(ph) * bin_size_h
 48 |                           + roi_start_h);
 49 |       	int wstart = floor(static_cast<float>(pw)* bin_size_w
 50 |                           + roi_start_w);
 51 |       	int hend = ceil(static_cast<float>(ph + 1) * bin_size_h
 52 |                         + roi_start_h);
 53 |       	int wend = ceil(static_cast<float>(pw + 1) * bin_size_w
 54 |                         + roi_start_w);
 55 | 
 56 |         // Add roi offsets and clip to input boundaries
 57 |         hstart = min(max(hstart, 0), height);
 58 |       	hend = min(max(hend, 0), height);
 59 |       	wstart = min(max(wstart, 0), width);
 60 |       	wend = min(max(wend, 0), width);
 61 |         bool is_empty = (hend <= hstart) || (wend <= wstart);
 62 | 
 63 |         int gw = pw;
 64 |       	int gh = ph;
 65 |       	int c = (ctop*group_size + gh)*group_size + gw;
 66 | 
 67 |         bottom_data += (roi_batch_ind * channels + c) * height * width;
 68 |         float out_sum = 0;
 69 |       	for (int h = hstart; h < hend; ++h) {
 70 |       	  for (int w = wstart; w < wend; ++w) {
 71 |       	    int bottom_index = h*width + w;
 72 |       	    out_sum += bottom_data[bottom_index];
 73 |       	  }
 74 |       	}
 75 |       	float bin_area = (hend - hstart)*(wend - wstart);
 76 |       	//top_data[index] = nthreads;
 77 |       	top_data[index] = is_empty? 0. : out_sum/bin_area;
 78 |       	mapping_channel[index] = c;
 79 |     }
 80 | }
 81 | 
 82 | 
 83 | int PSROIPoolForwardLauncher(
 84 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
 85 |     const int width, const int channels, const int pooled_height,
 86 |     const int pooled_width, const float* bottom_rois,
 87 |     const int group_size, const int output_dim,
 88 |     float* top_data, int* mapping_channel, cudaStream_t stream)
 89 | {
 90 |     const int kThreadsPerBlock = 1024;
 91 |     const int output_size = output_dim * pooled_height * pooled_width * num_rois;
 92 |     cudaError_t err;
 93 | 
 94 |     PSROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 95 |       output_size, bottom_data, spatial_scale, height, width, channels, pooled_height,
 96 |       pooled_width, group_size, output_dim, bottom_rois, top_data, mapping_channel);
 97 | 
 98 |     err = cudaGetLastError();
 99 |     if(cudaSuccess != err)
100 |     {
101 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
102 |         exit( -1 );
103 |     }
104 | 
105 |     return 1;
106 | }
107 | 
108 | 
109 | __global__ void PSROIPoolBackward(const int nthreads, const float* top_diff,
110 |     const int* mapping_channel, const int num_rois, const float spatial_scale,
111 |     const int height, const int width, const int channels,
112 |     const int pooled_height, const int pooled_width, const int output_dim, float* bottom_diff,
113 |     const float* bottom_rois) {
114 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
115 |     {
116 | 
117 |       int pw = index % pooled_width;
118 |       int ph = (index / pooled_width) % pooled_height;
119 |       int n = index / pooled_width / pooled_height / output_dim;
120 | 
121 |       // [start, end) interval for spatial sampling
122 |       bottom_rois += n * 5;
123 |       int roi_batch_ind = bottom_rois[0];
124 |       float roi_start_w =
125 |         static_cast<float>(round(bottom_rois[1])) * spatial_scale;
126 |       float roi_start_h =
127 |         static_cast<float>(round(bottom_rois[2])) * spatial_scale;
128 |       float roi_end_w =
129 |         static_cast<float>(round(bottom_rois[3]) + 1.) * spatial_scale;
130 |       float roi_end_h =
131 |         static_cast<float>(round(bottom_rois[4]) + 1.) * spatial_scale;
132 | 
133 |       // Force too small ROIs to be 1x1
134 |       float roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
135 |       float roi_height = max(roi_end_h - roi_start_h, 0.1);
136 | 
137 |       // Compute w and h at bottom
138 |       float bin_size_h = roi_height / static_cast<float>(pooled_height);
139 |       float bin_size_w = roi_width / static_cast<float>(pooled_width);
140 | 
141 |       int hstart = floor(static_cast<float>(ph)* bin_size_h
142 |         + roi_start_h);
143 |       int wstart = floor(static_cast<float>(pw)* bin_size_w
144 |         + roi_start_w);
145 |       int hend = ceil(static_cast<float>(ph + 1) * bin_size_h
146 |         + roi_start_h);
147 |       int wend = ceil(static_cast<float>(pw + 1) * bin_size_w
148 |         + roi_start_w);
149 |       // Add roi offsets and clip to input boundaries
150 |       hstart = min(max(hstart, 0), height);
151 |       hend = min(max(hend, 0), height);
152 |       wstart = min(max(wstart, 0), width);
153 |       wend = min(max(wend, 0), width);
154 |       bool is_empty = (hend <= hstart) || (wend <= wstart);
155 | 
156 |       // Compute c at bottom
157 |       int c = mapping_channel[index];
158 |       float* offset_bottom_diff = bottom_diff +
159 |         (roi_batch_ind * channels + c) * height * width;
160 |       float bin_area = (hend - hstart)*(wend - wstart);
161 |       float diff_val = is_empty ? 0. : top_diff[index] / bin_area;
162 |       for (int h = hstart; h < hend; ++h) {
163 |         for (int w = wstart; w < wend; ++w) {
164 |           int bottom_index = h*width + w;
165 |           //caffe_gpu_atomic_add(diff_val, offset_bottom_diff + bottom_index);
166 |           atomicAdd(offset_bottom_diff + bottom_index, diff_val);
167 |         }
168 |       }
169 |   }
170 | }
171 | 
172 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels,
173 |     const int height, const int width, const int pooled_width,
174 |     const int pooled_height, const int output_dim,
175 |     float* bottom_diff, const float* bottom_rois, cudaStream_t stream)
176 | {
177 |     const int kThreadsPerBlock = 1024;
178 |     //const int output_size = output_dim * height * width * channels;
179 |     const int output_size = output_dim * pooled_height * pooled_width * num_rois;
180 |     cudaError_t err;
181 | 
182 |     PSROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
183 |       output_size, top_diff, mapping_channel, num_rois, spatial_scale, height, width, channels, pooled_height,
184 |       pooled_width, output_dim, bottom_diff, bottom_rois);
185 | 
186 |     err = cudaGetLastError();
187 |     if(cudaSuccess != err)
188 |     {
189 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
190 |         exit( -1 );
191 |     }
192 | 
193 |     return 1;
194 | }
195 | 
196 | 
197 | #ifdef __cplusplus
198 | }
199 | #endif
200 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/cuda/psroi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef PS_ROI_POOLING_KERNEL
 2 | #define PS_ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int PSROIPoolForwardLauncher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height, const int pooled_width,
11 |     const float* bottom_rois, const int group_size, const int output_dim, float* top_data, int* mapping_channel, cudaStream_t stream);
12 | 
13 | 
14 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels, const int height, const int width, const int pooled_width, const int pooled_height, const int output_dim, float* bottom_diff, const float* bottom_rois, cudaStream_t stream);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | 
19 | #endif
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/psroi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include <stdio.h>
 4 | #include "cuda/psroi_pooling_kernel.h"
 5 | 
 6 | 
 7 | 
 8 | extern THCState* state;
 9 | 
10 | int psroi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, int group_size, int output_dim,THCudaTensor *features, THCudaTensor* rois, THCudaTensor* output, THCudaIntTensor* mappingchannel){
11 | 
12 | 	float* data_in = THCudaTensor_data(state, features);
13 | 	float* rois_in = THCudaTensor_data(state, rois);
14 | 	float* output_out = THCudaTensor_data(state, output);
15 | 	int* mappingchannel_out = THCudaIntTensor_data(state, mappingchannel);
16 | 	//Get # of Rois
17 | 	int num_rois = THCudaTensor_size(state, rois, 0);
18 | 	int size_rois = THCudaTensor_size(state, rois, 1);
19 | 	if (size_rois!=5)
20 | 	{
21 | 		return 0;
22 | 	}
23 | 
24 | 	//Get # of batch_size
25 | 	//int batch_size = THCudaTensor_size(state, features, 0);
26 | 	//if (batch_size!=1)
27 | 	//{
28 | 	//	return 0;
29 | 	//}
30 | 
31 | 	int data_height = THCudaTensor_size(state, features, 2);
32 | 	int data_width = THCudaTensor_size(state, features, 3);
33 | 	int num_channels = THCudaTensor_size(state, features, 1);
34 | 
35 | 	cudaStream_t stream = THCState_getCurrentStream(state);
36 | 
37 | 	// call the gpu kernel for psroi_pooling
38 | 	PSROIPoolForwardLauncher(data_in, spatial_scale, num_rois, data_height, data_width, num_channels, pooled_height, pooled_width,rois_in, group_size, 
39 | 	output_dim, output_out, mappingchannel_out,stream);
40 | 	return 1;
41 | }
42 | 
43 | 
44 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 
45 | THCudaTensor* top_grad, THCudaTensor* rois, THCudaTensor* bottom_grad, THCudaIntTensor* mappingchannel)
46 | {
47 |     float *top_grad_flat = THCudaTensor_data(state, top_grad);
48 | 	float *rois_flat = THCudaTensor_data(state, rois);
49 | 
50 | 	float *bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
51 |     	int *mappingchannel_flat = THCudaIntTensor_data(state, mappingchannel);
52 | 
53 |     	// Number of ROIs
54 |     	int num_rois = THCudaTensor_size(state, rois, 0);
55 |     	int size_rois = THCudaTensor_size(state, rois, 1);
56 |     	if (size_rois != 5)
57 |     	{
58 |         	return 0;
59 |     	}
60 |     	// batch size
61 |     	int batch_size = THCudaTensor_size(state, bottom_grad, 0);
62 |     	//if (batch_size != 1)
63 |     	//{
64 |         //	return 0;
65 |     	//}
66 |     	// data height
67 |     	int data_height = THCudaTensor_size(state, bottom_grad, 2);
68 |     	// data width
69 |     	int data_width = THCudaTensor_size(state, bottom_grad, 3);
70 |     	// Number of channels
71 |     	int num_channels = THCudaTensor_size(state, bottom_grad, 1);
72 | 
73 |     	cudaStream_t stream = THCState_getCurrentStream(state);
74 | 
75 |     	PSROIPoolBackwardLauncher(top_grad_flat, mappingchannel_flat, batch_size, num_rois, spatial_scale, num_channels, data_height, data_width, pooled_width,	      pooled_height, output_dim, bottom_grad_flat, rois_flat, stream);
76 |         return 1;
77 | }
78 | 


--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int psroi_pooling_forward_cuda( int pooled_height, int pooled_width, float spatial_scale,int group_size, int output_dim,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * mappingchannel);
3 | 
4 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * mappingchannel);
6 | 


--------------------------------------------------------------------------------
/lib/model/rfcn/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet_atrous import resnet


--------------------------------------------------------------------------------
/lib/model/rfcn/rfcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | 
  7 | from model.psroi_pooling.modules.psroi_pool import PSRoIPool
  8 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
  9 | from model.rpn.rpn import _RPN
 10 | from model.utils.config import cfg
 11 | from model.utils.net_utils import _smooth_l1_loss
 12 | 
 13 | class _RFCN(nn.Module):
 14 |     """ R-FCN """
 15 |     def __init__(self, classes, class_agnostic):
 16 |         super(_RFCN, self).__init__()
 17 |         self.classes = classes
 18 |         self.n_classes = len(classes)
 19 |         self.class_agnostic = class_agnostic
 20 |         # loss
 21 |         self.RCNN_loss_cls = 0
 22 |         self.RCNN_loss_bbox = 0
 23 | 
 24 |         self.box_num_classes = 1 if class_agnostic else self.n_classes
 25 | 
 26 |         # define rpn
 27 |         self.RCNN_rpn = _RPN(self.dout_base_model)
 28 |         self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
 29 |         self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
 30 |                                           spatial_scale=1/16.0, group_size=cfg.POOLING_SIZE,
 31 |                                           output_dim=self.n_classes)
 32 |         self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
 33 |                                           spatial_scale=1/16.0, group_size=cfg.POOLING_SIZE,
 34 |                                           output_dim=self.box_num_classes * 4)
 35 |         self.pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE)
 36 |         self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
 37 | 
 38 |     def detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):
 39 |         # classification loss
 40 |         RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
 41 | 
 42 |         # bounding box regression L1 loss
 43 |         RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
 44 | 
 45 |         return RCNN_loss_cls, RCNN_loss_bbox
 46 | 
 47 |     def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):
 48 | 
 49 |         def log_sum_exp(x):
 50 |             x_max = x.data.max()
 51 |             return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max
 52 | 
 53 |         num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size
 54 |         pos_idx = rois_label > 0
 55 |         num_pos = pos_idx.int().sum()
 56 | 
 57 |         # classification loss
 58 |         num_classes = cls_score.size(1)
 59 |         weight = cls_score.data.new(num_classes).fill_(1.)
 60 |         weight[0] = num_pos.data[0] / num_hard
 61 | 
 62 |         conf_p = cls_score.detach()
 63 |         conf_t = rois_label.detach()
 64 | 
 65 |         # rank on cross_entropy loss
 66 |         loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1))
 67 |         loss_c[pos_idx] = 100. # include all positive samples
 68 |         _, topk_idx = torch.topk(loss_c.view(-1), num_hard)
 69 |         loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight)
 70 | 
 71 |         # bounding box regression L1 loss
 72 |         pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred)
 73 |         loc_p = bbox_pred[pos_idx].view(-1, 4)
 74 |         loc_t = rois_target[pos_idx].view(-1, 4)
 75 |         loss_box = F.smooth_l1_loss(loc_p, loc_t)
 76 | 
 77 |         return loss_cls, loss_box
 78 | 
 79 |     def forward(self, im_data, im_info, gt_boxes, num_boxes):
 80 |         batch_size = im_data.size(0)
 81 | 
 82 |         im_info = im_info.data
 83 |         gt_boxes = gt_boxes.data
 84 |         num_boxes = num_boxes.data
 85 |         self.batch_size = im_data.size(0)
 86 | 
 87 |         # feed image data to base model to obtain base feature map
 88 |         base_feat = self.RCNN_base(im_data)
 89 | 
 90 |         # feed base feature map tp RPN to obtain rois
 91 |         rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
 92 | 
 93 |         # if it is training phrase, then use ground trubut bboxes for refining
 94 |         if self.training:
 95 |             roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
 96 |             rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
 97 | 
 98 |             rois_label = Variable(rois_label.view(-1).long())
 99 |             rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
100 |             rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
101 |             rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
102 |         else:
103 |             rois_label = None
104 |             rois_target = None
105 |             rois_inside_ws = None
106 |             rois_outside_ws = None
107 |             rpn_loss_cls = 0
108 |             rpn_loss_bbox = 0
109 | 
110 |         rois = Variable(rois)
111 |         base_feat = self.RCNN_conv_new(base_feat)
112 | 
113 |         # do roi pooling based on predicted rois
114 |         cls_feat = self.RCNN_cls_base(base_feat)
115 |         pooled_feat_cls = self.RCNN_psroi_pool_cls(cls_feat, rois.view(-1, 5))
116 |         cls_score = self.pooling(pooled_feat_cls)
117 |         cls_score = cls_score.squeeze()
118 | 
119 |         bbox_base = self.RCNN_bbox_base(base_feat)
120 |         pooled_feat_loc = self.RCNN_psroi_pool_loc(bbox_base, rois.view(-1, 5))
121 |         pooled_feat_loc = self.pooling(pooled_feat_loc)
122 |         bbox_pred = pooled_feat_loc.squeeze()
123 | 
124 |         if self.training and not self.class_agnostic:
125 |             # select the corresponding columns according to roi labels
126 |             bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
127 |             bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
128 |             bbox_pred = bbox_pred_select.squeeze(1)
129 | 
130 |         cls_prob = F.softmax(cls_score, dim=1)
131 | 
132 |         RCNN_loss_cls = 0
133 |         RCNN_loss_bbox = 0
134 | 
135 |         if self.training:
136 |             loss_func = self.ohem_detect_loss if cfg.TRAIN.OHEM else self.detect_loss
137 |             RCNN_loss_cls, RCNN_loss_bbox = loss_func(cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
138 | 
139 |         cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
140 |         bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
141 | 
142 |         return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
143 | 
144 |     def _init_weights(self):
145 |         def normal_init(m, mean, stddev, truncated=False):
146 |             """
147 |             weight initalizer: truncated normal and random normal.
148 |             """
149 |             # x is a parameter
150 |             if truncated:
151 |                 m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
152 |             else:
153 |                 m.weight.data.normal_(mean, stddev)
154 |                 if m.bias is not None:
155 |                     m.bias.data.zero_()
156 | 
157 |         normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
158 |         normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
159 |         normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
160 |         normal_init(self.RCNN_conv_1x1, 0, 0.01, cfg.TRAIN.TRUNCATED)
161 |         normal_init(self.RCNN_cls_base, 0, 0.01, cfg.TRAIN.TRUNCATED)
162 |         normal_init(self.RCNN_bbox_base, 0, 0.001, cfg.TRAIN.TRUNCATED)
163 | 
164 |     def create_architecture(self):
165 |         self._init_modules()
166 |         self._init_weights()
167 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | sources = ['src/roi_align.c']
 7 | headers = ['src/roi_align.h']
 8 | extra_objects = []
 9 | #sources = []
10 | #headers = []
11 | defines = []
12 | with_cuda = False
13 | 
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | print(this_file)
16 | 
17 | if torch.cuda.is_available():
18 |     print('Including CUDA code.')
19 |     sources += ['src/roi_align_cuda.c']
20 |     headers += ['src/roi_align_cuda.h']
21 |     defines += [('WITH_CUDA', None)]
22 |     with_cuda = True
23 |     
24 |     extra_objects = ['src/roi_align_kernel.cu.o']
25 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
26 | 
27 | ffi = create_extension(
28 |     '_ext.roi_align',
29 |     headers=headers,
30 |     sources=sources,
31 |     define_macros=defines,
32 |     relative_to=__file__,
33 |     with_cuda=with_cuda,
34 |     extra_objects=extra_objects
35 | )
36 | 
37 | if __name__ == '__main__':
38 |     ffi.build()
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             roi_align.roi_align_forward(self.aligned_height,
30 |                                         self.aligned_width,
31 |                                         self.spatial_scale, features,
32 |                                         rois, output)
33 | #            raise NotImplementedError
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 |         assert(self.feature_size is not None and grad_output.is_cuda)
39 | 
40 |         batch_size, num_channels, data_height, data_width = self.feature_size
41 | 
42 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
43 |                                   data_width).zero_()
44 |         roi_align.roi_align_backward_cuda(self.aligned_height,
45 |                                           self.aligned_width,
46 |                                           self.spatial_scale, grad_output,
47 |                                           self.rois, grad_input)
48 | 
49 |         # print grad_input
50 | 
51 |         return grad_input, None
52 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | #include <omp.h>
  4 | 
  5 | 
  6 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
  7 |                      const int height, const int width, const int channels,
  8 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
  9 |                      float* top_data);
 10 | 
 11 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
 12 |                      const int height, const int width, const int channels,
 13 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
 14 |                      float* top_data);
 15 | 
 16 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
 17 |                      THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
 18 | {
 19 |     //Grab the input tensor
 20 |     float * data_flat = THFloatTensor_data(features);
 21 |     float * rois_flat = THFloatTensor_data(rois);
 22 | 
 23 |     float * output_flat = THFloatTensor_data(output);
 24 | 
 25 |     // Number of ROIs
 26 |     int num_rois = THFloatTensor_size(rois, 0);
 27 |     int size_rois = THFloatTensor_size(rois, 1);
 28 |     if (size_rois != 5)
 29 |     {
 30 |         return 0;
 31 |     }
 32 | 
 33 |     // data height
 34 |     int data_height = THFloatTensor_size(features, 2);
 35 |     // data width
 36 |     int data_width = THFloatTensor_size(features, 3);
 37 |     // Number of channels
 38 |     int num_channels = THFloatTensor_size(features, 1);
 39 | 
 40 |     // do ROIAlignForward
 41 |     ROIAlignForwardCpu(data_flat, spatial_scale, num_rois, data_height, data_width, num_channels,
 42 |             aligned_height, aligned_width, rois_flat, output_flat);
 43 | 
 44 |     return 1;
 45 | }
 46 | 
 47 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
 48 |                        THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad)
 49 | {
 50 |     //Grab the input tensor
 51 |     float * top_grad_flat = THFloatTensor_data(top_grad);
 52 |     float * rois_flat = THFloatTensor_data(rois);
 53 | 
 54 |     float * bottom_grad_flat = THFloatTensor_data(bottom_grad);
 55 | 
 56 |     // Number of ROIs
 57 |     int num_rois = THFloatTensor_size(rois, 0);
 58 |     int size_rois = THFloatTensor_size(rois, 1);
 59 |     if (size_rois != 5)
 60 |     {
 61 |         return 0;
 62 |     }
 63 | 
 64 |     // batch size
 65 |     int batch_size = THFloatTensor_size(bottom_grad, 0);
 66 |     // data height
 67 |     int data_height = THFloatTensor_size(bottom_grad, 2);
 68 |     // data width
 69 |     int data_width = THFloatTensor_size(bottom_grad, 3);
 70 |     // Number of channels
 71 |     int num_channels = THFloatTensor_size(bottom_grad, 1);
 72 | 
 73 |     // do ROIAlignBackward
 74 |     ROIAlignBackwardCpu(top_grad_flat, spatial_scale, num_rois, data_height,
 75 |             data_width, num_channels, aligned_height, aligned_width, rois_flat, bottom_grad_flat);
 76 | 
 77 |     return 1;
 78 | }
 79 | 
 80 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
 81 |                      const int height, const int width, const int channels,
 82 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
 83 |                      float* top_data)
 84 | {
 85 |     const int output_size = num_rois * aligned_height * aligned_width * channels;
 86 | 
 87 |     #pragma omp parallel for 
 88 |     for (int idx = 0; idx < output_size; ++idx)
 89 |     {
 90 |         // (n, c, ph, pw) is an element in the aligned output
 91 |         int pw = idx % aligned_width;
 92 |         int ph = (idx / aligned_width) % aligned_height;
 93 |         int c = (idx / aligned_width / aligned_height) % channels;
 94 |         int n = idx / aligned_width / aligned_height / channels;
 95 | 
 96 |         float roi_batch_ind = bottom_rois[n * 5 + 0];
 97 |         float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 98 |         float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 99 |         float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
100 |         float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
101 | 
102 |         // Force malformed ROI to be 1x1
103 |         float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
104 |         float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
105 |         float bin_size_h = roi_height / (aligned_height - 1.);
106 |         float bin_size_w = roi_width / (aligned_width - 1.);
107 | 
108 |         float h = (float)(ph) * bin_size_h + roi_start_h;
109 |         float w = (float)(pw) * bin_size_w + roi_start_w;
110 | 
111 |         int hstart = fminf(floor(h), height - 2);
112 |         int wstart = fminf(floor(w), width - 2);
113 | 
114 |         int img_start = roi_batch_ind * channels * height * width;
115 | 
116 |         // bilinear interpolation
117 |         if (h < 0 || h >= height || w < 0 || w >= width)
118 |         {
119 |             top_data[idx] = 0.;
120 |         }
121 |         else
122 |         {
123 |             float h_ratio = h - (float)(hstart);
124 |             float w_ratio = w - (float)(wstart);
125 |             int upleft = img_start + (c * height + hstart) * width + wstart;
126 |             int upright = upleft + 1;
127 |             int downleft = upleft + width;
128 |             int downright = downleft + 1;
129 | 
130 |             top_data[idx] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
131 |                 + bottom_data[upright] * (1. - h_ratio) * w_ratio
132 |                 + bottom_data[downleft] * h_ratio * (1. - w_ratio)
133 |                 + bottom_data[downright] * h_ratio * w_ratio;
134 |         }
135 |     }
136 | }
137 | 
138 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
139 |                      const int height, const int width, const int channels,
140 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
141 |                      float* bottom_diff)
142 | {
143 |     const int output_size = num_rois * aligned_height * aligned_width * channels;
144 | 
145 |     #pragma omp parallel for 
146 |     for (int idx = 0; idx < output_size; ++idx)
147 |     {
148 |         // (n, c, ph, pw) is an element in the aligned output
149 |         int pw = idx % aligned_width;
150 |         int ph = (idx / aligned_width) % aligned_height;
151 |         int c = (idx / aligned_width / aligned_height) % channels;
152 |         int n = idx / aligned_width / aligned_height / channels;
153 | 
154 |         float roi_batch_ind = bottom_rois[n * 5 + 0];
155 |         float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
156 |         float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
157 |         float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
158 |         float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
159 | 
160 |         // Force malformed ROI to be 1x1
161 |         float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
162 |         float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
163 |         float bin_size_h = roi_height / (aligned_height - 1.);
164 |         float bin_size_w = roi_width / (aligned_width - 1.);
165 | 
166 |         float h = (float)(ph) * bin_size_h + roi_start_h;
167 |         float w = (float)(pw) * bin_size_w + roi_start_w;
168 | 
169 |         int hstart = fminf(floor(h), height - 2);
170 |         int wstart = fminf(floor(w), width - 2);
171 | 
172 |         int img_start = roi_batch_ind * channels * height * width;
173 | 
174 |         // bilinear interpolation
175 |         if (h < 0 || h >= height || w < 0 || w >= width)
176 |         {
177 |             float h_ratio = h - (float)(hstart);
178 |             float w_ratio = w - (float)(wstart);
179 |             int upleft = img_start + (c * height + hstart) * width + wstart;
180 |             int upright = upleft + 1;
181 |             int downleft = upleft + width;
182 |             int downright = downleft + 1;
183 | 
184 |             bottom_diff[upleft] += top_diff[idx] * (1. - h_ratio) * (1. - w_ratio);
185 |             bottom_diff[upright] += top_diff[idx] * (1. - h_ratio) *  w_ratio;
186 |             bottom_diff[downleft] += top_diff[idx] * h_ratio * (1. - w_ratio);
187 |             bottom_diff[downright] += top_diff[idx] * h_ratio * w_ratio;
188 |         }
189 |     }
190 | }
191 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
2 |                       THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
3 | 
4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
5 |                       THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
 16 |                                     const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
 17 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 18 |             // (n, c, ph, pw) is an element in the aligned output
 19 |             // int n = index;
 20 |             // int pw = n % aligned_width;
 21 |             // n /= aligned_width;
 22 |             // int ph = n % aligned_height;
 23 |             // n /= aligned_height;
 24 |             // int c = n % channels;
 25 |             // n /= channels;
 26 | 
 27 |             int pw = index % aligned_width;
 28 |             int ph = (index / aligned_width) % aligned_height;
 29 |             int c  = (index / aligned_width / aligned_height) % channels;
 30 |             int n  = index / aligned_width / aligned_height / channels;
 31 | 
 32 |             // bottom_rois += n * 5;
 33 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
 34 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 35 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 36 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
 37 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
 38 | 
 39 |             // Force malformed ROIs to be 1x1
 40 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
 41 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
 42 |             float bin_size_h = roi_height / (aligned_height - 1.);
 43 |             float bin_size_w = roi_width / (aligned_width - 1.);
 44 | 
 45 |             float h = (float)(ph) * bin_size_h + roi_start_h;
 46 |             float w = (float)(pw) * bin_size_w + roi_start_w;
 47 | 
 48 |             int hstart = fminf(floor(h), height - 2);
 49 |             int wstart = fminf(floor(w), width - 2);
 50 | 
 51 |             int img_start = roi_batch_ind * channels * height * width;
 52 | 
 53 |             // bilinear interpolation
 54 |             if (h < 0 || h >= height || w < 0 || w >= width) {
 55 |                 top_data[index] = 0.;
 56 |             } else {
 57 |                 float h_ratio = h - (float)(hstart);
 58 |                 float w_ratio = w - (float)(wstart);
 59 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
 60 |                 int upright = upleft + 1;
 61 |                 int downleft = upleft + width;
 62 |                 int downright = downleft + 1;
 63 | 
 64 |                 top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
 65 |                     + bottom_data[upright] * (1. - h_ratio) * w_ratio
 66 |                     + bottom_data[downleft] * h_ratio * (1. - w_ratio)
 67 |                     + bottom_data[downright] * h_ratio * w_ratio;
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 | 
 73 |     int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
 74 |                                const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
 75 |         const int kThreadsPerBlock = 1024;
 76 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
 77 |         cudaError_t err;
 78 | 
 79 | 
 80 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 81 |           output_size, bottom_data, spatial_scale, height, width, channels,
 82 |           aligned_height, aligned_width, bottom_rois, top_data);
 83 | 
 84 |         err = cudaGetLastError();
 85 |         if(cudaSuccess != err) {
 86 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 87 |             exit( -1 );
 88 |         }
 89 | 
 90 |         return 1;
 91 |     }
 92 | 
 93 | 
 94 |     __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
 95 |                                      const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
 96 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 97 | 
 98 |             // (n, c, ph, pw) is an element in the aligned output
 99 |             int pw = index % aligned_width;
100 |             int ph = (index / aligned_width) % aligned_height;
101 |             int c  = (index / aligned_width / aligned_height) % channels;
102 |             int n  = index / aligned_width / aligned_height / channels;
103 | 
104 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
105 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 |             /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 |             /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 |             /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 |             /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 | 
114 |             // Force malformed ROIs to be 1x1
115 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 |             float bin_size_h = roi_height / (aligned_height - 1.);
118 |             float bin_size_w = roi_width / (aligned_width - 1.);
119 | 
120 |             float h = (float)(ph) * bin_size_h + roi_start_h;
121 |             float w = (float)(pw) * bin_size_w + roi_start_w;
122 | 
123 |             int hstart = fminf(floor(h), height - 2);
124 |             int wstart = fminf(floor(w), width - 2);
125 | 
126 |             int img_start = roi_batch_ind * channels * height * width;
127 | 
128 |             // bilinear interpolation
129 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 |                 float h_ratio = h - (float)(hstart);
131 |                 float w_ratio = w - (float)(wstart);
132 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
133 |                 int upright = upleft + 1;
134 |                 int downleft = upleft + width;
135 |                 int downright = downleft + 1;
136 | 
137 |                 atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 |                 atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 |                 atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 |                 atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 |             }
142 |         }
143 |     }
144 | 
145 |     int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 |                                 const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 |         const int kThreadsPerBlock = 1024;
148 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
149 |         cudaError_t err;
150 | 
151 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 |           output_size, top_diff, spatial_scale, height, width, channels,
153 |           aligned_height, aligned_width, bottom_diff, bottom_rois);
154 | 
155 |         err = cudaGetLastError();
156 |         if(cudaSuccess != err) {
157 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 |             exit( -1 );
159 |         }
160 | 
161 |         return 1;
162 |     }
163 | 
164 | 
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include "roi_crop_cuda_kernel.h"
  5 | 
  6 | #define real float
  7 | 
  8 | // this symbol will be resolved automatically from PyTorch libs
  9 | extern THCState *state;
 10 | 
 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
 12 | // we assume BHWD format in inputImages
 13 | // we assume BHW(YX) format on grids
 14 | 
 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
 16 | //  THCState *state = getCutorchState(L);
 17 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 18 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 19 | //  THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 20 | 
 21 |   int success = 0;
 22 |   success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1],
 23 |                                                output->size[3],
 24 |                                                output->size[2],
 25 |                                                output->size[0],
 26 |                                                THCudaTensor_size(state, inputImages, 1),
 27 |                                                THCudaTensor_size(state, inputImages, 2),
 28 |                                                THCudaTensor_size(state, inputImages, 3),
 29 |                                                THCudaTensor_size(state, inputImages, 0),
 30 |                                                THCudaTensor_data(state, inputImages),
 31 |                                                THCudaTensor_stride(state, inputImages, 0),
 32 |                                                THCudaTensor_stride(state, inputImages, 1),
 33 |                                                THCudaTensor_stride(state, inputImages, 2),
 34 |                                                THCudaTensor_stride(state, inputImages, 3),
 35 |                                                THCudaTensor_data(state, grids),
 36 |                                                THCudaTensor_stride(state, grids, 0),
 37 |                                                THCudaTensor_stride(state, grids, 3),
 38 |                                                THCudaTensor_stride(state, grids, 1),
 39 |                                                THCudaTensor_stride(state, grids, 2),
 40 |                                                THCudaTensor_data(state, output),
 41 |                                                THCudaTensor_stride(state, output, 0),
 42 |                                                THCudaTensor_stride(state, output, 1),
 43 |                                                THCudaTensor_stride(state, output, 2),
 44 |                                                THCudaTensor_stride(state, output, 3),
 45 |                                                THCState_getCurrentStream(state));
 46 | 
 47 |   //check for errors
 48 |   if (!success) {
 49 |     THError("aborting");
 50 |   }
 51 |   return 1;
 52 | }
 53 | 
 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
 55 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput)
 56 | {
 57 | //  THCState *state = getCutorchState(L);
 58 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 59 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 60 | //  THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 61 | //  THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
 62 | //  THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
 63 | 
 64 |   int success = 0;
 65 |   success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1],
 66 |                                                   gradOutput->size[3],
 67 |                                                   gradOutput->size[2],
 68 |                                                   gradOutput->size[0],
 69 |                                                   THCudaTensor_size(state, inputImages, 1),
 70 |                                                   THCudaTensor_size(state, inputImages, 2),
 71 |                                                   THCudaTensor_size(state, inputImages, 3),
 72 |                                                   THCudaTensor_size(state, inputImages, 0),
 73 |                                                   THCudaTensor_data(state, inputImages),
 74 |                                                   THCudaTensor_stride(state, inputImages, 0),
 75 |                                                   THCudaTensor_stride(state, inputImages, 1),
 76 |                                                   THCudaTensor_stride(state, inputImages, 2),
 77 |                                                   THCudaTensor_stride(state, inputImages, 3),
 78 |                                                   THCudaTensor_data(state, grids),
 79 |                                                   THCudaTensor_stride(state, grids, 0),
 80 |                                                   THCudaTensor_stride(state, grids, 3),
 81 |                                                   THCudaTensor_stride(state, grids, 1),
 82 |                                                   THCudaTensor_stride(state, grids, 2),
 83 |                                                   THCudaTensor_data(state, gradInputImages),
 84 |                                                   THCudaTensor_stride(state, gradInputImages, 0),
 85 |                                                   THCudaTensor_stride(state, gradInputImages, 1),
 86 |                                                   THCudaTensor_stride(state, gradInputImages, 2),
 87 |                                                   THCudaTensor_stride(state, gradInputImages, 3),
 88 |                                                   THCudaTensor_data(state, gradGrids),
 89 |                                                   THCudaTensor_stride(state, gradGrids, 0),
 90 |                                                   THCudaTensor_stride(state, gradGrids, 3),
 91 |                                                   THCudaTensor_stride(state, gradGrids, 1),
 92 |                                                   THCudaTensor_stride(state, gradGrids, 2),
 93 |                                                   THCudaTensor_data(state, gradOutput),
 94 |                                                   THCudaTensor_stride(state, gradOutput, 0),
 95 |                                                   THCudaTensor_stride(state, gradOutput, 1),
 96 |                                                   THCudaTensor_stride(state, gradOutput, 2),
 97 |                                                   THCudaTensor_stride(state, gradOutput, 3),
 98 |                                                   THCState_getCurrentStream(state));
 99 | 
100 |   //check for errors
101 |   if (!success) {
102 |     THError("aborting");
103 |   }
104 |   return 1;
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | extra_objects = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | this_file = os.path.dirname(os.path.realpath(__file__))
14 | print(this_file)
15 | 
16 | if torch.cuda.is_available():
17 |     print('Including CUDA code.')
18 |     sources += ['src/roi_pooling_cuda.c']
19 |     headers += ['src/roi_pooling_cuda.h']
20 |     defines += [('WITH_CUDA', None)]
21 |     with_cuda = True
22 |     extra_objects = ['src/roi_pooling.cu.o']
23 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_pooling',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | import pdb
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | #array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | try:
 40 |     xrange          # Python 2
 41 | except NameError:
 42 |     xrange = range  # Python 3
 43 | 
 44 | 
 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 46 |                      scales=2**np.arange(3, 6)):
 47 |     """
 48 |     Generate anchor (reference) windows by enumerating aspect ratios X
 49 |     scales wrt a reference (0, 0, 15, 15) window.
 50 |     """
 51 | 
 52 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 53 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 54 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 55 |                          for i in xrange(ratio_anchors.shape[0])])
 56 |     return anchors
 57 | 
 58 | def _whctrs(anchor):
 59 |     """
 60 |     Return width, height, x center, and y center for an anchor (window).
 61 |     """
 62 | 
 63 |     w = anchor[2] - anchor[0] + 1
 64 |     h = anchor[3] - anchor[1] + 1
 65 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 66 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 67 |     return w, h, x_ctr, y_ctr
 68 | 
 69 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 70 |     """
 71 |     Given a vector of widths (ws) and heights (hs) around a center
 72 |     (x_ctr, y_ctr), output a set of anchors (windows).
 73 |     """
 74 | 
 75 |     ws = ws[:, np.newaxis]
 76 |     hs = hs[:, np.newaxis]
 77 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 78 |                          y_ctr - 0.5 * (hs - 1),
 79 |                          x_ctr + 0.5 * (ws - 1),
 80 |                          y_ctr + 0.5 * (hs - 1)))
 81 |     return anchors
 82 | 
 83 | def _ratio_enum(anchor, ratios):
 84 |     """
 85 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 86 |     """
 87 | 
 88 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 89 |     size = w * h
 90 |     size_ratios = size / ratios
 91 |     ws = np.round(np.sqrt(size_ratios))
 92 |     hs = np.round(ws * ratios)
 93 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 94 |     return anchors
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |     """
 98 |     Enumerate a set of anchors for each scale wrt an anchor.
 99 |     """
100 | 
101 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |     ws = w * scales
103 |     hs = h * scales
104 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |     return anchors
106 | 
107 | if __name__ == '__main__':
108 |     import time
109 |     t = time.time()
110 |     a = generate_anchors()
111 |     print(time.time() - t)
112 |     print(a)
113 |     from IPython import embed; embed()
114 | 


--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | # --------------------------------------------------------
  9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
 10 | # --------------------------------------------------------
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import numpy as np
 15 | import math
 16 | import yaml
 17 | from model.utils.config import cfg
 18 | from .generate_anchors import generate_anchors
 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
 20 | from model.nms.nms_wrapper import nms
 21 | 
 22 | import pdb
 23 | 
 24 | DEBUG = False
 25 | 
 26 | class _ProposalLayer(nn.Module):
 27 |     """
 28 |     Outputs object detection proposals by applying estimated bounding-box
 29 |     transformations to a set of regular boxes (called "anchors").
 30 |     """
 31 | 
 32 |     def __init__(self, feat_stride, scales, ratios):
 33 |         super(_ProposalLayer, self).__init__()
 34 | 
 35 |         self._feat_stride = feat_stride
 36 |         self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 
 37 |             ratios=np.array(ratios))).float()
 38 |         self._num_anchors = self._anchors.size(0)
 39 | 
 40 |         # rois blob: holds R regions of interest, each is a 5-tuple
 41 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 42 |         # rectangle (x1, y1, x2, y2)
 43 |         # top[0].reshape(1, 5)
 44 |         #
 45 |         # # scores blob: holds scores for R regions of interest
 46 |         # if len(top) > 1:
 47 |         #     top[1].reshape(1, 1, 1, 1)
 48 | 
 49 |     def forward(self, input):
 50 | 
 51 |         # Algorithm:
 52 |         #
 53 |         # for each (H, W) location i
 54 |         #   generate A anchor boxes centered on cell i
 55 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 56 |         # clip predicted boxes to image
 57 |         # remove predicted boxes with either height or width < threshold
 58 |         # sort all (proposal, score) pairs by score from highest to lowest
 59 |         # take top pre_nms_topN proposals before NMS
 60 |         # apply NMS with threshold 0.7 to remaining proposals
 61 |         # take after_nms_topN proposals after NMS
 62 |         # return the top proposals (-> RoIs top, scores top)
 63 | 
 64 | 
 65 |         # the first set of _num_anchors channels are bg probs
 66 |         # the second set are the fg probs
 67 |         scores = input[0][:, self._num_anchors:, :, :]
 68 |         bbox_deltas = input[1]
 69 |         im_info = input[2]
 70 |         cfg_key = input[3]
 71 | 
 72 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 73 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 74 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 75 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 76 | 
 77 |         batch_size = bbox_deltas.size(0)
 78 | 
 79 |         feat_height, feat_width = scores.size(2), scores.size(3)
 80 |         shift_x = np.arange(0, feat_width) * self._feat_stride
 81 |         shift_y = np.arange(0, feat_height) * self._feat_stride
 82 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 83 |         shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
 84 |                                   shift_x.ravel(), shift_y.ravel())).transpose())
 85 |         shifts = shifts.contiguous().type_as(scores).float()
 86 | 
 87 |         A = self._num_anchors
 88 |         K = shifts.size(0)
 89 | 
 90 |         self._anchors = self._anchors.type_as(scores)
 91 |         # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
 92 |         anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
 93 |         anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
 94 | 
 95 |         # Transpose and reshape predicted bbox transformations to get them
 96 |         # into the same order as the anchors:
 97 | 
 98 |         bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
 99 |         bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
100 | 
101 |         # Same story for the scores:
102 |         scores = scores.permute(0, 2, 3, 1).contiguous()
103 |         scores = scores.view(batch_size, -1)
104 | 
105 |         # Convert anchors into proposals via bbox transformations
106 |         proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
107 | 
108 |         # 2. clip predicted boxes to image
109 |         proposals = clip_boxes(proposals, im_info, batch_size)
110 |         # proposals = clip_boxes_batch(proposals, im_info, batch_size)
111 | 
112 |         # assign the score to 0 if it's non keep.
113 |         # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
114 | 
115 |         # trim keep index to make it euqal over batch
116 |         # keep_idx = torch.cat(tuple(keep_idx), 0)
117 | 
118 |         # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
119 |         # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
120 |         
121 |         # _, order = torch.sort(scores_keep, 1, True)
122 |         
123 |         scores_keep = scores
124 |         proposals_keep = proposals
125 |         _, order = torch.sort(scores_keep, 1, True)
126 | 
127 |         output = scores.new(batch_size, post_nms_topN, 5).zero_()
128 |         for i in range(batch_size):
129 |             # # 3. remove predicted boxes with either height or width < threshold
130 |             # # (NOTE: convert min_size to input image scale stored in im_info[2])
131 |             proposals_single = proposals_keep[i]
132 |             scores_single = scores_keep[i]
133 | 
134 |             # # 4. sort all (proposal, score) pairs by score from highest to lowest
135 |             # # 5. take top pre_nms_topN (e.g. 6000)
136 |             order_single = order[i]
137 | 
138 |             if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
139 |                 order_single = order_single[:pre_nms_topN]
140 | 
141 |             proposals_single = proposals_single[order_single, :]
142 |             scores_single = scores_single[order_single].view(-1,1)
143 | 
144 |             # 6. apply nms (e.g. threshold = 0.7)
145 |             # 7. take after_nms_topN (e.g. 300)
146 |             # 8. return the top proposals (-> RoIs top)
147 | 
148 |             keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
149 |             keep_idx_i = keep_idx_i.long().view(-1)
150 | 
151 |             if post_nms_topN > 0:
152 |                 keep_idx_i = keep_idx_i[:post_nms_topN]
153 |             proposals_single = proposals_single[keep_idx_i, :]
154 |             scores_single = scores_single[keep_idx_i, :]
155 | 
156 |             # padding 0 at the end.
157 |             num_proposal = proposals_single.size(0)
158 |             output[i,:,0] = i
159 |             output[i,:num_proposal,1:] = proposals_single
160 | 
161 |         return output
162 | 
163 |     def backward(self, top, propagate_down, bottom):
164 |         """This layer does not propagate gradients."""
165 |         pass
166 | 
167 |     def reshape(self, bottom, top):
168 |         """Reshaping happens during the call to forward."""
169 |         pass
170 | 
171 |     def _filter_boxes(self, boxes, min_size):
172 |         """Remove all boxes with any side smaller than min_size."""
173 |         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
174 |         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
175 |         keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
176 |         return keep
177 | 


--------------------------------------------------------------------------------
/lib/model/rpn/rpn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | 
  7 | from model.utils.config import cfg
  8 | from .proposal_layer import _ProposalLayer
  9 | from .anchor_target_layer import _AnchorTargetLayer
 10 | from model.utils.net_utils import _smooth_l1_loss
 11 | 
 12 | import numpy as np
 13 | import math
 14 | import pdb
 15 | import time
 16 | 
 17 | class _RPN(nn.Module):
 18 |     """ region proposal network """
 19 |     def __init__(self, din):
 20 |         super(_RPN, self).__init__()
 21 |         
 22 |         self.din = din  # get depth of input feature map, e.g., 512
 23 |         self.anchor_scales = cfg.ANCHOR_SCALES
 24 |         self.anchor_ratios = cfg.ANCHOR_RATIOS
 25 |         self.feat_stride = cfg.FEAT_STRIDE[0]
 26 | 
 27 |         # define the convrelu layers processing input feature map
 28 |         self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 29 | 
 30 |         # define bg/fg classifcation score layer
 31 |         self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
 32 |         self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
 33 | 
 34 |         # define anchor box offset prediction layer
 35 |         self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
 36 |         self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
 37 | 
 38 |         # define proposal layer
 39 |         self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 40 | 
 41 |         # define anchor target layer
 42 |         self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 43 | 
 44 |         self.rpn_loss_cls = 0
 45 |         self.rpn_loss_box = 0
 46 | 
 47 |     @staticmethod
 48 |     def reshape(x, d):
 49 |         input_shape = x.size()
 50 |         x = x.view(
 51 |             input_shape[0],
 52 |             int(d),
 53 |             int(float(input_shape[1] * input_shape[2]) / float(d)),
 54 |             input_shape[3]
 55 |         )
 56 |         return x
 57 | 
 58 |     def forward(self, base_feat, im_info, gt_boxes, num_boxes):
 59 | 
 60 |         batch_size = base_feat.size(0)
 61 | 
 62 |         # return feature map after convrelu layer
 63 |         rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
 64 |         # get rpn classification score
 65 |         rpn_cls_score = self.RPN_cls_score(rpn_conv1)
 66 | 
 67 |         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 68 |         rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, dim=1)
 69 |         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 70 | 
 71 |         # get rpn offsets to the anchor boxes
 72 |         rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
 73 | 
 74 |         # proposal layer
 75 |         cfg_key = 'TRAIN' if self.training else 'TEST'
 76 | 
 77 |         rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
 78 |                                  im_info, cfg_key))
 79 | 
 80 |         self.rpn_loss_cls = 0
 81 |         self.rpn_loss_box = 0
 82 | 
 83 |         # generating training labels and build the rpn loss
 84 |         if self.training:
 85 |             assert gt_boxes is not None
 86 | 
 87 |             rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
 88 | 
 89 |             # compute classification loss
 90 |             rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
 91 |             rpn_label = rpn_data[0].view(batch_size, -1)
 92 | 
 93 |             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
 94 |             rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
 95 |             rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
 96 |             rpn_label = Variable(rpn_label.long())
 97 |             self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
 98 |             fg_cnt = torch.sum(rpn_label.data.ne(0))
 99 | 
100 |             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
101 | 
102 |             # compute bbox regression loss
103 |             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
104 |             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
105 |             rpn_bbox_targets = Variable(rpn_bbox_targets)
106 | 
107 |             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
108 |                                                             rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
109 | 
110 |         return rois, self.rpn_loss_cls, self.rpn_loss_box
111 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 | 
14 | try:
15 |     xrange          # Python 2
16 | except NameError:
17 |     xrange = range  # Python 3
18 | 
19 | 
20 | def im_list_to_blob(ims):
21 |     """Convert a list of images into a network input.
22 | 
23 |     Assumes images are already prepared (means subtracted, BGR order, ...).
24 |     """
25 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
26 |     num_images = len(ims)
27 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 |                     dtype=np.float32)
29 |     for i in xrange(num_images):
30 |         im = ims[i]
31 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 | 
33 |     return blob
34 | 
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 |     """Mean subtract and scale an image for use in a blob."""
37 | 
38 |     im = im.astype(np.float32, copy=False)
39 |     im -= pixel_means
40 |     # im = im[:, :, ::-1]
41 |     im_shape = im.shape
42 |     im_size_min = np.min(im_shape[0:2])
43 |     im_size_max = np.max(im_shape[0:2])
44 |     im_scale = float(target_size) / float(im_size_min)
45 |     # Prevent the biggest axis from being more than MAX_SIZE
46 |     # if np.round(im_scale * im_size_max) > max_size:
47 |     #     im_scale = float(max_size) / float(im_size_max)
48 |     # im = imresize(im, im_scale)
49 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 |                     interpolation=cv2.INTER_LINEAR)
51 | 
52 |     return im, im_scale
53 | 


--------------------------------------------------------------------------------
/lib/model/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import scipy.misc 
 5 | try:
 6 |     from StringIO import StringIO  # Python 2.7
 7 | except ImportError:
 8 |     from io import BytesIO         # Python 3.x
 9 | 
10 | 
11 | class Logger(object):
12 |     
13 |     def __init__(self, log_dir):
14 |         """Create a summary writer logging to log_dir."""
15 |         self.writer = tf.summary.FileWriter(log_dir)
16 | 
17 |     def scalar_summary(self, tag, value, step):
18 |         """Log a scalar variable."""
19 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
20 |         self.writer.add_summary(summary, step)
21 | 
22 |     def image_summary(self, tag, images, step):
23 |         """Log a list of images."""
24 | 
25 |         img_summaries = []
26 |         for i, img in enumerate(images):
27 |             # Write the image to a string
28 |             try:
29 |                 s = StringIO()
30 |             except:
31 |                 s = BytesIO()
32 |             scipy.misc.toimage(img).save(s, format="png")
33 | 
34 |             # Create an Image object
35 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
36 |                                        height=img.shape[0],
37 |                                        width=img.shape[1])
38 |             # Create a Summary value
39 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
40 | 
41 |         # Create and write Summary
42 |         summary = tf.Summary(value=img_summaries)
43 |         self.writer.add_summary(summary, step)
44 |         
45 |     def histo_summary(self, tag, values, step, bins=1000):
46 |         """Log a histogram of the tensor of values."""
47 | 
48 |         # Create a histogram using numpy
49 |         counts, bin_edges = np.histogram(values, bins=bins)
50 | 
51 |         # Fill the fields of the histogram proto
52 |         hist = tf.HistogramProto()
53 |         hist.min = float(np.min(values))
54 |         hist.max = float(np.max(values))
55 |         hist.num = int(np.prod(values.shape))
56 |         hist.sum = float(np.sum(values))
57 |         hist.sum_squares = float(np.sum(values**2))
58 | 
59 |         # Drop the start of the first bin
60 |         bin_edges = bin_edges[1:]
61 | 
62 |         # Add bin edges and counts
63 |         for edge in bin_edges:
64 |             hist.bucket_limit.append(edge)
65 |         for c in counts:
66 |             hist.bucket.append(c)
67 | 
68 |         # Create and write Summary
69 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
70 |         self.writer.add_summary(summary, step)
71 |         self.writer.flush()
72 | 


--------------------------------------------------------------------------------
/lib/model/utils/net_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | import torchvision.models as models
  7 | from model.utils.config import cfg
  8 | from model.roi_crop.functions.roi_crop import RoICropFunction
  9 | import cv2
 10 | import pdb
 11 | import random
 12 | 
 13 | def save_net(fname, net):
 14 |     import h5py
 15 |     h5f = h5py.File(fname, mode='w')
 16 |     for k, v in net.state_dict().items():
 17 |         h5f.create_dataset(k, data=v.cpu().numpy())
 18 | 
 19 | def load_net(fname, net):
 20 |     import h5py
 21 |     h5f = h5py.File(fname, mode='r')
 22 |     for k, v in net.state_dict().items():
 23 |         param = torch.from_numpy(np.asarray(h5f[k]))
 24 |         v.copy_(param)
 25 | 
 26 | def weights_normal_init(model, dev=0.01):
 27 |     if isinstance(model, list):
 28 |         for m in model:
 29 |             weights_normal_init(m, dev)
 30 |     else:
 31 |         for m in model.modules():
 32 |             if isinstance(m, nn.Conv2d):
 33 |                 m.weight.data.normal_(0.0, dev)
 34 |             elif isinstance(m, nn.Linear):
 35 |                 m.weight.data.normal_(0.0, dev)
 36 | 
 37 | 
 38 | def clip_gradient(model, clip_norm):
 39 |     """Computes a gradient clipping coefficient based on gradient norm."""
 40 |     totalnorm = 0
 41 |     for p in model.parameters():
 42 |         if p.requires_grad:
 43 |             modulenorm = p.grad.data.norm()
 44 |             totalnorm += modulenorm ** 2
 45 |     totalnorm = np.sqrt(totalnorm)
 46 | 
 47 |     norm = clip_norm / max(totalnorm, clip_norm)
 48 |     for p in model.parameters():
 49 |         if p.requires_grad:
 50 |             p.grad.mul_(norm)
 51 | 
 52 | def vis_detections(im, class_name, dets, thresh=0.8):
 53 |     """Visual debugging of detections."""
 54 |     for i in range(np.minimum(10, dets.shape[0])):
 55 |         bbox = tuple(int(np.round(x)) for x in dets[i, :4])
 56 |         score = dets[i, -1]
 57 |         if score > thresh:
 58 |             cv2.rectangle(im, bbox[0:2], bbox[2:4], (0, 204, 0), 2)
 59 |             cv2.putText(im, '%s: %.3f' % (class_name, score), (bbox[0], bbox[1] + 15), cv2.FONT_HERSHEY_PLAIN,
 60 |                         1.0, (0, 0, 255), thickness=1)
 61 |     return im
 62 | 
 63 | 
 64 | def adjust_learning_rate(optimizer, decay=0.1):
 65 |     """Sets the learning rate to the initial LR decayed by 0.5 every 20 epochs"""
 66 |     for param_group in optimizer.param_groups:
 67 |         param_group['lr'] = decay * param_group['lr']
 68 | 
 69 | 
 70 | def save_checkpoint(state, filename):
 71 |     torch.save(state, filename)
 72 | 
 73 | def _smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
 74 |     
 75 |     sigma_2 = sigma ** 2
 76 |     box_diff = bbox_pred - bbox_targets
 77 |     in_box_diff = bbox_inside_weights * box_diff
 78 |     abs_in_box_diff = torch.abs(in_box_diff)
 79 |     smoothL1_sign = (abs_in_box_diff < 1. / sigma_2).detach().float()
 80 |     in_loss_box = torch.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
 81 |                   + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
 82 |     out_loss_box = bbox_outside_weights * in_loss_box
 83 |     loss_box = out_loss_box
 84 |     for i in sorted(dim, reverse=True):
 85 |       loss_box = loss_box.sum(i)
 86 |     loss_box = loss_box.mean()
 87 |     return loss_box
 88 | 
 89 | def _crop_pool_layer(bottom, rois, max_pool=True):
 90 |     # code modified from 
 91 |     # https://github.com/ruotianluo/pytorch-faster-rcnn
 92 |     # implement it using stn
 93 |     # box to affine
 94 |     # input (x1,y1,x2,y2)
 95 |     """
 96 |     [  x2-x1             x1 + x2 - W + 1  ]
 97 |     [  -----      0      ---------------  ]
 98 |     [  W - 1                  W - 1       ]
 99 |     [                                     ]
100 |     [           y2-y1    y1 + y2 - H + 1  ]
101 |     [    0      -----    ---------------  ]
102 |     [           H - 1         H - 1      ]
103 |     """
104 |     rois = rois.detach()
105 |     batch_size = bottom.size(0)
106 |     D = bottom.size(1)
107 |     H = bottom.size(2)
108 |     W = bottom.size(3)
109 |     roi_per_batch = rois.size(0) / batch_size
110 |     x1 = rois[:, 1::4] / 16.0
111 |     y1 = rois[:, 2::4] / 16.0
112 |     x2 = rois[:, 3::4] / 16.0
113 |     y2 = rois[:, 4::4] / 16.0
114 | 
115 |     height = bottom.size(2)
116 |     width = bottom.size(3)
117 | 
118 |     # affine theta
119 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
120 |     theta = torch.cat([\
121 |       (x2 - x1) / (width - 1),
122 |       zero,
123 |       (x1 + x2 - width + 1) / (width - 1),
124 |       zero,
125 |       (y2 - y1) / (height - 1),
126 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
127 | 
128 |     if max_pool:
129 |       pre_pool_size = cfg.POOLING_SIZE * 2
130 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size)))
131 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
132 |                                                                 .contiguous().view(-1, D, H, W)
133 |       crops = F.grid_sample(bottom, grid)
134 |       crops = F.max_pool2d(crops, 2, 2)
135 |     else:
136 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE)))
137 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
138 |                                                                 .contiguous().view(-1, D, H, W)
139 |       crops = F.grid_sample(bottom, grid)
140 |     
141 |     return crops, grid
142 | 
143 | def _affine_grid_gen(rois, input_size, grid_size):
144 | 
145 |     rois = rois.detach()
146 |     x1 = rois[:, 1::4] / 16.0
147 |     y1 = rois[:, 2::4] / 16.0
148 |     x2 = rois[:, 3::4] / 16.0
149 |     y2 = rois[:, 4::4] / 16.0
150 | 
151 |     height = input_size[0]
152 |     width = input_size[1]
153 | 
154 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
155 |     theta = torch.cat([\
156 |       (x2 - x1) / (width - 1),
157 |       zero,
158 |       (x1 + x2 - width + 1) / (width - 1),
159 |       zero,
160 |       (y2 - y1) / (height - 1),
161 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
162 | 
163 |     grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
164 | 
165 |     return grid
166 | 
167 | def _affine_theta(rois, input_size):
168 | 
169 |     rois = rois.detach()
170 |     x1 = rois[:, 1::4] / 16.0
171 |     y1 = rois[:, 2::4] / 16.0
172 |     x2 = rois[:, 3::4] / 16.0
173 |     y2 = rois[:, 4::4] / 16.0
174 | 
175 |     height = input_size[0]
176 |     width = input_size[1]
177 | 
178 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
179 | 
180 |     # theta = torch.cat([\
181 |     #   (x2 - x1) / (width - 1),
182 |     #   zero,
183 |     #   (x1 + x2 - width + 1) / (width - 1),
184 |     #   zero,
185 |     #   (y2 - y1) / (height - 1),
186 |     #   (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
187 | 
188 |     theta = torch.cat([\
189 |       (y2 - y1) / (height - 1),
190 |       zero,
191 |       (y1 + y2 - height + 1) / (height - 1),
192 |       zero,
193 |       (x2 - x1) / (width - 1),
194 |       (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3)
195 | 
196 |     return theta
197 | 
198 | def compare_grid_sample():
199 |     # do gradcheck
200 |     N = random.randint(1, 8)
201 |     C = 2 # random.randint(1, 8)
202 |     H = 5 # random.randint(1, 8)
203 |     W = 4 # random.randint(1, 8)
204 |     input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True)
205 |     input_p = input.clone().data.contiguous()
206 |    
207 |     grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True)
208 |     grid_clone = grid.clone().contiguous()
209 | 
210 |     out_offcial = F.grid_sample(input, grid)    
211 |     grad_outputs = Variable(torch.rand(out_offcial.size()).cuda())
212 |     grad_outputs_clone = grad_outputs.clone().contiguous()
213 |     grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous())
214 |     grad_input_off = grad_inputs[0]
215 | 
216 | 
217 |     crf = RoICropFunction()
218 |     grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda()
219 |     out_stn = crf.forward(input_p, grid_yx)
220 |     grad_inputs = crf.backward(grad_outputs_clone.data)
221 |     grad_input_stn = grad_inputs[0]
222 |     pdb.set_trace()
223 | 
224 |     delta = (grad_input_off.data - grad_input_stn).sum()
225 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | from . import _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   if(cnts) for(siz j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(siz i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   for(siz i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   for( siz i=0; i<n; i++ ) {
 45 |     byte v=0; for( siz j=0; j<R[i].m; j++ ) {
 46 |       for( siz k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; bool v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   for( siz i=0; i<n; i++ ) {
 74 |     a[i]=0; for( siz j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; bool crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
 99 |   double h, w, i, u, ga, da; siz g, d; bool crowd;
100 |   for( g=0; g<n; g++ ) {
101 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
102 |     for( d=0; d<m; d++ ) {
103 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
104 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
105 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
106 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
107 |     }
108 |   }
109 | }
110 | 
111 | void rleToBbox( const RLE *R, BB bb, siz n ) {
112 |   for( siz i=0; i<n; i++ ) {
113 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
114 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
115 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
116 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
117 |     for( j=0; j<m; j++ ) {
118 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
119 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
120 |     }
121 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
122 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
123 |   }
124 | }
125 | 
126 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
127 |   for( siz i=0; i<n; i++ ) {
128 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
129 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
130 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
131 |     rleFrPoly( R+i, xy, 4, h, w );
132 |   }
133 | }
134 | 
135 | int uintCompare(const void *a, const void *b) {
136 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
137 | }
138 | 
139 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
140 |   // upsample and get discrete points densely along entire boundary
141 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
142 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
143 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
144 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
145 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
146 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
147 |   for( j=0; j<k; j++ ) {
148 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t;
149 |     bool flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
150 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
151 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
152 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
153 |     if(dx>=dy) for( int d=0; d<=dx; d++ ) {
154 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
155 |     } else for( int d=0; d<=dy; d++ ) {
156 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
157 |     }
158 |   }
159 |   // get points along y-boundary and downsample
160 |   free(x); free(y); k=m; m=0; double xd, yd;
161 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
162 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
163 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
164 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
165 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
166 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
167 |     x[m]=(int) xd; y[m]=(int) yd; m++;
168 |   }
169 |   // compute rle encoding given y-boundary points
170 |   k=m; a=malloc(sizeof(uint)*(k+1));
171 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
172 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
173 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
174 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
175 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
176 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
177 |     j++; if(j<k) b[m-1]+=a[j++]; }
178 |   rleInit(R,h,w,m,b); free(a); free(b);
179 | }
180 | 
181 | char* rleToString( const RLE *R ) {
182 |   // Similar to LEB128 but using 6 bits/char and ascii chars 48-111.
183 |   siz i, m=R->m, p=0; long x; bool more;
184 |   char *s=malloc(sizeof(char)*m*6);
185 |   for( i=0; i<m; i++ ) {
186 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
187 |     while( more ) {
188 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
189 |       if(more) c |= 0x20; c+=48; s[p++]=c;
190 |     }
191 |   }
192 |   s[p]=0; return s;
193 | }
194 | 
195 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
196 |   siz m=0, p=0, k; long x; bool more; uint *cnts;
197 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
198 |   while( s[p] ) {
199 |     x=0; k=0; more=1;
200 |     while( more ) {
201 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
202 |       more = c & 0x20; p++; k++;
203 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
204 |     }
205 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
206 |   }
207 |   rleInit(R,h,w,m,cnts); free(cnts);
208 | }
209 | 


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | 
20 | def get_minibatch(roidb, num_classes, target_size):
21 |   """Given a roidb, construct a minibatch sampled from it."""
22 |   num_images = len(roidb)
23 |   # Sample random scales to use for each image in this batch
24 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
25 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
26 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
27 | 
28 |   # Get the input image blob, formatted for caffe
29 |   im_blob, im_scales = _get_image_blob(roidb, target_size)
30 | 
31 |   blobs = {'data': im_blob}
32 | 
33 |   assert len(im_scales) == 1, "Single batch only"
34 |   assert len(roidb) == 1, "Single batch only"
35 |   
36 |   # gt boxes: (x1, y1, x2, y2, cls)
37 |   if cfg.TRAIN.USE_ALL_GT:
38 |     # Include all ground truth boxes
39 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
40 |   else:
41 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
42 |     gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
43 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
44 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
45 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
46 |   blobs['gt_boxes'] = gt_boxes
47 |   blobs['im_info'] = np.array(
48 |     [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
49 |     dtype=np.float32)
50 | 
51 |   blobs['img_id'] = roidb[0]['img_id']
52 | 
53 |   return blobs
54 | 
55 | def _get_image_blob(roidb, target_size):
56 |   """Builds an input blob from the images in the roidb at the specified
57 |   scales.
58 |   """
59 |   num_images = len(roidb)
60 | 
61 |   processed_ims = []
62 |   im_scales = []
63 |   for i in range(num_images):
64 |     #im = cv2.imread(roidb[i]['image'])
65 |     im = imread(roidb[i]['image'])
66 | 
67 |     if len(im.shape) == 2:
68 |       im = im[:,:,np.newaxis]
69 |       im = np.concatenate((im,im,im), axis=2)
70 |     # flip the channel, since the original one using cv2
71 |     # rgb -> bgr
72 |     im = im[:,:,::-1]
73 | 
74 |     if roidb[i]['flipped']:
75 |       im = im[:, ::-1, :]
76 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size[i],
77 |                     cfg.TRAIN.MAX_SIZE)
78 |     im_scales.append(im_scale)
79 |     processed_ims.append(im)
80 | 
81 |   # Create a blob to hold the input images
82 |   blob = im_list_to_blob(processed_ims)
83 | 
84 |   return blob, im_scales
85 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import datasets
  7 | import numpy as np
  8 | from model.utils.config import cfg
  9 | from datasets.factory import get_imdb
 10 | import PIL
 11 | import pdb
 12 | 
 13 | def prepare_roidb(imdb):
 14 |   """Enrich the imdb's roidb by adding some derived quantities that
 15 |   are useful for training. This function precomputes the maximum
 16 |   overlap, taken over ground-truth boxes, between each ROI and
 17 |   each ground-truth box. The class with maximum overlap is also
 18 |   recorded.
 19 |   """
 20 | 
 21 |   roidb = imdb.roidb
 22 |   if not (imdb.name.startswith('coco')):
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |          for i in range(imdb.num_images)]
 25 |          
 26 |   for i in range(len(imdb.image_index)):
 27 |     roidb[i]['img_id'] = imdb.image_id_at(i)
 28 |     roidb[i]['image'] = imdb.image_path_at(i)
 29 |     if not (imdb.name.startswith('coco')):
 30 |       roidb[i]['width'] = sizes[i][0]
 31 |       roidb[i]['height'] = sizes[i][1]
 32 |     # need gt_overlaps as a dense array for argmax
 33 |     gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 34 |     # max overlap with gt over classes (columns)
 35 |     max_overlaps = gt_overlaps.max(axis=1)
 36 |     # gt class that had the max overlap
 37 |     max_classes = gt_overlaps.argmax(axis=1)
 38 |     roidb[i]['max_classes'] = max_classes
 39 |     roidb[i]['max_overlaps'] = max_overlaps
 40 |     # sanity checks
 41 |     # max overlap of 0 => class should be zero (background)
 42 |     zero_inds = np.where(max_overlaps == 0)[0]
 43 |     assert all(max_classes[zero_inds] == 0)
 44 |     # max overlap > 0 => class should not be zero (must be a fg class)
 45 |     nonzero_inds = np.where(max_overlaps > 0)[0]
 46 |     assert all(max_classes[nonzero_inds] != 0)
 47 | 
 48 | 
 49 | def rank_roidb_ratio(roidb):
 50 |     # rank roidb based on the ratio between width and height.
 51 |     ratio_large = 2 # largest ratio to preserve.
 52 |     ratio_small = 0.5 # smallest ratio to preserve.    
 53 |     
 54 |     ratio_list = []
 55 |     for i in range(len(roidb)):
 56 |       width = roidb[i]['width']
 57 |       height = roidb[i]['height']
 58 |       ratio = width / float(height)
 59 | 
 60 |       if ratio > ratio_large:
 61 |         roidb[i]['need_crop'] = 1
 62 |         ratio = ratio_large
 63 |       elif ratio < ratio_small:
 64 |         roidb[i]['need_crop'] = 1
 65 |         ratio = ratio_small        
 66 |       else:
 67 |         roidb[i]['need_crop'] = 0
 68 | 
 69 |       ratio_list.append(ratio)
 70 | 
 71 |     ratio_list = np.array(ratio_list)
 72 |     ratio_index = np.argsort(ratio_list)
 73 |     return ratio_list[ratio_index], ratio_index
 74 | 
 75 | def filter_roidb(roidb):
 76 |     # filter the image without bounding box.
 77 |     print('before filtering, there are %d images...' % (len(roidb)))
 78 |     i = 0
 79 |     while i < len(roidb):
 80 |       if len(roidb[i]['boxes']) == 0:
 81 |         del roidb[i]
 82 |         i -= 1
 83 |       i += 1
 84 | 
 85 |     print('after filtering, there are %d images...' % (len(roidb)))
 86 |     return roidb
 87 | 
 88 | def combined_roidb(imdb_names, training=True):
 89 |   """
 90 |   Combine multiple roidbs
 91 |   """
 92 | 
 93 |   def get_training_roidb(imdb):
 94 |     """Returns a roidb (Region of Interest database) for use in training."""
 95 |     if cfg.TRAIN.USE_FLIPPED:
 96 |       print('Appending horizontally-flipped training examples...')
 97 |       imdb.append_flipped_images()
 98 |       print('done')
 99 | 
100 |     print('Preparing training data...')
101 | 
102 |     prepare_roidb(imdb)
103 |     #ratio_index = rank_roidb_ratio(imdb)
104 |     print('done')
105 | 
106 |     return imdb.roidb
107 |   
108 |   def get_roidb(imdb_name):
109 |     imdb = get_imdb(imdb_name)
110 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
111 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
112 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
113 |     roidb = get_training_roidb(imdb)
114 |     return roidb
115 | 
116 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
117 |   roidb = roidbs[0]
118 | 
119 |   if len(roidbs) > 1:
120 |     for r in roidbs[1:]:
121 |       roidb.extend(r)
122 |     tmp = get_imdb(imdb_names.split('+')[1])
123 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
124 |   else:
125 |     imdb = get_imdb(imdb_names)
126 | 
127 |   if training:
128 |     roidb = filter_roidb(roidb)
129 | 
130 |   ratio_list, ratio_index = rank_roidb_ratio(roidb)
131 | 
132 |   return imdb, roidb, ratio_list, ratio_index
133 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | from os.path import join as pjoin
 11 | import numpy as np
 12 | from distutils.core import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | 
 16 | 
 17 | def find_in_path(name, path):
 18 |     "Find a file in a search path"
 19 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | # def locate_cuda():
 28 | #     """Locate the CUDA environment on the system
 29 | #
 30 | #     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 | #     and values giving the absolute path to each directory.
 32 | #
 33 | #     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 | #     is based on finding 'nvcc' in the PATH.
 35 | #     """
 36 | # 
 37 | #     # first check if the CUDAHOME env variable is in use
 38 | #     if 'CUDAHOME' in os.environ:
 39 | #         home = os.environ['CUDAHOME']
 40 | #         nvcc = pjoin(home, 'bin', 'nvcc')
 41 | #     else:
 42 | #         # otherwise, search the PATH for NVCC
 43 | #         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 | #         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 | #         if nvcc is None:
 46 | #             raise EnvironmentError('The nvcc binary could not be '
 47 | #                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 | #         home = os.path.dirname(os.path.dirname(nvcc))
 49 | #
 50 | #     cudaconfig = {'home': home, 'nvcc': nvcc,
 51 | #                   'include': pjoin(home, 'include'),
 52 | #                   'lib64': pjoin(home, 'lib64')}
 53 | #     for k, v in cudaconfig.iteritems():
 54 | #         if not os.path.exists(v):
 55 | #             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | #
 57 | #     return cudaconfig
 58 | 
 59 | 
 60 | # CUDA = locate_cuda()
 61 | 
 62 | # Obtain the numpy include directory.  This logic works across numpy versions.
 63 | try:
 64 |     numpy_include = np.get_include()
 65 | except AttributeError:
 66 |     numpy_include = np.get_numpy_include()
 67 | 
 68 | 
 69 | def customize_compiler_for_nvcc(self):
 70 |     """inject deep into distutils to customize how the dispatch
 71 |     to gcc/nvcc works.
 72 | 
 73 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 74 |     injected in, and still have the right customizations (i.e.
 75 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 76 |     the OO route, I have this. Note, it's kindof like a wierd functional
 77 |     subclassing going on."""
 78 | 
 79 |     # tell the compiler it can processes .cu
 80 |     self.src_extensions.append('.cu')
 81 | 
 82 |     # save references to the default compiler_so and _comple methods
 83 |     default_compiler_so = self.compiler_so
 84 |     super = self._compile
 85 | 
 86 |     # now redefine the _compile method. This gets executed for each
 87 |     # object but distutils doesn't have the ability to change compilers
 88 |     # based on source extension: we add it.
 89 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 90 |         print(extra_postargs)
 91 |         if os.path.splitext(src)[1] == '.cu':
 92 |             # use the cuda for .cu files
 93 |             self.set_executable('compiler_so', CUDA['nvcc'])
 94 |             # use only a subset of the extra_postargs, which are 1-1 translated
 95 |             # from the extra_compile_args in the Extension class
 96 |             postargs = extra_postargs['nvcc']
 97 |         else:
 98 |             postargs = extra_postargs['gcc']
 99 | 
100 |         super(obj, src, ext, cc_args, postargs, pp_opts)
101 |         # reset the default compiler_so, which we might have changed for cuda
102 |         self.compiler_so = default_compiler_so
103 | 
104 |     # inject our redefined _compile method into the class
105 |     self._compile = _compile
106 | 
107 | 
108 | # run the customize_compiler
109 | class custom_build_ext(build_ext):
110 |     def build_extensions(self):
111 |         customize_compiler_for_nvcc(self.compiler)
112 |         build_ext.build_extensions(self)
113 | 
114 | 
115 | ext_modules = [
116 |     Extension(
117 |         "model.utils.cython_bbox",
118 |         ["model/utils/bbox.pyx"],
119 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
120 |         include_dirs=[numpy_include]
121 |     ),
122 |     Extension(
123 |         'pycocotools._mask',
124 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
125 |         include_dirs=[numpy_include, 'pycocotools'],
126 |         extra_compile_args={
127 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
128 |     ),
129 | ]
130 | 
131 | setup(
132 |     name='faster_rcnn',
133 |     ext_modules=ext_modules,
134 |     # inject our custom trigger
135 |     cmdclass={'build_ext': custom_build_ext},
136 | )
137 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | cffi
3 | opencv-python
4 | scipy
5 | easydict
6 | matplotlib
7 | pyyaml
8 | 


--------------------------------------------------------------------------------