├── .gitignore
├── LICENSE
├── README.md
├── _init_paths.py
├── cfgs
├── res101.yml
├── res101_ls.yml
├── res50.yml
└── vgg16.yml
├── demo.py
├── images
├── img1.jpg
├── img1_det.jpg
├── img1_det_res101.jpg
├── img2.jpg
├── img2_det.jpg
├── img2_det_res101.jpg
├── img3.jpg
├── img3_det.jpg
├── img3_det_res101.jpg
├── img4.jpg
├── img4_det.jpg
└── img4_det_res101.jpg
├── lib
├── Makefile
├── datasets
│ ├── VOCdevkit-matlab-wrapper
│ │ ├── get_voc_opts.m
│ │ ├── voc_eval.m
│ │ └── xVOCap.m
│ ├── __init__.py
│ ├── coco.py
│ ├── ds_utils.py
│ ├── factory.py
│ ├── imagenet.py
│ ├── imdb.py
│ ├── pascal_voc.py
│ ├── pascal_voc_rbg.py
│ ├── tools
│ │ └── mcg_munge.py
│ ├── vg.py
│ ├── vg_eval.py
│ └── voc_eval.py
├── make.sh
├── model
│ ├── __init__.py
│ ├── couplenet
│ │ ├── __init__.py
│ │ ├── couplenet.py
│ │ └── resnet_atrous.py
│ ├── faster_rcnn
│ │ ├── __init__.py
│ │ ├── faster_rcnn.py
│ │ ├── resnet.py
│ │ └── vgg16.py
│ ├── nms
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── nms
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── make.sh
│ │ ├── nms_cpu.py
│ │ ├── nms_gpu.py
│ │ ├── nms_kernel.cu
│ │ ├── nms_wrapper.py
│ │ └── src
│ │ │ ├── nms_cuda.c
│ │ │ ├── nms_cuda.h
│ │ │ ├── nms_cuda_kernel.cu
│ │ │ └── nms_cuda_kernel.h
│ ├── psroi_pooling
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── psroi_pooling
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── psroi_pooling.py
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── psroi_pool.py
│ │ └── src
│ │ │ ├── cuda
│ │ │ ├── psroi_pooling_kernel.cu
│ │ │ └── psroi_pooling_kernel.h
│ │ │ ├── psroi_pooling_cuda.c
│ │ │ └── psroi_pooling_cuda.h
│ ├── rfcn
│ │ ├── __init__.py
│ │ ├── resnet_atrous.py
│ │ └── rfcn.py
│ ├── roi_align
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── roi_align
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── make.sh
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ └── src
│ │ │ ├── roi_align.c
│ │ │ ├── roi_align.h
│ │ │ ├── roi_align_cuda.c
│ │ │ ├── roi_align_cuda.h
│ │ │ ├── roi_align_kernel.cu
│ │ │ └── roi_align_kernel.h
│ ├── roi_crop
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ ├── crop_resize
│ │ │ │ └── __init__.py
│ │ │ └── roi_crop
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ ├── crop_resize.py
│ │ │ ├── gridgen.py
│ │ │ └── roi_crop.py
│ │ ├── make.sh
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ ├── gridgen.py
│ │ │ └── roi_crop.py
│ │ └── src
│ │ │ ├── roi_crop.c
│ │ │ ├── roi_crop.h
│ │ │ ├── roi_crop_cuda.c
│ │ │ ├── roi_crop_cuda.h
│ │ │ ├── roi_crop_cuda_kernel.cu
│ │ │ └── roi_crop_cuda_kernel.h
│ ├── roi_pooling
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── roi_pooling
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_pool.py
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_pool.py
│ │ └── src
│ │ │ ├── roi_pooling.c
│ │ │ ├── roi_pooling.h
│ │ │ ├── roi_pooling_cuda.c
│ │ │ ├── roi_pooling_cuda.h
│ │ │ ├── roi_pooling_kernel.cu
│ │ │ └── roi_pooling_kernel.h
│ ├── rpn
│ │ ├── __init__.py
│ │ ├── anchor_target_layer.py
│ │ ├── bbox_transform.py
│ │ ├── generate_anchors.py
│ │ ├── proposal_layer.py
│ │ ├── proposal_target_layer_cascade.py
│ │ └── rpn.py
│ └── utils
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── bbox.c
│ │ ├── bbox.pyx
│ │ ├── blob.py
│ │ ├── config.py
│ │ ├── logger.py
│ │ └── net_utils.py
├── pycocotools
│ ├── UPSTREAM_REV
│ ├── __init__.py
│ ├── _mask.c
│ ├── _mask.pyx
│ ├── coco.py
│ ├── cocoeval.py
│ ├── license.txt
│ ├── mask.py
│ ├── maskApi.c
│ └── maskApi.h
├── roi_data_layer
│ ├── __init__.py
│ ├── minibatch.py
│ ├── roibatchLoader.py
│ └── roidb.py
└── setup.py
├── requirements.txt
├── test_net.py
└── trainval_net.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | .idea/
3 | *.pyc
4 | *~
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Jianwei Yang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # A Pytorch Implementation of R-FCN/CoupleNet
2 |
3 | This repo has moved to [princewang1994/RFCN_CoupleNet.pytorch](https://github.com/princewang1994/RFCN_CoupleNet.pytorch), it will stop updating here.
4 |
5 | ## Introduction
6 |
7 | This project is an pytorch implement R-FCN and CoupleNet, large part code is reference from [jwyang/faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch). The R-FCN structure is refer to [Caffe R-FCN](https://github.com/daijifeng001/R-FCN) and [Py-R-FCN](https://github.com/YuwenXiong/py-R-FCN)
8 |
9 | - For R-FCN, mAP@0.5 reached 73.2 in VOC2007 trainval dataset
10 | - For CoupleNet, mAP@0.5 reached 75.2 in VOC2007 trainval dataset
11 |
12 | ## R-FCN
13 |
14 | arXiv:1605.06409: [R-FCN: Object Detection via Region-based Fully Convolutional Networks](https://arxiv.org/abs/1605.06409)
15 |
16 | 
17 |
18 | This repo has following modification compare to [jwyang/faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch):
19 |
20 | - **R-FCN architecture**: We refered to the origin [Caffe version] of R-FCN, the main structure of R-FCN is show in following figure.
21 | - **PS-RoIPooling with CUDA** :(refer to the other pytorch implement R-FCN, pytorch_RFCN). I have modified it to fit multi-image training (not only batch-size=1 is supported)
22 | - **Implement multi-scale training:** As the original paper says, each image is randomly reized to differenct resolutions (400, 500, 600, 700, 800) when training, and during test time, we use fix input size(600). These make 1.2 mAP gain in our experiments.
23 | - **Implement OHEM:** in this repo, we implement Online Hard Example Mining(OHEM) method in the paper, set `OHEM: False` in `cfgs/res101.yml` for using OHEM. Unluckly, it cause a bit performance degration in my experiments
24 |
25 | 
26 |
27 | ## CoupleNet
28 |
29 | arXiv:1708.02863:[CoupleNet: Coupling Global Structure with Local Parts for Object Detection](https://arxiv.org/abs/1708.02863)
30 |
31 | 
32 |
33 | - Making changes based on R-FCN
34 | - Implement local/global FCN in CoupleNet
35 |
36 | ## Tutorial
37 |
38 | * [R-FCN blog](http://blog.prince2015.club/2018/07/13/R-FCN/)
39 |
40 | ## Benchmarking
41 |
42 | We benchmark our code thoroughly on three datasets: pascal voc using two different architecture: R-FCN and CoupleNet. Results shows following:
43 |
44 | 1). PASCAL VOC 2007 (Train: 07_trainval - Test: 07_test, scale=400, 500, 600, 700, 800)
45 |
46 | model | #GPUs | batch size | lr | lr_decay | max_epoch | time/epoch | mem/GPU | mAP
47 | ---------|--------|-----|--------|-----|-----|-------|--------|-----
48 | [R-FCN](https://drive.google.com/file/d/1JMh0gguOozEEIRijQxkQnMKLTAp2_iu5/view?usp=sharing) | 1 | 2 | 4e-3 | 8 | 20 | 0.88 hr | 3000 MB | 73.8
49 | CouleNet | 1 | 2 | 4e-3 | 8 | 20 | 0.60 hr | 8900 MB | 75.2
50 |
51 | - Pretrained model for R-FCN(VOC2007) has released~, See `Test` part following
52 |
53 |
54 | ## Preparation
55 |
56 |
57 | First of all, clone the code
58 | ```
59 | $ git clone https://github.com/princewang1994/R-FCN.pytorch.git
60 | ```
61 |
62 | Then, create a folder:
63 | ```
64 | $ cd R-FCN.pytorch && mkdir data
65 | $ cd data
66 | $ ln -s $VOC_DEVKIT_ROOT .
67 | ```
68 |
69 | ### prerequisites
70 |
71 | * Python 3.6
72 | * Pytorch 0.3.0, **NOT suport 0.4.0 because of some errors**
73 | * CUDA 8.0 or higher
74 |
75 | ### Data Preparation
76 |
77 | * **PASCAL_VOC 07+12**: Please follow the instructions in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to prepare VOC datasets. Actually, you can refer to any others. After downloading the data, creat softlinks in the folder data/.
78 | * **Pretrained ResNet**: download from [here](https://drive.google.com/file/d/1I4Jmh2bU6BJVnwqfg5EDe8KGGdec2UE8/view?usp=sharing) and put it to `$RFCN_ROOT/data/pretrained_model/resnet101_caffe.pth`.
79 |
80 |
81 | ### Compilation
82 |
83 | As pointed out by [ruotianluo/pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn), choose the right `-arch` in `make.sh` file, to compile the cuda code:
84 |
85 | | GPU model | Architecture |
86 | | ------------- | ------------- |
87 | | TitanX (Maxwell/Pascal) | sm_52 |
88 | | GTX 960M | sm_50 |
89 | | GTX 1080 (Ti) | sm_61 |
90 | | Grid K520 (AWS g2.2xlarge) | sm_30 |
91 | | Tesla K80 (AWS p2.xlarge) | sm_37 |
92 |
93 | More details about setting the architecture can be found [here](https://developer.nvidia.com/cuda-gpus) or [here](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
94 |
95 | Install all the python dependencies using pip:
96 | ```
97 | $ pip install -r requirements.txt
98 | ```
99 |
100 | Compile the cuda dependencies using following simple commands:
101 |
102 | ```
103 | $ cd lib
104 | $ sh make.sh
105 | ```
106 |
107 | It will compile all the modules you need, including NMS, ROI_Pooing, ROI_Align and ROI_Crop. The default version is compiled with Python 2.7, please compile by yourself if you are using a different python version.
108 |
109 | ## Train
110 |
111 | To train a R-FCN model with ResNet101 on pascal_voc, simply run:
112 | ```
113 | $ CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net.py \
114 | --arch rfcn \
115 | --dataset pascal_voc --net res101 \
116 | --bs $BATCH_SIZE --nw $WORKER_NUMBER \
117 | --lr $LEARNING_RATE --lr_decay_step $DECAY_STEP \
118 | --cuda
119 | ```
120 |
121 | - Set `--s` to identified differenct experiments.
122 | - For CoupleNet training, replace `--arch rfcn` with `--arch couplenet`, other arguments should be modified according to your machine. (e.g. larger learning rate for bigger batch-size)
123 | - Model are saved to `$RFCN_ROOT/save`
124 |
125 | ## Test
126 |
127 | If you want to evlauate the detection performance of a pre-trained model on pascal_voc test set, simply run
128 | ```
129 | $ python test_net.py --dataset pascal_voc --arch rfcn \
130 | --net res101 \
131 | --checksession $SESSION \
132 | --checkepoch $EPOCH \
133 | --checkpoint $CHECKPOINT \
134 | --cuda
135 | ```
136 | - Specify the specific model session(`--s` in training phase), chechepoch and checkpoint, e.g., SESSION=1, EPOCH=6, CHECKPOINT=5010.
137 |
138 | ### Pretrained Model
139 |
140 | - R-FCN VOC2007: [faster_rcnn_2_12_5010.pth](https://drive.google.com/file/d/1JMh0gguOozEEIRijQxkQnMKLTAp2_iu5/view?usp=sharing)
141 |
142 | Download from link above and put it to `save/rfcn/res101/pascal_voc/faster_rcnn_2_12_5010.pth`. Then you can set `$SESSiON=2, $EPOCH=12, $CHECKPOINT=5010` in test command. It'll got 73.2 mAP.
143 |
144 | ## Demo
145 |
146 | Below are some detection results:
147 |
148 |
149 |

150 |
151 |
152 | ## Going to do
153 |
154 | - Keeping updating structures to reach the state-of-art
155 | - More benchmarking in VOC0712/COCO
156 | - ~~RFCN Pretrained model for VOC07~~
157 | - CoupleNet pretrained model for VOC07
158 | - Adapt to fit PyTorch 0.4.0
159 |
160 | ## Acknowledgement
161 |
162 | This project is writen by [Prince Wang](https://github.com/princewang1994), and thanks the faster-rcnn.pytorch's code provider [jwyang](https://github.com/jwyang)
163 |
--------------------------------------------------------------------------------
/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 |
14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 |
--------------------------------------------------------------------------------
/cfgs/res101.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: res101
2 | TRAIN:
3 | HAS_RPN: True
4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
5 | RPN_POSITIVE_OVERLAP: 0.7
6 | RPN_BATCHSIZE: 256
7 | PROPOSAL_METHOD: gt
8 | BG_THRESH_LO: 0.0
9 | DISPLAY: 20
10 | BATCH_SIZE: 128
11 | WEIGHT_DECAY: 0.0001
12 | DOUBLE_BIAS: False
13 | LEARNING_RATE: 0.001
14 | OHEM: False
15 | TEST:
16 | HAS_RPN: True
17 | POOLING_SIZE: 7
18 | POOLING_MODE: align
19 | CROP_RESIZE_WITH_MAX_POOL: False
20 |
--------------------------------------------------------------------------------
/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: res101
2 | TRAIN:
3 | HAS_RPN: True
4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
5 | RPN_POSITIVE_OVERLAP: 0.7
6 | RPN_BATCHSIZE: 256
7 | PROPOSAL_METHOD: gt
8 | BG_THRESH_LO: 0.0
9 | DISPLAY: 20
10 | BATCH_SIZE: 128
11 | WEIGHT_DECAY: 0.0001
12 | SCALES: [800]
13 | DOUBLE_BIAS: False
14 | LEARNING_RATE: 0.001
15 | TEST:
16 | HAS_RPN: True
17 | SCALES: [800]
18 | MAX_SIZE: 1200
19 | RPN_POST_NMS_TOP_N: 1000
20 | POOLING_SIZE: 7
21 | POOLING_MODE: align
22 | CROP_RESIZE_WITH_MAX_POOL: False
23 |
--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: res50
2 | TRAIN:
3 | HAS_RPN: True
4 | # IMS_PER_BATCH: 1
5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
6 | RPN_POSITIVE_OVERLAP: 0.7
7 | RPN_BATCHSIZE: 256
8 | PROPOSAL_METHOD: gt
9 | BG_THRESH_LO: 0.0
10 | DISPLAY: 20
11 | BATCH_SIZE: 256
12 | WEIGHT_DECAY: 0.0001
13 | DOUBLE_BIAS: False
14 | SNAPSHOT_PREFIX: res50_faster_rcnn
15 | TEST:
16 | HAS_RPN: True
17 | POOLING_MODE: crop
18 |
--------------------------------------------------------------------------------
/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: vgg16
2 | TRAIN:
3 | HAS_RPN: True
4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
5 | RPN_POSITIVE_OVERLAP: 0.7
6 | RPN_BATCHSIZE: 256
7 | PROPOSAL_METHOD: gt
8 | BG_THRESH_LO: 0.0
9 | BATCH_SIZE: 256
10 | LEARNING_RATE: 0.01
11 | TEST:
12 | HAS_RPN: True
13 | POOLING_MODE: align
14 | CROP_RESIZE_WITH_MAX_POOL: False
15 |
--------------------------------------------------------------------------------
/images/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img1.jpg
--------------------------------------------------------------------------------
/images/img1_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img1_det.jpg
--------------------------------------------------------------------------------
/images/img1_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img1_det_res101.jpg
--------------------------------------------------------------------------------
/images/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img2.jpg
--------------------------------------------------------------------------------
/images/img2_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img2_det.jpg
--------------------------------------------------------------------------------
/images/img2_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img2_det_res101.jpg
--------------------------------------------------------------------------------
/images/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img3.jpg
--------------------------------------------------------------------------------
/images/img3_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img3_det.jpg
--------------------------------------------------------------------------------
/images/img3_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img3_det_res101.jpg
--------------------------------------------------------------------------------
/images/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img4.jpg
--------------------------------------------------------------------------------
/images/img4_det.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img4_det.jpg
--------------------------------------------------------------------------------
/images/img4_det_res101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/images/img4_det_res101.jpg
--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | CUDA_PATH="/usr/local/cuda/"
2 | CUDA_ARCH="-gencode arch=compute_61,code=sm_61"
3 | ROOT=`pwd`
4 |
5 | all: build_ext nms roi_pooling roi_align roi_crop psroi_pooling
6 | ls
7 |
8 | build_ext:
9 | python setup.py build_ext --inplace
10 | rm -rf build
11 |
12 | nms:
13 | # compile NMS
14 | cd model/nms/src; \
15 | echo "Compiling nms kernels by nvcc..."; \
16 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
17 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
18 |
19 |
20 | #cd ../
21 | #python build.py
22 | #
23 | ## compile roi_pooling
24 | #cd ../../
25 | #cd model/roi_pooling/src
26 | #echo "Compiling roi pooling kernels by nvcc..."
27 | #nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
28 | # -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
29 | #cd ../
30 | #python build.py
31 | #
32 | ## compile roi_align
33 | #cd ../../
34 | #cd model/roi_align/src
35 | #echo "Compiling roi align kernels by nvcc..."
36 | #nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
37 | # -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
38 | #cd ../
39 | #python build.py
40 | #
41 | ## compile roi_crop
42 | #cd ../../
43 | #cd model/roi_crop/src
44 | #echo "Compiling roi crop kernels by nvcc..."
45 | #nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
46 | # -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
47 | #cd ../
48 | #python build.py
49 | #
50 | ## compile roi_crop
51 | #cd ../../
52 | #cd model/psroi_pooling/src/cuda
53 | #echo "Compiling psroi pooling kernels by nvcc..."
54 | #nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu \
55 | # -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
56 | #cd ../../
57 | #python build.py
58 |
59 | #clean:
60 | # rm model/nms/src/*.o
61 | # rm model/roi_pooling/src/*.o
62 | # rm model/roi_align/src/*.o
63 | # rm model/roi_crop/src/*.o
64 | # rm model/psroi_pooling/src/cuda/*.o
65 |
66 |
--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
1 | function VOCopts = get_voc_opts(path)
2 |
3 | tmp = pwd;
4 | cd(path);
5 | try
6 | addpath('VOCcode');
7 | VOCinit;
8 | catch
9 | rmpath('VOCcode');
10 | cd(tmp);
11 | error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 |
--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
1 | function res = voc_eval(path, comp_id, test_set, output_dir)
2 |
3 | VOCopts = get_voc_opts(path);
4 | VOCopts.testset = test_set;
5 |
6 | for i = 1:length(VOCopts.classes)
7 | cls = VOCopts.classes{i};
8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
9 | end
10 |
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 |
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 |
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 |
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 |
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 |
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 |
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 | % Bug in VOCevaldet requires that tic has been called first
35 | tic;
36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 | ap_auc = xVOCap(recall, prec);
38 |
39 | % force plot limits
40 | ylim([0 1]);
41 | xlim([0 1]);
42 |
43 | print(gcf, '-djpeg', '-r0', ...
44 | [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 |
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 |
53 | save([output_dir '/' cls '_pr.mat'], ...
54 | 'res', 'recall', 'prec', 'ap', 'ap_auc');
55 |
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 |
--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
1 | function ap = xVOCap(rec,prec)
2 | % From the PASCAL VOC 2011 devkit
3 |
4 | mrec=[0 ; rec ; 1];
5 | mpre=[0 ; prec ; 0];
6 | for i=numel(mpre)-1:-1:1
7 | mpre(i)=max(mpre(i),mpre(i+1));
8 | end
9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 |
--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast/er R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Ross Girshick
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import numpy as np
11 |
12 |
13 | def unique_boxes(boxes, scale=1.0):
14 | """Return indices of unique boxes."""
15 | v = np.array([1, 1e3, 1e6, 1e9])
16 | hashes = np.round(boxes * scale).dot(v)
17 | _, index = np.unique(hashes, return_index=True)
18 | return np.sort(index)
19 |
20 |
21 | def xywh_to_xyxy(boxes):
22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 |
25 |
26 | def xyxy_to_xywh(boxes):
27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 |
30 |
31 | def validate_boxes(boxes, width=0, height=0):
32 | """Check that a set of boxes are valid."""
33 | x1 = boxes[:, 0]
34 | y1 = boxes[:, 1]
35 | x2 = boxes[:, 2]
36 | y2 = boxes[:, 3]
37 | assert (x1 >= 0).all()
38 | assert (y1 >= 0).all()
39 | assert (x2 >= x1).all()
40 | assert (y2 >= y1).all()
41 | assert (x2 < width).all()
42 | assert (y2 < height).all()
43 |
44 |
45 | def filter_small_boxes(boxes, min_size):
46 | w = boxes[:, 2] - boxes[:, 0]
47 | h = boxes[:, 3] - boxes[:, 1]
48 | keep = np.where((w >= min_size) & (h > min_size))[0]
49 | return keep
50 |
--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Factory method for easily getting imdbs by name."""
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 |
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | from datasets.imagenet import imagenet
17 | from datasets.vg import vg
18 |
19 | import numpy as np
20 |
21 | # Set up voc__
22 | for year in ['2007', '2012']:
23 | for split in ['train', 'val', 'trainval', 'test']:
24 | name = 'voc_{}_{}'.format(year, split)
25 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
26 |
27 | # Set up coco_2014_
28 | for year in ['2014']:
29 | for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
30 | name = 'coco_{}_{}'.format(year, split)
31 | __sets[name] = (lambda split=split, year=year: coco(split, year))
32 |
33 | # Set up coco_2014_cap_
34 | for year in ['2014']:
35 | for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']:
36 | name = 'coco_{}_{}'.format(year, split)
37 | __sets[name] = (lambda split=split, year=year: coco(split, year))
38 |
39 | # Set up coco_2015_
40 | for year in ['2015']:
41 | for split in ['test', 'test-dev']:
42 | name = 'coco_{}_{}'.format(year, split)
43 | __sets[name] = (lambda split=split, year=year: coco(split, year))
44 |
45 | # Set up vg_
46 | # for version in ['1600-400-20']:
47 | # for split in ['minitrain', 'train', 'minival', 'val', 'test']:
48 | # name = 'vg_{}_{}'.format(version,split)
49 | # __sets[name] = (lambda split=split, version=version: vg(version, split))
50 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
51 | for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
52 | name = 'vg_{}_{}'.format(version,split)
53 | __sets[name] = (lambda split=split, version=version: vg(version, split))
54 |
55 | # set up image net.
56 | for split in ['train', 'val', 'val1', 'val2', 'test']:
57 | name = 'imagenet_{}'.format(split)
58 | devkit_path = 'data/imagenet/ILSVRC/devkit'
59 | data_path = 'data/imagenet/ILSVRC'
60 | __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))
61 |
62 | def get_imdb(name):
63 | """Get an imdb (image database) by name."""
64 | if name not in __sets:
65 | raise KeyError('Unknown dataset: {}'.format(name))
66 | return __sets[name]()
67 |
68 |
69 | def list_imdbs():
70 | """List all registered imdbs."""
71 | return list(__sets.keys())
72 |
--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import sys
4 |
5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
7 | so that it's consistent with those computed by Jan Hosang (see:
8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
9 | computing/research/object-recognition-and-scene-understanding/how-
10 | good-are-detection-proposals-really/)
11 |
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 |
16 | def munge(src_dir):
17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 |
20 | files = os.listdir(src_dir)
21 | for fn in files:
22 | base, ext = os.path.splitext(fn)
23 | # first 14 chars / first 22 chars / all chars + .mat
24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 | first = base[:14]
26 | second = base[:22]
27 | dst_dir = os.path.join('MCG', 'mat', first, second)
28 | if not os.path.exists(dst_dir):
29 | os.makedirs(dst_dir)
30 | src = os.path.join(src_dir, fn)
31 | dst = os.path.join(dst_dir, fn)
32 | print('MV: {} -> {}'.format(src, dst))
33 | os.rename(src, dst)
34 |
35 | if __name__ == '__main__':
36 | # src_dir should look something like:
37 | # src_dir = 'MCG-COCO-val2014-boxes'
38 | src_dir = sys.argv[1]
39 | munge(src_dir)
40 |
--------------------------------------------------------------------------------
/lib/datasets/vg_eval.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | # --------------------------------------------------------
3 | # Fast/er R-CNN
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Bharath Hariharan
6 | # --------------------------------------------------------
7 |
8 | import xml.etree.ElementTree as ET
9 | import os
10 | import numpy as np
11 | from .voc_eval import voc_ap
12 |
13 | def vg_eval( detpath,
14 | gt_roidb,
15 | image_index,
16 | classindex,
17 | ovthresh=0.5,
18 | use_07_metric=False,
19 | eval_attributes=False):
20 | """rec, prec, ap, sorted_scores, npos = voc_eval(
21 | detpath,
22 | gt_roidb,
23 | image_index,
24 | classindex,
25 | [ovthresh],
26 | [use_07_metric])
27 |
28 | Top level function that does the Visual Genome evaluation.
29 |
30 | detpath: Path to detections
31 | gt_roidb: List of ground truth structs.
32 | image_index: List of image ids.
33 | classindex: Category index
34 | [ovthresh]: Overlap threshold (default = 0.5)
35 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
36 | (default False)
37 | """
38 | # extract gt objects for this class
39 | class_recs = {}
40 | npos = 0
41 | for item,imagename in zip(gt_roidb,image_index):
42 | if eval_attributes:
43 | bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
44 | else:
45 | bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
46 | difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
47 | det = [False] * bbox.shape[0]
48 | npos = npos + sum(~difficult)
49 | class_recs[str(imagename)] = {'bbox': bbox,
50 | 'difficult': difficult,
51 | 'det': det}
52 | if npos == 0:
53 | # No ground truth examples
54 | return 0,0,0,0,npos
55 |
56 | # read dets
57 | with open(detpath, 'r') as f:
58 | lines = f.readlines()
59 | if len(lines) == 0:
60 | # No detection examples
61 | return 0,0,0,0,npos
62 |
63 | splitlines = [x.strip().split(' ') for x in lines]
64 | image_ids = [x[0] for x in splitlines]
65 | confidence = np.array([float(x[1]) for x in splitlines])
66 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
67 |
68 | # sort by confidence
69 | sorted_ind = np.argsort(-confidence)
70 | sorted_scores = -np.sort(-confidence)
71 | BB = BB[sorted_ind, :]
72 | image_ids = [image_ids[x] for x in sorted_ind]
73 |
74 | # go down dets and mark TPs and FPs
75 | nd = len(image_ids)
76 | tp = np.zeros(nd)
77 | fp = np.zeros(nd)
78 | for d in range(nd):
79 | R = class_recs[image_ids[d]]
80 | bb = BB[d, :].astype(float)
81 | ovmax = -np.inf
82 | BBGT = R['bbox'].astype(float)
83 |
84 | if BBGT.size > 0:
85 | # compute overlaps
86 | # intersection
87 | ixmin = np.maximum(BBGT[:, 0], bb[0])
88 | iymin = np.maximum(BBGT[:, 1], bb[1])
89 | ixmax = np.minimum(BBGT[:, 2], bb[2])
90 | iymax = np.minimum(BBGT[:, 3], bb[3])
91 | iw = np.maximum(ixmax - ixmin + 1., 0.)
92 | ih = np.maximum(iymax - iymin + 1., 0.)
93 | inters = iw * ih
94 |
95 | # union
96 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
97 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
98 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
99 |
100 | overlaps = inters / uni
101 | ovmax = np.max(overlaps)
102 | jmax = np.argmax(overlaps)
103 |
104 | if ovmax > ovthresh:
105 | if not R['difficult'][jmax]:
106 | if not R['det'][jmax]:
107 | tp[d] = 1.
108 | R['det'][jmax] = 1
109 | else:
110 | fp[d] = 1.
111 | else:
112 | fp[d] = 1.
113 |
114 | # compute precision recall
115 | fp = np.cumsum(fp)
116 | tp = np.cumsum(tp)
117 | rec = tp / float(npos)
118 | # avoid divide by zero in case the first detection matches a difficult
119 | # ground truth
120 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
121 | ap = voc_ap(rec, prec, use_07_metric)
122 |
123 | return rec, prec, ap, sorted_scores, npos
124 |
--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast/er R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Bharath Hariharan
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import xml.etree.ElementTree as ET
11 | import os
12 | import pickle
13 | import numpy as np
14 |
15 | def parse_rec(filename):
16 | """ Parse a PASCAL VOC xml file """
17 | tree = ET.parse(filename)
18 | objects = []
19 | for obj in tree.findall('object'):
20 | obj_struct = {}
21 | obj_struct['name'] = obj.find('name').text
22 | obj_struct['pose'] = obj.find('pose').text
23 | obj_struct['truncated'] = int(obj.find('truncated').text)
24 | obj_struct['difficult'] = int(obj.find('difficult').text)
25 | bbox = obj.find('bndbox')
26 | obj_struct['bbox'] = [int(bbox.find('xmin').text),
27 | int(bbox.find('ymin').text),
28 | int(bbox.find('xmax').text),
29 | int(bbox.find('ymax').text)]
30 | objects.append(obj_struct)
31 |
32 | return objects
33 |
34 |
35 | def voc_ap(rec, prec, use_07_metric=False):
36 | """ ap = voc_ap(rec, prec, [use_07_metric])
37 | Compute VOC AP given precision and recall.
38 | If use_07_metric is true, uses the
39 | VOC 07 11 point method (default:False).
40 | """
41 | if use_07_metric:
42 | # 11 point metric
43 | ap = 0.
44 | for t in np.arange(0., 1.1, 0.1):
45 | if np.sum(rec >= t) == 0:
46 | p = 0
47 | else:
48 | p = np.max(prec[rec >= t])
49 | ap = ap + p / 11.
50 | else:
51 | # correct AP calculation
52 | # first append sentinel values at the end
53 | mrec = np.concatenate(([0.], rec, [1.]))
54 | mpre = np.concatenate(([0.], prec, [0.]))
55 |
56 | # compute the precision envelope
57 | for i in range(mpre.size - 1, 0, -1):
58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
59 |
60 | # to calculate area under PR curve, look for points
61 | # where X axis (recall) changes value
62 | i = np.where(mrec[1:] != mrec[:-1])[0]
63 |
64 | # and sum (\Delta recall) * prec
65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
66 | return ap
67 |
68 |
69 | def voc_eval(detpath,
70 | annopath,
71 | imagesetfile,
72 | classname,
73 | cachedir,
74 | ovthresh=0.5,
75 | use_07_metric=False):
76 | """rec, prec, ap = voc_eval(detpath,
77 | annopath,
78 | imagesetfile,
79 | classname,
80 | [ovthresh],
81 | [use_07_metric])
82 |
83 | Top level function that does the PASCAL VOC evaluation.
84 |
85 | detpath: Path to detections
86 | detpath.format(classname) should produce the detection results file.
87 | annopath: Path to annotations
88 | annopath.format(imagename) should be the xml annotations file.
89 | imagesetfile: Text file containing the list of images, one image per line.
90 | classname: Category name (duh)
91 | cachedir: Directory for caching the annotations
92 | [ovthresh]: Overlap threshold (default = 0.5)
93 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
94 | (default False)
95 | """
96 | # assumes detections are in detpath.format(classname)
97 | # assumes annotations are in annopath.format(imagename)
98 | # assumes imagesetfile is a text file with each line an image name
99 | # cachedir caches the annotations in a pickle file
100 |
101 | # first load gt
102 | if not os.path.isdir(cachedir):
103 | os.mkdir(cachedir)
104 | cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
105 | # read list of images
106 | with open(imagesetfile, 'r') as f:
107 | lines = f.readlines()
108 | imagenames = [x.strip() for x in lines]
109 |
110 | if not os.path.isfile(cachefile):
111 | # load annotations
112 | recs = {}
113 | for i, imagename in enumerate(imagenames):
114 | recs[imagename] = parse_rec(annopath.format(imagename))
115 | if i % 100 == 0:
116 | print('Reading annotation for {:d}/{:d}'.format(
117 | i + 1, len(imagenames)))
118 | # save
119 | print('Saving cached annotations to {:s}'.format(cachefile))
120 | with open(cachefile, 'wb') as f:
121 | pickle.dump(recs, f)
122 | else:
123 | # load
124 | with open(cachefile, 'rb') as f:
125 | try:
126 | recs = pickle.load(f)
127 | except:
128 | recs = pickle.load(f, encoding='bytes')
129 |
130 | # extract gt objects for this class
131 | class_recs = {}
132 | npos = 0
133 | for imagename in imagenames:
134 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
135 | bbox = np.array([x['bbox'] for x in R])
136 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
137 | det = [False] * len(R)
138 | npos = npos + sum(~difficult)
139 | class_recs[imagename] = {'bbox': bbox,
140 | 'difficult': difficult,
141 | 'det': det}
142 |
143 | # read dets
144 | detfile = detpath.format(classname)
145 | with open(detfile, 'r') as f:
146 | lines = f.readlines()
147 |
148 | splitlines = [x.strip().split(' ') for x in lines]
149 | image_ids = [x[0] for x in splitlines]
150 | confidence = np.array([float(x[1]) for x in splitlines])
151 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
152 |
153 | nd = len(image_ids)
154 | tp = np.zeros(nd)
155 | fp = np.zeros(nd)
156 |
157 | if BB.shape[0] > 0:
158 | # sort by confidence
159 | sorted_ind = np.argsort(-confidence)
160 | sorted_scores = np.sort(-confidence)
161 | BB = BB[sorted_ind, :]
162 | image_ids = [image_ids[x] for x in sorted_ind]
163 |
164 | # go down dets and mark TPs and FPs
165 | for d in range(nd):
166 | R = class_recs[image_ids[d]]
167 | bb = BB[d, :].astype(float)
168 | ovmax = -np.inf
169 | BBGT = R['bbox'].astype(float)
170 |
171 | if BBGT.size > 0:
172 | # compute overlaps
173 | # intersection
174 | ixmin = np.maximum(BBGT[:, 0], bb[0])
175 | iymin = np.maximum(BBGT[:, 1], bb[1])
176 | ixmax = np.minimum(BBGT[:, 2], bb[2])
177 | iymax = np.minimum(BBGT[:, 3], bb[3])
178 | iw = np.maximum(ixmax - ixmin + 1., 0.)
179 | ih = np.maximum(iymax - iymin + 1., 0.)
180 | inters = iw * ih
181 |
182 | # union
183 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 |
187 | overlaps = inters / uni
188 | ovmax = np.max(overlaps)
189 | jmax = np.argmax(overlaps)
190 |
191 | if ovmax > ovthresh:
192 | if not R['difficult'][jmax]:
193 | if not R['det'][jmax]:
194 | tp[d] = 1.
195 | R['det'][jmax] = 1
196 | else:
197 | fp[d] = 1.
198 | else:
199 | fp[d] = 1.
200 |
201 | # compute precision recall
202 | fp = np.cumsum(fp)
203 | tp = np.cumsum(tp)
204 | rec = tp / float(npos)
205 | # avoid divide by zero in case the first detection matches a difficult
206 | # ground truth
207 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 | ap = voc_ap(rec, prec, use_07_metric)
209 |
210 | return rec, prec, ap
211 |
--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # CUDA_PATH=/usr/local/cuda/
4 |
5 | export CUDA_PATH=/usr/local/cuda/
6 |
7 | python setup.py build_ext --inplace
8 | rm -rf build
9 |
10 | CUDA_ARCH="-gencode arch=compute_61,code=sm_61"
11 |
12 | # clean build file
13 | rm model/nms/src/*.o
14 | rm model/roi_pooling/src/*.o
15 | rm model/roi_align/src/*.o
16 | rm model/roi_crop/src/*.o
17 | rm model/psroi_pooling/src/cuda/*.o
18 |
19 |
20 | # compile NMS
21 | cd model/nms/src
22 | echo "Compiling nms kernels by nvcc..."
23 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
24 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
25 |
26 | cd ../
27 | python build.py
28 |
29 | # compile roi_pooling
30 | cd ../../
31 | cd model/roi_pooling/src
32 | echo "Compiling roi pooling kernels by nvcc..."
33 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
34 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
35 | cd ../
36 | python build.py
37 |
38 | # compile roi_align
39 | cd ../../
40 | cd model/roi_align/src
41 | echo "Compiling roi align kernels by nvcc..."
42 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
43 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
44 | cd ../
45 | python build.py
46 |
47 | # compile roi_crop
48 | cd ../../
49 | cd model/roi_crop/src
50 | echo "Compiling roi crop kernels by nvcc..."
51 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
52 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
53 | cd ../
54 | python build.py
55 |
56 | # compile roi_crop
57 | cd ../../
58 | cd model/psroi_pooling/src/cuda
59 | echo "Compiling psroi pooling kernels by nvcc..."
60 | nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu \
61 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
62 | cd ../../
63 | python build.py
64 |
--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/__init__.py
--------------------------------------------------------------------------------
/lib/model/couplenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet_atrous import resnet
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import resnet
2 | from .vgg16 import vgg16
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | import torchvision.models as models
7 | from torch.autograd import Variable
8 | import numpy as np
9 | from model.utils.config import cfg
10 | from model.rpn.rpn import _RPN
11 | from model.roi_pooling.modules.roi_pool import _RoIPooling
12 | from model.roi_crop.modules.roi_crop import _RoICrop
13 | from model.roi_align.modules.roi_align import RoIAlignAvg
14 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
15 | import time
16 | import pdb
17 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta
18 |
19 | class _fasterRCNN(nn.Module):
20 | """ faster RCNN """
21 | def __init__(self, classes, class_agnostic):
22 | super(_fasterRCNN, self).__init__()
23 | self.classes = classes
24 | self.n_classes = len(classes)
25 | self.class_agnostic = class_agnostic
26 | # loss
27 | self.RCNN_loss_cls = 0
28 | self.RCNN_loss_bbox = 0
29 |
30 | # define rpn
31 | self.RCNN_rpn = _RPN(self.dout_base_model)
32 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
33 | self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
34 | self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
35 |
36 | self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
37 | self.RCNN_roi_crop = _RoICrop()
38 |
39 | def forward(self, im_data, im_info, gt_boxes, num_boxes):
40 | batch_size = im_data.size(0)
41 |
42 | im_info = im_info.data
43 | gt_boxes = gt_boxes.data
44 | num_boxes = num_boxes.data
45 |
46 | # feed image data to base model to obtain base feature map
47 | base_feat = self.RCNN_base(im_data)
48 |
49 | # feed base feature map tp RPN to obtain rois
50 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
51 |
52 | # if it is training phrase, then use ground trubut bboxes for refining
53 | if self.training:
54 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
55 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
56 |
57 | rois_label = Variable(rois_label.view(-1).long())
58 | rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
59 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
60 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
61 | else:
62 | rois_label = None
63 | rois_target = None
64 | rois_inside_ws = None
65 | rois_outside_ws = None
66 | rpn_loss_cls = 0
67 | rpn_loss_bbox = 0
68 |
69 | rois = Variable(rois)
70 | # do roi pooling based on predicted rois
71 |
72 | if cfg.POOLING_MODE == 'crop':
73 | # pdb.set_trace()
74 | # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
75 | grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
76 | grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
77 | pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
78 | if cfg.CROP_RESIZE_WITH_MAX_POOL:
79 | pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
80 | elif cfg.POOLING_MODE == 'align':
81 | pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
82 | elif cfg.POOLING_MODE == 'pool':
83 | pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
84 |
85 | # feed pooled features to top model
86 | pooled_feat = self._head_to_tail(pooled_feat)
87 |
88 | # compute bbox offset
89 | bbox_pred = self.RCNN_bbox_pred(pooled_feat)
90 | if self.training and not self.class_agnostic:
91 | # select the corresponding columns according to roi labels
92 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
93 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
94 | bbox_pred = bbox_pred_select.squeeze(1)
95 |
96 | # compute object classification probability
97 | cls_score = self.RCNN_cls_score(pooled_feat)
98 | cls_prob = F.softmax(cls_score)
99 |
100 | RCNN_loss_cls = 0
101 | RCNN_loss_bbox = 0
102 |
103 | if self.training:
104 | # classification loss
105 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
106 |
107 | # bounding box regression L1 loss
108 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
109 |
110 |
111 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
112 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
113 |
114 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
115 |
116 | def _init_weights(self):
117 | def normal_init(m, mean, stddev, truncated=False):
118 | """
119 | weight initalizer: truncated normal and random normal.
120 | """
121 | # x is a parameter
122 | if truncated:
123 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
124 | else:
125 | m.weight.data.normal_(mean, stddev)
126 | m.bias.data.zero_()
127 |
128 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
129 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
130 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
131 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
132 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
133 |
134 | def create_architecture(self):
135 | self._init_modules()
136 | self._init_weights()
137 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow Faster R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Xinlei Chen
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
17 | import pdb
18 |
19 | class vgg16(_fasterRCNN):
20 | def __init__(self, classes, pretrained=False, class_agnostic=False):
21 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth'
22 | self.dout_base_model = 512
23 | self.pretrained = pretrained
24 | self.class_agnostic = class_agnostic
25 |
26 | _fasterRCNN.__init__(self, classes, class_agnostic)
27 |
28 | def _init_modules(self):
29 | vgg = models.vgg16()
30 | if self.pretrained:
31 | print("Loading pretrained weights from %s" %(self.model_path))
32 | state_dict = torch.load(self.model_path)
33 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
34 |
35 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
36 |
37 | # not using the last maxpool layer
38 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
39 |
40 | # Fix the layers before conv3:
41 | for layer in range(10):
42 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
43 |
44 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
45 |
46 | self.RCNN_top = vgg.classifier
47 |
48 | # not using the last maxpool layer
49 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
50 |
51 | if self.class_agnostic:
52 | self.RCNN_bbox_pred = nn.Linear(4096, 4)
53 | else:
54 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)
55 |
56 | def _head_to_tail(self, pool5):
57 |
58 | pool5_flat = pool5.view(pool5.size(0), -1)
59 | fc7 = self.RCNN_top(pool5_flat)
60 |
61 | return fc7
62 |
63 |
--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/nms/__init__.py
--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/nms/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._nms import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | #this_file = os.path.dirname(__file__)
7 |
8 | sources = []
9 | headers = []
10 | defines = []
11 | with_cuda = False
12 |
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/nms_cuda.c']
16 | headers += ['src/nms_cuda.h']
17 | defines += [('WITH_CUDA', None)]
18 | with_cuda = True
19 |
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 |
26 | ffi = create_extension(
27 | '_ext.nms',
28 | headers=headers,
29 | sources=sources,
30 | define_macros=defines,
31 | relative_to=__file__,
32 | with_cuda=with_cuda,
33 | extra_objects=extra_objects
34 | )
35 |
36 | if __name__ == '__main__':
37 | ffi.build()
38 |
--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling stnm kernels by nvcc..."
7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import numpy as np
4 | import torch
5 |
6 | def nms_cpu(dets, thresh):
7 | dets = dets.numpy()
8 | x1 = dets[:, 0]
9 | y1 = dets[:, 1]
10 | x2 = dets[:, 2]
11 | y2 = dets[:, 3]
12 | scores = dets[:, 4]
13 |
14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
15 | order = scores.argsort()[::-1]
16 |
17 | keep = []
18 | while order.size > 0:
19 | i = order.item(0)
20 | keep.append(i)
21 | xx1 = np.maximum(x1[i], x1[order[1:]])
22 | yy1 = np.maximum(y1[i], y1[order[1:]])
23 | xx2 = np.maximum(x2[i], x2[order[1:]])
24 | yy2 = np.maximum(y2[i], y2[order[1:]])
25 |
26 | w = np.maximum(0.0, xx2 - xx1 + 1)
27 | h = np.maximum(0.0, yy2 - yy1 + 1)
28 | inter = w * h
29 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
30 |
31 | inds = np.where(ovr <= thresh)[0]
32 | order = order[inds + 1]
33 |
34 | return torch.IntTensor(keep)
35 |
36 |
37 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import torch
3 | import numpy as np
4 | from ._ext import nms
5 | import pdb
6 |
7 | def nms_gpu(dets, thresh):
8 | keep = dets.new(dets.size(0), 1).zero_().int()
9 | num_out = dets.new(1).zero_().int()
10 | nms.nms_cuda(keep, dets, num_out, thresh)
11 | keep = keep[:num_out[0]]
12 | return keep
13 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | import torch
8 | from model.utils.config import cfg
9 | if torch.cuda.is_available():
10 | from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 |
13 | def nms(dets, thresh, force_cpu=False):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 | if dets.shape[0] == 0:
16 | return []
17 | # ---numpy version---
18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | # ---pytorch version---
20 |
21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "nms_cuda_kernel.h"
4 |
5 | // this symbol will be resolved automatically from PyTorch libs
6 | extern THCState *state;
7 |
8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
9 | THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 |
11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out),
12 | THCudaIntTensor_data(state, num_out),
13 | THCudaTensor_data(state, boxes_host),
14 | boxes_host->size[0],
15 | boxes_host->size[1],
16 | nms_overlap_thresh);
17 |
18 | return 1;
19 | }
20 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 |
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 | THCudaIntTensor *num_out, float nms_overlap_thresh);
6 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "nms_cuda_kernel.h"
13 |
14 | #define CUDA_WARN(XXX) \
15 | do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
16 | cudaGetErrorString(XXX) << ", at line " << __LINE__ \
17 | << std::endl; cudaDeviceSynchronize(); } while (0)
18 |
19 | #define CUDA_CHECK(condition) \
20 | /* Code block avoids redefinition of cudaError_t error */ \
21 | do { \
22 | cudaError_t error = condition; \
23 | if (error != cudaSuccess) { \
24 | std::cout << cudaGetErrorString(error) << std::endl; \
25 | } \
26 | } while (0)
27 |
28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
30 |
31 | __device__ inline float devIoU(float const * const a, float const * const b) {
32 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
33 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
34 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
35 | float interS = width * height;
36 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
37 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
38 | return interS / (Sa + Sb - interS);
39 | }
40 |
41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
42 | float *dev_boxes, unsigned long long *dev_mask) {
43 | const int row_start = blockIdx.y;
44 | const int col_start = blockIdx.x;
45 |
46 | // if (row_start > col_start) return;
47 |
48 | const int row_size =
49 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
50 | const int col_size =
51 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
52 |
53 | __shared__ float block_boxes[threadsPerBlock * 5];
54 | if (threadIdx.x < col_size) {
55 | block_boxes[threadIdx.x * 5 + 0] =
56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
57 | block_boxes[threadIdx.x * 5 + 1] =
58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
59 | block_boxes[threadIdx.x * 5 + 2] =
60 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
61 | block_boxes[threadIdx.x * 5 + 3] =
62 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
63 | block_boxes[threadIdx.x * 5 + 4] =
64 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
65 | }
66 | __syncthreads();
67 |
68 | if (threadIdx.x < row_size) {
69 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
70 | const float *cur_box = dev_boxes + cur_box_idx * 5;
71 | int i = 0;
72 | unsigned long long t = 0;
73 | int start = 0;
74 | if (row_start == col_start) {
75 | start = threadIdx.x + 1;
76 | }
77 | for (i = start; i < col_size; i++) {
78 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
79 | t |= 1ULL << i;
80 | }
81 | }
82 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
83 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
84 | }
85 | }
86 |
87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
88 | int boxes_dim, float nms_overlap_thresh) {
89 |
90 | float* boxes_dev = NULL;
91 | unsigned long long* mask_dev = NULL;
92 |
93 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
94 |
95 | CUDA_CHECK(cudaMalloc(&boxes_dev,
96 | boxes_num * boxes_dim * sizeof(float)));
97 | CUDA_CHECK(cudaMemcpy(boxes_dev,
98 | boxes_host,
99 | boxes_num * boxes_dim * sizeof(float),
100 | cudaMemcpyHostToDevice));
101 |
102 | CUDA_CHECK(cudaMalloc(&mask_dev,
103 | boxes_num * col_blocks * sizeof(unsigned long long)));
104 |
105 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 | DIVUP(boxes_num, threadsPerBlock));
107 | dim3 threads(threadsPerBlock);
108 |
109 | // printf("i am at line %d\n", boxes_num);
110 | // printf("i am at line %d\n", boxes_dim);
111 |
112 | nms_kernel<<>>(boxes_num,
113 | nms_overlap_thresh,
114 | boxes_dev,
115 | mask_dev);
116 |
117 | std::vector mask_host(boxes_num * col_blocks);
118 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 | mask_dev,
120 | sizeof(unsigned long long) * boxes_num * col_blocks,
121 | cudaMemcpyDeviceToHost));
122 |
123 | std::vector remv(col_blocks);
124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 |
126 | // we need to create a memory for keep_out on cpu
127 | // otherwise, the following code cannot run
128 |
129 | int* keep_out_cpu = new int[boxes_num];
130 |
131 | int num_to_keep = 0;
132 | for (int i = 0; i < boxes_num; i++) {
133 | int nblock = i / threadsPerBlock;
134 | int inblock = i % threadsPerBlock;
135 |
136 | if (!(remv[nblock] & (1ULL << inblock))) {
137 | // orignal: keep_out[num_to_keep++] = i;
138 | keep_out_cpu[num_to_keep++] = i;
139 | unsigned long long *p = &mask_host[0] + i * col_blocks;
140 | for (int j = nblock; j < col_blocks; j++) {
141 | remv[j] |= p[j];
142 | }
143 | }
144 | }
145 |
146 | // copy keep_out_cpu to keep_out on gpu
147 | CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));
148 |
149 | // *num_out = num_to_keep;
150 |
151 | // original: *num_out = num_to_keep;
152 | // copy num_to_keep to num_out on gpu
153 |
154 | CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));
155 |
156 | // release cuda memory
157 | CUDA_CHECK(cudaFree(boxes_dev));
158 | CUDA_CHECK(cudaFree(mask_dev));
159 | // release cpu memory
160 | delete []keep_out_cpu;
161 | }
162 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
6 | int boxes_dim, float nms_overlap_thresh);
7 |
8 | #ifdef __cplusplus
9 | }
10 | #endif
11 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/__init__.py
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/_ext/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._psroi_pooling import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | locals[symbol] = _wrap_function(fn, _ffi)
10 | __all__.append(symbol)
11 |
12 | _import_symbols(locals())
13 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 | sources = []
6 | headers = []
7 | defines = []
8 | with_cuda = False
9 |
10 | if torch.cuda.is_available():
11 | print('Including CUDA code.')
12 | sources += ['src/psroi_pooling_cuda.c']
13 | headers += ['src/psroi_pooling_cuda.h']
14 | defines += [('WITH_CUDA', None)]
15 | with_cuda = True
16 |
17 | this_file = os.path.dirname(os.path.realpath(__file__))
18 | print(this_file)
19 | extra_objects = ['src/cuda/psroi_pooling.cu.o']
20 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
21 |
22 | ffi = create_extension(
23 | '_ext.psroi_pooling',
24 | headers=headers,
25 | sources=sources,
26 | define_macros=defines,
27 | relative_to=__file__,
28 | with_cuda=with_cuda,
29 | extra_objects=extra_objects
30 | )
31 |
32 | if __name__ == '__main__':
33 | ffi.build()
34 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/functions/psroi_pooling.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import psroi_pooling
4 |
5 |
6 | class PSRoIPoolingFunction(Function):
7 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
8 | self.pooled_width = int(pooled_width)
9 | self.pooled_height = int(pooled_height)
10 | self.spatial_scale = float(spatial_scale)
11 | self.group_size = int(group_size)
12 | self.output_dim = int(output_dim)
13 | self.output = None
14 | self.mappingchannel = None
15 | self.rois = None
16 | self.feature_size = None
17 |
18 | def forward(self, features, rois):
19 | batch_size, num_channels, data_height, data_width = features.size()
20 | num_rois = rois.size()[0]
21 | output = torch.zeros(num_rois, self.output_dim, self.pooled_height, self.pooled_width)
22 | mappingchannel = torch.IntTensor(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_()
23 | output = output.cuda()
24 | mappingchannel = mappingchannel.cuda()
25 | psroi_pooling.psroi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size, self.output_dim, \
26 | features, rois, output, mappingchannel)
27 | self.output = output
28 | self.mappingchannel = mappingchannel
29 | self.rois = rois
30 | self.feature_size = features.size()
31 |
32 | return output
33 |
34 | def backward(self, grad_output):
35 | assert(self.feature_size is not None and grad_output.is_cuda)
36 |
37 | batch_size, num_channels, data_height, data_width = self.feature_size
38 |
39 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
40 |
41 | psroi_pooling.psroi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, self.output_dim, \
42 | grad_output, self.rois, grad_input, self.mappingchannel)
43 | return grad_input, None
44 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/psroi_pooling/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/modules/psroi_pool.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | import sys
3 | from model.psroi_pooling.functions.psroi_pooling import PSRoIPoolingFunction
4 |
5 |
6 | class PSRoIPool(Module):
7 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
8 | super(PSRoIPool, self).__init__()
9 |
10 | self.pooled_width = int(pooled_width)
11 | self.pooled_height = int(pooled_height)
12 | self.spatial_scale = float(spatial_scale)
13 | self.group_size = int(group_size)
14 | self.output_dim = int(output_dim)
15 |
16 | def forward(self, features, rois):
17 | return PSRoIPoolingFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size, self.output_dim)(features, rois)
18 |
19 | if __name__ == '__main__':
20 | import torch
21 | import numpy as np
22 | from torch.autograd import Variable
23 | from model.roi_pooling.modules.roi_pool import _RoIPooling
24 |
25 | input = torch.randn(2, 21*7*7, 50, 72)
26 | rois = torch.from_numpy(
27 | np.array([
28 | [0.0000, 350.6689, 211.0240, 779.0886, 777.7496],
29 | [0.0000, 744.0627, 277.4919, 988.4307, 602.7589],
30 | [1.0000, 350.6689, 211.0240, 779.0886, 777.7496],
31 | [1.0000, 744.0627, 277.4919, 988.4307, 602.7589],
32 | ])
33 | ).float()
34 |
35 | pool = PSRoIPool(7, 7, 1/16.0, 7, 21)
36 | input = Variable(input.cuda())
37 | rois = Variable(rois.cuda())
38 | print(rois.size(), input.size())
39 | print(input)
40 | out = pool(input, rois)
41 | print(out)
42 | print(out.size())
43 |
44 | print('============================')
45 | roi_pool = _RoIPooling(7, 7, 1/16.0)
46 | out = roi_pool(input, rois.view(-1, 5))
47 | print(out)
48 | print(out.size())
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/cuda/psroi_pooling_kernel.cu:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | #include
6 | #include
7 | #include
8 | #include "psroi_pooling_kernel.h"
9 |
10 | #define CUDA_1D_KERNEL_LOOP(i, n) \
11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
12 | i += blockDim.x * gridDim.x)
13 |
14 | __global__ void PSROIPoolForward(const int nthreads, const float* bottom_data,
15 | const float spatial_scale, const int height, const int width,
16 | const int channels, const int pooled_height, const int pooled_width,
17 | const int group_size, const int output_dim,
18 | const float* bottom_rois, float* top_data, int* mapping_channel)
19 | {
20 |
21 | CUDA_1D_KERNEL_LOOP(index, nthreads)
22 | {
23 | // (n, c, ph, pw) is an element in the pooled output
24 | int pw = index % pooled_width;
25 | int ph = (index / pooled_width) % pooled_height;
26 | int ctop = (index / pooled_width / pooled_height) % output_dim;
27 | int n = index / pooled_width / pooled_height / output_dim;
28 |
29 | bottom_rois += n * 5;
30 | int roi_batch_ind = bottom_rois[0];
31 | float roi_start_w =
32 | static_cast(round(bottom_rois[1])) * spatial_scale;
33 | float roi_start_h =
34 | static_cast(round(bottom_rois[2])) * spatial_scale;
35 | float roi_end_w =
36 | static_cast(round(bottom_rois[3]) + 1.) * spatial_scale;
37 | float roi_end_h =
38 | static_cast(round(bottom_rois[4]) + 1.) * spatial_scale;
39 |
40 | // Force malformed ROIs to be 1x1
41 | float roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0
42 | float roi_height = max(roi_end_h - roi_start_h, 0.1);
43 |
44 | float bin_size_h = (float)(roi_height) / (float)(pooled_height);
45 | float bin_size_w = (float)(roi_width) / (float)(pooled_width);
46 |
47 | int hstart = floor(static_cast(ph) * bin_size_h
48 | + roi_start_h);
49 | int wstart = floor(static_cast(pw)* bin_size_w
50 | + roi_start_w);
51 | int hend = ceil(static_cast(ph + 1) * bin_size_h
52 | + roi_start_h);
53 | int wend = ceil(static_cast(pw + 1) * bin_size_w
54 | + roi_start_w);
55 |
56 | // Add roi offsets and clip to input boundaries
57 | hstart = min(max(hstart, 0), height);
58 | hend = min(max(hend, 0), height);
59 | wstart = min(max(wstart, 0), width);
60 | wend = min(max(wend, 0), width);
61 | bool is_empty = (hend <= hstart) || (wend <= wstart);
62 |
63 | int gw = pw;
64 | int gh = ph;
65 | int c = (ctop*group_size + gh)*group_size + gw;
66 |
67 | bottom_data += (roi_batch_ind * channels + c) * height * width;
68 | float out_sum = 0;
69 | for (int h = hstart; h < hend; ++h) {
70 | for (int w = wstart; w < wend; ++w) {
71 | int bottom_index = h*width + w;
72 | out_sum += bottom_data[bottom_index];
73 | }
74 | }
75 | float bin_area = (hend - hstart)*(wend - wstart);
76 | //top_data[index] = nthreads;
77 | top_data[index] = is_empty? 0. : out_sum/bin_area;
78 | mapping_channel[index] = c;
79 | }
80 | }
81 |
82 |
83 | int PSROIPoolForwardLauncher(
84 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
85 | const int width, const int channels, const int pooled_height,
86 | const int pooled_width, const float* bottom_rois,
87 | const int group_size, const int output_dim,
88 | float* top_data, int* mapping_channel, cudaStream_t stream)
89 | {
90 | const int kThreadsPerBlock = 1024;
91 | const int output_size = output_dim * pooled_height * pooled_width * num_rois;
92 | cudaError_t err;
93 |
94 | PSROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
95 | output_size, bottom_data, spatial_scale, height, width, channels, pooled_height,
96 | pooled_width, group_size, output_dim, bottom_rois, top_data, mapping_channel);
97 |
98 | err = cudaGetLastError();
99 | if(cudaSuccess != err)
100 | {
101 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
102 | exit( -1 );
103 | }
104 |
105 | return 1;
106 | }
107 |
108 |
109 | __global__ void PSROIPoolBackward(const int nthreads, const float* top_diff,
110 | const int* mapping_channel, const int num_rois, const float spatial_scale,
111 | const int height, const int width, const int channels,
112 | const int pooled_height, const int pooled_width, const int output_dim, float* bottom_diff,
113 | const float* bottom_rois) {
114 | CUDA_1D_KERNEL_LOOP(index, nthreads)
115 | {
116 |
117 | int pw = index % pooled_width;
118 | int ph = (index / pooled_width) % pooled_height;
119 | int n = index / pooled_width / pooled_height / output_dim;
120 |
121 | // [start, end) interval for spatial sampling
122 | bottom_rois += n * 5;
123 | int roi_batch_ind = bottom_rois[0];
124 | float roi_start_w =
125 | static_cast(round(bottom_rois[1])) * spatial_scale;
126 | float roi_start_h =
127 | static_cast(round(bottom_rois[2])) * spatial_scale;
128 | float roi_end_w =
129 | static_cast(round(bottom_rois[3]) + 1.) * spatial_scale;
130 | float roi_end_h =
131 | static_cast(round(bottom_rois[4]) + 1.) * spatial_scale;
132 |
133 | // Force too small ROIs to be 1x1
134 | float roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0
135 | float roi_height = max(roi_end_h - roi_start_h, 0.1);
136 |
137 | // Compute w and h at bottom
138 | float bin_size_h = roi_height / static_cast(pooled_height);
139 | float bin_size_w = roi_width / static_cast(pooled_width);
140 |
141 | int hstart = floor(static_cast(ph)* bin_size_h
142 | + roi_start_h);
143 | int wstart = floor(static_cast(pw)* bin_size_w
144 | + roi_start_w);
145 | int hend = ceil(static_cast(ph + 1) * bin_size_h
146 | + roi_start_h);
147 | int wend = ceil(static_cast(pw + 1) * bin_size_w
148 | + roi_start_w);
149 | // Add roi offsets and clip to input boundaries
150 | hstart = min(max(hstart, 0), height);
151 | hend = min(max(hend, 0), height);
152 | wstart = min(max(wstart, 0), width);
153 | wend = min(max(wend, 0), width);
154 | bool is_empty = (hend <= hstart) || (wend <= wstart);
155 |
156 | // Compute c at bottom
157 | int c = mapping_channel[index];
158 | float* offset_bottom_diff = bottom_diff +
159 | (roi_batch_ind * channels + c) * height * width;
160 | float bin_area = (hend - hstart)*(wend - wstart);
161 | float diff_val = is_empty ? 0. : top_diff[index] / bin_area;
162 | for (int h = hstart; h < hend; ++h) {
163 | for (int w = wstart; w < wend; ++w) {
164 | int bottom_index = h*width + w;
165 | //caffe_gpu_atomic_add(diff_val, offset_bottom_diff + bottom_index);
166 | atomicAdd(offset_bottom_diff + bottom_index, diff_val);
167 | }
168 | }
169 | }
170 | }
171 |
172 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels,
173 | const int height, const int width, const int pooled_width,
174 | const int pooled_height, const int output_dim,
175 | float* bottom_diff, const float* bottom_rois, cudaStream_t stream)
176 | {
177 | const int kThreadsPerBlock = 1024;
178 | //const int output_size = output_dim * height * width * channels;
179 | const int output_size = output_dim * pooled_height * pooled_width * num_rois;
180 | cudaError_t err;
181 |
182 | PSROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
183 | output_size, top_diff, mapping_channel, num_rois, spatial_scale, height, width, channels, pooled_height,
184 | pooled_width, output_dim, bottom_diff, bottom_rois);
185 |
186 | err = cudaGetLastError();
187 | if(cudaSuccess != err)
188 | {
189 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
190 | exit( -1 );
191 | }
192 |
193 | return 1;
194 | }
195 |
196 |
197 | #ifdef __cplusplus
198 | }
199 | #endif
200 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/cuda/psroi_pooling_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef PS_ROI_POOLING_KERNEL
2 | #define PS_ROI_POOLING_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int PSROIPoolForwardLauncher(
9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 | const int width, const int channels, const int pooled_height, const int pooled_width,
11 | const float* bottom_rois, const int group_size, const int output_dim, float* top_data, int* mapping_channel, cudaStream_t stream);
12 |
13 |
14 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels, const int height, const int width, const int pooled_width, const int pooled_height, const int output_dim, float* bottom_diff, const float* bottom_rois, cudaStream_t stream);
15 |
16 | #ifdef __cplusplus
17 | }
18 |
19 | #endif
20 |
21 | #endif
22 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/psroi_pooling_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cuda/psroi_pooling_kernel.h"
5 |
6 |
7 |
8 | extern THCState* state;
9 |
10 | int psroi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, int group_size, int output_dim,THCudaTensor *features, THCudaTensor* rois, THCudaTensor* output, THCudaIntTensor* mappingchannel){
11 |
12 | float* data_in = THCudaTensor_data(state, features);
13 | float* rois_in = THCudaTensor_data(state, rois);
14 | float* output_out = THCudaTensor_data(state, output);
15 | int* mappingchannel_out = THCudaIntTensor_data(state, mappingchannel);
16 | //Get # of Rois
17 | int num_rois = THCudaTensor_size(state, rois, 0);
18 | int size_rois = THCudaTensor_size(state, rois, 1);
19 | if (size_rois!=5)
20 | {
21 | return 0;
22 | }
23 |
24 | //Get # of batch_size
25 | //int batch_size = THCudaTensor_size(state, features, 0);
26 | //if (batch_size!=1)
27 | //{
28 | // return 0;
29 | //}
30 |
31 | int data_height = THCudaTensor_size(state, features, 2);
32 | int data_width = THCudaTensor_size(state, features, 3);
33 | int num_channels = THCudaTensor_size(state, features, 1);
34 |
35 | cudaStream_t stream = THCState_getCurrentStream(state);
36 |
37 | // call the gpu kernel for psroi_pooling
38 | PSROIPoolForwardLauncher(data_in, spatial_scale, num_rois, data_height, data_width, num_channels, pooled_height, pooled_width,rois_in, group_size,
39 | output_dim, output_out, mappingchannel_out,stream);
40 | return 1;
41 | }
42 |
43 |
44 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim,
45 | THCudaTensor* top_grad, THCudaTensor* rois, THCudaTensor* bottom_grad, THCudaIntTensor* mappingchannel)
46 | {
47 | float *top_grad_flat = THCudaTensor_data(state, top_grad);
48 | float *rois_flat = THCudaTensor_data(state, rois);
49 |
50 | float *bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
51 | int *mappingchannel_flat = THCudaIntTensor_data(state, mappingchannel);
52 |
53 | // Number of ROIs
54 | int num_rois = THCudaTensor_size(state, rois, 0);
55 | int size_rois = THCudaTensor_size(state, rois, 1);
56 | if (size_rois != 5)
57 | {
58 | return 0;
59 | }
60 | // batch size
61 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
62 | //if (batch_size != 1)
63 | //{
64 | // return 0;
65 | //}
66 | // data height
67 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
68 | // data width
69 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
70 | // Number of channels
71 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
72 |
73 | cudaStream_t stream = THCState_getCurrentStream(state);
74 |
75 | PSROIPoolBackwardLauncher(top_grad_flat, mappingchannel_flat, batch_size, num_rois, spatial_scale, num_channels, data_height, data_width, pooled_width, pooled_height, output_dim, bottom_grad_flat, rois_flat, stream);
76 | return 1;
77 | }
78 |
--------------------------------------------------------------------------------
/lib/model/psroi_pooling/src/psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int psroi_pooling_forward_cuda( int pooled_height, int pooled_width, float spatial_scale,int group_size, int output_dim,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * mappingchannel);
3 |
4 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * mappingchannel);
6 |
--------------------------------------------------------------------------------
/lib/model/rfcn/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet_atrous import resnet
--------------------------------------------------------------------------------
/lib/model/rfcn/rfcn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 |
7 | from model.psroi_pooling.modules.psroi_pool import PSRoIPool
8 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
9 | from model.rpn.rpn import _RPN
10 | from model.utils.config import cfg
11 | from model.utils.net_utils import _smooth_l1_loss
12 |
13 | class _RFCN(nn.Module):
14 | """ R-FCN """
15 | def __init__(self, classes, class_agnostic):
16 | super(_RFCN, self).__init__()
17 | self.classes = classes
18 | self.n_classes = len(classes)
19 | self.class_agnostic = class_agnostic
20 | # loss
21 | self.RCNN_loss_cls = 0
22 | self.RCNN_loss_bbox = 0
23 |
24 | self.box_num_classes = 1 if class_agnostic else self.n_classes
25 |
26 | # define rpn
27 | self.RCNN_rpn = _RPN(self.dout_base_model)
28 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
29 | self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
30 | spatial_scale=1/16.0, group_size=cfg.POOLING_SIZE,
31 | output_dim=self.n_classes)
32 | self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
33 | spatial_scale=1/16.0, group_size=cfg.POOLING_SIZE,
34 | output_dim=self.box_num_classes * 4)
35 | self.pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE)
36 | self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
37 |
38 | def detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):
39 | # classification loss
40 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
41 |
42 | # bounding box regression L1 loss
43 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
44 |
45 | return RCNN_loss_cls, RCNN_loss_bbox
46 |
47 | def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):
48 |
49 | def log_sum_exp(x):
50 | x_max = x.data.max()
51 | return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max
52 |
53 | num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size
54 | pos_idx = rois_label > 0
55 | num_pos = pos_idx.int().sum()
56 |
57 | # classification loss
58 | num_classes = cls_score.size(1)
59 | weight = cls_score.data.new(num_classes).fill_(1.)
60 | weight[0] = num_pos.data[0] / num_hard
61 |
62 | conf_p = cls_score.detach()
63 | conf_t = rois_label.detach()
64 |
65 | # rank on cross_entropy loss
66 | loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1))
67 | loss_c[pos_idx] = 100. # include all positive samples
68 | _, topk_idx = torch.topk(loss_c.view(-1), num_hard)
69 | loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight)
70 |
71 | # bounding box regression L1 loss
72 | pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred)
73 | loc_p = bbox_pred[pos_idx].view(-1, 4)
74 | loc_t = rois_target[pos_idx].view(-1, 4)
75 | loss_box = F.smooth_l1_loss(loc_p, loc_t)
76 |
77 | return loss_cls, loss_box
78 |
79 | def forward(self, im_data, im_info, gt_boxes, num_boxes):
80 | batch_size = im_data.size(0)
81 |
82 | im_info = im_info.data
83 | gt_boxes = gt_boxes.data
84 | num_boxes = num_boxes.data
85 | self.batch_size = im_data.size(0)
86 |
87 | # feed image data to base model to obtain base feature map
88 | base_feat = self.RCNN_base(im_data)
89 |
90 | # feed base feature map tp RPN to obtain rois
91 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
92 |
93 | # if it is training phrase, then use ground trubut bboxes for refining
94 | if self.training:
95 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
96 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
97 |
98 | rois_label = Variable(rois_label.view(-1).long())
99 | rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
100 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
101 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
102 | else:
103 | rois_label = None
104 | rois_target = None
105 | rois_inside_ws = None
106 | rois_outside_ws = None
107 | rpn_loss_cls = 0
108 | rpn_loss_bbox = 0
109 |
110 | rois = Variable(rois)
111 | base_feat = self.RCNN_conv_new(base_feat)
112 |
113 | # do roi pooling based on predicted rois
114 | cls_feat = self.RCNN_cls_base(base_feat)
115 | pooled_feat_cls = self.RCNN_psroi_pool_cls(cls_feat, rois.view(-1, 5))
116 | cls_score = self.pooling(pooled_feat_cls)
117 | cls_score = cls_score.squeeze()
118 |
119 | bbox_base = self.RCNN_bbox_base(base_feat)
120 | pooled_feat_loc = self.RCNN_psroi_pool_loc(bbox_base, rois.view(-1, 5))
121 | pooled_feat_loc = self.pooling(pooled_feat_loc)
122 | bbox_pred = pooled_feat_loc.squeeze()
123 |
124 | if self.training and not self.class_agnostic:
125 | # select the corresponding columns according to roi labels
126 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
127 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
128 | bbox_pred = bbox_pred_select.squeeze(1)
129 |
130 | cls_prob = F.softmax(cls_score, dim=1)
131 |
132 | RCNN_loss_cls = 0
133 | RCNN_loss_bbox = 0
134 |
135 | if self.training:
136 | loss_func = self.ohem_detect_loss if cfg.TRAIN.OHEM else self.detect_loss
137 | RCNN_loss_cls, RCNN_loss_bbox = loss_func(cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
138 |
139 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
140 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
141 |
142 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
143 |
144 | def _init_weights(self):
145 | def normal_init(m, mean, stddev, truncated=False):
146 | """
147 | weight initalizer: truncated normal and random normal.
148 | """
149 | # x is a parameter
150 | if truncated:
151 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
152 | else:
153 | m.weight.data.normal_(mean, stddev)
154 | if m.bias is not None:
155 | m.bias.data.zero_()
156 |
157 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
158 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
159 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
160 | normal_init(self.RCNN_conv_1x1, 0, 0.01, cfg.TRAIN.TRUNCATED)
161 | normal_init(self.RCNN_cls_base, 0, 0.01, cfg.TRAIN.TRUNCATED)
162 | normal_init(self.RCNN_bbox_base, 0, 0.001, cfg.TRAIN.TRUNCATED)
163 |
164 | def create_architecture(self):
165 | self._init_modules()
166 | self._init_weights()
167 |
--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_align import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | sources = ['src/roi_align.c']
7 | headers = ['src/roi_align.h']
8 | extra_objects = []
9 | #sources = []
10 | #headers = []
11 | defines = []
12 | with_cuda = False
13 |
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | print(this_file)
16 |
17 | if torch.cuda.is_available():
18 | print('Including CUDA code.')
19 | sources += ['src/roi_align_cuda.c']
20 | headers += ['src/roi_align_cuda.h']
21 | defines += [('WITH_CUDA', None)]
22 | with_cuda = True
23 |
24 | extra_objects = ['src/roi_align_kernel.cu.o']
25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
26 |
27 | ffi = create_extension(
28 | '_ext.roi_align',
29 | headers=headers,
30 | sources=sources,
31 | define_macros=defines,
32 | relative_to=__file__,
33 | with_cuda=with_cuda,
34 | extra_objects=extra_objects
35 | )
36 |
37 | if __name__ == '__main__':
38 | ffi.build()
39 |
--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import roi_align
4 |
5 |
6 | # TODO use save_for_backward instead
7 | class RoIAlignFunction(Function):
8 | def __init__(self, aligned_height, aligned_width, spatial_scale):
9 | self.aligned_width = int(aligned_width)
10 | self.aligned_height = int(aligned_height)
11 | self.spatial_scale = float(spatial_scale)
12 | self.rois = None
13 | self.feature_size = None
14 |
15 | def forward(self, features, rois):
16 | self.rois = rois
17 | self.feature_size = features.size()
18 |
19 | batch_size, num_channels, data_height, data_width = features.size()
20 | num_rois = rois.size(0)
21 |
22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 | if features.is_cuda:
24 | roi_align.roi_align_forward_cuda(self.aligned_height,
25 | self.aligned_width,
26 | self.spatial_scale, features,
27 | rois, output)
28 | else:
29 | roi_align.roi_align_forward(self.aligned_height,
30 | self.aligned_width,
31 | self.spatial_scale, features,
32 | rois, output)
33 | # raise NotImplementedError
34 |
35 | return output
36 |
37 | def backward(self, grad_output):
38 | assert(self.feature_size is not None and grad_output.is_cuda)
39 |
40 | batch_size, num_channels, data_height, data_width = self.feature_size
41 |
42 | grad_input = self.rois.new(batch_size, num_channels, data_height,
43 | data_width).zero_()
44 | roi_align.roi_align_backward_cuda(self.aligned_height,
45 | self.aligned_width,
46 | self.spatial_scale, grad_output,
47 | self.rois, grad_input)
48 |
49 | # print grad_input
50 |
51 | return grad_input, None
52 |
--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling my_lib kernels by nvcc..."
7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_align/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.nn.functional import avg_pool2d, max_pool2d
3 | from ..functions.roi_align import RoIAlignFunction
4 |
5 |
6 | class RoIAlign(Module):
7 | def __init__(self, aligned_height, aligned_width, spatial_scale):
8 | super(RoIAlign, self).__init__()
9 |
10 | self.aligned_width = int(aligned_width)
11 | self.aligned_height = int(aligned_height)
12 | self.spatial_scale = float(spatial_scale)
13 |
14 | def forward(self, features, rois):
15 | return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 | self.spatial_scale)(features, rois)
17 |
18 | class RoIAlignAvg(Module):
19 | def __init__(self, aligned_height, aligned_width, spatial_scale):
20 | super(RoIAlignAvg, self).__init__()
21 |
22 | self.aligned_width = int(aligned_width)
23 | self.aligned_height = int(aligned_height)
24 | self.spatial_scale = float(spatial_scale)
25 |
26 | def forward(self, features, rois):
27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 | self.spatial_scale)(features, rois)
29 | return avg_pool2d(x, kernel_size=2, stride=1)
30 |
31 | class RoIAlignMax(Module):
32 | def __init__(self, aligned_height, aligned_width, spatial_scale):
33 | super(RoIAlignMax, self).__init__()
34 |
35 | self.aligned_width = int(aligned_width)
36 | self.aligned_height = int(aligned_height)
37 | self.spatial_scale = float(spatial_scale)
38 |
39 | def forward(self, features, rois):
40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 | self.spatial_scale)(features, rois)
42 | return max_pool2d(x, kernel_size=2, stride=1)
43 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 |
6 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
7 | const int height, const int width, const int channels,
8 | const int aligned_height, const int aligned_width, const float * bottom_rois,
9 | float* top_data);
10 |
11 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
12 | const int height, const int width, const int channels,
13 | const int aligned_height, const int aligned_width, const float * bottom_rois,
14 | float* top_data);
15 |
16 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
17 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
18 | {
19 | //Grab the input tensor
20 | float * data_flat = THFloatTensor_data(features);
21 | float * rois_flat = THFloatTensor_data(rois);
22 |
23 | float * output_flat = THFloatTensor_data(output);
24 |
25 | // Number of ROIs
26 | int num_rois = THFloatTensor_size(rois, 0);
27 | int size_rois = THFloatTensor_size(rois, 1);
28 | if (size_rois != 5)
29 | {
30 | return 0;
31 | }
32 |
33 | // data height
34 | int data_height = THFloatTensor_size(features, 2);
35 | // data width
36 | int data_width = THFloatTensor_size(features, 3);
37 | // Number of channels
38 | int num_channels = THFloatTensor_size(features, 1);
39 |
40 | // do ROIAlignForward
41 | ROIAlignForwardCpu(data_flat, spatial_scale, num_rois, data_height, data_width, num_channels,
42 | aligned_height, aligned_width, rois_flat, output_flat);
43 |
44 | return 1;
45 | }
46 |
47 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
48 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad)
49 | {
50 | //Grab the input tensor
51 | float * top_grad_flat = THFloatTensor_data(top_grad);
52 | float * rois_flat = THFloatTensor_data(rois);
53 |
54 | float * bottom_grad_flat = THFloatTensor_data(bottom_grad);
55 |
56 | // Number of ROIs
57 | int num_rois = THFloatTensor_size(rois, 0);
58 | int size_rois = THFloatTensor_size(rois, 1);
59 | if (size_rois != 5)
60 | {
61 | return 0;
62 | }
63 |
64 | // batch size
65 | int batch_size = THFloatTensor_size(bottom_grad, 0);
66 | // data height
67 | int data_height = THFloatTensor_size(bottom_grad, 2);
68 | // data width
69 | int data_width = THFloatTensor_size(bottom_grad, 3);
70 | // Number of channels
71 | int num_channels = THFloatTensor_size(bottom_grad, 1);
72 |
73 | // do ROIAlignBackward
74 | ROIAlignBackwardCpu(top_grad_flat, spatial_scale, num_rois, data_height,
75 | data_width, num_channels, aligned_height, aligned_width, rois_flat, bottom_grad_flat);
76 |
77 | return 1;
78 | }
79 |
80 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
81 | const int height, const int width, const int channels,
82 | const int aligned_height, const int aligned_width, const float * bottom_rois,
83 | float* top_data)
84 | {
85 | const int output_size = num_rois * aligned_height * aligned_width * channels;
86 |
87 | #pragma omp parallel for
88 | for (int idx = 0; idx < output_size; ++idx)
89 | {
90 | // (n, c, ph, pw) is an element in the aligned output
91 | int pw = idx % aligned_width;
92 | int ph = (idx / aligned_width) % aligned_height;
93 | int c = (idx / aligned_width / aligned_height) % channels;
94 | int n = idx / aligned_width / aligned_height / channels;
95 |
96 | float roi_batch_ind = bottom_rois[n * 5 + 0];
97 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
98 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
99 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
100 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
101 |
102 | // Force malformed ROI to be 1x1
103 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
104 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
105 | float bin_size_h = roi_height / (aligned_height - 1.);
106 | float bin_size_w = roi_width / (aligned_width - 1.);
107 |
108 | float h = (float)(ph) * bin_size_h + roi_start_h;
109 | float w = (float)(pw) * bin_size_w + roi_start_w;
110 |
111 | int hstart = fminf(floor(h), height - 2);
112 | int wstart = fminf(floor(w), width - 2);
113 |
114 | int img_start = roi_batch_ind * channels * height * width;
115 |
116 | // bilinear interpolation
117 | if (h < 0 || h >= height || w < 0 || w >= width)
118 | {
119 | top_data[idx] = 0.;
120 | }
121 | else
122 | {
123 | float h_ratio = h - (float)(hstart);
124 | float w_ratio = w - (float)(wstart);
125 | int upleft = img_start + (c * height + hstart) * width + wstart;
126 | int upright = upleft + 1;
127 | int downleft = upleft + width;
128 | int downright = downleft + 1;
129 |
130 | top_data[idx] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
131 | + bottom_data[upright] * (1. - h_ratio) * w_ratio
132 | + bottom_data[downleft] * h_ratio * (1. - w_ratio)
133 | + bottom_data[downright] * h_ratio * w_ratio;
134 | }
135 | }
136 | }
137 |
138 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
139 | const int height, const int width, const int channels,
140 | const int aligned_height, const int aligned_width, const float * bottom_rois,
141 | float* bottom_diff)
142 | {
143 | const int output_size = num_rois * aligned_height * aligned_width * channels;
144 |
145 | #pragma omp parallel for
146 | for (int idx = 0; idx < output_size; ++idx)
147 | {
148 | // (n, c, ph, pw) is an element in the aligned output
149 | int pw = idx % aligned_width;
150 | int ph = (idx / aligned_width) % aligned_height;
151 | int c = (idx / aligned_width / aligned_height) % channels;
152 | int n = idx / aligned_width / aligned_height / channels;
153 |
154 | float roi_batch_ind = bottom_rois[n * 5 + 0];
155 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
156 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
157 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
158 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
159 |
160 | // Force malformed ROI to be 1x1
161 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
162 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
163 | float bin_size_h = roi_height / (aligned_height - 1.);
164 | float bin_size_w = roi_width / (aligned_width - 1.);
165 |
166 | float h = (float)(ph) * bin_size_h + roi_start_h;
167 | float w = (float)(pw) * bin_size_w + roi_start_w;
168 |
169 | int hstart = fminf(floor(h), height - 2);
170 | int wstart = fminf(floor(w), width - 2);
171 |
172 | int img_start = roi_batch_ind * channels * height * width;
173 |
174 | // bilinear interpolation
175 | if (h < 0 || h >= height || w < 0 || w >= width)
176 | {
177 | float h_ratio = h - (float)(hstart);
178 | float w_ratio = w - (float)(wstart);
179 | int upleft = img_start + (c * height + hstart) * width + wstart;
180 | int upright = upleft + 1;
181 | int downleft = upleft + width;
182 | int downright = downleft + 1;
183 |
184 | bottom_diff[upleft] += top_diff[idx] * (1. - h_ratio) * (1. - w_ratio);
185 | bottom_diff[upright] += top_diff[idx] * (1. - h_ratio) * w_ratio;
186 | bottom_diff[downleft] += top_diff[idx] * h_ratio * (1. - w_ratio);
187 | bottom_diff[downright] += top_diff[idx] * h_ratio * w_ratio;
188 | }
189 | }
190 | }
191 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
3 |
4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);
6 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "roi_align_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 |
16 | // Number of ROIs
17 | int num_rois = THCudaTensor_size(state, rois, 0);
18 | int size_rois = THCudaTensor_size(state, rois, 1);
19 | if (size_rois != 5)
20 | {
21 | return 0;
22 | }
23 |
24 | // data height
25 | int data_height = THCudaTensor_size(state, features, 2);
26 | // data width
27 | int data_width = THCudaTensor_size(state, features, 3);
28 | // Number of channels
29 | int num_channels = THCudaTensor_size(state, features, 1);
30 |
31 | cudaStream_t stream = THCState_getCurrentStream(state);
32 |
33 | ROIAlignForwardLaucher(
34 | data_flat, spatial_scale, num_rois, data_height,
35 | data_width, num_channels, aligned_height,
36 | aligned_width, rois_flat,
37 | output_flat, stream);
38 |
39 | return 1;
40 | }
41 |
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 | // Grab the input tensor
46 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 | float * rois_flat = THCudaTensor_data(state, rois);
48 |
49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 |
51 | // Number of ROIs
52 | int num_rois = THCudaTensor_size(state, rois, 0);
53 | int size_rois = THCudaTensor_size(state, rois, 1);
54 | if (size_rois != 5)
55 | {
56 | return 0;
57 | }
58 |
59 | // batch size
60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 | // data height
62 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 | // data width
64 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 | // Number of channels
66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 |
68 | cudaStream_t stream = THCState_getCurrentStream(state);
69 | ROIAlignBackwardLaucher(
70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 | data_width, num_channels, aligned_height,
72 | aligned_width, rois_flat,
73 | bottom_grad_flat, stream);
74 |
75 | return 1;
76 | }
77 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 |
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | #include
6 | #include
7 | #include
8 | #include "roi_align_kernel.h"
9 |
10 | #define CUDA_1D_KERNEL_LOOP(i, n) \
11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
12 | i += blockDim.x * gridDim.x)
13 |
14 |
15 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
16 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
17 | CUDA_1D_KERNEL_LOOP(index, nthreads) {
18 | // (n, c, ph, pw) is an element in the aligned output
19 | // int n = index;
20 | // int pw = n % aligned_width;
21 | // n /= aligned_width;
22 | // int ph = n % aligned_height;
23 | // n /= aligned_height;
24 | // int c = n % channels;
25 | // n /= channels;
26 |
27 | int pw = index % aligned_width;
28 | int ph = (index / aligned_width) % aligned_height;
29 | int c = (index / aligned_width / aligned_height) % channels;
30 | int n = index / aligned_width / aligned_height / channels;
31 |
32 | // bottom_rois += n * 5;
33 | float roi_batch_ind = bottom_rois[n * 5 + 0];
34 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
35 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
36 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
37 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
38 |
39 | // Force malformed ROIs to be 1x1
40 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
41 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
42 | float bin_size_h = roi_height / (aligned_height - 1.);
43 | float bin_size_w = roi_width / (aligned_width - 1.);
44 |
45 | float h = (float)(ph) * bin_size_h + roi_start_h;
46 | float w = (float)(pw) * bin_size_w + roi_start_w;
47 |
48 | int hstart = fminf(floor(h), height - 2);
49 | int wstart = fminf(floor(w), width - 2);
50 |
51 | int img_start = roi_batch_ind * channels * height * width;
52 |
53 | // bilinear interpolation
54 | if (h < 0 || h >= height || w < 0 || w >= width) {
55 | top_data[index] = 0.;
56 | } else {
57 | float h_ratio = h - (float)(hstart);
58 | float w_ratio = w - (float)(wstart);
59 | int upleft = img_start + (c * height + hstart) * width + wstart;
60 | int upright = upleft + 1;
61 | int downleft = upleft + width;
62 | int downright = downleft + 1;
63 |
64 | top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
65 | + bottom_data[upright] * (1. - h_ratio) * w_ratio
66 | + bottom_data[downleft] * h_ratio * (1. - w_ratio)
67 | + bottom_data[downright] * h_ratio * w_ratio;
68 | }
69 | }
70 | }
71 |
72 |
73 | int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
74 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
75 | const int kThreadsPerBlock = 1024;
76 | const int output_size = num_rois * aligned_height * aligned_width * channels;
77 | cudaError_t err;
78 |
79 |
80 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
81 | output_size, bottom_data, spatial_scale, height, width, channels,
82 | aligned_height, aligned_width, bottom_rois, top_data);
83 |
84 | err = cudaGetLastError();
85 | if(cudaSuccess != err) {
86 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
87 | exit( -1 );
88 | }
89 |
90 | return 1;
91 | }
92 |
93 |
94 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
95 | const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
96 | CUDA_1D_KERNEL_LOOP(index, nthreads) {
97 |
98 | // (n, c, ph, pw) is an element in the aligned output
99 | int pw = index % aligned_width;
100 | int ph = (index / aligned_width) % aligned_height;
101 | int c = (index / aligned_width / aligned_height) % channels;
102 | int n = index / aligned_width / aligned_height / channels;
103 |
104 | float roi_batch_ind = bottom_rois[n * 5 + 0];
105 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 | /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 | /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 | /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 | /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 |
114 | // Force malformed ROIs to be 1x1
115 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 | float bin_size_h = roi_height / (aligned_height - 1.);
118 | float bin_size_w = roi_width / (aligned_width - 1.);
119 |
120 | float h = (float)(ph) * bin_size_h + roi_start_h;
121 | float w = (float)(pw) * bin_size_w + roi_start_w;
122 |
123 | int hstart = fminf(floor(h), height - 2);
124 | int wstart = fminf(floor(w), width - 2);
125 |
126 | int img_start = roi_batch_ind * channels * height * width;
127 |
128 | // bilinear interpolation
129 | if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 | float h_ratio = h - (float)(hstart);
131 | float w_ratio = w - (float)(wstart);
132 | int upleft = img_start + (c * height + hstart) * width + wstart;
133 | int upright = upleft + 1;
134 | int downleft = upleft + width;
135 | int downright = downleft + 1;
136 |
137 | atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 | atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 | atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 | atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 | }
142 | }
143 | }
144 |
145 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 | const int kThreadsPerBlock = 1024;
148 | const int output_size = num_rois * aligned_height * aligned_width * channels;
149 | cudaError_t err;
150 |
151 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 | output_size, top_diff, spatial_scale, height, width, channels,
153 | aligned_height, aligned_width, bottom_diff, bottom_rois);
154 |
155 | err = cudaGetLastError();
156 | if(cudaSuccess != err) {
157 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 | exit( -1 );
159 | }
160 |
161 | return 1;
162 | }
163 |
164 |
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_ALIGN_KERNEL
2 | #define _ROI_ALIGN_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
9 | const float spatial_scale, const int height, const int width,
10 | const int channels, const int aligned_height, const int aligned_width,
11 | const float* bottom_rois, float* top_data);
12 |
13 | int ROIAlignForwardLaucher(
14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 | const int width, const int channels, const int aligned_height,
16 | const int aligned_width, const float* bottom_rois,
17 | float* top_data, cudaStream_t stream);
18 |
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 | const float spatial_scale, const int height, const int width,
21 | const int channels, const int aligned_height, const int aligned_width,
22 | float* bottom_diff, const float* bottom_rois);
23 |
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 | const int height, const int width, const int channels, const int aligned_height,
26 | const int aligned_width, const float* bottom_rois,
27 | float* bottom_diff, cudaStream_t stream);
28 |
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 |
33 | #endif
34 |
35 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._crop_resize import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | locals[symbol] = _wrap_function(fn, _ffi)
10 | __all__.append(symbol)
11 |
12 | _import_symbols(locals())
13 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_crop import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | #this_file = os.path.dirname(__file__)
7 |
8 | sources = ['src/roi_crop.c']
9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 |
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/roi_crop_cuda.c']
16 | headers += ['src/roi_crop_cuda.h']
17 | defines += [('WITH_CUDA', None)]
18 | with_cuda = True
19 |
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 |
25 | ffi = create_extension(
26 | '_ext.roi_crop',
27 | headers=headers,
28 | sources=sources,
29 | define_macros=defines,
30 | relative_to=__file__,
31 | with_cuda=with_cuda,
32 | extra_objects=extra_objects
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
1 | # functions/add.py
2 | import torch
3 | from torch.autograd import Function
4 | from .._ext import roi_crop
5 | from cffi import FFI
6 | ffi = FFI()
7 |
8 | class RoICropFunction(Function):
9 | def forward(self, input1, input2):
10 | self.input1 = input1
11 | self.input2 = input2
12 | self.device_c = ffi.new("int *")
13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 | #print('decice %d' % torch.cuda.current_device())
15 | if input1.is_cuda:
16 | self.device = torch.cuda.current_device()
17 | else:
18 | self.device = -1
19 | self.device_c[0] = self.device
20 | if not input1.is_cuda:
21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 | else:
23 | output = output.cuda(self.device)
24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 | return output
26 |
27 | def backward(self, grad_output):
28 | grad_input1 = torch.zeros(self.input1.size())
29 | grad_input2 = torch.zeros(self.input2.size())
30 | #print('backward decice %d' % self.device)
31 | if not grad_output.is_cuda:
32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 | else:
34 | grad_input1 = grad_input1.cuda(self.device)
35 | grad_input2 = grad_input2.cuda(self.device)
36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 | return grad_input1, grad_input2
38 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
1 | # functions/add.py
2 | import torch
3 | from torch.autograd import Function
4 | import numpy as np
5 |
6 |
7 | class AffineGridGenFunction(Function):
8 | def __init__(self, height, width,lr=1):
9 | super(AffineGridGenFunction, self).__init__()
10 | self.lr = lr
11 | self.height, self.width = height, width
12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 | self.grid[:,:,2] = np.ones([self.height, width])
18 | self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 | #print(self.grid)
20 |
21 | def forward(self, input1):
22 | self.input1 = input1
23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 | for i in range(input1.size(0)):
26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 |
28 | # if input1.is_cuda:
29 | # self.batchgrid = self.batchgrid.cuda()
30 | # output = output.cuda()
31 |
32 | for i in range(input1.size(0)):
33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 |
35 | return output
36 |
37 | def backward(self, grad_output):
38 |
39 | grad_input1 = self.input1.new(self.input1.size()).zero_()
40 |
41 | # if grad_output.is_cuda:
42 | # self.batchgrid = self.batchgrid.cuda()
43 | # grad_input1 = grad_input1.cuda()
44 |
45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 | return grad_input1
47 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
1 | # functions/add.py
2 | import torch
3 | from torch.autograd import Function
4 | from .._ext import roi_crop
5 | import pdb
6 |
7 | class RoICropFunction(Function):
8 | def forward(self, input1, input2):
9 | self.input1 = input1.clone()
10 | self.input2 = input2.clone()
11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 | return output
16 |
17 | def backward(self, grad_output):
18 | grad_input1 = self.input1.new(self.input1.size()).zero_()
19 | grad_input2 = self.input2.new(self.input2.size()).zero_()
20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 | return grad_input1, grad_input2
22 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling my_lib kernels by nvcc..."
7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_crop/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 |
4 | class _RoICrop(Module):
5 | def __init__(self, layout = 'BHWD'):
6 | super(_RoICrop, self).__init__()
7 | def forward(self, input1, input2):
8 | return RoICropFunction()(input1, input2)
9 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
2 |
3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput);
5 |
6 |
7 |
8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
9 |
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "roi_crop_cuda_kernel.h"
5 |
6 | #define real float
7 |
8 | // this symbol will be resolved automatically from PyTorch libs
9 | extern THCState *state;
10 |
11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
12 | // we assume BHWD format in inputImages
13 | // we assume BHW(YX) format on grids
14 |
15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
16 | // THCState *state = getCutorchState(L);
17 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
18 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
19 | // THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
20 |
21 | int success = 0;
22 | success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1],
23 | output->size[3],
24 | output->size[2],
25 | output->size[0],
26 | THCudaTensor_size(state, inputImages, 1),
27 | THCudaTensor_size(state, inputImages, 2),
28 | THCudaTensor_size(state, inputImages, 3),
29 | THCudaTensor_size(state, inputImages, 0),
30 | THCudaTensor_data(state, inputImages),
31 | THCudaTensor_stride(state, inputImages, 0),
32 | THCudaTensor_stride(state, inputImages, 1),
33 | THCudaTensor_stride(state, inputImages, 2),
34 | THCudaTensor_stride(state, inputImages, 3),
35 | THCudaTensor_data(state, grids),
36 | THCudaTensor_stride(state, grids, 0),
37 | THCudaTensor_stride(state, grids, 3),
38 | THCudaTensor_stride(state, grids, 1),
39 | THCudaTensor_stride(state, grids, 2),
40 | THCudaTensor_data(state, output),
41 | THCudaTensor_stride(state, output, 0),
42 | THCudaTensor_stride(state, output, 1),
43 | THCudaTensor_stride(state, output, 2),
44 | THCudaTensor_stride(state, output, 3),
45 | THCState_getCurrentStream(state));
46 |
47 | //check for errors
48 | if (!success) {
49 | THError("aborting");
50 | }
51 | return 1;
52 | }
53 |
54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
55 | THCudaTensor *gradGrids, THCudaTensor *gradOutput)
56 | {
57 | // THCState *state = getCutorchState(L);
58 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
59 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
60 | // THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
61 | // THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
62 | // THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
63 |
64 | int success = 0;
65 | success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1],
66 | gradOutput->size[3],
67 | gradOutput->size[2],
68 | gradOutput->size[0],
69 | THCudaTensor_size(state, inputImages, 1),
70 | THCudaTensor_size(state, inputImages, 2),
71 | THCudaTensor_size(state, inputImages, 3),
72 | THCudaTensor_size(state, inputImages, 0),
73 | THCudaTensor_data(state, inputImages),
74 | THCudaTensor_stride(state, inputImages, 0),
75 | THCudaTensor_stride(state, inputImages, 1),
76 | THCudaTensor_stride(state, inputImages, 2),
77 | THCudaTensor_stride(state, inputImages, 3),
78 | THCudaTensor_data(state, grids),
79 | THCudaTensor_stride(state, grids, 0),
80 | THCudaTensor_stride(state, grids, 3),
81 | THCudaTensor_stride(state, grids, 1),
82 | THCudaTensor_stride(state, grids, 2),
83 | THCudaTensor_data(state, gradInputImages),
84 | THCudaTensor_stride(state, gradInputImages, 0),
85 | THCudaTensor_stride(state, gradInputImages, 1),
86 | THCudaTensor_stride(state, gradInputImages, 2),
87 | THCudaTensor_stride(state, gradInputImages, 3),
88 | THCudaTensor_data(state, gradGrids),
89 | THCudaTensor_stride(state, gradGrids, 0),
90 | THCudaTensor_stride(state, gradGrids, 3),
91 | THCudaTensor_stride(state, gradGrids, 1),
92 | THCudaTensor_stride(state, gradGrids, 2),
93 | THCudaTensor_data(state, gradOutput),
94 | THCudaTensor_stride(state, gradOutput, 0),
95 | THCudaTensor_stride(state, gradOutput, 1),
96 | THCudaTensor_stride(state, gradOutput, 2),
97 | THCudaTensor_stride(state, gradOutput, 3),
98 | THCState_getCurrentStream(state));
99 |
100 | //check for errors
101 | if (!success) {
102 | THError("aborting");
103 | }
104 | return 1;
105 | }
106 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 |
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 |
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 |
6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
7 | /*output->size[2]*/int ow,
8 | /*output->size[1]*/int oh,
9 | /*output->size[0]*/int ob,
10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 |
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 | /*gradOutput->size[2]*/int gow,
21 | /*gradOutput->size[1]*/int goh,
22 | /*gradOutput->size[0]*/int gob,
23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 |
34 |
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_pooling import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 |
7 | sources = ['src/roi_pooling.c']
8 | headers = ['src/roi_pooling.h']
9 | extra_objects = []
10 | defines = []
11 | with_cuda = False
12 |
13 | this_file = os.path.dirname(os.path.realpath(__file__))
14 | print(this_file)
15 |
16 | if torch.cuda.is_available():
17 | print('Including CUDA code.')
18 | sources += ['src/roi_pooling_cuda.c']
19 | headers += ['src/roi_pooling_cuda.h']
20 | defines += [('WITH_CUDA', None)]
21 | with_cuda = True
22 | extra_objects = ['src/roi_pooling.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 |
25 | ffi = create_extension(
26 | '_ext.roi_pooling',
27 | headers=headers,
28 | sources=sources,
29 | define_macros=defines,
30 | relative_to=__file__,
31 | with_cuda=with_cuda,
32 | extra_objects=extra_objects
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import roi_pooling
4 | import pdb
5 |
6 | class RoIPoolFunction(Function):
7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale):
8 | ctx.pooled_width = pooled_width
9 | ctx.pooled_height = pooled_height
10 | ctx.spatial_scale = spatial_scale
11 | ctx.feature_size = None
12 |
13 | def forward(ctx, features, rois):
14 | ctx.feature_size = features.size()
15 | batch_size, num_channels, data_height, data_width = ctx.feature_size
16 | num_rois = rois.size(0)
17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 | ctx.rois = rois
20 | if not features.is_cuda:
21 | _features = features.permute(0, 2, 3, 1)
22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 | _features, rois, output)
24 | else:
25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 | features, rois, output, ctx.argmax)
27 |
28 | return output
29 |
30 | def backward(ctx, grad_output):
31 | assert(ctx.feature_size is not None and grad_output.is_cuda)
32 | batch_size, num_channels, data_height, data_width = ctx.feature_size
33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 |
35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 | grad_output, ctx.rois, grad_input, ctx.argmax)
37 |
38 | return grad_input, None
39 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/roi_pooling/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_pool import RoIPoolFunction
3 |
4 |
5 | class _RoIPooling(Module):
6 | def __init__(self, pooled_height, pooled_width, spatial_scale):
7 | super(_RoIPooling, self).__init__()
8 |
9 | self.pooled_width = int(pooled_width)
10 | self.pooled_height = int(pooled_height)
11 | self.spatial_scale = float(spatial_scale)
12 |
13 | def forward(self, features, rois):
14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
6 | {
7 | // Grab the input tensor
8 | float * data_flat = THFloatTensor_data(features);
9 | float * rois_flat = THFloatTensor_data(rois);
10 |
11 | float * output_flat = THFloatTensor_data(output);
12 |
13 | // Number of ROIs
14 | int num_rois = THFloatTensor_size(rois, 0);
15 | int size_rois = THFloatTensor_size(rois, 1);
16 | // batch size
17 | int batch_size = THFloatTensor_size(features, 0);
18 | if(batch_size != 1)
19 | {
20 | return 0;
21 | }
22 | // data height
23 | int data_height = THFloatTensor_size(features, 1);
24 | // data width
25 | int data_width = THFloatTensor_size(features, 2);
26 | // Number of channels
27 | int num_channels = THFloatTensor_size(features, 3);
28 |
29 | // Set all element of the output tensor to -inf.
30 | THFloatStorage_fill(THFloatTensor_storage(output), -1);
31 |
32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
33 | int index_roi = 0;
34 | int index_output = 0;
35 | int n;
36 | for (n = 0; n < num_rois; ++n)
37 | {
38 | int roi_batch_ind = rois_flat[index_roi + 0];
39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
43 | // CHECK_GE(roi_batch_ind, 0);
44 | // CHECK_LT(roi_batch_ind, batch_size);
45 |
46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height);
49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width);
50 |
51 | int index_data = roi_batch_ind * data_height * data_width * num_channels;
52 | const int output_area = pooled_width * pooled_height;
53 |
54 | int c, ph, pw;
55 | for (ph = 0; ph < pooled_height; ++ph)
56 | {
57 | for (pw = 0; pw < pooled_width; ++pw)
58 | {
59 | int hstart = (floor((float)(ph) * bin_size_h));
60 | int wstart = (floor((float)(pw) * bin_size_w));
61 | int hend = (ceil((float)(ph + 1) * bin_size_h));
62 | int wend = (ceil((float)(pw + 1) * bin_size_w));
63 |
64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
68 |
69 | const int pool_index = index_output + (ph * pooled_width + pw);
70 | int is_empty = (hend <= hstart) || (wend <= wstart);
71 | if (is_empty)
72 | {
73 | for (c = 0; c < num_channels * output_area; c += output_area)
74 | {
75 | output_flat[pool_index + c] = 0;
76 | }
77 | }
78 | else
79 | {
80 | int h, w, c;
81 | for (h = hstart; h < hend; ++h)
82 | {
83 | for (w = wstart; w < wend; ++w)
84 | {
85 | for (c = 0; c < num_channels; ++c)
86 | {
87 | const int index = (h * data_width + w) * num_channels + c;
88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
89 | {
90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index];
91 | }
92 | }
93 | }
94 | }
95 | }
96 | }
97 | }
98 |
99 | // Increment ROI index
100 | index_roi += size_rois;
101 | index_output += pooled_height * pooled_width * num_channels;
102 | }
103 | return 1;
104 | }
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "roi_pooling_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 |
17 | // Number of ROIs
18 | int num_rois = THCudaTensor_size(state, rois, 0);
19 | int size_rois = THCudaTensor_size(state, rois, 1);
20 | if (size_rois != 5)
21 | {
22 | return 0;
23 | }
24 |
25 | // batch size
26 | // int batch_size = THCudaTensor_size(state, features, 0);
27 | // if (batch_size != 1)
28 | // {
29 | // return 0;
30 | // }
31 | // data height
32 | int data_height = THCudaTensor_size(state, features, 2);
33 | // data width
34 | int data_width = THCudaTensor_size(state, features, 3);
35 | // Number of channels
36 | int num_channels = THCudaTensor_size(state, features, 1);
37 |
38 | cudaStream_t stream = THCState_getCurrentStream(state);
39 |
40 | ROIPoolForwardLaucher(
41 | data_flat, spatial_scale, num_rois, data_height,
42 | data_width, num_channels, pooled_height,
43 | pooled_width, rois_flat,
44 | output_flat, argmax_flat, stream);
45 |
46 | return 1;
47 | }
48 |
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 | // Grab the input tensor
53 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 | float * rois_flat = THCudaTensor_data(state, rois);
55 |
56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 |
59 | // Number of ROIs
60 | int num_rois = THCudaTensor_size(state, rois, 0);
61 | int size_rois = THCudaTensor_size(state, rois, 1);
62 | if (size_rois != 5)
63 | {
64 | return 0;
65 | }
66 |
67 | // batch size
68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 | // if (batch_size != 1)
70 | // {
71 | // return 0;
72 | // }
73 | // data height
74 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 | // data width
76 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 | // Number of channels
78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 |
80 | cudaStream_t stream = THCState_getCurrentStream(state);
81 | ROIPoolBackwardLaucher(
82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 | data_width, num_channels, pooled_height,
84 | pooled_width, rois_flat,
85 | bottom_grad_flat, argmax_flat, stream);
86 |
87 | return 1;
88 | }
89 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 |
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_POOLING_KERNEL
2 | #define _ROI_POOLING_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int ROIPoolForwardLaucher(
9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 | const int width, const int channels, const int pooled_height,
11 | const int pooled_width, const float* bottom_rois,
12 | float* top_data, int* argmax_data, cudaStream_t stream);
13 |
14 |
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 | const int height, const int width, const int channels, const int pooled_height,
17 | const int pooled_width, const float* bottom_rois,
18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
26 |
--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/rpn/__init__.py
--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | # --------------------------------------------------------
3 | # Faster R-CNN
4 | # Copyright (c) 2015 Microsoft
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # Written by Ross Girshick and Sean Bell
7 | # --------------------------------------------------------
8 |
9 | import numpy as np
10 | import pdb
11 |
12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
13 | #
14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
15 | # >> anchors
16 | #
17 | # anchors =
18 | #
19 | # -83 -39 100 56
20 | # -175 -87 192 104
21 | # -359 -183 376 200
22 | # -55 -55 72 72
23 | # -119 -119 136 136
24 | # -247 -247 264 264
25 | # -35 -79 52 96
26 | # -79 -167 96 184
27 | # -167 -343 184 360
28 |
29 | #array([[ -83., -39., 100., 56.],
30 | # [-175., -87., 192., 104.],
31 | # [-359., -183., 376., 200.],
32 | # [ -55., -55., 72., 72.],
33 | # [-119., -119., 136., 136.],
34 | # [-247., -247., 264., 264.],
35 | # [ -35., -79., 52., 96.],
36 | # [ -79., -167., 96., 184.],
37 | # [-167., -343., 184., 360.]])
38 |
39 | try:
40 | xrange # Python 2
41 | except NameError:
42 | xrange = range # Python 3
43 |
44 |
45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
46 | scales=2**np.arange(3, 6)):
47 | """
48 | Generate anchor (reference) windows by enumerating aspect ratios X
49 | scales wrt a reference (0, 0, 15, 15) window.
50 | """
51 |
52 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
53 | ratio_anchors = _ratio_enum(base_anchor, ratios)
54 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
55 | for i in xrange(ratio_anchors.shape[0])])
56 | return anchors
57 |
58 | def _whctrs(anchor):
59 | """
60 | Return width, height, x center, and y center for an anchor (window).
61 | """
62 |
63 | w = anchor[2] - anchor[0] + 1
64 | h = anchor[3] - anchor[1] + 1
65 | x_ctr = anchor[0] + 0.5 * (w - 1)
66 | y_ctr = anchor[1] + 0.5 * (h - 1)
67 | return w, h, x_ctr, y_ctr
68 |
69 | def _mkanchors(ws, hs, x_ctr, y_ctr):
70 | """
71 | Given a vector of widths (ws) and heights (hs) around a center
72 | (x_ctr, y_ctr), output a set of anchors (windows).
73 | """
74 |
75 | ws = ws[:, np.newaxis]
76 | hs = hs[:, np.newaxis]
77 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
78 | y_ctr - 0.5 * (hs - 1),
79 | x_ctr + 0.5 * (ws - 1),
80 | y_ctr + 0.5 * (hs - 1)))
81 | return anchors
82 |
83 | def _ratio_enum(anchor, ratios):
84 | """
85 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
86 | """
87 |
88 | w, h, x_ctr, y_ctr = _whctrs(anchor)
89 | size = w * h
90 | size_ratios = size / ratios
91 | ws = np.round(np.sqrt(size_ratios))
92 | hs = np.round(ws * ratios)
93 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
94 | return anchors
95 |
96 | def _scale_enum(anchor, scales):
97 | """
98 | Enumerate a set of anchors for each scale wrt an anchor.
99 | """
100 |
101 | w, h, x_ctr, y_ctr = _whctrs(anchor)
102 | ws = w * scales
103 | hs = h * scales
104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 | return anchors
106 |
107 | if __name__ == '__main__':
108 | import time
109 | t = time.time()
110 | a = generate_anchors()
111 | print(time.time() - t)
112 | print(a)
113 | from IPython import embed; embed()
114 |
--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | # --------------------------------------------------------
3 | # Faster R-CNN
4 | # Copyright (c) 2015 Microsoft
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # Written by Ross Girshick and Sean Bell
7 | # --------------------------------------------------------
8 | # --------------------------------------------------------
9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
10 | # --------------------------------------------------------
11 |
12 | import torch
13 | import torch.nn as nn
14 | import numpy as np
15 | import math
16 | import yaml
17 | from model.utils.config import cfg
18 | from .generate_anchors import generate_anchors
19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
20 | from model.nms.nms_wrapper import nms
21 |
22 | import pdb
23 |
24 | DEBUG = False
25 |
26 | class _ProposalLayer(nn.Module):
27 | """
28 | Outputs object detection proposals by applying estimated bounding-box
29 | transformations to a set of regular boxes (called "anchors").
30 | """
31 |
32 | def __init__(self, feat_stride, scales, ratios):
33 | super(_ProposalLayer, self).__init__()
34 |
35 | self._feat_stride = feat_stride
36 | self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales),
37 | ratios=np.array(ratios))).float()
38 | self._num_anchors = self._anchors.size(0)
39 |
40 | # rois blob: holds R regions of interest, each is a 5-tuple
41 | # (n, x1, y1, x2, y2) specifying an image batch index n and a
42 | # rectangle (x1, y1, x2, y2)
43 | # top[0].reshape(1, 5)
44 | #
45 | # # scores blob: holds scores for R regions of interest
46 | # if len(top) > 1:
47 | # top[1].reshape(1, 1, 1, 1)
48 |
49 | def forward(self, input):
50 |
51 | # Algorithm:
52 | #
53 | # for each (H, W) location i
54 | # generate A anchor boxes centered on cell i
55 | # apply predicted bbox deltas at cell i to each of the A anchors
56 | # clip predicted boxes to image
57 | # remove predicted boxes with either height or width < threshold
58 | # sort all (proposal, score) pairs by score from highest to lowest
59 | # take top pre_nms_topN proposals before NMS
60 | # apply NMS with threshold 0.7 to remaining proposals
61 | # take after_nms_topN proposals after NMS
62 | # return the top proposals (-> RoIs top, scores top)
63 |
64 |
65 | # the first set of _num_anchors channels are bg probs
66 | # the second set are the fg probs
67 | scores = input[0][:, self._num_anchors:, :, :]
68 | bbox_deltas = input[1]
69 | im_info = input[2]
70 | cfg_key = input[3]
71 |
72 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
73 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
74 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
75 | min_size = cfg[cfg_key].RPN_MIN_SIZE
76 |
77 | batch_size = bbox_deltas.size(0)
78 |
79 | feat_height, feat_width = scores.size(2), scores.size(3)
80 | shift_x = np.arange(0, feat_width) * self._feat_stride
81 | shift_y = np.arange(0, feat_height) * self._feat_stride
82 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
83 | shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
84 | shift_x.ravel(), shift_y.ravel())).transpose())
85 | shifts = shifts.contiguous().type_as(scores).float()
86 |
87 | A = self._num_anchors
88 | K = shifts.size(0)
89 |
90 | self._anchors = self._anchors.type_as(scores)
91 | # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
92 | anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
93 | anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
94 |
95 | # Transpose and reshape predicted bbox transformations to get them
96 | # into the same order as the anchors:
97 |
98 | bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
99 | bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
100 |
101 | # Same story for the scores:
102 | scores = scores.permute(0, 2, 3, 1).contiguous()
103 | scores = scores.view(batch_size, -1)
104 |
105 | # Convert anchors into proposals via bbox transformations
106 | proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
107 |
108 | # 2. clip predicted boxes to image
109 | proposals = clip_boxes(proposals, im_info, batch_size)
110 | # proposals = clip_boxes_batch(proposals, im_info, batch_size)
111 |
112 | # assign the score to 0 if it's non keep.
113 | # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
114 |
115 | # trim keep index to make it euqal over batch
116 | # keep_idx = torch.cat(tuple(keep_idx), 0)
117 |
118 | # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
119 | # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
120 |
121 | # _, order = torch.sort(scores_keep, 1, True)
122 |
123 | scores_keep = scores
124 | proposals_keep = proposals
125 | _, order = torch.sort(scores_keep, 1, True)
126 |
127 | output = scores.new(batch_size, post_nms_topN, 5).zero_()
128 | for i in range(batch_size):
129 | # # 3. remove predicted boxes with either height or width < threshold
130 | # # (NOTE: convert min_size to input image scale stored in im_info[2])
131 | proposals_single = proposals_keep[i]
132 | scores_single = scores_keep[i]
133 |
134 | # # 4. sort all (proposal, score) pairs by score from highest to lowest
135 | # # 5. take top pre_nms_topN (e.g. 6000)
136 | order_single = order[i]
137 |
138 | if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
139 | order_single = order_single[:pre_nms_topN]
140 |
141 | proposals_single = proposals_single[order_single, :]
142 | scores_single = scores_single[order_single].view(-1,1)
143 |
144 | # 6. apply nms (e.g. threshold = 0.7)
145 | # 7. take after_nms_topN (e.g. 300)
146 | # 8. return the top proposals (-> RoIs top)
147 |
148 | keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
149 | keep_idx_i = keep_idx_i.long().view(-1)
150 |
151 | if post_nms_topN > 0:
152 | keep_idx_i = keep_idx_i[:post_nms_topN]
153 | proposals_single = proposals_single[keep_idx_i, :]
154 | scores_single = scores_single[keep_idx_i, :]
155 |
156 | # padding 0 at the end.
157 | num_proposal = proposals_single.size(0)
158 | output[i,:,0] = i
159 | output[i,:num_proposal,1:] = proposals_single
160 |
161 | return output
162 |
163 | def backward(self, top, propagate_down, bottom):
164 | """This layer does not propagate gradients."""
165 | pass
166 |
167 | def reshape(self, bottom, top):
168 | """Reshaping happens during the call to forward."""
169 | pass
170 |
171 | def _filter_boxes(self, boxes, min_size):
172 | """Remove all boxes with any side smaller than min_size."""
173 | ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
174 | hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
175 | keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
176 | return keep
177 |
--------------------------------------------------------------------------------
/lib/model/rpn/rpn.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 |
7 | from model.utils.config import cfg
8 | from .proposal_layer import _ProposalLayer
9 | from .anchor_target_layer import _AnchorTargetLayer
10 | from model.utils.net_utils import _smooth_l1_loss
11 |
12 | import numpy as np
13 | import math
14 | import pdb
15 | import time
16 |
17 | class _RPN(nn.Module):
18 | """ region proposal network """
19 | def __init__(self, din):
20 | super(_RPN, self).__init__()
21 |
22 | self.din = din # get depth of input feature map, e.g., 512
23 | self.anchor_scales = cfg.ANCHOR_SCALES
24 | self.anchor_ratios = cfg.ANCHOR_RATIOS
25 | self.feat_stride = cfg.FEAT_STRIDE[0]
26 |
27 | # define the convrelu layers processing input feature map
28 | self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
29 |
30 | # define bg/fg classifcation score layer
31 | self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
32 | self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
33 |
34 | # define anchor box offset prediction layer
35 | self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
36 | self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
37 |
38 | # define proposal layer
39 | self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
40 |
41 | # define anchor target layer
42 | self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
43 |
44 | self.rpn_loss_cls = 0
45 | self.rpn_loss_box = 0
46 |
47 | @staticmethod
48 | def reshape(x, d):
49 | input_shape = x.size()
50 | x = x.view(
51 | input_shape[0],
52 | int(d),
53 | int(float(input_shape[1] * input_shape[2]) / float(d)),
54 | input_shape[3]
55 | )
56 | return x
57 |
58 | def forward(self, base_feat, im_info, gt_boxes, num_boxes):
59 |
60 | batch_size = base_feat.size(0)
61 |
62 | # return feature map after convrelu layer
63 | rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
64 | # get rpn classification score
65 | rpn_cls_score = self.RPN_cls_score(rpn_conv1)
66 |
67 | rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
68 | rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, dim=1)
69 | rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
70 |
71 | # get rpn offsets to the anchor boxes
72 | rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
73 |
74 | # proposal layer
75 | cfg_key = 'TRAIN' if self.training else 'TEST'
76 |
77 | rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
78 | im_info, cfg_key))
79 |
80 | self.rpn_loss_cls = 0
81 | self.rpn_loss_box = 0
82 |
83 | # generating training labels and build the rpn loss
84 | if self.training:
85 | assert gt_boxes is not None
86 |
87 | rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
88 |
89 | # compute classification loss
90 | rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
91 | rpn_label = rpn_data[0].view(batch_size, -1)
92 |
93 | rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
94 | rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
95 | rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
96 | rpn_label = Variable(rpn_label.long())
97 | self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
98 | fg_cnt = torch.sum(rpn_label.data.ne(0))
99 |
100 | rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
101 |
102 | # compute bbox regression loss
103 | rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
104 | rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
105 | rpn_bbox_targets = Variable(rpn_bbox_targets)
106 |
107 | self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
108 | rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
109 |
110 | return rois, self.rpn_loss_cls, self.rpn_loss_box
111 |
--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princewang1994/R-FCN.pytorch/0c8da30bfd23e61f4c7fd1299626b9d82cf8a164/lib/model/utils/__init__.py
--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
16 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
17 | return bbox_overlaps_c(boxes, query_boxes)
18 |
19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
20 | np.ndarray[DTYPE_t, ndim=2] boxes,
21 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
22 | """
23 | Parameters
24 | ----------
25 | boxes: (N, 4) ndarray of float
26 | query_boxes: (K, 4) ndarray of float
27 | Returns
28 | -------
29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
30 | """
31 | cdef unsigned int N = boxes.shape[0]
32 | cdef unsigned int K = query_boxes.shape[0]
33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
34 | cdef DTYPE_t iw, ih, box_area
35 | cdef DTYPE_t ua
36 | cdef unsigned int k, n
37 | for k in range(K):
38 | box_area = (
39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
41 | )
42 | for n in range(N):
43 | iw = (
44 | min(boxes[n, 2], query_boxes[k, 2]) -
45 | max(boxes[n, 0], query_boxes[k, 0]) + 1
46 | )
47 | if iw > 0:
48 | ih = (
49 | min(boxes[n, 3], query_boxes[k, 3]) -
50 | max(boxes[n, 1], query_boxes[k, 1]) + 1
51 | )
52 | if ih > 0:
53 | ua = float(
54 | (boxes[n, 2] - boxes[n, 0] + 1) *
55 | (boxes[n, 3] - boxes[n, 1] + 1) +
56 | box_area - iw * ih
57 | )
58 | overlaps[n, k] = iw * ih / ua
59 | return overlaps
60 |
61 |
62 | def bbox_intersections(
63 | np.ndarray[DTYPE_t, ndim=2] boxes,
64 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
65 | return bbox_intersections_c(boxes, query_boxes)
66 |
67 |
68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
69 | np.ndarray[DTYPE_t, ndim=2] boxes,
70 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
71 | """
72 | For each query box compute the intersection ratio covered by boxes
73 | ----------
74 | Parameters
75 | ----------
76 | boxes: (N, 4) ndarray of float
77 | query_boxes: (K, 4) ndarray of float
78 | Returns
79 | -------
80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes
81 | """
82 | cdef unsigned int N = boxes.shape[0]
83 | cdef unsigned int K = query_boxes.shape[0]
84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
85 | cdef DTYPE_t iw, ih, box_area
86 | cdef DTYPE_t ua
87 | cdef unsigned int k, n
88 | for k in range(K):
89 | box_area = (
90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
92 | )
93 | for n in range(N):
94 | iw = (
95 | min(boxes[n, 2], query_boxes[k, 2]) -
96 | max(boxes[n, 0], query_boxes[k, 0]) + 1
97 | )
98 | if iw > 0:
99 | ih = (
100 | min(boxes[n, 3], query_boxes[k, 3]) -
101 | max(boxes[n, 1], query_boxes[k, 1]) + 1
102 | )
103 | if ih > 0:
104 | intersec[n, k] = iw * ih / box_area
105 | return intersec
--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 |
14 | try:
15 | xrange # Python 2
16 | except NameError:
17 | xrange = range # Python 3
18 |
19 |
20 | def im_list_to_blob(ims):
21 | """Convert a list of images into a network input.
22 |
23 | Assumes images are already prepared (means subtracted, BGR order, ...).
24 | """
25 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
26 | num_images = len(ims)
27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 | dtype=np.float32)
29 | for i in xrange(num_images):
30 | im = ims[i]
31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 |
33 | return blob
34 |
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 | """Mean subtract and scale an image for use in a blob."""
37 |
38 | im = im.astype(np.float32, copy=False)
39 | im -= pixel_means
40 | # im = im[:, :, ::-1]
41 | im_shape = im.shape
42 | im_size_min = np.min(im_shape[0:2])
43 | im_size_max = np.max(im_shape[0:2])
44 | im_scale = float(target_size) / float(im_size_min)
45 | # Prevent the biggest axis from being more than MAX_SIZE
46 | # if np.round(im_scale * im_size_max) > max_size:
47 | # im_scale = float(max_size) / float(im_size_max)
48 | # im = imresize(im, im_scale)
49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 | interpolation=cv2.INTER_LINEAR)
51 |
52 | return im, im_scale
53 |
--------------------------------------------------------------------------------
/lib/model/utils/logger.py:
--------------------------------------------------------------------------------
1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
2 | import tensorflow as tf
3 | import numpy as np
4 | import scipy.misc
5 | try:
6 | from StringIO import StringIO # Python 2.7
7 | except ImportError:
8 | from io import BytesIO # Python 3.x
9 |
10 |
11 | class Logger(object):
12 |
13 | def __init__(self, log_dir):
14 | """Create a summary writer logging to log_dir."""
15 | self.writer = tf.summary.FileWriter(log_dir)
16 |
17 | def scalar_summary(self, tag, value, step):
18 | """Log a scalar variable."""
19 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
20 | self.writer.add_summary(summary, step)
21 |
22 | def image_summary(self, tag, images, step):
23 | """Log a list of images."""
24 |
25 | img_summaries = []
26 | for i, img in enumerate(images):
27 | # Write the image to a string
28 | try:
29 | s = StringIO()
30 | except:
31 | s = BytesIO()
32 | scipy.misc.toimage(img).save(s, format="png")
33 |
34 | # Create an Image object
35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
36 | height=img.shape[0],
37 | width=img.shape[1])
38 | # Create a Summary value
39 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
40 |
41 | # Create and write Summary
42 | summary = tf.Summary(value=img_summaries)
43 | self.writer.add_summary(summary, step)
44 |
45 | def histo_summary(self, tag, values, step, bins=1000):
46 | """Log a histogram of the tensor of values."""
47 |
48 | # Create a histogram using numpy
49 | counts, bin_edges = np.histogram(values, bins=bins)
50 |
51 | # Fill the fields of the histogram proto
52 | hist = tf.HistogramProto()
53 | hist.min = float(np.min(values))
54 | hist.max = float(np.max(values))
55 | hist.num = int(np.prod(values.shape))
56 | hist.sum = float(np.sum(values))
57 | hist.sum_squares = float(np.sum(values**2))
58 |
59 | # Drop the start of the first bin
60 | bin_edges = bin_edges[1:]
61 |
62 | # Add bin edges and counts
63 | for edge in bin_edges:
64 | hist.bucket_limit.append(edge)
65 | for c in counts:
66 | hist.bucket.append(c)
67 |
68 | # Create and write Summary
69 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
70 | self.writer.add_summary(summary, step)
71 | self.writer.flush()
72 |
--------------------------------------------------------------------------------
/lib/model/utils/net_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | import numpy as np
6 | import torchvision.models as models
7 | from model.utils.config import cfg
8 | from model.roi_crop.functions.roi_crop import RoICropFunction
9 | import cv2
10 | import pdb
11 | import random
12 |
13 | def save_net(fname, net):
14 | import h5py
15 | h5f = h5py.File(fname, mode='w')
16 | for k, v in net.state_dict().items():
17 | h5f.create_dataset(k, data=v.cpu().numpy())
18 |
19 | def load_net(fname, net):
20 | import h5py
21 | h5f = h5py.File(fname, mode='r')
22 | for k, v in net.state_dict().items():
23 | param = torch.from_numpy(np.asarray(h5f[k]))
24 | v.copy_(param)
25 |
26 | def weights_normal_init(model, dev=0.01):
27 | if isinstance(model, list):
28 | for m in model:
29 | weights_normal_init(m, dev)
30 | else:
31 | for m in model.modules():
32 | if isinstance(m, nn.Conv2d):
33 | m.weight.data.normal_(0.0, dev)
34 | elif isinstance(m, nn.Linear):
35 | m.weight.data.normal_(0.0, dev)
36 |
37 |
38 | def clip_gradient(model, clip_norm):
39 | """Computes a gradient clipping coefficient based on gradient norm."""
40 | totalnorm = 0
41 | for p in model.parameters():
42 | if p.requires_grad:
43 | modulenorm = p.grad.data.norm()
44 | totalnorm += modulenorm ** 2
45 | totalnorm = np.sqrt(totalnorm)
46 |
47 | norm = clip_norm / max(totalnorm, clip_norm)
48 | for p in model.parameters():
49 | if p.requires_grad:
50 | p.grad.mul_(norm)
51 |
52 | def vis_detections(im, class_name, dets, thresh=0.8):
53 | """Visual debugging of detections."""
54 | for i in range(np.minimum(10, dets.shape[0])):
55 | bbox = tuple(int(np.round(x)) for x in dets[i, :4])
56 | score = dets[i, -1]
57 | if score > thresh:
58 | cv2.rectangle(im, bbox[0:2], bbox[2:4], (0, 204, 0), 2)
59 | cv2.putText(im, '%s: %.3f' % (class_name, score), (bbox[0], bbox[1] + 15), cv2.FONT_HERSHEY_PLAIN,
60 | 1.0, (0, 0, 255), thickness=1)
61 | return im
62 |
63 |
64 | def adjust_learning_rate(optimizer, decay=0.1):
65 | """Sets the learning rate to the initial LR decayed by 0.5 every 20 epochs"""
66 | for param_group in optimizer.param_groups:
67 | param_group['lr'] = decay * param_group['lr']
68 |
69 |
70 | def save_checkpoint(state, filename):
71 | torch.save(state, filename)
72 |
73 | def _smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
74 |
75 | sigma_2 = sigma ** 2
76 | box_diff = bbox_pred - bbox_targets
77 | in_box_diff = bbox_inside_weights * box_diff
78 | abs_in_box_diff = torch.abs(in_box_diff)
79 | smoothL1_sign = (abs_in_box_diff < 1. / sigma_2).detach().float()
80 | in_loss_box = torch.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
81 | + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
82 | out_loss_box = bbox_outside_weights * in_loss_box
83 | loss_box = out_loss_box
84 | for i in sorted(dim, reverse=True):
85 | loss_box = loss_box.sum(i)
86 | loss_box = loss_box.mean()
87 | return loss_box
88 |
89 | def _crop_pool_layer(bottom, rois, max_pool=True):
90 | # code modified from
91 | # https://github.com/ruotianluo/pytorch-faster-rcnn
92 | # implement it using stn
93 | # box to affine
94 | # input (x1,y1,x2,y2)
95 | """
96 | [ x2-x1 x1 + x2 - W + 1 ]
97 | [ ----- 0 --------------- ]
98 | [ W - 1 W - 1 ]
99 | [ ]
100 | [ y2-y1 y1 + y2 - H + 1 ]
101 | [ 0 ----- --------------- ]
102 | [ H - 1 H - 1 ]
103 | """
104 | rois = rois.detach()
105 | batch_size = bottom.size(0)
106 | D = bottom.size(1)
107 | H = bottom.size(2)
108 | W = bottom.size(3)
109 | roi_per_batch = rois.size(0) / batch_size
110 | x1 = rois[:, 1::4] / 16.0
111 | y1 = rois[:, 2::4] / 16.0
112 | x2 = rois[:, 3::4] / 16.0
113 | y2 = rois[:, 4::4] / 16.0
114 |
115 | height = bottom.size(2)
116 | width = bottom.size(3)
117 |
118 | # affine theta
119 | zero = Variable(rois.data.new(rois.size(0), 1).zero_())
120 | theta = torch.cat([\
121 | (x2 - x1) / (width - 1),
122 | zero,
123 | (x1 + x2 - width + 1) / (width - 1),
124 | zero,
125 | (y2 - y1) / (height - 1),
126 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
127 |
128 | if max_pool:
129 | pre_pool_size = cfg.POOLING_SIZE * 2
130 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size)))
131 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
132 | .contiguous().view(-1, D, H, W)
133 | crops = F.grid_sample(bottom, grid)
134 | crops = F.max_pool2d(crops, 2, 2)
135 | else:
136 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE)))
137 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
138 | .contiguous().view(-1, D, H, W)
139 | crops = F.grid_sample(bottom, grid)
140 |
141 | return crops, grid
142 |
143 | def _affine_grid_gen(rois, input_size, grid_size):
144 |
145 | rois = rois.detach()
146 | x1 = rois[:, 1::4] / 16.0
147 | y1 = rois[:, 2::4] / 16.0
148 | x2 = rois[:, 3::4] / 16.0
149 | y2 = rois[:, 4::4] / 16.0
150 |
151 | height = input_size[0]
152 | width = input_size[1]
153 |
154 | zero = Variable(rois.data.new(rois.size(0), 1).zero_())
155 | theta = torch.cat([\
156 | (x2 - x1) / (width - 1),
157 | zero,
158 | (x1 + x2 - width + 1) / (width - 1),
159 | zero,
160 | (y2 - y1) / (height - 1),
161 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
162 |
163 | grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
164 |
165 | return grid
166 |
167 | def _affine_theta(rois, input_size):
168 |
169 | rois = rois.detach()
170 | x1 = rois[:, 1::4] / 16.0
171 | y1 = rois[:, 2::4] / 16.0
172 | x2 = rois[:, 3::4] / 16.0
173 | y2 = rois[:, 4::4] / 16.0
174 |
175 | height = input_size[0]
176 | width = input_size[1]
177 |
178 | zero = Variable(rois.data.new(rois.size(0), 1).zero_())
179 |
180 | # theta = torch.cat([\
181 | # (x2 - x1) / (width - 1),
182 | # zero,
183 | # (x1 + x2 - width + 1) / (width - 1),
184 | # zero,
185 | # (y2 - y1) / (height - 1),
186 | # (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
187 |
188 | theta = torch.cat([\
189 | (y2 - y1) / (height - 1),
190 | zero,
191 | (y1 + y2 - height + 1) / (height - 1),
192 | zero,
193 | (x2 - x1) / (width - 1),
194 | (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3)
195 |
196 | return theta
197 |
198 | def compare_grid_sample():
199 | # do gradcheck
200 | N = random.randint(1, 8)
201 | C = 2 # random.randint(1, 8)
202 | H = 5 # random.randint(1, 8)
203 | W = 4 # random.randint(1, 8)
204 | input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True)
205 | input_p = input.clone().data.contiguous()
206 |
207 | grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True)
208 | grid_clone = grid.clone().contiguous()
209 |
210 | out_offcial = F.grid_sample(input, grid)
211 | grad_outputs = Variable(torch.rand(out_offcial.size()).cuda())
212 | grad_outputs_clone = grad_outputs.clone().contiguous()
213 | grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous())
214 | grad_input_off = grad_inputs[0]
215 |
216 |
217 | crf = RoICropFunction()
218 | grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda()
219 | out_stn = crf.forward(input_p, grid_yx)
220 | grad_inputs = crf.backward(grad_outputs_clone.data)
221 | grad_input_stn = grad_inputs[0]
222 | pdb.set_trace()
223 |
224 | delta = (grad_input_off.data - grad_input_stn).sum()
225 |
--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 |
--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.
27 |
--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | from . import _mask
4 |
5 | # Interface for manipulating masks stored in RLE format.
6 | #
7 | # RLE is a simple yet efficient format for storing binary masks. RLE
8 | # first divides a vector (or vectorized image) into a series of piecewise
9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | # encode - Encode binary masks using RLE.
32 | # decode - Decode binary masks encoded via RLE.
33 | # merge - Compute union or intersection of encoded masks.
34 | # iou - Compute intersection over union between masks.
35 | # area - Compute area of encoded masks.
36 | # toBbox - Get bounding boxes surrounding encoded masks.
37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | # Rs = encode( masks )
41 | # masks = decode( Rs )
42 | # R = merge( Rs, intersect=false )
43 | # o = iou( dt, gt, iscrowd )
44 | # a = area( Rs )
45 | # bbs = toBbox( Rs )
46 | # Rs = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | # Rs - [dict] Run-length encoding of binary masks
50 | # R - dict Run-length encoding of binary mask
51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | # dt,gt - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox. version 2.0
72 | # Data, paper, and tutorials available at: http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 |
76 | encode = _mask.encode
77 | decode = _mask.decode
78 | iou = _mask.iou
79 | merge = _mask.merge
80 | area = _mask.area
81 | toBbox = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects
--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #include "maskApi.h"
8 | #include
9 | #include
10 |
11 | uint umin( uint a, uint b ) { return (ab) ? a : b; }
13 |
14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
16 | if(cnts) for(siz j=0; jcnts[j]=cnts[j];
17 | }
18 |
19 | void rleFree( RLE *R ) {
20 | free(R->cnts); R->cnts=0;
21 | }
22 |
23 | void rlesInit( RLE **R, siz n ) {
24 | *R = (RLE*) malloc(sizeof(RLE)*n);
25 | for(siz i=0; i0 ) {
61 | c=umin(ca,cb); cc+=c; ct=0;
62 | ca-=c; if(!ca && a0) {
83 | crowd=iscrowd!=NULL && iscrowd[g];
84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
88 | while( ct>0 ) {
89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
90 | ca-=c; if(!ca && ad?1:c=dy && xs>xe) || (dxye);
151 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
152 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
153 | if(dx>=dy) for( int d=0; d<=dx; d++ ) {
154 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
155 | } else for( int d=0; d<=dy; d++ ) {
156 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
157 | }
158 | }
159 | // get points along y-boundary and downsample
160 | free(x); free(y); k=m; m=0; double xd, yd;
161 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
162 | for( j=1; jw-1 ) continue;
165 | yd=(double)(v[j]h) yd=h; yd=ceil(yd);
167 | x[m]=(int) xd; y[m]=(int) yd; m++;
168 | }
169 | // compute rle encoding given y-boundary points
170 | k=m; a=malloc(sizeof(uint)*(k+1));
171 | for( j=0; j0) b[m++]=a[j++]; else {
177 | j++; if(jm, p=0; long x; bool more;
184 | char *s=malloc(sizeof(char)*m*6);
185 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
187 | while( more ) {
188 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
189 | if(more) c |= 0x20; c+=48; s[p++]=c;
190 | }
191 | }
192 | s[p]=0; return s;
193 | }
194 |
195 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
196 | siz m=0, p=0, k; long x; bool more; uint *cnts;
197 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
198 | while( s[p] ) {
199 | x=0; k=0; more=1;
200 | while( more ) {
201 | char c=s[p]-48; x |= (c & 0x1f) << 5*k;
202 | more = c & 0x20; p++; k++;
203 | if(!more && (c & 0x10)) x |= -1 << 5*k;
204 | }
205 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
206 | }
207 | rleInit(R,h,w,m,cnts); free(cnts);
208 | }
209 |
--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 | #include
9 |
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 |
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 |
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 |
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 |
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 |
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 |
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 |
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 |
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 |
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 |
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 |
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 |
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 |
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Xinlei Chen
6 | # --------------------------------------------------------
7 |
8 | """Compute minibatch blobs for training a Fast R-CNN network."""
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 |
13 | import numpy as np
14 | import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 |
20 | def get_minibatch(roidb, num_classes, target_size):
21 | """Given a roidb, construct a minibatch sampled from it."""
22 | num_images = len(roidb)
23 | # Sample random scales to use for each image in this batch
24 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
25 | 'num_images ({}) must divide BATCH_SIZE ({})'. \
26 | format(num_images, cfg.TRAIN.BATCH_SIZE)
27 |
28 | # Get the input image blob, formatted for caffe
29 | im_blob, im_scales = _get_image_blob(roidb, target_size)
30 |
31 | blobs = {'data': im_blob}
32 |
33 | assert len(im_scales) == 1, "Single batch only"
34 | assert len(roidb) == 1, "Single batch only"
35 |
36 | # gt boxes: (x1, y1, x2, y2, cls)
37 | if cfg.TRAIN.USE_ALL_GT:
38 | # Include all ground truth boxes
39 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
40 | else:
41 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd''
42 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
43 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
44 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
45 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
46 | blobs['gt_boxes'] = gt_boxes
47 | blobs['im_info'] = np.array(
48 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
49 | dtype=np.float32)
50 |
51 | blobs['img_id'] = roidb[0]['img_id']
52 |
53 | return blobs
54 |
55 | def _get_image_blob(roidb, target_size):
56 | """Builds an input blob from the images in the roidb at the specified
57 | scales.
58 | """
59 | num_images = len(roidb)
60 |
61 | processed_ims = []
62 | im_scales = []
63 | for i in range(num_images):
64 | #im = cv2.imread(roidb[i]['image'])
65 | im = imread(roidb[i]['image'])
66 |
67 | if len(im.shape) == 2:
68 | im = im[:,:,np.newaxis]
69 | im = np.concatenate((im,im,im), axis=2)
70 | # flip the channel, since the original one using cv2
71 | # rgb -> bgr
72 | im = im[:,:,::-1]
73 |
74 | if roidb[i]['flipped']:
75 | im = im[:, ::-1, :]
76 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size[i],
77 | cfg.TRAIN.MAX_SIZE)
78 | im_scales.append(im_scale)
79 | processed_ims.append(im)
80 |
81 | # Create a blob to hold the input images
82 | blob = im_list_to_blob(processed_ims)
83 |
84 | return blob, im_scales
85 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
2 | from __future__ import absolute_import
3 | from __future__ import division
4 | from __future__ import print_function
5 |
6 | import datasets
7 | import numpy as np
8 | from model.utils.config import cfg
9 | from datasets.factory import get_imdb
10 | import PIL
11 | import pdb
12 |
13 | def prepare_roidb(imdb):
14 | """Enrich the imdb's roidb by adding some derived quantities that
15 | are useful for training. This function precomputes the maximum
16 | overlap, taken over ground-truth boxes, between each ROI and
17 | each ground-truth box. The class with maximum overlap is also
18 | recorded.
19 | """
20 |
21 | roidb = imdb.roidb
22 | if not (imdb.name.startswith('coco')):
23 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size
24 | for i in range(imdb.num_images)]
25 |
26 | for i in range(len(imdb.image_index)):
27 | roidb[i]['img_id'] = imdb.image_id_at(i)
28 | roidb[i]['image'] = imdb.image_path_at(i)
29 | if not (imdb.name.startswith('coco')):
30 | roidb[i]['width'] = sizes[i][0]
31 | roidb[i]['height'] = sizes[i][1]
32 | # need gt_overlaps as a dense array for argmax
33 | gt_overlaps = roidb[i]['gt_overlaps'].toarray()
34 | # max overlap with gt over classes (columns)
35 | max_overlaps = gt_overlaps.max(axis=1)
36 | # gt class that had the max overlap
37 | max_classes = gt_overlaps.argmax(axis=1)
38 | roidb[i]['max_classes'] = max_classes
39 | roidb[i]['max_overlaps'] = max_overlaps
40 | # sanity checks
41 | # max overlap of 0 => class should be zero (background)
42 | zero_inds = np.where(max_overlaps == 0)[0]
43 | assert all(max_classes[zero_inds] == 0)
44 | # max overlap > 0 => class should not be zero (must be a fg class)
45 | nonzero_inds = np.where(max_overlaps > 0)[0]
46 | assert all(max_classes[nonzero_inds] != 0)
47 |
48 |
49 | def rank_roidb_ratio(roidb):
50 | # rank roidb based on the ratio between width and height.
51 | ratio_large = 2 # largest ratio to preserve.
52 | ratio_small = 0.5 # smallest ratio to preserve.
53 |
54 | ratio_list = []
55 | for i in range(len(roidb)):
56 | width = roidb[i]['width']
57 | height = roidb[i]['height']
58 | ratio = width / float(height)
59 |
60 | if ratio > ratio_large:
61 | roidb[i]['need_crop'] = 1
62 | ratio = ratio_large
63 | elif ratio < ratio_small:
64 | roidb[i]['need_crop'] = 1
65 | ratio = ratio_small
66 | else:
67 | roidb[i]['need_crop'] = 0
68 |
69 | ratio_list.append(ratio)
70 |
71 | ratio_list = np.array(ratio_list)
72 | ratio_index = np.argsort(ratio_list)
73 | return ratio_list[ratio_index], ratio_index
74 |
75 | def filter_roidb(roidb):
76 | # filter the image without bounding box.
77 | print('before filtering, there are %d images...' % (len(roidb)))
78 | i = 0
79 | while i < len(roidb):
80 | if len(roidb[i]['boxes']) == 0:
81 | del roidb[i]
82 | i -= 1
83 | i += 1
84 |
85 | print('after filtering, there are %d images...' % (len(roidb)))
86 | return roidb
87 |
88 | def combined_roidb(imdb_names, training=True):
89 | """
90 | Combine multiple roidbs
91 | """
92 |
93 | def get_training_roidb(imdb):
94 | """Returns a roidb (Region of Interest database) for use in training."""
95 | if cfg.TRAIN.USE_FLIPPED:
96 | print('Appending horizontally-flipped training examples...')
97 | imdb.append_flipped_images()
98 | print('done')
99 |
100 | print('Preparing training data...')
101 |
102 | prepare_roidb(imdb)
103 | #ratio_index = rank_roidb_ratio(imdb)
104 | print('done')
105 |
106 | return imdb.roidb
107 |
108 | def get_roidb(imdb_name):
109 | imdb = get_imdb(imdb_name)
110 | print('Loaded dataset `{:s}` for training'.format(imdb.name))
111 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
112 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
113 | roidb = get_training_roidb(imdb)
114 | return roidb
115 |
116 | roidbs = [get_roidb(s) for s in imdb_names.split('+')]
117 | roidb = roidbs[0]
118 |
119 | if len(roidbs) > 1:
120 | for r in roidbs[1:]:
121 | roidb.extend(r)
122 | tmp = get_imdb(imdb_names.split('+')[1])
123 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
124 | else:
125 | imdb = get_imdb(imdb_names)
126 |
127 | if training:
128 | roidb = filter_roidb(roidb)
129 |
130 | ratio_list, ratio_index = rank_roidb_ratio(roidb)
131 |
132 | return imdb, roidb, ratio_list, ratio_index
133 |
--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | # --------------------------------------------------------
3 | # Fast R-CNN
4 | # Copyright (c) 2015 Microsoft
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # Written by Ross Girshick
7 | # --------------------------------------------------------
8 |
9 | import os
10 | from os.path import join as pjoin
11 | import numpy as np
12 | from distutils.core import setup
13 | from distutils.extension import Extension
14 | from Cython.Distutils import build_ext
15 |
16 |
17 | def find_in_path(name, path):
18 | "Find a file in a search path"
19 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
20 | for dir in path.split(os.pathsep):
21 | binpath = pjoin(dir, name)
22 | if os.path.exists(binpath):
23 | return os.path.abspath(binpath)
24 | return None
25 |
26 |
27 | # def locate_cuda():
28 | # """Locate the CUDA environment on the system
29 | #
30 | # Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
31 | # and values giving the absolute path to each directory.
32 | #
33 | # Starts by looking for the CUDAHOME env variable. If not found, everything
34 | # is based on finding 'nvcc' in the PATH.
35 | # """
36 | #
37 | # # first check if the CUDAHOME env variable is in use
38 | # if 'CUDAHOME' in os.environ:
39 | # home = os.environ['CUDAHOME']
40 | # nvcc = pjoin(home, 'bin', 'nvcc')
41 | # else:
42 | # # otherwise, search the PATH for NVCC
43 | # default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
44 | # nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
45 | # if nvcc is None:
46 | # raise EnvironmentError('The nvcc binary could not be '
47 | # 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
48 | # home = os.path.dirname(os.path.dirname(nvcc))
49 | #
50 | # cudaconfig = {'home': home, 'nvcc': nvcc,
51 | # 'include': pjoin(home, 'include'),
52 | # 'lib64': pjoin(home, 'lib64')}
53 | # for k, v in cudaconfig.iteritems():
54 | # if not os.path.exists(v):
55 | # raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
56 | #
57 | # return cudaconfig
58 |
59 |
60 | # CUDA = locate_cuda()
61 |
62 | # Obtain the numpy include directory. This logic works across numpy versions.
63 | try:
64 | numpy_include = np.get_include()
65 | except AttributeError:
66 | numpy_include = np.get_numpy_include()
67 |
68 |
69 | def customize_compiler_for_nvcc(self):
70 | """inject deep into distutils to customize how the dispatch
71 | to gcc/nvcc works.
72 |
73 | If you subclass UnixCCompiler, it's not trivial to get your subclass
74 | injected in, and still have the right customizations (i.e.
75 | distutils.sysconfig.customize_compiler) run on it. So instead of going
76 | the OO route, I have this. Note, it's kindof like a wierd functional
77 | subclassing going on."""
78 |
79 | # tell the compiler it can processes .cu
80 | self.src_extensions.append('.cu')
81 |
82 | # save references to the default compiler_so and _comple methods
83 | default_compiler_so = self.compiler_so
84 | super = self._compile
85 |
86 | # now redefine the _compile method. This gets executed for each
87 | # object but distutils doesn't have the ability to change compilers
88 | # based on source extension: we add it.
89 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
90 | print(extra_postargs)
91 | if os.path.splitext(src)[1] == '.cu':
92 | # use the cuda for .cu files
93 | self.set_executable('compiler_so', CUDA['nvcc'])
94 | # use only a subset of the extra_postargs, which are 1-1 translated
95 | # from the extra_compile_args in the Extension class
96 | postargs = extra_postargs['nvcc']
97 | else:
98 | postargs = extra_postargs['gcc']
99 |
100 | super(obj, src, ext, cc_args, postargs, pp_opts)
101 | # reset the default compiler_so, which we might have changed for cuda
102 | self.compiler_so = default_compiler_so
103 |
104 | # inject our redefined _compile method into the class
105 | self._compile = _compile
106 |
107 |
108 | # run the customize_compiler
109 | class custom_build_ext(build_ext):
110 | def build_extensions(self):
111 | customize_compiler_for_nvcc(self.compiler)
112 | build_ext.build_extensions(self)
113 |
114 |
115 | ext_modules = [
116 | Extension(
117 | "model.utils.cython_bbox",
118 | ["model/utils/bbox.pyx"],
119 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
120 | include_dirs=[numpy_include]
121 | ),
122 | Extension(
123 | 'pycocotools._mask',
124 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
125 | include_dirs=[numpy_include, 'pycocotools'],
126 | extra_compile_args={
127 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
128 | ),
129 | ]
130 |
131 | setup(
132 | name='faster_rcnn',
133 | ext_modules=ext_modules,
134 | # inject our custom trigger
135 | cmdclass={'build_ext': custom_build_ext},
136 | )
137 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | cffi
3 | opencv-python
4 | scipy
5 | easydict
6 | matplotlib
7 | pyyaml
8 |
--------------------------------------------------------------------------------
| |