├── .gitignore
├── LICENSE
├── README.md
├── _init_paths.py
├── cfgs
├── res101.yml
├── res101_ls.yml
├── res50.yml
└── vgg16.yml
├── demo_global.py
├── docs
└── swda.png
├── lib
├── datasets
│ ├── VOCdevkit-matlab-wrapper
│ │ ├── get_voc_opts.m
│ │ ├── voc_eval.m
│ │ └── xVOCap.m
│ ├── __init__.py
│ ├── cityscape.py
│ ├── cityscape_car.py
│ ├── clipart.py
│ ├── coco.py
│ ├── config_dataset.py
│ ├── ds_utils.py
│ ├── factory.py
│ ├── foggy_cityscape.py
│ ├── imdb.py
│ ├── pascal_voc.py
│ ├── pascal_voc_cycleclipart.py
│ ├── pascal_voc_cyclewater.py
│ ├── pascal_voc_water.py
│ ├── sim10k.py
│ ├── sim10k_cycle.py
│ ├── tools
│ │ ├── list_all_images.py
│ │ ├── mcg_munge.py
│ │ └── multilabel_list.py
│ ├── voc_eval.py
│ └── water.py
├── make.sh
├── model
│ ├── __init__.py
│ ├── faster_rcnn
│ │ ├── __init__.py
│ │ ├── faster_rcnn.py
│ │ ├── faster_rcnn_global.py
│ │ ├── faster_rcnn_global_local.py
│ │ ├── faster_rcnn_local.py
│ │ ├── resnet.py
│ │ ├── resnet_dafrcnn.py
│ │ ├── resnet_global.py
│ │ ├── resnet_global_local.py
│ │ ├── resnet_local.py
│ │ ├── vgg16.py
│ │ ├── vgg16_global.py
│ │ ├── vgg16_global_local.py
│ │ └── vgg16_local.py
│ ├── nms
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── nms
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── make.sh
│ │ ├── nms_cpu.py
│ │ ├── nms_gpu.py
│ │ ├── nms_kernel.cu
│ │ ├── nms_wrapper.py
│ │ └── src
│ │ │ ├── nms_cuda.c
│ │ │ ├── nms_cuda.h
│ │ │ ├── nms_cuda_kernel.cu
│ │ │ └── nms_cuda_kernel.h
│ ├── roi_align
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── roi_align
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── make.sh
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ └── src
│ │ │ ├── roi_align.c
│ │ │ ├── roi_align.h
│ │ │ ├── roi_align_cuda.c
│ │ │ ├── roi_align_cuda.h
│ │ │ ├── roi_align_kernel.cu
│ │ │ └── roi_align_kernel.h
│ ├── roi_crop
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ ├── crop_resize
│ │ │ │ └── __init__.py
│ │ │ └── roi_crop
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ ├── crop_resize.py
│ │ │ ├── gridgen.py
│ │ │ └── roi_crop.py
│ │ ├── make.sh
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ ├── gridgen.py
│ │ │ └── roi_crop.py
│ │ └── src
│ │ │ ├── roi_crop.c
│ │ │ ├── roi_crop.h
│ │ │ ├── roi_crop_cuda.c
│ │ │ ├── roi_crop_cuda.h
│ │ │ ├── roi_crop_cuda_kernel.cu
│ │ │ └── roi_crop_cuda_kernel.h
│ ├── roi_pooling
│ │ ├── __init__.py
│ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── roi_pooling
│ │ │ │ └── __init__.py
│ │ ├── build.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_pool.py
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_pool.py
│ │ └── src
│ │ │ ├── roi_pooling.c
│ │ │ ├── roi_pooling.h
│ │ │ ├── roi_pooling_cuda.c
│ │ │ ├── roi_pooling_cuda.h
│ │ │ ├── roi_pooling_kernel.cu
│ │ │ └── roi_pooling_kernel.h
│ ├── rpn
│ │ ├── __init__.py
│ │ ├── anchor_target_layer.py
│ │ ├── bbox_transform.py
│ │ ├── generate_anchors.py
│ │ ├── proposal_layer.py
│ │ ├── proposal_target_layer_cascade.py
│ │ └── rpn.py
│ └── utils
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── bbox.pyx
│ │ ├── blob.py
│ │ ├── config.py
│ │ ├── net_utils.py
│ │ └── parser_func.py
├── pycocotools
│ ├── UPSTREAM_REV
│ ├── __init__.py
│ ├── _mask.pyx
│ ├── coco.py
│ ├── cocoeval.py
│ ├── license.txt
│ ├── mask.py
│ ├── maskApi.c
│ └── maskApi.h
├── roi_data_layer
│ ├── __init__.py
│ ├── minibatch.py
│ ├── roibatchLoader.py
│ └── roidb.py
└── setup.py
├── requirements.txt
├── test_net.py
├── test_net_global.py
├── test_net_global_local.py
├── test_net_local.py
├── test_net_so.py
├── test_scripts
├── city2foggycity_sample.sh
├── clipart_sample.sh
├── sim10k2cityscape_sample.sh
└── watercolor_sample.sh
├── train_scripts
├── city2foggycity_sample.sh
├── clipart_sample.sh
├── sim10k2cityscape_sample.sh
└── watercolor_sample.sh
├── trainval_net_global.py
├── trainval_net_global_local.py
├── trainval_net_local.py
└── trainval_net_so.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 |
3 | # READ THIS BEFORE YOU REFACTOR ME
4 | #
5 | # setup.py uses the list of patterns in this file to decide
6 | # what to delete, but it's not 100% sound. So, for example,
7 | # if you delete aten/build/ because it's redundant with build/,
8 | # aten/build/ will stop being cleaned. So be careful when
9 | # refactoring this file!
10 |
11 | ## PyTorch
12 |
13 | .mypy_cache
14 | *.pyc
15 | */*.pyc
16 | */*.so*
17 | */**/__pycache__
18 | */**/*.dylib*
19 | */**/*.pyc
20 | */**/*.pyd
21 | */**/*.so*
22 | */**/**/*.pyc
23 | */**/**/**/*.pyc
24 | */**/**/**/**/*.pyc
25 | aten/build/
26 | aten/src/ATen/Config.h
27 | aten/src/ATen/cuda/CUDAConfig.h
28 | build/
29 | dist/
30 | docs/src/**/*
31 | test/.coverage
32 | test/cpp/api/mnist
33 | test/data/gpu_tensors.pt
34 | test/data/legacy_modules.t7
35 | test/data/legacy_serialized.pt
36 | test/data/linear.pt
37 | test/htmlcov
38 | third_party/build/
39 | tools/shared/_utils_internal.py
40 | torch.egg-info/
41 | torch/csrc/autograd/generated/*
42 | torch/csrc/cudnn/cuDNN.cpp
43 | torch/csrc/generated
44 | torch/csrc/generic/TensorMethods.cpp
45 | torch/csrc/jit/generated/*
46 | torch/csrc/nn/THCUNN.cpp
47 | torch/csrc/nn/THCUNN.cwrap
48 | torch/csrc/nn/THNN_generic.cpp
49 | torch/csrc/nn/THNN_generic.cwrap
50 | torch/csrc/nn/THNN_generic.h
51 | torch/csrc/nn/THNN.cpp
52 | torch/csrc/nn/THNN.cwrap
53 | torch/lib/*.a*
54 | torch/lib/*.dll*
55 | torch/lib/*.dylib*
56 | torch/lib/*.h
57 | torch/lib/*.lib
58 | torch/lib/*.so*
59 | torch/lib/build
60 | torch/lib/cmake
61 | torch/lib/include
62 | torch/lib/pkgconfig
63 | torch/lib/protoc
64 | torch/lib/tmp_install
65 | torch/lib/torch_shm_manager
66 | torch/version.py
67 |
68 | # IPython notebook checkpoints
69 | .ipynb_checkpoints
70 |
71 | # Editor temporaries
72 | *.swn
73 | *.swo
74 | *.swp
75 | *.swm
76 | *~
77 |
78 | # macOS dir files
79 | .DS_Store
80 |
81 | # Symbolic files
82 | tools/shared/cwrap_common.py
83 |
84 | # Ninja files
85 | .ninja_deps
86 | .ninja_log
87 | compile_commands.json
88 | *.egg-info/
89 | docs/source/scripts/activation_images/
90 |
91 | ## General
92 |
93 | # Compiled Object files
94 | *.slo
95 | *.lo
96 | *.o
97 | *.cuo
98 | *.obj
99 |
100 | # Compiled Dynamic libraries
101 | *.so
102 | *.dylib
103 | *.dll
104 |
105 | # Compiled Static libraries
106 | *.lai
107 | *.la
108 | *.a
109 | *.lib
110 |
111 | # Compiled protocol buffers
112 | *.pb.h
113 | *.pb.cc
114 | *_pb2.py
115 |
116 | # Compiled python
117 | *.pyc
118 | *.pyd
119 |
120 | # Compiled MATLAB
121 | *.mex*
122 |
123 | # IPython notebook checkpoints
124 | .ipynb_checkpoints
125 |
126 | # Editor temporaries
127 | *.swn
128 | *.swo
129 | *.swp
130 | *~
131 |
132 | # Sublime Text settings
133 | *.sublime-workspace
134 | *.sublime-project
135 |
136 | # Eclipse Project settings
137 | *.*project
138 | .settings
139 |
140 | # QtCreator files
141 | *.user
142 |
143 | # PyCharm files
144 | .idea
145 |
146 | # Visual Studio Code files
147 | .vscode
148 | .vs
149 |
150 | # OSX dir files
151 | .DS_Store
152 |
153 | ## Caffe2
154 |
155 | # build, distribute, and bins (+ python proto bindings)
156 | build
157 | build_host_protoc
158 | build_android
159 | build_ios
160 | /build_*
161 | .build_debug/*
162 | .build_release/*
163 | distribute/*
164 | *.testbin
165 | *.bin
166 | cmake_build
167 | .cmake_build
168 | gen
169 | .setuptools-cmake-build
170 | .pytest_cache
171 | aten/build/*
172 |
173 | # Bram
174 | plsdontbreak
175 |
176 | # Generated documentation
177 | docs/_site
178 | docs/gathered
179 | _site
180 | doxygen
181 | docs/dev
182 |
183 | # LevelDB files
184 | *.sst
185 | *.ldb
186 | LOCK
187 | LOG*
188 | CURRENT
189 | MANIFEST-*
190 |
191 | # generated version file
192 | caffe2/version.py
193 |
194 | # setup.py intermediates
195 | .eggs
196 | caffe2.egg-info
197 |
198 | # Atom/Watchman required file
199 | .watchmanconfig
200 |
201 | # cython generated files
202 | lib/model/utils/bbox.c
203 | lib/pycocotools/_mask.c
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Jianwei Yang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # A Pytorch Implementation of [Strong-Weak Distribution Alignment for Adaptive Object Detection](https://arxiv.org/pdf/1812.04798.pdf) (CVPR 2019)
2 |
3 |
4 |
5 | ## Introduction
6 | Follow [faster-rcnn repository](https://github.com/jwyang/faster-rcnn.pytorch)
7 | to setup the environment. When installing pytorch-faster-rcnn, you may encounter some issues.
8 | Many issues have been reported there to setup the environment. We used Pytorch 0.4.0 for this project.
9 | The different version of pytorch will cause some errors, which have to be handled based on each envirionment.
10 |
11 | ### Data Preparation
12 |
13 | * **PASCAL_VOC 07+12**: Please follow the instructions in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to prepare VOC datasets.
14 | * **Clipart, WaterColor**: Dataset preparation instruction link [Cross Domain Detection ](https://github.com/naoto0804/cross-domain-detection/tree/master/datasets). Images translated by Cyclegan are available in the website.
15 | * **Sim10k**: Website [Sim10k](https://fcav.engin.umich.edu/sim-dataset/)
16 | * **Cityscape-Translated Sim10k**: TBA
17 | * **CitysScape, FoggyCityscape**: Download website [Cityscape](https://www.cityscapes-dataset.com/), see dataset preparation code in [DA-Faster RCNN](https://github.com/yuhuayc/da-faster-rcnn/tree/master/prepare_data)
18 |
19 | All codes are written to fit for the format of PASCAL_VOC.
20 | For example, the dataset [Sim10k](https://fcav.engin.umich.edu/sim-dataset/) is stored as follows.
21 |
22 | ```
23 | $ cd Sim10k/VOC2012/
24 | $ ls
25 | Annotations ImageSets JPEGImages
26 | $ cat ImageSets/Main/val.txt
27 | 3384827.jpg
28 | 3384828.jpg
29 | 3384829.jpg
30 | .
31 | .
32 | .
33 | ```
34 | If you want to test the code on your own dataset, arange the dataset
35 | in the format of PASCAL, make dataset class in lib/datasets/. and add
36 | it to lib/datasets/factory.py, lib/datasets/config_dataset.py. Then, add the dataset option to lib/model/utils/parser_func.py.
37 | ### Data Path
38 | Write your dataset directories' paths in lib/datasets/config_dataset.py.
39 |
40 | ### Pretrained Model
41 |
42 | We used two models pre-trained on ImageNet in our experiments, VGG and ResNet101. You can download these two models from:
43 |
44 | * VGG16: [Dropbox](https://www.dropbox.com/s/s3brpk0bdq60nyb/vgg16_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/vgg16_caffe.pth)
45 |
46 | * ResNet101: [Dropbox](https://www.dropbox.com/s/iev3tkbz5wyyuz9/resnet101_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/resnet101_caffe.pth)
47 |
48 | Download them and write the path in __C.VGG_PATH and __C.RESNET_PATH at lib/model/utils/config.py.
49 |
50 | #### sample model
51 | Global-local alignment model for watercolor dataset.
52 |
53 | * ResNet101 (adapted to water color) [GoogleDrive](https://drive.google.com/file/d/1pzj2jKFwtGzwjZTeEyeDSnNlPU1MZ4t9/view?usp=sharing)
54 |
55 | ## Train
56 | * Sample training script is in a folder, train_scripts.
57 | * With only local alignment loss,
58 | ```
59 | CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net_local.py \
60 | --dataset source_dataset --dataset_t target_dataset --net vgg16 \
61 | --cuda
62 | ```
63 | Add --lc when using context-vector based regularization loss.
64 |
65 | * With only global alignment loss,
66 | ```
67 | CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net_global.py \
68 | --dataset source_dataset --dataset_t target_dataset --net vgg16 \
69 | --cuda
70 | ```
71 | Add --gc when using context-vector based regularization loss.
72 | * With global and local alignment loss,
73 | ```
74 | CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net_global_local.py \
75 | --dataset source_dataset --dataset_t target_dataset --net vgg16 \
76 | --cuda
77 | ```
78 | Add --lc and --gc when using context-vector based regularization loss.
79 | ## Test
80 | * Sample test script is in a folder, test_scripts.
81 |
82 | ```
83 | CUDA_VISIBLE_DEVICES=$GPU_ID python test_net_global_local.py \
84 | --dataset target_dataset --net vgg16 \
85 | --cuda --lc --gc --load_name path_to_model
86 | ```
87 |
88 | ## Citation
89 | Please cite the following reference if you utilize this repository for your project.
90 |
91 | ```
92 | @article{saito2018strong,
93 | title={Strong-Weak Distribution Alignment for Adaptive Object Detection},
94 | author={Saito, Kuniaki and Ushiku, Yoshitaka and Harada, Tatsuya and Saenko, Kate},
95 | journal={arXiv},
96 | year={2018}
97 | }
98 | ```
99 |
--------------------------------------------------------------------------------
/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 |
14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 |
--------------------------------------------------------------------------------
/cfgs/res101.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: res101
2 | TRAIN:
3 | HAS_RPN: True
4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
5 | RPN_POSITIVE_OVERLAP: 0.7
6 | RPN_BATCHSIZE: 256
7 | PROPOSAL_METHOD: gt
8 | BG_THRESH_LO: 0.0
9 | DISPLAY: 20
10 | BATCH_SIZE: 128
11 | RPN_POST_NMS_TOP_N_TARGET: 128
12 | WEIGHT_DECAY: 0.0001
13 | DOUBLE_BIAS: False
14 | LEARNING_RATE: 0.001
15 | TEST:
16 | HAS_RPN: True
17 | POOLING_SIZE: 7
18 | POOLING_MODE: align
19 | CROP_RESIZE_WITH_MAX_POOL: False
20 |
--------------------------------------------------------------------------------
/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: res101
2 | TRAIN:
3 | HAS_RPN: True
4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
5 | RPN_POSITIVE_OVERLAP: 0.7
6 | RPN_BATCHSIZE: 256
7 | PROPOSAL_METHOD: gt
8 | BG_THRESH_LO: 0.0
9 | DISPLAY: 20
10 | BATCH_SIZE: 128
11 | WEIGHT_DECAY: 0.0001
12 | SCALES: [800]
13 | DOUBLE_BIAS: False
14 | LEARNING_RATE: 0.001
15 | TEST:
16 | HAS_RPN: True
17 | SCALES: [800]
18 | MAX_SIZE: 1200
19 | RPN_POST_NMS_TOP_N: 1000
20 | POOLING_SIZE: 7
21 | POOLING_MODE: align
22 | CROP_RESIZE_WITH_MAX_POOL: False
23 |
--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: res50
2 | TRAIN:
3 | HAS_RPN: True
4 | # IMS_PER_BATCH: 1
5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
6 | RPN_POSITIVE_OVERLAP: 0.7
7 | RPN_BATCHSIZE: 256
8 | PROPOSAL_METHOD: gt
9 | BG_THRESH_LO: 0.0
10 | DISPLAY: 20
11 | BATCH_SIZE: 256
12 | WEIGHT_DECAY: 0.0001
13 | DOUBLE_BIAS: False
14 | SNAPSHOT_PREFIX: res50_faster_rcnn
15 | TEST:
16 | HAS_RPN: True
17 | POOLING_MODE: crop
18 |
--------------------------------------------------------------------------------
/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
1 | EXP_DIR: vgg16
2 | TRAIN:
3 | HAS_RPN: True
4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
5 | RPN_POSITIVE_OVERLAP: 0.7
6 | RPN_BATCHSIZE: 256
7 | PROPOSAL_METHOD: gt
8 | BG_THRESH_LO: 0.0
9 | BATCH_SIZE: 256
10 | RPN_POST_NMS_TOP_N_TARGET: 256
11 | LEARNING_RATE: 0.001
12 | TEST:
13 | HAS_RPN: True
14 | POOLING_MODE: align
15 | CROP_RESIZE_WITH_MAX_POOL: False
16 |
--------------------------------------------------------------------------------
/docs/swda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/docs/swda.png
--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
1 | function VOCopts = get_voc_opts(path)
2 |
3 | tmp = pwd;
4 | cd(path);
5 | try
6 | addpath('VOCcode');
7 | VOCinit;
8 | catch
9 | rmpath('VOCcode');
10 | cd(tmp);
11 | error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 |
--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
1 | function res = voc_eval(path, comp_id, test_set, output_dir)
2 |
3 | VOCopts = get_voc_opts(path);
4 | VOCopts.testset = test_set;
5 |
6 | for i = 1:length(VOCopts.classes)
7 | cls = VOCopts.classes{i};
8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
9 | end
10 |
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 |
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 |
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 |
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 |
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 |
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 |
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 | % Bug in VOCevaldet requires that tic has been called first
35 | tic;
36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 | ap_auc = xVOCap(recall, prec);
38 |
39 | % force plot limits
40 | ylim([0 1]);
41 | xlim([0 1]);
42 |
43 | print(gcf, '-djpeg', '-r0', ...
44 | [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 |
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 |
53 | save([output_dir '/' cls '_pr.mat'], ...
54 | 'res', 'recall', 'prec', 'ap', 'ap_auc');
55 |
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 |
--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
1 | function ap = xVOCap(rec,prec)
2 | % From the PASCAL VOC 2011 devkit
3 |
4 | mrec=[0 ; rec ; 1];
5 | mpre=[0 ; prec ; 0];
6 | for i=numel(mpre)-1:-1:1
7 | mpre(i)=max(mpre(i),mpre(i+1));
8 | end
9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 |
--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/datasets/config_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import os.path as osp
7 | import numpy as np
8 | # `pip install easydict` if you don't have it
9 | from easydict import EasyDict as edict
10 |
11 | __D = edict()
12 | # Consumers can get config by:
13 | # from fast_rcnn_config import cfg
14 | cfg_d = __D
15 | #
16 | # Training options
17 | #with regard to pascal, the directories under the path will be ./VOC2007, ./VOC2012"
18 | __D.PASCAL = "/VOCdevkit"
19 | __D.PASCALCLIP = ""
20 | __D.PASCALWATER = "/VOCdevkit"
21 |
22 | #For these datasets, the directories under the path will be Annotations ImageSets JPEGImages."
23 | __D.CLIPART = "/clipart"
24 | __D.WATER = "/watercolor"
25 | __D.SIM10K = "Sim10k/VOC2012"
26 | __D.CITYSCAPE_CAR = "/VOC2007"
27 | __D.CITYSCAPE = "VOC2007"
28 | __D.FOGGYCITY = "VOC2007"
29 |
30 |
31 | def _merge_a_into_b(a, b):
32 | """Merge config dictionary a into config dictionary b, clobbering the
33 | options in b whenever they are also specified in a.
34 | """
35 | if type(a) is not edict:
36 | return
37 |
38 | for k, v in a.items():
39 | # a must specify keys that are in b
40 | if k not in b:
41 | raise KeyError('{} is not a valid config key'.format(k))
42 |
43 | # the types must match, too
44 | old_type = type(b[k])
45 | if old_type is not type(v):
46 | if isinstance(b[k], np.ndarray):
47 | v = np.array(v, dtype=b[k].dtype)
48 | else:
49 | raise ValueError(('Type mismatch ({} vs. {}) '
50 | 'for config key: {}').format(type(b[k]),
51 | type(v), k))
52 |
53 | # recursively merge dicts
54 | if type(v) is edict:
55 | try:
56 | _merge_a_into_b(a[k], b[k])
57 | except:
58 | print(('Error under config key: {}'.format(k)))
59 | raise
60 | else:
61 | b[k] = v
62 |
63 |
64 | def cfg_from_file(filename):
65 | """Load a config file and merge it into the default options."""
66 | import yaml
67 | with open(filename, 'r') as f:
68 | yaml_cfg = edict(yaml.load(f))
69 |
70 | _merge_a_into_b(yaml_cfg, __D)
71 |
72 |
73 | def cfg_from_list(cfg_list):
74 | """Set config keys via list (e.g., from command line)."""
75 | from ast import literal_eval
76 | assert len(cfg_list) % 2 == 0
77 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
78 | key_list = k.split('.')
79 | d = __D
80 | for subkey in key_list[:-1]:
81 | assert subkey in d
82 | d = d[subkey]
83 | subkey = key_list[-1]
84 | assert subkey in d
85 | try:
86 | value = literal_eval(v)
87 | except:
88 | # handle the case when v is a string literal
89 | value = v
90 | assert type(value) == type(d[subkey]), \
91 | 'type {} does not match original type {}'.format(
92 | type(value), type(d[subkey]))
93 | d[subkey] = value
94 |
--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast/er R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Ross Girshick
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import numpy as np
11 |
12 |
13 | def unique_boxes(boxes, scale=1.0):
14 | """Return indices of unique boxes."""
15 | v = np.array([1, 1e3, 1e6, 1e9])
16 | hashes = np.round(boxes * scale).dot(v)
17 | _, index = np.unique(hashes, return_index=True)
18 | return np.sort(index)
19 |
20 |
21 | def xywh_to_xyxy(boxes):
22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 |
25 |
26 | def xyxy_to_xywh(boxes):
27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 |
30 |
31 | def validate_boxes(boxes, width=0, height=0):
32 | """Check that a set of boxes are valid."""
33 | x1 = boxes[:, 0]
34 | y1 = boxes[:, 1]
35 | x2 = boxes[:, 2]
36 | y2 = boxes[:, 3]
37 | assert (x1 >= 0).all()
38 | assert (y1 >= 0).all()
39 | assert (x2 >= x1).all()
40 | assert (y2 >= y1).all()
41 | assert (x2 < width).all()
42 | assert (y2 < height).all()
43 |
44 |
45 | def filter_small_boxes(boxes, min_size):
46 | w = boxes[:, 2] - boxes[:, 0]
47 | h = boxes[:, 3] - boxes[:, 1]
48 | keep = np.where((w >= min_size) & (h > min_size))[0]
49 | return keep
50 |
--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Factory method for easily getting imdbs by name."""
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 |
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.pascal_voc_water import pascal_voc_water
16 | from datasets.pascal_voc_cyclewater import pascal_voc_cyclewater
17 | from datasets.pascal_voc_cycleclipart import pascal_voc_cycleclipart
18 | from datasets.sim10k import sim10k
19 | from datasets.water import water
20 | from datasets.clipart import clipart
21 | from datasets.sim10k_cycle import sim10k_cycle
22 | from datasets.cityscape import cityscape
23 | from datasets.cityscape_car import cityscape_car
24 | from datasets.foggy_cityscape import foggy_cityscape
25 |
26 | import numpy as np
27 | for split in ['train', 'trainval','val','test']:
28 | name = 'cityscape_{}'.format(split)
29 | __sets[name] = (lambda split=split : cityscape(split))
30 | for split in ['train', 'trainval','val','test']:
31 | name = 'cityscape_car_{}'.format(split)
32 | __sets[name] = (lambda split=split : cityscape_car(split))
33 | for split in ['train', 'trainval','test']:
34 | name = 'foggy_cityscape_{}'.format(split)
35 | __sets[name] = (lambda split=split : foggy_cityscape(split))
36 | for split in ['train','val']:
37 | name = 'sim10k_{}'.format(split)
38 | __sets[name] = (lambda split=split : sim10k(split))
39 | for split in ['train', 'val']:
40 | name = 'sim10k_cycle_{}'.format(split)
41 | __sets[name] = (lambda split=split: sim10k_cycle(split))
42 | for year in ['2007', '2012']:
43 | for split in ['train', 'val', 'trainval', 'test']:
44 | name = 'voc_{}_{}'.format(year, split)
45 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
46 | for year in ['2007', '2012']:
47 | for split in ['train', 'val', 'trainval', 'test']:
48 | name = 'voc_water_{}_{}'.format(year, split)
49 | __sets[name] = (lambda split=split, year=year: pascal_voc_water(split, year))
50 | for year in ['2007', '2012']:
51 | for split in ['train', 'val', 'trainval', 'test']:
52 | name = 'voc_cycleclipart_{}_{}'.format(year, split)
53 | __sets[name] = (lambda split=split, year=year: pascal_voc_cycleclipart(split, year))
54 | for year in ['2007', '2012']:
55 | for split in ['train', 'val', 'trainval', 'test']:
56 | name = 'voc_cyclewater_{}_{}'.format(year, split)
57 | __sets[name] = (lambda split=split, year=year: pascal_voc_cyclewater(split, year))
58 | for year in ['2007']:
59 | for split in ['trainval', 'test']:
60 | name = 'clipart_{}'.format(split)
61 | __sets[name] = (lambda split=split : clipart(split,year))
62 | for year in ['2007']:
63 | for split in ['train', 'test']:
64 | name = 'water_{}'.format(split)
65 | __sets[name] = (lambda split=split : water(split,year))
66 | def get_imdb(name):
67 | """Get an imdb (image database) by name."""
68 | if name not in __sets:
69 | raise KeyError('Unknown dataset: {}'.format(name))
70 | return __sets[name]()
71 |
72 |
73 | def list_imdbs():
74 | """List all registered imdbs."""
75 | return list(__sets.keys())
76 |
--------------------------------------------------------------------------------
/lib/datasets/tools/list_all_images.py:
--------------------------------------------------------------------------------
1 | import os
2 | p_path = '/scratch4/keisaito/visda/train'
3 | dir_list = os.listdir(p_path)
4 | write_name = open('/scratch4/keisaito/visda/all_images_train.txt','w')
5 | for direc in dir_list:
6 | if not '.txt' in direc:
7 | files = os.listdir(os.path.join(p_path,direc))
8 | for file in files:
9 | class_name = direc
10 | #if class_name == 'motorcycle':
11 | # class_name = 'motorbike'
12 | #if class_name == 'plant':
13 | # class_name = 'pottedplant'
14 | file_name = os.path.join(p_path,direc,file)
15 | write_name.write('%s %s\n'%(file_name,class_name))
16 |
17 |
--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import sys
4 |
5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
7 | so that it's consistent with those computed by Jan Hosang (see:
8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
9 | computing/research/object-recognition-and-scene-understanding/how-
10 | good-are-detection-proposals-really/)
11 |
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 |
16 | def munge(src_dir):
17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 |
20 | files = os.listdir(src_dir)
21 | for fn in files:
22 | base, ext = os.path.splitext(fn)
23 | # first 14 chars / first 22 chars / all chars + .mat
24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 | first = base[:14]
26 | second = base[:22]
27 | dst_dir = os.path.join('MCG', 'mat', first, second)
28 | if not os.path.exists(dst_dir):
29 | os.makedirs(dst_dir)
30 | src = os.path.join(src_dir, fn)
31 | dst = os.path.join(dst_dir, fn)
32 | print('MV: {} -> {}'.format(src, dst))
33 | os.rename(src, dst)
34 |
35 | if __name__ == '__main__':
36 | # src_dir should look something like:
37 | # src_dir = 'MCG-COCO-val2014-boxes'
38 | src_dir = sys.argv[1]
39 | munge(src_dir)
40 |
--------------------------------------------------------------------------------
/lib/datasets/tools/multilabel_list.py:
--------------------------------------------------------------------------------
1 | import os
2 | import xml.etree.ElementTree as ET
3 | import sys
4 | argvs = sys.argv
5 |
6 | def load_image_set_index(ref):
7 | """
8 | Load the indexes listed in this dataset's image set file.
9 | """
10 | # Example path to image set file:
11 | # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
12 | image_set_file = os.path.join(ref)
13 | assert os.path.exists(image_set_file), \
14 | 'Path does not exist: {}'.format(image_set_file)
15 | with open(image_set_file) as f:
16 | image_index = [x.strip() for x in f.readlines()]
17 | return image_index
18 |
19 | def load_pascal_annotation(ref_path, index):
20 | """
21 | Load image and bounding boxes info from XML file in the PASCAL VOC
22 | format.
23 | """
24 | filename = os.path.join(ref_path, 'Annotations', index + '.xml')
25 | tree = ET.parse(filename)
26 | objs = tree.findall('object')
27 | obj_list = []
28 | for ix, obj in enumerate(objs):
29 | cls = obj.find('name').text.lower().strip()
30 | obj_list.append(cls)
31 | return list(set(obj_list))
32 |
33 | indexes = load_image_set_index(argvs[1])
34 | images_list = open(argvs[3],'w')
35 | for index in indexes:
36 | objs = load_pascal_annotation(argvs[2],index)
37 | write_word = os.path.join('/research/masaito/detection_dataset/VOCdevkit/VOC2007/JPEGImages', index + '.jpg' + ' ')
38 | for name in objs:
39 | write_word = write_word + name + ' '
40 | images_list.write(write_word + '\n')
41 |
42 |
--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # CUDA_PATH=/usr/local/cuda/
4 |
5 | export CUDA_PATH=/usr/local/cuda/
6 | #You may also want to ad the following
7 | #export C_INCLUDE_PATH=/opt/cuda/include
8 |
9 | export CXXFLAGS="-std=c++11"
10 | export CFLAGS="-std=c99"
11 |
12 | python setup.py build_ext --inplace
13 | rm -rf build
14 |
15 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
16 | -gencode arch=compute_35,code=sm_35 \
17 | -gencode arch=compute_50,code=sm_50 \
18 | -gencode arch=compute_52,code=sm_52 \
19 | -gencode arch=compute_60,code=sm_60 \
20 | -gencode arch=compute_61,code=sm_61 "
21 |
22 | # compile NMS
23 | cd model/nms/src
24 | echo "Compiling nms kernels by nvcc..."
25 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
26 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
27 |
28 | cd ../
29 | python build.py
30 |
31 | # compile roi_pooling
32 | cd ../../
33 | cd model/roi_pooling/src
34 | echo "Compiling roi pooling kernels by nvcc..."
35 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
36 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
37 | cd ../
38 | python build.py
39 |
40 | # compile roi_align
41 | cd ../../
42 | cd model/roi_align/src
43 | echo "Compiling roi align kernels by nvcc..."
44 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
45 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
46 | cd ../
47 | python build.py
48 |
49 | # compile roi_crop
50 | cd ../../
51 | cd model/roi_crop/src
52 | echo "Compiling roi crop kernels by nvcc..."
53 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
54 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
55 | cd ../
56 | python build.py
57 |
--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/__init__.py
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/faster_rcnn/__init__.py
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | import torchvision.models as models
7 | from torch.autograd import Variable
8 | import numpy as np
9 | from model.rpn.rpn import _RPN
10 | from model.roi_pooling.modules.roi_pool import _RoIPooling
11 | from model.roi_crop.modules.roi_crop import _RoICrop
12 | from model.roi_align.modules.roi_align import RoIAlignAvg
13 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
14 | import time
15 | import pdb
16 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta
17 |
18 | class _fasterRCNN(nn.Module):
19 | """ faster RCNN """
20 | def __init__(self, classes, class_agnostic):
21 | super(_fasterRCNN, self).__init__()
22 | self.classes = classes
23 | self.n_classes = len(classes)
24 | self.class_agnostic = class_agnostic
25 | # loss
26 | self.RCNN_loss_cls = 0
27 | self.RCNN_loss_bbox = 0
28 |
29 | # define rpn
30 | self.RCNN_rpn = _RPN(self.dout_base_model)
31 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
32 | self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
33 | self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
34 |
35 | self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
36 | self.RCNN_roi_crop = _RoICrop()
37 |
38 | def forward(self, im_data, im_info, gt_boxes, num_boxes):
39 | batch_size = im_data.size(0)
40 |
41 | im_info = im_info.data
42 | gt_boxes = gt_boxes.data
43 | num_boxes = num_boxes.data
44 |
45 | # feed image data to base model to obtain base feature map
46 | base_feat = self.RCNN_base(im_data)
47 |
48 | # feed base feature map tp RPN to obtain rois
49 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
50 |
51 | # if it is training phrase, then use ground trubut bboxes for refining
52 | if self.training:
53 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
54 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
55 |
56 | rois_label = Variable(rois_label.view(-1).long())
57 | rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
58 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
59 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
60 | else:
61 | rois_label = None
62 | rois_target = None
63 | rois_inside_ws = None
64 | rois_outside_ws = None
65 | rpn_loss_cls = 0
66 | rpn_loss_bbox = 0
67 |
68 | rois = Variable(rois)
69 | # do roi pooling based on predicted rois
70 |
71 | if cfg.POOLING_MODE == 'crop':
72 | # pdb.set_trace()
73 | # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
74 | grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
75 | grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
76 | pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
77 | if cfg.CROP_RESIZE_WITH_MAX_POOL:
78 | pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
79 | elif cfg.POOLING_MODE == 'align':
80 | pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
81 | elif cfg.POOLING_MODE == 'pool':
82 | pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
83 |
84 | # feed pooled features to top model
85 | pooled_feat = self._head_to_tail(pooled_feat)
86 |
87 | # compute bbox offset
88 | bbox_pred = self.RCNN_bbox_pred(pooled_feat)
89 | if self.training and not self.class_agnostic:
90 | # select the corresponding columns according to roi labels
91 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
92 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
93 | bbox_pred = bbox_pred_select.squeeze(1)
94 |
95 | # compute object classification probability
96 | cls_score = self.RCNN_cls_score(pooled_feat)
97 | cls_prob = F.softmax(cls_score, 1)
98 |
99 | RCNN_loss_cls = 0
100 | RCNN_loss_bbox = 0
101 |
102 | if self.training:
103 | # classification loss
104 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
105 |
106 | # bounding box regression L1 loss
107 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
108 |
109 |
110 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
111 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
112 |
113 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
114 |
115 | def _init_weights(self):
116 | def normal_init(m, mean, stddev, truncated=False):
117 | """
118 | weight initalizer: truncated normal and random normal.
119 | """
120 | # x is a parameter
121 | if truncated:
122 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
123 | else:
124 | m.weight.data.normal_(mean, stddev)
125 | m.bias.data.zero_()
126 |
127 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
128 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
129 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
130 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
131 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
132 |
133 | def create_architecture(self):
134 | self._init_modules()
135 | self._init_weights()
136 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn_global.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | import torchvision.models as models
7 | from torch.autograd import Variable
8 | import numpy as np
9 | from model.utils.config import cfg
10 | from model.rpn.rpn import _RPN
11 | from model.roi_pooling.modules.roi_pool import _RoIPooling
12 | from model.roi_crop.modules.roi_crop import _RoICrop
13 | from model.roi_align.modules.roi_align import RoIAlignAvg
14 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
15 | import time
16 | import pdb
17 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta,grad_reverse
18 |
19 | class _fasterRCNN(nn.Module):
20 | """ faster RCNN """
21 | def __init__(self, classes, class_agnostic,context):
22 | super(_fasterRCNN, self).__init__()
23 | self.classes = classes
24 | self.n_classes = len(classes)
25 | self.class_agnostic = class_agnostic
26 | # loss
27 | self.RCNN_loss_cls = 0
28 | self.RCNN_loss_bbox = 0
29 | self.context = context
30 | # define rpn
31 | self.RCNN_rpn = _RPN(self.dout_base_model)
32 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
33 | self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
34 | self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
35 |
36 | self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
37 | self.RCNN_roi_crop = _RoICrop()
38 |
39 | def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0):
40 | batch_size = im_data.size(0)
41 |
42 | im_info = im_info.data
43 | gt_boxes = gt_boxes.data
44 | num_boxes = num_boxes.data
45 |
46 | # feed image data to base model to obtain base feature map
47 | base_feat = self.RCNN_base(im_data)
48 |
49 | if self.context:
50 | domain_p,_ = self.netD(grad_reverse(base_feat,lambd=eta))
51 | if target:
52 | domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
53 | return domain_p#, diff
54 | _,feat = self.netD(base_feat.detach())
55 | else:
56 | domain_p = self.netD(grad_reverse(base_feat,lambd=eta))
57 | if target:
58 | return domain_p#,diff
59 | # feed base feature map tp RPN to obtain rois
60 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
61 |
62 | # if it is training phrase, then use ground trubut bboxes for refining
63 | if self.training:
64 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
65 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
66 |
67 | rois_label = Variable(rois_label.view(-1).long())
68 | rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
69 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
70 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
71 | else:
72 | rois_label = None
73 | rois_target = None
74 | rois_inside_ws = None
75 | rois_outside_ws = None
76 | rpn_loss_cls = 0
77 | rpn_loss_bbox = 0
78 |
79 | rois = Variable(rois)
80 | # do roi pooling based on predicted rois
81 |
82 | if cfg.POOLING_MODE == 'crop':
83 | # pdb.set_trace()
84 | # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
85 | grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
86 | grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
87 | pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
88 | if cfg.CROP_RESIZE_WITH_MAX_POOL:
89 | pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
90 | elif cfg.POOLING_MODE == 'align':
91 | pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
92 | elif cfg.POOLING_MODE == 'pool':
93 | pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
94 |
95 | # feed pooled features to top model
96 | pooled_feat = self._head_to_tail(pooled_feat)
97 | if self.context:
98 | feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
99 | pooled_feat = torch.cat((feat, pooled_feat), 1)
100 | # compute bbox offset
101 | bbox_pred = self.RCNN_bbox_pred(pooled_feat)
102 | if self.training and not self.class_agnostic:
103 | # select the corresponding columns according to roi labels
104 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
105 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
106 | bbox_pred = bbox_pred_select.squeeze(1)
107 |
108 | # compute object classification probability
109 | cls_score = self.RCNN_cls_score(pooled_feat)
110 | cls_prob = F.softmax(cls_score, 1)
111 |
112 | RCNN_loss_cls = 0
113 | RCNN_loss_bbox = 0
114 |
115 | if self.training:
116 | # classification loss
117 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
118 |
119 | # bounding box regression L1 loss
120 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
121 |
122 |
123 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
124 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
125 |
126 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,domain_p#,diff
127 |
128 | def _init_weights(self):
129 | def normal_init(m, mean, stddev, truncated=False):
130 | """
131 | weight initalizer: truncated normal and random normal.
132 | """
133 | # x is a parameter
134 | if truncated:
135 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
136 | else:
137 | m.weight.data.normal_(mean, stddev)
138 | m.bias.data.zero_()
139 |
140 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
141 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
142 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
143 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
144 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
145 |
146 | def create_architecture(self):
147 | self._init_modules()
148 | self._init_weights()
149 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn_global_local.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | import torchvision.models as models
7 | from torch.autograd import Variable
8 | import numpy as np
9 | from model.utils.config import cfg
10 | from model.rpn.rpn import _RPN
11 | from model.roi_pooling.modules.roi_pool import _RoIPooling
12 | from model.roi_crop.modules.roi_crop import _RoICrop
13 | from model.roi_align.modules.roi_align import RoIAlignAvg
14 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
15 | import time
16 | import pdb
17 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta,grad_reverse
18 |
19 | class _fasterRCNN(nn.Module):
20 | """ faster RCNN """
21 | def __init__(self, classes, class_agnostic,lc,gc):
22 | super(_fasterRCNN, self).__init__()
23 | self.classes = classes
24 | self.n_classes = len(classes)
25 | self.class_agnostic = class_agnostic
26 | # loss
27 | self.RCNN_loss_cls = 0
28 | self.RCNN_loss_bbox = 0
29 | self.lc = lc
30 | self.gc = gc
31 | # define rpn
32 | self.RCNN_rpn = _RPN(self.dout_base_model)
33 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
34 | self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
35 | self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
36 |
37 | self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
38 | self.RCNN_roi_crop = _RoICrop()
39 |
40 | def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0):
41 | batch_size = im_data.size(0)
42 |
43 | im_info = im_info.data
44 | gt_boxes = gt_boxes.data
45 | num_boxes = num_boxes.data
46 |
47 | # feed image data to base model to obtain base feature map
48 | base_feat1 = self.RCNN_base1(im_data)
49 | if self.lc:
50 | d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
51 | #print(d_pixel)
52 | if not target:
53 | _, feat_pixel = self.netD_pixel(base_feat1.detach())
54 | else:
55 | d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
56 | base_feat = self.RCNN_base2(base_feat1)
57 | if self.gc:
58 | domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
59 | if target:
60 | return d_pixel,domain_p#, diff
61 | _,feat = self.netD(base_feat.detach())
62 | else:
63 | domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
64 | if target:
65 | return d_pixel,domain_p#,diff
66 | # feed base feature map tp RPN to obtain rois
67 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
68 |
69 | # if it is training phrase, then use ground trubut bboxes for refining
70 | if self.training:
71 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
72 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
73 |
74 | rois_label = Variable(rois_label.view(-1).long())
75 | rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
76 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
77 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
78 | else:
79 | rois_label = None
80 | rois_target = None
81 | rois_inside_ws = None
82 | rois_outside_ws = None
83 | rpn_loss_cls = 0
84 | rpn_loss_bbox = 0
85 |
86 | rois = Variable(rois)
87 | # do roi pooling based on predicted rois
88 |
89 | if cfg.POOLING_MODE == 'crop':
90 | # pdb.set_trace()
91 | # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
92 | grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
93 | grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
94 | pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
95 | if cfg.CROP_RESIZE_WITH_MAX_POOL:
96 | pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
97 | elif cfg.POOLING_MODE == 'align':
98 | pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
99 | elif cfg.POOLING_MODE == 'pool':
100 | pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
101 |
102 | # feed pooled features to top model
103 | pooled_feat = self._head_to_tail(pooled_feat)
104 | #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
105 | if self.lc:
106 | feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
107 | pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
108 | if self.gc:
109 | feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
110 | pooled_feat = torch.cat((feat, pooled_feat), 1)
111 | # compute bbox offset
112 |
113 | # compute bbox offset
114 | bbox_pred = self.RCNN_bbox_pred(pooled_feat)
115 | if self.training and not self.class_agnostic:
116 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
117 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
118 | bbox_pred = bbox_pred_select.squeeze(1)
119 |
120 | # compute object classification probability
121 | cls_score = self.RCNN_cls_score(pooled_feat)
122 | cls_prob = F.softmax(cls_score, 1)
123 |
124 | RCNN_loss_cls = 0
125 | RCNN_loss_bbox = 0
126 |
127 | if self.training:
128 | # classification loss
129 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
130 |
131 | # bounding box regression L1 loss
132 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
133 |
134 |
135 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
136 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
137 |
138 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,d_pixel, domain_p#,diff
139 |
140 | def _init_weights(self):
141 | def normal_init(m, mean, stddev, truncated=False):
142 | """
143 | weight initalizer: truncated normal and random normal.
144 | """
145 | # x is a parameter
146 | if truncated:
147 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
148 | else:
149 | m.weight.data.normal_(mean, stddev)
150 | m.bias.data.zero_()
151 |
152 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
153 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
154 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
155 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
156 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
157 |
158 | def create_architecture(self):
159 | self._init_modules()
160 | self._init_weights()
161 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn_local.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | import torchvision.models as models
7 | from torch.autograd import Variable
8 | import numpy as np
9 | from model.utils.config import cfg
10 | from model.rpn.rpn import _RPN
11 | from model.roi_pooling.modules.roi_pool import _RoIPooling
12 | from model.roi_crop.modules.roi_crop import _RoICrop
13 | from model.roi_align.modules.roi_align import RoIAlignAvg
14 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
15 | import time
16 | import pdb
17 | from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta,grad_reverse
18 |
19 | class _fasterRCNN(nn.Module):
20 | """ faster RCNN """
21 | def __init__(self, classes, class_agnostic,lc):
22 | super(_fasterRCNN, self).__init__()
23 | self.classes = classes
24 | self.n_classes = len(classes)
25 | self.class_agnostic = class_agnostic
26 | # loss
27 | self.RCNN_loss_cls = 0
28 | self.RCNN_loss_bbox = 0
29 | self.lc = lc
30 | # define rpn
31 | self.RCNN_rpn = _RPN(self.dout_base_model)
32 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
33 | self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
34 | self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
35 |
36 | self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
37 | self.RCNN_roi_crop = _RoICrop()
38 |
39 | def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0):
40 | batch_size = im_data.size(0)
41 |
42 | im_info = im_info.data
43 | gt_boxes = gt_boxes.data
44 | num_boxes = num_boxes.data
45 |
46 | # feed image data to base model to obtain base feature map
47 | base_feat1 = self.RCNN_base1(im_data)
48 |
49 | if self.lc:
50 | d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
51 | if not target:
52 | _, feat_pixel = self.netD_pixel(base_feat1.detach())
53 | else:
54 | d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
55 | if target:
56 | return d_pixel
57 | base_feat = self.RCNN_base2(base_feat1)
58 |
59 | # feed base feature map tp RPN to obtain rois
60 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
61 |
62 | # if it is training phrase, then use ground trubut bboxes for refining
63 | if self.training:
64 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
65 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
66 |
67 | rois_label = Variable(rois_label.view(-1).long())
68 | rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
69 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
70 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
71 | else:
72 | rois_label = None
73 | rois_target = None
74 | rois_inside_ws = None
75 | rois_outside_ws = None
76 | rpn_loss_cls = 0
77 | rpn_loss_bbox = 0
78 |
79 | rois = Variable(rois)
80 | # do roi pooling based on predicted rois
81 |
82 | if cfg.POOLING_MODE == 'crop':
83 | # pdb.set_trace()
84 | # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
85 | grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
86 | grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
87 | pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
88 | if cfg.CROP_RESIZE_WITH_MAX_POOL:
89 | pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
90 | elif cfg.POOLING_MODE == 'align':
91 | pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
92 | elif cfg.POOLING_MODE == 'pool':
93 | pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
94 |
95 | # feed pooled features to top model
96 | pooled_feat = self._head_to_tail(pooled_feat)
97 | if self.lc:
98 | feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
99 | pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
100 |
101 | # compute bbox offset
102 | bbox_pred = self.RCNN_bbox_pred(pooled_feat)
103 | if self.training and not self.class_agnostic:
104 | # select the corresponding columns according to roi labels
105 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
106 | bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
107 | bbox_pred = bbox_pred_select.squeeze(1)
108 |
109 | # compute object classification probability
110 | cls_score = self.RCNN_cls_score(pooled_feat)
111 | cls_prob = F.softmax(cls_score, 1)
112 |
113 | RCNN_loss_cls = 0
114 | RCNN_loss_bbox = 0
115 |
116 | if self.training:
117 | # classification loss
118 | RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
119 |
120 | # bounding box regression L1 loss
121 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
122 |
123 |
124 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
125 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
126 |
127 | return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,d_pixel#,diff
128 |
129 | def _init_weights(self):
130 | def normal_init(m, mean, stddev, truncated=False):
131 | """
132 | weight initalizer: truncated normal and random normal.
133 | """
134 | # x is a parameter
135 | if truncated:
136 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
137 | else:
138 | m.weight.data.normal_(mean, stddev)
139 | m.bias.data.zero_()
140 |
141 | normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
142 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
143 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
144 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
145 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
146 |
147 | def create_architecture(self):
148 | self._init_modules()
149 | self._init_weights()
150 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow Faster R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Xinlei Chen
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.utils.config import cfg
17 |
18 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
19 | import pdb
20 |
21 | class vgg16(_fasterRCNN):
22 | def __init__(self, classes, pretrained=False, class_agnostic=False):
23 | self.model_path = cfg.VGG_PATH
24 | self.dout_base_model = 512
25 | self.pretrained = pretrained
26 | self.class_agnostic = class_agnostic
27 |
28 | _fasterRCNN.__init__(self, classes, class_agnostic)
29 |
30 | def _init_modules(self):
31 | vgg = models.vgg16()
32 | if self.pretrained:
33 | print("Loading pretrained weights from %s" %(self.model_path))
34 | state_dict = torch.load(self.model_path)
35 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
36 |
37 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
38 |
39 | # not using the last maxpool layer
40 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
41 |
42 | # Fix the layers before conv3:
43 | for layer in range(10):
44 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
45 |
46 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
47 |
48 | self.RCNN_top = vgg.classifier
49 |
50 | # not using the last maxpool layer
51 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
52 |
53 | if self.class_agnostic:
54 | self.RCNN_bbox_pred = nn.Linear(4096, 4)
55 | else:
56 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)
57 |
58 | def _head_to_tail(self, pool5):
59 |
60 | pool5_flat = pool5.view(pool5.size(0), -1)
61 | fc7 = self.RCNN_top(pool5_flat)
62 |
63 | return fc7
64 |
65 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16_global.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow Faster R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Xinlei Chen
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.utils.config import cfg
17 |
18 | from model.faster_rcnn.faster_rcnn_global import _fasterRCNN
19 | import pdb
20 | def conv3x3(in_planes, out_planes, stride=1):
21 | "3x3 convolution with padding"
22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
23 | padding=1, bias=False)
24 | class netD(nn.Module):
25 | def __init__(self,context=False):
26 | super(netD, self).__init__()
27 | self.conv1 = conv3x3(512, 512, stride=2)
28 | self.bn1 = nn.BatchNorm2d(512)
29 | self.conv2 = conv3x3(512, 128, stride=2)
30 | self.bn2 = nn.BatchNorm2d(128)
31 | self.conv3 = conv3x3(128, 128, stride=2)
32 | self.bn3 = nn.BatchNorm2d(128)
33 | self.fc = nn.Linear(128,2)
34 | self.context = context
35 | def forward(self, x):
36 | x = F.dropout(F.relu(self.bn1(self.conv1(x))),training=self.training)
37 | x = F.dropout(F.relu(self.bn2(self.conv2(x))),training=self.training)
38 | x = F.dropout(F.relu(self.bn3(self.conv3(x))),training=self.training)
39 | x = F.avg_pool2d(x,(x.size(2),x.size(3)))
40 | x = x.view(-1,128)
41 | if self.context:
42 | feat = x
43 | x = self.fc(x)
44 | if self.context:
45 | return x,feat#torch.cat((feat1,feat2),1)#F
46 | else:
47 | return x
48 | class netD_dc(nn.Module):
49 | def __init__(self):
50 | super(netD_dc, self).__init__()
51 | self.fc1 = nn.Linear(2048,100)
52 | self.bn1 = nn.BatchNorm1d(100)
53 | self.fc2 = nn.Linear(100,100)
54 | self.bn2 = nn.BatchNorm1d(100)
55 | self.fc3 = nn.Linear(100,2)
56 | def forward(self, x):
57 | x = F.dropout(F.relu(self.bn1(self.fc1(x))),training=self.training)
58 | x = F.dropout(F.relu(self.bn2(self.fc2(x))),training=self.training)
59 | x = self.fc3(x)
60 | return x
61 |
62 | class vgg16(_fasterRCNN):
63 | def __init__(self, classes, pretrained=False, class_agnostic=False,gc=False):
64 | self.model_path = cfg.VGG_PATH
65 | self.dout_base_model = 512
66 | self.pretrained = pretrained
67 | self.class_agnostic = class_agnostic
68 | self.gc = gc
69 |
70 | _fasterRCNN.__init__(self, classes, class_agnostic,self.gc)
71 |
72 | def _init_modules(self):
73 | vgg = models.vgg16()
74 | if self.pretrained:
75 | print("Loading pretrained weights from %s" %(self.model_path))
76 | state_dict = torch.load(self.model_path)
77 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
78 |
79 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
80 |
81 | # not using the last maxpool layer
82 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
83 | self.netD = netD(context=self.context)
84 | feat_d = 4096
85 |
86 | # Fix the layers before conv3:
87 | for layer in range(10):
88 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
89 |
90 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
91 |
92 | self.RCNN_top = vgg.classifier
93 | if self.gc:
94 | feat_d += 128
95 | self.RCNN_cls_score = nn.Linear(feat_d, self.n_classes)
96 | if self.class_agnostic:
97 | self.RCNN_bbox_pred = nn.Linear(feat_d, 4)
98 | else:
99 | self.RCNN_bbox_pred = nn.Linear(feat_d, 4 * self.n_classes)
100 |
101 |
102 | def _head_to_tail(self, pool5):
103 |
104 | pool5_flat = pool5.view(pool5.size(0), -1)
105 | fc7 = self.RCNN_top(pool5_flat)
106 |
107 | return fc7
108 |
109 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16_global_local.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow Faster R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Xinlei Chen
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.faster_rcnn.faster_rcnn_global_local import _fasterRCNN
17 | #from model.faster_rcnn.faster_rcnn_imgandpixellevel_gradcam import _fasterRCNN
18 | from model.utils.config import cfg
19 |
20 | import pdb
21 | def conv3x3(in_planes, out_planes, stride=1):
22 | "3x3 convolution with padding"
23 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
24 | padding=1, bias=False)
25 | def conv1x1(in_planes, out_planes, stride=1):
26 | "3x3 convolution with padding"
27 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
28 | padding=0, bias=False)
29 | class netD_pixel(nn.Module):
30 | def __init__(self,context=False):
31 | super(netD_pixel, self).__init__()
32 | self.conv1 = conv1x1(256, 256)
33 | #self.bn1 = nn.BatchNorm2d(256)
34 | self.conv2 = conv1x1(256, 128)
35 | #self.bn2 = nn.BatchNorm2d(128)
36 | self.conv3 = conv1x1(128, 1)
37 |
38 | self.context = context
39 | def forward(self, x):
40 | x = F.relu(x)
41 | x = F.relu(self.conv1(x))
42 | x = F.relu(self.conv2(x))
43 | if self.context:
44 | feat = F.avg_pool2d(x, (x.size(2), x.size(3)))
45 | # feat = x
46 | x = F.sigmoid(self.conv3(x))
47 | return x.view(-1,1), feat # torch.cat((feat1,feat2),1)#F
48 | else:
49 | x = F.sigmoid(self.conv3(x))
50 | return x.view(-1,1)#F.sigmoid(x)
51 |
52 |
53 | class netD(nn.Module):
54 | def __init__(self,context=False):
55 | super(netD, self).__init__()
56 | self.conv1 = conv3x3(512, 512, stride=2)
57 | self.bn1 = nn.BatchNorm2d(512)
58 | self.conv2 = conv3x3(512, 128, stride=2)
59 | self.bn2 = nn.BatchNorm2d(128)
60 | self.conv3 = conv3x3(128, 128, stride=2)
61 | self.bn3 = nn.BatchNorm2d(128)
62 | self.fc = nn.Linear(128,2)
63 | self.context = context
64 | def forward(self, x):
65 | x = F.dropout(F.relu(self.bn1(self.conv1(x))),training=self.training)
66 | x = F.dropout(F.relu(self.bn2(self.conv2(x))),training=self.training)
67 | x = F.dropout(F.relu(self.bn3(self.conv3(x))),training=self.training)
68 | x = F.avg_pool2d(x,(x.size(2),x.size(3)))
69 | x = x.view(-1,128)
70 | if self.context:
71 | feat = x
72 | x = self.fc(x)
73 | if self.context:
74 | return x,feat#torch.cat((feat1,feat2),1)#F
75 | else:
76 | return x
77 | class netD_dc(nn.Module):
78 | def __init__(self):
79 | super(netD_dc, self).__init__()
80 | self.fc1 = nn.Linear(2048,100)
81 | self.bn1 = nn.BatchNorm1d(100)
82 | self.fc2 = nn.Linear(100,100)
83 | self.bn2 = nn.BatchNorm1d(100)
84 | self.fc3 = nn.Linear(100,2)
85 | def forward(self, x):
86 | x = F.dropout(F.relu(self.bn1(self.fc1(x))),training=self.training)
87 | x = F.dropout(F.relu(self.bn2(self.fc2(x))),training=self.training)
88 | x = self.fc3(x)
89 | return x
90 |
91 | class vgg16(_fasterRCNN):
92 | def __init__(self, classes, pretrained=False, class_agnostic=False,lc=False,gc=False):
93 | self.model_path = cfg.VGG_PATH
94 | self.dout_base_model = 512
95 | self.pretrained = pretrained
96 | self.class_agnostic = class_agnostic
97 | self.lc = lc
98 | self.gc = gc
99 |
100 | _fasterRCNN.__init__(self, classes, class_agnostic,self.lc,self.gc)
101 |
102 | def _init_modules(self):
103 | vgg = models.vgg16()
104 | if self.pretrained:
105 | print("Loading pretrained weights from %s" %(self.model_path))
106 | state_dict = torch.load(self.model_path)
107 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
108 |
109 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
110 |
111 | # not using the last maxpool layer
112 | #print(vgg.features)
113 | self.RCNN_base1 = nn.Sequential(*list(vgg.features._modules.values())[:14])
114 |
115 | self.RCNN_base2 = nn.Sequential(*list(vgg.features._modules.values())[14:-1])
116 | #print(self.RCNN_base1)
117 | #print(self.RCNN_base2)
118 | self.netD = netD(context=self.gc)
119 | self.netD_pixel = netD_pixel(context=self.lc)
120 | feat_d = 4096
121 | if self.lc:
122 | feat_d += 128
123 | if self.gc:
124 | feat_d += 128
125 | # Fix the layers before conv3:
126 | for layer in range(10):
127 | for p in self.RCNN_base1[layer].parameters(): p.requires_grad = False
128 |
129 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
130 |
131 | self.RCNN_top = vgg.classifier
132 |
133 | self.RCNN_cls_score = nn.Linear(feat_d, self.n_classes)
134 | if self.class_agnostic:
135 | self.RCNN_bbox_pred = nn.Linear(feat_d, 4)
136 | else:
137 | self.RCNN_bbox_pred = nn.Linear(feat_d, 4 * self.n_classes)
138 |
139 |
140 | def _head_to_tail(self, pool5):
141 |
142 | pool5_flat = pool5.view(pool5.size(0), -1)
143 | fc7 = self.RCNN_top(pool5_flat)
144 |
145 | return fc7
146 |
147 |
--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16_local.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow Faster R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Xinlei Chen
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.utils.config import cfg
17 |
18 | from model.faster_rcnn.faster_rcnn_local import _fasterRCNN
19 | import pdb
20 | def conv3x3(in_planes, out_planes, stride=1):
21 | "3x3 convolution with padding"
22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
23 | padding=1, bias=False)
24 | def conv1x1(in_planes, out_planes, stride=1):
25 | "3x3 convolution with padding"
26 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
27 | padding=0, bias=False)
28 | class netD_pixel(nn.Module):
29 | def __init__(self,context=False):
30 | super(netD_pixel, self).__init__()
31 | self.conv1 = conv1x1(256, 256)
32 | self.conv2 = conv1x1(256, 128)
33 | self.conv3 = conv1x1(128, 1)
34 | self.context = context
35 | def forward(self, x):
36 | x = F.relu(x)
37 | x = F.relu(self.conv1(x))
38 | x = F.relu(self.conv2(x))
39 | if self.context:
40 | feat = F.avg_pool2d(x, (x.size(2), x.size(3)))
41 | x = self.conv3(x)
42 | return F.sigmoid(x), feat
43 | else:
44 | x = self.conv3(x)
45 | return F.sigmoid(x)
46 |
47 |
48 | class netD(nn.Module):
49 | def __init__(self,context=False):
50 | super(netD, self).__init__()
51 | self.conv1 = conv3x3(512, 512, stride=2)
52 | self.bn1 = nn.BatchNorm2d(512)
53 | self.conv2 = conv3x3(512, 128, stride=2)
54 | self.bn2 = nn.BatchNorm2d(128)
55 | self.conv3 = conv3x3(128, 128, stride=2)
56 | self.bn3 = nn.BatchNorm2d(128)
57 | self.fc = nn.Linear(128,2)
58 | self.context = context
59 | def forward(self, x):
60 | x = F.dropout(F.relu(self.bn1(self.conv1(x))),training=self.training)
61 | x = F.dropout(F.relu(self.bn2(self.conv2(x))),training=self.training)
62 | x = F.dropout(F.relu(self.bn3(self.conv3(x))),training=self.training)
63 | x = F.avg_pool2d(x,(x.size(2),x.size(3)))
64 | x = x.view(-1,128)
65 | if self.context:
66 | feat = x
67 | x = self.fc(x)
68 | if self.context:
69 | return x,feat#torch.cat((feat1,feat2),1)#F
70 | else:
71 | return x
72 | class netD_dc(nn.Module):
73 | def __init__(self):
74 | super(netD_dc, self).__init__()
75 | self.fc1 = nn.Linear(2048,100)
76 | self.bn1 = nn.BatchNorm1d(100)
77 | self.fc2 = nn.Linear(100,100)
78 | self.bn2 = nn.BatchNorm1d(100)
79 | self.fc3 = nn.Linear(100,2)
80 | def forward(self, x):
81 | x = F.dropout(F.relu(self.bn1(self.fc1(x))),training=self.training)
82 | x = F.dropout(F.relu(self.bn2(self.fc2(x))),training=self.training)
83 | x = self.fc3(x)
84 | return x
85 |
86 | class vgg16(_fasterRCNN):
87 | def __init__(self, classes, pretrained=False, class_agnostic=False,lc=False):
88 | self.model_path = cfg.VGG_PATH
89 | self.dout_base_model = 512
90 | self.pretrained = pretrained
91 | self.class_agnostic = class_agnostic
92 | self.lc = lc
93 | self.gc = gc
94 |
95 | _fasterRCNN.__init__(self, classes, class_agnostic,self.lc)
96 |
97 | def _init_modules(self):
98 | vgg = models.vgg16()
99 | if self.pretrained:
100 | print("Loading pretrained weights from %s" %(self.model_path))
101 | state_dict = torch.load(self.model_path)
102 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
103 |
104 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
105 |
106 | # not using the last maxpool layer
107 | print(vgg.features)
108 | self.RCNN_base1 = nn.Sequential(*list(vgg.features._modules.values())[:14])
109 | self.RCNN_base2 = nn.Sequential(*list(vgg.features._modules.values())[14:-1])
110 | #print(self.RCNN_base1)
111 | #print(self.RCNN_base2)
112 | self.netD_pixel = netD_pixel(context=self.lc)
113 | feat_d = 4096
114 | if self.lc:
115 | feat_d += 128
116 | #if self.gc:
117 | # feat_d += 128
118 | # Fix the layers before conv3:
119 | for layer in range(10):
120 | for p in self.RCNN_base1[layer].parameters(): p.requires_grad = False
121 |
122 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
123 |
124 | self.RCNN_top = vgg.classifier
125 |
126 | self.RCNN_cls_score = nn.Linear(feat_d, self.n_classes)
127 | if self.class_agnostic:
128 | self.RCNN_bbox_pred = nn.Linear(feat_d, 4)
129 | else:
130 | self.RCNN_bbox_pred = nn.Linear(feat_d, 4 * self.n_classes)
131 |
132 |
133 | def _head_to_tail(self, pool5):
134 |
135 | pool5_flat = pool5.view(pool5.size(0), -1)
136 | fc7 = self.RCNN_top(pool5_flat)
137 |
138 | return fc7
139 |
140 |
--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/nms/__init__.py
--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/nms/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._nms import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | #this_file = os.path.dirname(__file__)
7 |
8 | sources = []
9 | headers = []
10 | defines = []
11 | with_cuda = False
12 |
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/nms_cuda.c']
16 | headers += ['src/nms_cuda.h']
17 | defines += [('WITH_CUDA', None)]
18 | with_cuda = True
19 |
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 |
26 | ffi = create_extension(
27 | '_ext.nms',
28 | headers=headers,
29 | sources=sources,
30 | define_macros=defines,
31 | relative_to=__file__,
32 | with_cuda=with_cuda,
33 | extra_objects=extra_objects
34 | )
35 |
36 | if __name__ == '__main__':
37 | ffi.build()
38 |
--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling stnm kernels by nvcc..."
7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import numpy as np
4 | import torch
5 |
6 | def nms_cpu(dets, thresh):
7 | dets = dets.numpy()
8 | x1 = dets[:, 0]
9 | y1 = dets[:, 1]
10 | x2 = dets[:, 2]
11 | y2 = dets[:, 3]
12 | scores = dets[:, 4]
13 |
14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
15 | order = scores.argsort()[::-1]
16 |
17 | keep = []
18 | while order.size > 0:
19 | i = order.item(0)
20 | keep.append(i)
21 | xx1 = np.maximum(x1[i], x1[order[1:]])
22 | yy1 = np.maximum(y1[i], y1[order[1:]])
23 | xx2 = np.minimum(x2[i], x2[order[1:]])
24 | yy2 = np.minimum(y2[i], y2[order[1:]])
25 |
26 | w = np.maximum(0.0, xx2 - xx1 + 1)
27 | h = np.maximum(0.0, yy2 - yy1 + 1)
28 | inter = w * h
29 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
30 |
31 | inds = np.where(ovr <= thresh)[0]
32 | order = order[inds + 1]
33 |
34 | return torch.IntTensor(keep)
35 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import torch
3 | import numpy as np
4 | from ._ext import nms
5 | import pdb
6 |
7 | def nms_gpu(dets, thresh):
8 | keep = dets.new(dets.size(0), 1).zero_().int()
9 | num_out = dets.new(1).zero_().int()
10 | nms.nms_cuda(keep, dets, num_out, thresh)
11 | keep = keep[:num_out[0]]
12 | return keep
13 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | import torch
8 | from model.utils.config import cfg
9 | if torch.cuda.is_available():
10 | from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 |
13 | def nms(dets, thresh, force_cpu=False):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 | if dets.shape[0] == 0:
16 | return []
17 | # ---numpy version---
18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | # ---pytorch version---
20 |
21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "nms_cuda_kernel.h"
4 |
5 | // this symbol will be resolved automatically from PyTorch libs
6 | extern THCState *state;
7 |
8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
9 | THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 |
11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out),
12 | THCudaIntTensor_data(state, num_out),
13 | THCudaTensor_data(state, boxes_host),
14 | THCudaTensor_size(state, boxes_host, 0),
15 | THCudaTensor_size(state, boxes_host, 1),
16 | nms_overlap_thresh);
17 |
18 | return 1;
19 | }
20 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 |
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 | THCudaIntTensor *num_out, float nms_overlap_thresh);
6 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "nms_cuda_kernel.h"
13 |
14 | #define CUDA_WARN(XXX) \
15 | do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
16 | cudaGetErrorString(XXX) << ", at line " << __LINE__ \
17 | << std::endl; cudaDeviceSynchronize(); } while (0)
18 |
19 | #define CUDA_CHECK(condition) \
20 | /* Code block avoids redefinition of cudaError_t error */ \
21 | do { \
22 | cudaError_t error = condition; \
23 | if (error != cudaSuccess) { \
24 | std::cout << cudaGetErrorString(error) << std::endl; \
25 | } \
26 | } while (0)
27 |
28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
30 |
31 | __device__ inline float devIoU(float const * const a, float const * const b) {
32 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
33 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
34 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
35 | float interS = width * height;
36 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
37 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
38 | return interS / (Sa + Sb - interS);
39 | }
40 |
41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
42 | float *dev_boxes, unsigned long long *dev_mask) {
43 | const int row_start = blockIdx.y;
44 | const int col_start = blockIdx.x;
45 |
46 | // if (row_start > col_start) return;
47 |
48 | const int row_size =
49 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
50 | const int col_size =
51 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
52 |
53 | __shared__ float block_boxes[threadsPerBlock * 5];
54 | if (threadIdx.x < col_size) {
55 | block_boxes[threadIdx.x * 5 + 0] =
56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
57 | block_boxes[threadIdx.x * 5 + 1] =
58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
59 | block_boxes[threadIdx.x * 5 + 2] =
60 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
61 | block_boxes[threadIdx.x * 5 + 3] =
62 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
63 | block_boxes[threadIdx.x * 5 + 4] =
64 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
65 | }
66 | __syncthreads();
67 |
68 | if (threadIdx.x < row_size) {
69 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
70 | const float *cur_box = dev_boxes + cur_box_idx * 5;
71 | int i = 0;
72 | unsigned long long t = 0;
73 | int start = 0;
74 | if (row_start == col_start) {
75 | start = threadIdx.x + 1;
76 | }
77 | for (i = start; i < col_size; i++) {
78 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
79 | t |= 1ULL << i;
80 | }
81 | }
82 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
83 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
84 | }
85 | }
86 |
87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
88 | int boxes_dim, float nms_overlap_thresh) {
89 |
90 | float* boxes_dev = NULL;
91 | unsigned long long* mask_dev = NULL;
92 |
93 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
94 |
95 | CUDA_CHECK(cudaMalloc(&boxes_dev,
96 | boxes_num * boxes_dim * sizeof(float)));
97 | CUDA_CHECK(cudaMemcpy(boxes_dev,
98 | boxes_host,
99 | boxes_num * boxes_dim * sizeof(float),
100 | cudaMemcpyHostToDevice));
101 |
102 | CUDA_CHECK(cudaMalloc(&mask_dev,
103 | boxes_num * col_blocks * sizeof(unsigned long long)));
104 |
105 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 | DIVUP(boxes_num, threadsPerBlock));
107 | dim3 threads(threadsPerBlock);
108 |
109 | // printf("i am at line %d\n", boxes_num);
110 | // printf("i am at line %d\n", boxes_dim);
111 |
112 | nms_kernel<<>>(boxes_num,
113 | nms_overlap_thresh,
114 | boxes_dev,
115 | mask_dev);
116 |
117 | std::vector mask_host(boxes_num * col_blocks);
118 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 | mask_dev,
120 | sizeof(unsigned long long) * boxes_num * col_blocks,
121 | cudaMemcpyDeviceToHost));
122 |
123 | std::vector remv(col_blocks);
124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 |
126 | // we need to create a memory for keep_out on cpu
127 | // otherwise, the following code cannot run
128 |
129 | int* keep_out_cpu = new int[boxes_num];
130 |
131 | int num_to_keep = 0;
132 | for (int i = 0; i < boxes_num; i++) {
133 | int nblock = i / threadsPerBlock;
134 | int inblock = i % threadsPerBlock;
135 |
136 | if (!(remv[nblock] & (1ULL << inblock))) {
137 | // orignal: keep_out[num_to_keep++] = i;
138 | keep_out_cpu[num_to_keep++] = i;
139 | unsigned long long *p = &mask_host[0] + i * col_blocks;
140 | for (int j = nblock; j < col_blocks; j++) {
141 | remv[j] |= p[j];
142 | }
143 | }
144 | }
145 |
146 | // copy keep_out_cpu to keep_out on gpu
147 | CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));
148 |
149 | // *num_out = num_to_keep;
150 |
151 | // original: *num_out = num_to_keep;
152 | // copy num_to_keep to num_out on gpu
153 |
154 | CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));
155 |
156 | // release cuda memory
157 | CUDA_CHECK(cudaFree(boxes_dev));
158 | CUDA_CHECK(cudaFree(mask_dev));
159 | // release cpu memory
160 | delete []keep_out_cpu;
161 | }
162 |
--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
6 | int boxes_dim, float nms_overlap_thresh);
7 |
8 | #ifdef __cplusplus
9 | }
10 | #endif
11 |
--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_align/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_align/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_align import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | sources = ['src/roi_align.c']
7 | headers = ['src/roi_align.h']
8 | extra_objects = []
9 | #sources = []
10 | #headers = []
11 | defines = []
12 | with_cuda = False
13 |
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | print(this_file)
16 |
17 | if torch.cuda.is_available():
18 | print('Including CUDA code.')
19 | sources += ['src/roi_align_cuda.c']
20 | headers += ['src/roi_align_cuda.h']
21 | defines += [('WITH_CUDA', None)]
22 | with_cuda = True
23 |
24 | extra_objects = ['src/roi_align_kernel.cu.o']
25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
26 |
27 | ffi = create_extension(
28 | '_ext.roi_align',
29 | headers=headers,
30 | sources=sources,
31 | define_macros=defines,
32 | relative_to=__file__,
33 | with_cuda=with_cuda,
34 | extra_objects=extra_objects
35 | )
36 |
37 | if __name__ == '__main__':
38 | ffi.build()
39 |
--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_align/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import roi_align
4 |
5 |
6 | # TODO use save_for_backward instead
7 | class RoIAlignFunction(Function):
8 | def __init__(self, aligned_height, aligned_width, spatial_scale):
9 | self.aligned_width = int(aligned_width)
10 | self.aligned_height = int(aligned_height)
11 | self.spatial_scale = float(spatial_scale)
12 | self.rois = None
13 | self.feature_size = None
14 |
15 | def forward(self, features, rois):
16 | self.rois = rois
17 | self.feature_size = features.size()
18 |
19 | batch_size, num_channels, data_height, data_width = features.size()
20 | num_rois = rois.size(0)
21 |
22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 | if features.is_cuda:
24 | roi_align.roi_align_forward_cuda(self.aligned_height,
25 | self.aligned_width,
26 | self.spatial_scale, features,
27 | rois, output)
28 | else:
29 | roi_align.roi_align_forward(self.aligned_height,
30 | self.aligned_width,
31 | self.spatial_scale, features,
32 | rois, output)
33 | # raise NotImplementedError
34 |
35 | return output
36 |
37 | def backward(self, grad_output):
38 | assert(self.feature_size is not None and grad_output.is_cuda)
39 |
40 | batch_size, num_channels, data_height, data_width = self.feature_size
41 |
42 | grad_input = self.rois.new(batch_size, num_channels, data_height,
43 | data_width).zero_()
44 | roi_align.roi_align_backward_cuda(self.aligned_height,
45 | self.aligned_width,
46 | self.spatial_scale, grad_output,
47 | self.rois, grad_input)
48 |
49 | # print grad_input
50 |
51 | return grad_input, None
52 |
--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling my_lib kernels by nvcc..."
7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_align/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.nn.functional import avg_pool2d, max_pool2d
3 | from ..functions.roi_align import RoIAlignFunction
4 |
5 |
6 | class RoIAlign(Module):
7 | def __init__(self, aligned_height, aligned_width, spatial_scale):
8 | super(RoIAlign, self).__init__()
9 |
10 | self.aligned_width = int(aligned_width)
11 | self.aligned_height = int(aligned_height)
12 | self.spatial_scale = float(spatial_scale)
13 |
14 | def forward(self, features, rois):
15 | return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 | self.spatial_scale)(features, rois)
17 |
18 | class RoIAlignAvg(Module):
19 | def __init__(self, aligned_height, aligned_width, spatial_scale):
20 | super(RoIAlignAvg, self).__init__()
21 |
22 | self.aligned_width = int(aligned_width)
23 | self.aligned_height = int(aligned_height)
24 | self.spatial_scale = float(spatial_scale)
25 |
26 | def forward(self, features, rois):
27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 | self.spatial_scale)(features, rois)
29 | return avg_pool2d(x, kernel_size=2, stride=1)
30 |
31 | class RoIAlignMax(Module):
32 | def __init__(self, aligned_height, aligned_width, spatial_scale):
33 | super(RoIAlignMax, self).__init__()
34 |
35 | self.aligned_width = int(aligned_width)
36 | self.aligned_height = int(aligned_height)
37 | self.spatial_scale = float(spatial_scale)
38 |
39 | def forward(self, features, rois):
40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 | self.spatial_scale)(features, rois)
42 | return max_pool2d(x, kernel_size=2, stride=1)
43 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 |
6 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
7 | const int height, const int width, const int channels,
8 | const int aligned_height, const int aligned_width, const float * bottom_rois,
9 | float* top_data);
10 |
11 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
12 | const int height, const int width, const int channels,
13 | const int aligned_height, const int aligned_width, const float * bottom_rois,
14 | float* top_data);
15 |
16 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
17 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
18 | {
19 | //Grab the input tensor
20 | float * data_flat = THFloatTensor_data(features);
21 | float * rois_flat = THFloatTensor_data(rois);
22 |
23 | float * output_flat = THFloatTensor_data(output);
24 |
25 | // Number of ROIs
26 | int num_rois = THFloatTensor_size(rois, 0);
27 | int size_rois = THFloatTensor_size(rois, 1);
28 | if (size_rois != 5)
29 | {
30 | return 0;
31 | }
32 |
33 | // data height
34 | int data_height = THFloatTensor_size(features, 2);
35 | // data width
36 | int data_width = THFloatTensor_size(features, 3);
37 | // Number of channels
38 | int num_channels = THFloatTensor_size(features, 1);
39 |
40 | // do ROIAlignForward
41 | ROIAlignForwardCpu(data_flat, spatial_scale, num_rois, data_height, data_width, num_channels,
42 | aligned_height, aligned_width, rois_flat, output_flat);
43 |
44 | return 1;
45 | }
46 |
47 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
48 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad)
49 | {
50 | //Grab the input tensor
51 | float * top_grad_flat = THFloatTensor_data(top_grad);
52 | float * rois_flat = THFloatTensor_data(rois);
53 |
54 | float * bottom_grad_flat = THFloatTensor_data(bottom_grad);
55 |
56 | // Number of ROIs
57 | int num_rois = THFloatTensor_size(rois, 0);
58 | int size_rois = THFloatTensor_size(rois, 1);
59 | if (size_rois != 5)
60 | {
61 | return 0;
62 | }
63 |
64 | // batch size
65 | // int batch_size = THFloatTensor_size(bottom_grad, 0);
66 | // data height
67 | int data_height = THFloatTensor_size(bottom_grad, 2);
68 | // data width
69 | int data_width = THFloatTensor_size(bottom_grad, 3);
70 | // Number of channels
71 | int num_channels = THFloatTensor_size(bottom_grad, 1);
72 |
73 | // do ROIAlignBackward
74 | ROIAlignBackwardCpu(top_grad_flat, spatial_scale, num_rois, data_height,
75 | data_width, num_channels, aligned_height, aligned_width, rois_flat, bottom_grad_flat);
76 |
77 | return 1;
78 | }
79 |
80 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
81 | const int height, const int width, const int channels,
82 | const int aligned_height, const int aligned_width, const float * bottom_rois,
83 | float* top_data)
84 | {
85 | const int output_size = num_rois * aligned_height * aligned_width * channels;
86 |
87 | int idx = 0;
88 | for (idx = 0; idx < output_size; ++idx)
89 | {
90 | // (n, c, ph, pw) is an element in the aligned output
91 | int pw = idx % aligned_width;
92 | int ph = (idx / aligned_width) % aligned_height;
93 | int c = (idx / aligned_width / aligned_height) % channels;
94 | int n = idx / aligned_width / aligned_height / channels;
95 |
96 | float roi_batch_ind = bottom_rois[n * 5 + 0];
97 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
98 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
99 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
100 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
101 |
102 | // Force malformed ROI to be 1x1
103 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
104 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
105 | float bin_size_h = roi_height / (aligned_height - 1.);
106 | float bin_size_w = roi_width / (aligned_width - 1.);
107 |
108 | float h = (float)(ph) * bin_size_h + roi_start_h;
109 | float w = (float)(pw) * bin_size_w + roi_start_w;
110 |
111 | int hstart = fminf(floor(h), height - 2);
112 | int wstart = fminf(floor(w), width - 2);
113 |
114 | int img_start = roi_batch_ind * channels * height * width;
115 |
116 | // bilinear interpolation
117 | if (h < 0 || h >= height || w < 0 || w >= width)
118 | {
119 | top_data[idx] = 0.;
120 | }
121 | else
122 | {
123 | float h_ratio = h - (float)(hstart);
124 | float w_ratio = w - (float)(wstart);
125 | int upleft = img_start + (c * height + hstart) * width + wstart;
126 | int upright = upleft + 1;
127 | int downleft = upleft + width;
128 | int downright = downleft + 1;
129 |
130 | top_data[idx] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
131 | + bottom_data[upright] * (1. - h_ratio) * w_ratio
132 | + bottom_data[downleft] * h_ratio * (1. - w_ratio)
133 | + bottom_data[downright] * h_ratio * w_ratio;
134 | }
135 | }
136 | }
137 |
138 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
139 | const int height, const int width, const int channels,
140 | const int aligned_height, const int aligned_width, const float * bottom_rois,
141 | float* bottom_diff)
142 | {
143 | const int output_size = num_rois * aligned_height * aligned_width * channels;
144 |
145 | int idx = 0;
146 | for (idx = 0; idx < output_size; ++idx)
147 | {
148 | // (n, c, ph, pw) is an element in the aligned output
149 | int pw = idx % aligned_width;
150 | int ph = (idx / aligned_width) % aligned_height;
151 | int c = (idx / aligned_width / aligned_height) % channels;
152 | int n = idx / aligned_width / aligned_height / channels;
153 |
154 | float roi_batch_ind = bottom_rois[n * 5 + 0];
155 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
156 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
157 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
158 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
159 |
160 | // Force malformed ROI to be 1x1
161 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
162 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
163 | float bin_size_h = roi_height / (aligned_height - 1.);
164 | float bin_size_w = roi_width / (aligned_width - 1.);
165 |
166 | float h = (float)(ph) * bin_size_h + roi_start_h;
167 | float w = (float)(pw) * bin_size_w + roi_start_w;
168 |
169 | int hstart = fminf(floor(h), height - 2);
170 | int wstart = fminf(floor(w), width - 2);
171 |
172 | int img_start = roi_batch_ind * channels * height * width;
173 |
174 | // bilinear interpolation
175 | if (h < 0 || h >= height || w < 0 || w >= width)
176 | {
177 | float h_ratio = h - (float)(hstart);
178 | float w_ratio = w - (float)(wstart);
179 | int upleft = img_start + (c * height + hstart) * width + wstart;
180 | int upright = upleft + 1;
181 | int downleft = upleft + width;
182 | int downright = downleft + 1;
183 |
184 | bottom_diff[upleft] += top_diff[idx] * (1. - h_ratio) * (1. - w_ratio);
185 | bottom_diff[upright] += top_diff[idx] * (1. - h_ratio) * w_ratio;
186 | bottom_diff[downleft] += top_diff[idx] * h_ratio * (1. - w_ratio);
187 | bottom_diff[downright] += top_diff[idx] * h_ratio * w_ratio;
188 | }
189 | }
190 | }
191 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
3 |
4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);
6 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "roi_align_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 |
16 | // Number of ROIs
17 | int num_rois = THCudaTensor_size(state, rois, 0);
18 | int size_rois = THCudaTensor_size(state, rois, 1);
19 | if (size_rois != 5)
20 | {
21 | return 0;
22 | }
23 |
24 | // data height
25 | int data_height = THCudaTensor_size(state, features, 2);
26 | // data width
27 | int data_width = THCudaTensor_size(state, features, 3);
28 | // Number of channels
29 | int num_channels = THCudaTensor_size(state, features, 1);
30 |
31 | cudaStream_t stream = THCState_getCurrentStream(state);
32 |
33 | ROIAlignForwardLaucher(
34 | data_flat, spatial_scale, num_rois, data_height,
35 | data_width, num_channels, aligned_height,
36 | aligned_width, rois_flat,
37 | output_flat, stream);
38 |
39 | return 1;
40 | }
41 |
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 | // Grab the input tensor
46 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 | float * rois_flat = THCudaTensor_data(state, rois);
48 |
49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 |
51 | // Number of ROIs
52 | int num_rois = THCudaTensor_size(state, rois, 0);
53 | int size_rois = THCudaTensor_size(state, rois, 1);
54 | if (size_rois != 5)
55 | {
56 | return 0;
57 | }
58 |
59 | // batch size
60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 | // data height
62 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 | // data width
64 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 | // Number of channels
66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 |
68 | cudaStream_t stream = THCState_getCurrentStream(state);
69 | ROIAlignBackwardLaucher(
70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 | data_width, num_channels, aligned_height,
72 | aligned_width, rois_flat,
73 | bottom_grad_flat, stream);
74 |
75 | return 1;
76 | }
77 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 |
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | #include
6 | #include
7 | #include
8 | #include "roi_align_kernel.h"
9 |
10 | #define CUDA_1D_KERNEL_LOOP(i, n) \
11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
12 | i += blockDim.x * gridDim.x)
13 |
14 |
15 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
16 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
17 | CUDA_1D_KERNEL_LOOP(index, nthreads) {
18 | // (n, c, ph, pw) is an element in the aligned output
19 | // int n = index;
20 | // int pw = n % aligned_width;
21 | // n /= aligned_width;
22 | // int ph = n % aligned_height;
23 | // n /= aligned_height;
24 | // int c = n % channels;
25 | // n /= channels;
26 |
27 | int pw = index % aligned_width;
28 | int ph = (index / aligned_width) % aligned_height;
29 | int c = (index / aligned_width / aligned_height) % channels;
30 | int n = index / aligned_width / aligned_height / channels;
31 |
32 | // bottom_rois += n * 5;
33 | float roi_batch_ind = bottom_rois[n * 5 + 0];
34 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
35 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
36 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
37 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
38 |
39 | // Force malformed ROIs to be 1x1
40 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
41 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
42 | float bin_size_h = roi_height / (aligned_height - 1.);
43 | float bin_size_w = roi_width / (aligned_width - 1.);
44 |
45 | float h = (float)(ph) * bin_size_h + roi_start_h;
46 | float w = (float)(pw) * bin_size_w + roi_start_w;
47 |
48 | int hstart = fminf(floor(h), height - 2);
49 | int wstart = fminf(floor(w), width - 2);
50 |
51 | int img_start = roi_batch_ind * channels * height * width;
52 |
53 | // bilinear interpolation
54 | if (h < 0 || h >= height || w < 0 || w >= width) {
55 | top_data[index] = 0.;
56 | } else {
57 | float h_ratio = h - (float)(hstart);
58 | float w_ratio = w - (float)(wstart);
59 | int upleft = img_start + (c * height + hstart) * width + wstart;
60 | int upright = upleft + 1;
61 | int downleft = upleft + width;
62 | int downright = downleft + 1;
63 |
64 | top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
65 | + bottom_data[upright] * (1. - h_ratio) * w_ratio
66 | + bottom_data[downleft] * h_ratio * (1. - w_ratio)
67 | + bottom_data[downright] * h_ratio * w_ratio;
68 | }
69 | }
70 | }
71 |
72 |
73 | int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
74 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
75 | const int kThreadsPerBlock = 1024;
76 | const int output_size = num_rois * aligned_height * aligned_width * channels;
77 | cudaError_t err;
78 |
79 |
80 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
81 | output_size, bottom_data, spatial_scale, height, width, channels,
82 | aligned_height, aligned_width, bottom_rois, top_data);
83 |
84 | err = cudaGetLastError();
85 | if(cudaSuccess != err) {
86 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
87 | exit( -1 );
88 | }
89 |
90 | return 1;
91 | }
92 |
93 |
94 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
95 | const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
96 | CUDA_1D_KERNEL_LOOP(index, nthreads) {
97 |
98 | // (n, c, ph, pw) is an element in the aligned output
99 | int pw = index % aligned_width;
100 | int ph = (index / aligned_width) % aligned_height;
101 | int c = (index / aligned_width / aligned_height) % channels;
102 | int n = index / aligned_width / aligned_height / channels;
103 |
104 | float roi_batch_ind = bottom_rois[n * 5 + 0];
105 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 | /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 | /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 | /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 | /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 |
114 | // Force malformed ROIs to be 1x1
115 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 | float bin_size_h = roi_height / (aligned_height - 1.);
118 | float bin_size_w = roi_width / (aligned_width - 1.);
119 |
120 | float h = (float)(ph) * bin_size_h + roi_start_h;
121 | float w = (float)(pw) * bin_size_w + roi_start_w;
122 |
123 | int hstart = fminf(floor(h), height - 2);
124 | int wstart = fminf(floor(w), width - 2);
125 |
126 | int img_start = roi_batch_ind * channels * height * width;
127 |
128 | // bilinear interpolation
129 | if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 | float h_ratio = h - (float)(hstart);
131 | float w_ratio = w - (float)(wstart);
132 | int upleft = img_start + (c * height + hstart) * width + wstart;
133 | int upright = upleft + 1;
134 | int downleft = upleft + width;
135 | int downright = downleft + 1;
136 |
137 | atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 | atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 | atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 | atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 | }
142 | }
143 | }
144 |
145 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 | const int kThreadsPerBlock = 1024;
148 | const int output_size = num_rois * aligned_height * aligned_width * channels;
149 | cudaError_t err;
150 |
151 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 | output_size, top_diff, spatial_scale, height, width, channels,
153 | aligned_height, aligned_width, bottom_diff, bottom_rois);
154 |
155 | err = cudaGetLastError();
156 | if(cudaSuccess != err) {
157 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 | exit( -1 );
159 | }
160 |
161 | return 1;
162 | }
163 |
164 |
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 |
--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_ALIGN_KERNEL
2 | #define _ROI_ALIGN_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
9 | const float spatial_scale, const int height, const int width,
10 | const int channels, const int aligned_height, const int aligned_width,
11 | const float* bottom_rois, float* top_data);
12 |
13 | int ROIAlignForwardLaucher(
14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 | const int width, const int channels, const int aligned_height,
16 | const int aligned_width, const float* bottom_rois,
17 | float* top_data, cudaStream_t stream);
18 |
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 | const float spatial_scale, const int height, const int width,
21 | const int channels, const int aligned_height, const int aligned_width,
22 | float* bottom_diff, const float* bottom_rois);
23 |
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 | const int height, const int width, const int channels, const int aligned_height,
26 | const int aligned_width, const float* bottom_rois,
27 | float* bottom_diff, cudaStream_t stream);
28 |
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 |
33 | #endif
34 |
35 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_crop/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_crop/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._crop_resize import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | locals[symbol] = _wrap_function(fn, _ffi)
10 | __all__.append(symbol)
11 |
12 | _import_symbols(locals())
13 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_crop import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | #this_file = os.path.dirname(__file__)
7 |
8 | sources = ['src/roi_crop.c']
9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 |
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/roi_crop_cuda.c']
16 | headers += ['src/roi_crop_cuda.h']
17 | defines += [('WITH_CUDA', None)]
18 | with_cuda = True
19 |
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 |
25 | ffi = create_extension(
26 | '_ext.roi_crop',
27 | headers=headers,
28 | sources=sources,
29 | define_macros=defines,
30 | relative_to=__file__,
31 | with_cuda=with_cuda,
32 | extra_objects=extra_objects
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_crop/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
1 | # functions/add.py
2 | import torch
3 | from torch.autograd import Function
4 | from .._ext import roi_crop
5 | from cffi import FFI
6 | ffi = FFI()
7 |
8 | class RoICropFunction(Function):
9 | def forward(self, input1, input2):
10 | self.input1 = input1
11 | self.input2 = input2
12 | self.device_c = ffi.new("int *")
13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 | #print('decice %d' % torch.cuda.current_device())
15 | if input1.is_cuda:
16 | self.device = torch.cuda.current_device()
17 | else:
18 | self.device = -1
19 | self.device_c[0] = self.device
20 | if not input1.is_cuda:
21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 | else:
23 | output = output.cuda(self.device)
24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 | return output
26 |
27 | def backward(self, grad_output):
28 | grad_input1 = torch.zeros(self.input1.size())
29 | grad_input2 = torch.zeros(self.input2.size())
30 | #print('backward decice %d' % self.device)
31 | if not grad_output.is_cuda:
32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 | else:
34 | grad_input1 = grad_input1.cuda(self.device)
35 | grad_input2 = grad_input2.cuda(self.device)
36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 | return grad_input1, grad_input2
38 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
1 | # functions/add.py
2 | import torch
3 | from torch.autograd import Function
4 | import numpy as np
5 |
6 |
7 | class AffineGridGenFunction(Function):
8 | def __init__(self, height, width,lr=1):
9 | super(AffineGridGenFunction, self).__init__()
10 | self.lr = lr
11 | self.height, self.width = height, width
12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 | self.grid[:,:,2] = np.ones([self.height, width])
18 | self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 | #print(self.grid)
20 |
21 | def forward(self, input1):
22 | self.input1 = input1
23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 | for i in range(input1.size(0)):
26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 |
28 | # if input1.is_cuda:
29 | # self.batchgrid = self.batchgrid.cuda()
30 | # output = output.cuda()
31 |
32 | for i in range(input1.size(0)):
33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 |
35 | return output
36 |
37 | def backward(self, grad_output):
38 |
39 | grad_input1 = self.input1.new(self.input1.size()).zero_()
40 |
41 | # if grad_output.is_cuda:
42 | # self.batchgrid = self.batchgrid.cuda()
43 | # grad_input1 = grad_input1.cuda()
44 |
45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 | return grad_input1
47 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
1 | # functions/add.py
2 | import torch
3 | from torch.autograd import Function
4 | from .._ext import roi_crop
5 | import pdb
6 |
7 | class RoICropFunction(Function):
8 | def forward(self, input1, input2):
9 | self.input1 = input1.clone()
10 | self.input2 = input2.clone()
11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 | return output
16 |
17 | def backward(self, grad_output):
18 | grad_input1 = self.input1.new(self.input1.size()).zero_()
19 | grad_input2 = self.input2.new(self.input2.size()).zero_()
20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 | return grad_input1, grad_input2
22 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling my_lib kernels by nvcc..."
7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_crop/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 |
4 | class _RoICrop(Module):
5 | def __init__(self, layout = 'BHWD'):
6 | super(_RoICrop, self).__init__()
7 | def forward(self, input1, input2):
8 | return RoICropFunction()(input1, input2)
9 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
2 |
3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput);
5 |
6 |
7 |
8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
9 |
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "roi_crop_cuda_kernel.h"
5 |
6 | #define real float
7 |
8 | // this symbol will be resolved automatically from PyTorch libs
9 | extern THCState *state;
10 |
11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
12 | // we assume BHWD format in inputImages
13 | // we assume BHW(YX) format on grids
14 |
15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
16 | // THCState *state = getCutorchState(L);
17 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
18 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
19 | // THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
20 |
21 | int success = 0;
22 | success = BilinearSamplerBHWD_updateOutput_cuda_kernel(THCudaTensor_size(state, output, 1),
23 | THCudaTensor_size(state, output, 3),
24 | THCudaTensor_size(state, output, 2),
25 | THCudaTensor_size(state, output, 0),
26 | THCudaTensor_size(state, inputImages, 1),
27 | THCudaTensor_size(state, inputImages, 2),
28 | THCudaTensor_size(state, inputImages, 3),
29 | THCudaTensor_size(state, inputImages, 0),
30 | THCudaTensor_data(state, inputImages),
31 | THCudaTensor_stride(state, inputImages, 0),
32 | THCudaTensor_stride(state, inputImages, 1),
33 | THCudaTensor_stride(state, inputImages, 2),
34 | THCudaTensor_stride(state, inputImages, 3),
35 | THCudaTensor_data(state, grids),
36 | THCudaTensor_stride(state, grids, 0),
37 | THCudaTensor_stride(state, grids, 3),
38 | THCudaTensor_stride(state, grids, 1),
39 | THCudaTensor_stride(state, grids, 2),
40 | THCudaTensor_data(state, output),
41 | THCudaTensor_stride(state, output, 0),
42 | THCudaTensor_stride(state, output, 1),
43 | THCudaTensor_stride(state, output, 2),
44 | THCudaTensor_stride(state, output, 3),
45 | THCState_getCurrentStream(state));
46 |
47 | //check for errors
48 | if (!success) {
49 | THError("aborting");
50 | }
51 | return 1;
52 | }
53 |
54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
55 | THCudaTensor *gradGrids, THCudaTensor *gradOutput)
56 | {
57 | // THCState *state = getCutorchState(L);
58 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
59 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
60 | // THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
61 | // THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
62 | // THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
63 |
64 | int success = 0;
65 | success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(THCudaTensor_size(state, gradOutput, 1),
66 | THCudaTensor_size(state, gradOutput, 3),
67 | THCudaTensor_size(state, gradOutput, 2),
68 | THCudaTensor_size(state, gradOutput, 0),
69 | THCudaTensor_size(state, inputImages, 1),
70 | THCudaTensor_size(state, inputImages, 2),
71 | THCudaTensor_size(state, inputImages, 3),
72 | THCudaTensor_size(state, inputImages, 0),
73 | THCudaTensor_data(state, inputImages),
74 | THCudaTensor_stride(state, inputImages, 0),
75 | THCudaTensor_stride(state, inputImages, 1),
76 | THCudaTensor_stride(state, inputImages, 2),
77 | THCudaTensor_stride(state, inputImages, 3),
78 | THCudaTensor_data(state, grids),
79 | THCudaTensor_stride(state, grids, 0),
80 | THCudaTensor_stride(state, grids, 3),
81 | THCudaTensor_stride(state, grids, 1),
82 | THCudaTensor_stride(state, grids, 2),
83 | THCudaTensor_data(state, gradInputImages),
84 | THCudaTensor_stride(state, gradInputImages, 0),
85 | THCudaTensor_stride(state, gradInputImages, 1),
86 | THCudaTensor_stride(state, gradInputImages, 2),
87 | THCudaTensor_stride(state, gradInputImages, 3),
88 | THCudaTensor_data(state, gradGrids),
89 | THCudaTensor_stride(state, gradGrids, 0),
90 | THCudaTensor_stride(state, gradGrids, 3),
91 | THCudaTensor_stride(state, gradGrids, 1),
92 | THCudaTensor_stride(state, gradGrids, 2),
93 | THCudaTensor_data(state, gradOutput),
94 | THCudaTensor_stride(state, gradOutput, 0),
95 | THCudaTensor_stride(state, gradOutput, 1),
96 | THCudaTensor_stride(state, gradOutput, 2),
97 | THCudaTensor_stride(state, gradOutput, 3),
98 | THCState_getCurrentStream(state));
99 |
100 | //check for errors
101 | if (!success) {
102 | THError("aborting");
103 | }
104 | return 1;
105 | }
106 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 |
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 |
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 |
--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 |
6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
7 | /*output->size[2]*/int ow,
8 | /*output->size[1]*/int oh,
9 | /*output->size[0]*/int ob,
10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 |
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 | /*gradOutput->size[2]*/int gow,
21 | /*gradOutput->size[1]*/int goh,
22 | /*gradOutput->size[0]*/int gob,
23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 |
34 |
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_pooling/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_pooling/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_pooling import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 |
7 | sources = ['src/roi_pooling.c']
8 | headers = ['src/roi_pooling.h']
9 | extra_objects = []
10 | defines = []
11 | with_cuda = False
12 |
13 | this_file = os.path.dirname(os.path.realpath(__file__))
14 | print(this_file)
15 |
16 | if torch.cuda.is_available():
17 | print('Including CUDA code.')
18 | sources += ['src/roi_pooling_cuda.c']
19 | headers += ['src/roi_pooling_cuda.h']
20 | defines += [('WITH_CUDA', None)]
21 | with_cuda = True
22 | extra_objects = ['src/roi_pooling.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 |
25 | ffi = create_extension(
26 | '_ext.roi_pooling',
27 | headers=headers,
28 | sources=sources,
29 | define_macros=defines,
30 | relative_to=__file__,
31 | with_cuda=with_cuda,
32 | extra_objects=extra_objects
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_pooling/functions/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import roi_pooling
4 | import pdb
5 |
6 | class RoIPoolFunction(Function):
7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale):
8 | ctx.pooled_width = pooled_width
9 | ctx.pooled_height = pooled_height
10 | ctx.spatial_scale = spatial_scale
11 | ctx.feature_size = None
12 |
13 | def forward(ctx, features, rois):
14 | ctx.feature_size = features.size()
15 | batch_size, num_channels, data_height, data_width = ctx.feature_size
16 | num_rois = rois.size(0)
17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 | ctx.rois = rois
20 | if not features.is_cuda:
21 | _features = features.permute(0, 2, 3, 1)
22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 | _features, rois, output)
24 | else:
25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 | features, rois, output, ctx.argmax)
27 |
28 | return output
29 |
30 | def backward(ctx, grad_output):
31 | assert(ctx.feature_size is not None and grad_output.is_cuda)
32 | batch_size, num_channels, data_height, data_width = ctx.feature_size
33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 |
35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 | grad_output, ctx.rois, grad_input, ctx.argmax)
37 |
38 | return grad_input, None
39 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/roi_pooling/modules/__init__.py
--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_pool import RoIPoolFunction
3 |
4 |
5 | class _RoIPooling(Module):
6 | def __init__(self, pooled_height, pooled_width, spatial_scale):
7 | super(_RoIPooling, self).__init__()
8 |
9 | self.pooled_width = int(pooled_width)
10 | self.pooled_height = int(pooled_height)
11 | self.spatial_scale = float(spatial_scale)
12 |
13 | def forward(self, features, rois):
14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
6 | {
7 | // Grab the input tensor
8 | float * data_flat = THFloatTensor_data(features);
9 | float * rois_flat = THFloatTensor_data(rois);
10 |
11 | float * output_flat = THFloatTensor_data(output);
12 |
13 | // Number of ROIs
14 | int num_rois = THFloatTensor_size(rois, 0);
15 | int size_rois = THFloatTensor_size(rois, 1);
16 | // batch size
17 | int batch_size = THFloatTensor_size(features, 0);
18 | if(batch_size != 1)
19 | {
20 | return 0;
21 | }
22 | // data height
23 | int data_height = THFloatTensor_size(features, 1);
24 | // data width
25 | int data_width = THFloatTensor_size(features, 2);
26 | // Number of channels
27 | int num_channels = THFloatTensor_size(features, 3);
28 |
29 | // Set all element of the output tensor to -inf.
30 | THFloatStorage_fill(THFloatTensor_storage(output), -1);
31 |
32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
33 | int index_roi = 0;
34 | int index_output = 0;
35 | int n;
36 | for (n = 0; n < num_rois; ++n)
37 | {
38 | int roi_batch_ind = rois_flat[index_roi + 0];
39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
43 | // CHECK_GE(roi_batch_ind, 0);
44 | // CHECK_LT(roi_batch_ind, batch_size);
45 |
46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height);
49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width);
50 |
51 | int index_data = roi_batch_ind * data_height * data_width * num_channels;
52 | const int output_area = pooled_width * pooled_height;
53 |
54 | int c, ph, pw;
55 | for (ph = 0; ph < pooled_height; ++ph)
56 | {
57 | for (pw = 0; pw < pooled_width; ++pw)
58 | {
59 | int hstart = (floor((float)(ph) * bin_size_h));
60 | int wstart = (floor((float)(pw) * bin_size_w));
61 | int hend = (ceil((float)(ph + 1) * bin_size_h));
62 | int wend = (ceil((float)(pw + 1) * bin_size_w));
63 |
64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
68 |
69 | const int pool_index = index_output + (ph * pooled_width + pw);
70 | int is_empty = (hend <= hstart) || (wend <= wstart);
71 | if (is_empty)
72 | {
73 | for (c = 0; c < num_channels * output_area; c += output_area)
74 | {
75 | output_flat[pool_index + c] = 0;
76 | }
77 | }
78 | else
79 | {
80 | int h, w, c;
81 | for (h = hstart; h < hend; ++h)
82 | {
83 | for (w = wstart; w < wend; ++w)
84 | {
85 | for (c = 0; c < num_channels; ++c)
86 | {
87 | const int index = (h * data_width + w) * num_channels + c;
88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
89 | {
90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index];
91 | }
92 | }
93 | }
94 | }
95 | }
96 | }
97 | }
98 |
99 | // Increment ROI index
100 | index_roi += size_rois;
101 | index_output += pooled_height * pooled_width * num_channels;
102 | }
103 | return 1;
104 | }
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "roi_pooling_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 |
17 | // Number of ROIs
18 | int num_rois = THCudaTensor_size(state, rois, 0);
19 | int size_rois = THCudaTensor_size(state, rois, 1);
20 | if (size_rois != 5)
21 | {
22 | return 0;
23 | }
24 |
25 | // batch size
26 | // int batch_size = THCudaTensor_size(state, features, 0);
27 | // if (batch_size != 1)
28 | // {
29 | // return 0;
30 | // }
31 | // data height
32 | int data_height = THCudaTensor_size(state, features, 2);
33 | // data width
34 | int data_width = THCudaTensor_size(state, features, 3);
35 | // Number of channels
36 | int num_channels = THCudaTensor_size(state, features, 1);
37 |
38 | cudaStream_t stream = THCState_getCurrentStream(state);
39 |
40 | ROIPoolForwardLaucher(
41 | data_flat, spatial_scale, num_rois, data_height,
42 | data_width, num_channels, pooled_height,
43 | pooled_width, rois_flat,
44 | output_flat, argmax_flat, stream);
45 |
46 | return 1;
47 | }
48 |
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 | // Grab the input tensor
53 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 | float * rois_flat = THCudaTensor_data(state, rois);
55 |
56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 |
59 | // Number of ROIs
60 | int num_rois = THCudaTensor_size(state, rois, 0);
61 | int size_rois = THCudaTensor_size(state, rois, 1);
62 | if (size_rois != 5)
63 | {
64 | return 0;
65 | }
66 |
67 | // batch size
68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 | // if (batch_size != 1)
70 | // {
71 | // return 0;
72 | // }
73 | // data height
74 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 | // data width
76 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 | // Number of channels
78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 |
80 | cudaStream_t stream = THCState_getCurrentStream(state);
81 | ROIPoolBackwardLaucher(
82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 | data_width, num_channels, pooled_height,
84 | pooled_width, rois_flat,
85 | bottom_grad_flat, argmax_flat, stream);
86 |
87 | return 1;
88 | }
89 |
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 |
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);
--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_POOLING_KERNEL
2 | #define _ROI_POOLING_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int ROIPoolForwardLaucher(
9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 | const int width, const int channels, const int pooled_height,
11 | const int pooled_width, const float* bottom_rois,
12 | float* top_data, int* argmax_data, cudaStream_t stream);
13 |
14 |
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 | const int height, const int width, const int channels, const int pooled_height,
17 | const int pooled_width, const float* bottom_rois,
18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
26 |
--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/rpn/__init__.py
--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | # --------------------------------------------------------
3 | # Faster R-CNN
4 | # Copyright (c) 2015 Microsoft
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # Written by Ross Girshick and Sean Bell
7 | # --------------------------------------------------------
8 |
9 | import numpy as np
10 | import pdb
11 |
12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
13 | #
14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
15 | # >> anchors
16 | #
17 | # anchors =
18 | #
19 | # -83 -39 100 56
20 | # -175 -87 192 104
21 | # -359 -183 376 200
22 | # -55 -55 72 72
23 | # -119 -119 136 136
24 | # -247 -247 264 264
25 | # -35 -79 52 96
26 | # -79 -167 96 184
27 | # -167 -343 184 360
28 |
29 | #array([[ -83., -39., 100., 56.],
30 | # [-175., -87., 192., 104.],
31 | # [-359., -183., 376., 200.],
32 | # [ -55., -55., 72., 72.],
33 | # [-119., -119., 136., 136.],
34 | # [-247., -247., 264., 264.],
35 | # [ -35., -79., 52., 96.],
36 | # [ -79., -167., 96., 184.],
37 | # [-167., -343., 184., 360.]])
38 |
39 | try:
40 | xrange # Python 2
41 | except NameError:
42 | xrange = range # Python 3
43 |
44 |
45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
46 | scales=2**np.arange(3, 6)):
47 | """
48 | Generate anchor (reference) windows by enumerating aspect ratios X
49 | scales wrt a reference (0, 0, 15, 15) window.
50 | """
51 |
52 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
53 | ratio_anchors = _ratio_enum(base_anchor, ratios)
54 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
55 | for i in xrange(ratio_anchors.shape[0])])
56 | return anchors
57 |
58 | def _whctrs(anchor):
59 | """
60 | Return width, height, x center, and y center for an anchor (window).
61 | """
62 |
63 | w = anchor[2] - anchor[0] + 1
64 | h = anchor[3] - anchor[1] + 1
65 | x_ctr = anchor[0] + 0.5 * (w - 1)
66 | y_ctr = anchor[1] + 0.5 * (h - 1)
67 | return w, h, x_ctr, y_ctr
68 |
69 | def _mkanchors(ws, hs, x_ctr, y_ctr):
70 | """
71 | Given a vector of widths (ws) and heights (hs) around a center
72 | (x_ctr, y_ctr), output a set of anchors (windows).
73 | """
74 |
75 | ws = ws[:, np.newaxis]
76 | hs = hs[:, np.newaxis]
77 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
78 | y_ctr - 0.5 * (hs - 1),
79 | x_ctr + 0.5 * (ws - 1),
80 | y_ctr + 0.5 * (hs - 1)))
81 | return anchors
82 |
83 | def _ratio_enum(anchor, ratios):
84 | """
85 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
86 | """
87 |
88 | w, h, x_ctr, y_ctr = _whctrs(anchor)
89 | size = w * h
90 | size_ratios = size / ratios
91 | ws = np.round(np.sqrt(size_ratios))
92 | hs = np.round(ws * ratios)
93 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
94 | return anchors
95 |
96 | def _scale_enum(anchor, scales):
97 | """
98 | Enumerate a set of anchors for each scale wrt an anchor.
99 | """
100 |
101 | w, h, x_ctr, y_ctr = _whctrs(anchor)
102 | ws = w * scales
103 | hs = h * scales
104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 | return anchors
106 |
107 | if __name__ == '__main__':
108 | import time
109 | t = time.time()
110 | a = generate_anchors()
111 | print(time.time() - t)
112 | print(a)
113 | from IPython import embed; embed()
114 |
--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | # --------------------------------------------------------
3 | # Faster R-CNN
4 | # Copyright (c) 2015 Microsoft
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # Written by Ross Girshick and Sean Bell
7 | # --------------------------------------------------------
8 | # --------------------------------------------------------
9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
10 | # --------------------------------------------------------
11 |
12 | import torch
13 | import torch.nn as nn
14 | import numpy as np
15 | import math
16 | import yaml
17 | from model.utils.config import cfg
18 | from .generate_anchors import generate_anchors
19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
20 | from model.nms.nms_wrapper import nms
21 |
22 | import pdb
23 |
24 | DEBUG = False
25 |
26 | class _ProposalLayer(nn.Module):
27 | """
28 | Outputs object detection proposals by applying estimated bounding-box
29 | transformations to a set of regular boxes (called "anchors").
30 | """
31 |
32 | def __init__(self, feat_stride, scales, ratios):
33 | super(_ProposalLayer, self).__init__()
34 |
35 | self._feat_stride = feat_stride
36 | self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales),
37 | ratios=np.array(ratios))).float()
38 | self._num_anchors = self._anchors.size(0)
39 |
40 | # rois blob: holds R regions of interest, each is a 5-tuple
41 | # (n, x1, y1, x2, y2) specifying an image batch index n and a
42 | # rectangle (x1, y1, x2, y2)
43 | # top[0].reshape(1, 5)
44 | #
45 | # # scores blob: holds scores for R regions of interest
46 | # if len(top) > 1:
47 | # top[1].reshape(1, 1, 1, 1)
48 |
49 | def forward(self, input,target=False):
50 |
51 | # Algorithm:
52 | #
53 | # for each (H, W) location i
54 | # generate A anchor boxes centered on cell i
55 | # apply predicted bbox deltas at cell i to each of the A anchors
56 | # clip predicted boxes to image
57 | # remove predicted boxes with either height or width < threshold
58 | # sort all (proposal, score) pairs by score from highest to lowest
59 | # take top pre_nms_topN proposals before NMS
60 | # apply NMS with threshold 0.7 to remaining proposals
61 | # take after_nms_topN proposals after NMS
62 | # return the top proposals (-> RoIs top, scores top)
63 |
64 |
65 | # the first set of _num_anchors channels are bg probs
66 | # the second set are the fg probs
67 | scores = input[0][:, self._num_anchors:, :, :]
68 | bbox_deltas = input[1]
69 | im_info = input[2]
70 | cfg_key = input[3]
71 |
72 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
73 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
74 | if target:
75 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N_TARGET
76 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
77 | min_size = cfg[cfg_key].RPN_MIN_SIZE
78 |
79 | batch_size = bbox_deltas.size(0)
80 |
81 | feat_height, feat_width = scores.size(2), scores.size(3)
82 | shift_x = np.arange(0, feat_width) * self._feat_stride
83 | shift_y = np.arange(0, feat_height) * self._feat_stride
84 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
85 | shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
86 | shift_x.ravel(), shift_y.ravel())).transpose())
87 | shifts = shifts.contiguous().type_as(scores).float()
88 |
89 | A = self._num_anchors
90 | K = shifts.size(0)
91 |
92 | self._anchors = self._anchors.type_as(scores)
93 | # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
94 | anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
95 | anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
96 |
97 | # Transpose and reshape predicted bbox transformations to get them
98 | # into the same order as the anchors:
99 |
100 | bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
101 | bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
102 |
103 | # Same story for the scores:
104 | scores = scores.permute(0, 2, 3, 1).contiguous()
105 | scores = scores.view(batch_size, -1)
106 |
107 | # Convert anchors into proposals via bbox transformations
108 | proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
109 |
110 | # 2. clip predicted boxes to image
111 | proposals = clip_boxes(proposals, im_info, batch_size)
112 | # proposals = clip_boxes_batch(proposals, im_info, batch_size)
113 |
114 | # assign the score to 0 if it's non keep.
115 | # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
116 |
117 | # trim keep index to make it euqal over batch
118 | # keep_idx = torch.cat(tuple(keep_idx), 0)
119 |
120 | # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
121 | # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
122 |
123 | # _, order = torch.sort(scores_keep, 1, True)
124 |
125 | scores_keep = scores
126 | proposals_keep = proposals
127 | _, order = torch.sort(scores_keep, 1, True)
128 |
129 | output = scores.new(batch_size, post_nms_topN, 5).zero_()
130 | for i in range(batch_size):
131 | # # 3. remove predicted boxes with either height or width < threshold
132 | # # (NOTE: convert min_size to input image scale stored in im_info[2])
133 | proposals_single = proposals_keep[i]
134 | scores_single = scores_keep[i]
135 |
136 | # # 4. sort all (proposal, score) pairs by score from highest to lowest
137 | # # 5. take top pre_nms_topN (e.g. 6000)
138 | order_single = order[i]
139 |
140 | if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
141 | order_single = order_single[:pre_nms_topN]
142 |
143 | proposals_single = proposals_single[order_single, :]
144 | scores_single = scores_single[order_single].view(-1,1)
145 |
146 | # 6. apply nms (e.g. threshold = 0.7)
147 | # 7. take after_nms_topN (e.g. 300)
148 | # 8. return the top proposals (-> RoIs top)
149 |
150 | keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
151 | keep_idx_i = keep_idx_i.long().view(-1)
152 |
153 | if post_nms_topN > 0:
154 | keep_idx_i = keep_idx_i[:post_nms_topN]
155 | proposals_single = proposals_single[keep_idx_i, :]
156 | scores_single = scores_single[keep_idx_i, :]
157 |
158 | # padding 0 at the end.
159 | num_proposal = proposals_single.size(0)
160 | output[i,:,0] = i
161 | output[i,:num_proposal,1:] = proposals_single
162 |
163 | return output
164 |
165 | def backward(self, top, propagate_down, bottom):
166 | """This layer does not propagate gradients."""
167 | pass
168 |
169 | def reshape(self, bottom, top):
170 | """Reshaping happens during the call to forward."""
171 | pass
172 |
173 | def _filter_boxes(self, boxes, min_size):
174 | """Remove all boxes with any side smaller than min_size."""
175 | ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
176 | hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
177 | keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
178 | return keep
179 |
--------------------------------------------------------------------------------
/lib/model/rpn/rpn.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 |
7 | from model.utils.config import cfg
8 | from .proposal_layer import _ProposalLayer
9 | from .anchor_target_layer import _AnchorTargetLayer
10 | from model.utils.net_utils import _smooth_l1_loss
11 |
12 | import numpy as np
13 | import math
14 | import pdb
15 | import time
16 |
17 | class _RPN(nn.Module):
18 | """ region proposal network """
19 | def __init__(self, din):
20 | super(_RPN, self).__init__()
21 |
22 | self.din = din # get depth of input feature map, e.g., 512
23 | self.anchor_scales = cfg.ANCHOR_SCALES
24 | self.anchor_ratios = cfg.ANCHOR_RATIOS
25 | self.feat_stride = cfg.FEAT_STRIDE[0]
26 |
27 | # define the convrelu layers processing input feature map
28 | self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
29 |
30 | # define bg/fg classifcation score layer
31 | self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
32 | self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
33 |
34 | # define anchor box offset prediction layer
35 | self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
36 | self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
37 |
38 | # define proposal layer
39 | self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
40 |
41 | # define anchor target layer
42 | self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
43 |
44 | self.rpn_loss_cls = 0
45 | self.rpn_loss_box = 0
46 |
47 | @staticmethod
48 | def reshape(x, d):
49 | input_shape = x.size()
50 | x = x.view(
51 | input_shape[0],
52 | int(d),
53 | int(float(input_shape[1] * input_shape[2]) / float(d)),
54 | input_shape[3]
55 | )
56 | return x
57 |
58 | def forward(self, base_feat, im_info, gt_boxes, num_boxes,target=False):
59 |
60 | batch_size = base_feat.size(0)
61 |
62 | # return feature map after convrelu layer
63 | rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
64 | # get rpn classification score
65 | rpn_cls_score = self.RPN_cls_score(rpn_conv1)
66 |
67 | rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
68 | rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
69 | rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
70 |
71 | # get rpn offsets to the anchor boxes
72 | rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
73 |
74 | # proposal layer
75 | cfg_key = 'TRAIN' if self.training else 'TEST'
76 |
77 | rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
78 | im_info, cfg_key),target=target)
79 |
80 | self.rpn_loss_cls = 0
81 | self.rpn_loss_box = 0
82 |
83 | # generating training labels and build the rpn loss
84 | if self.training:
85 | assert gt_boxes is not None
86 |
87 | rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
88 |
89 | # compute classification loss
90 | rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
91 | rpn_label = rpn_data[0].view(batch_size, -1)
92 |
93 | rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
94 | rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
95 | rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
96 | rpn_label = Variable(rpn_label.long())
97 | self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
98 | fg_cnt = torch.sum(rpn_label.data.ne(0))
99 |
100 | rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
101 |
102 | # compute bbox regression loss
103 | rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
104 | rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
105 | rpn_bbox_targets = Variable(rpn_bbox_targets)
106 |
107 | self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
108 | rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
109 |
110 | return rois, self.rpn_loss_cls, self.rpn_loss_box
111 |
--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VisionLearningGroup/DA_Detection/730eaca8528d22ed3aa6b4dbc1965828a697cf9a/lib/model/utils/__init__.py
--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
16 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
17 | return bbox_overlaps_c(boxes, query_boxes)
18 |
19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
20 | np.ndarray[DTYPE_t, ndim=2] boxes,
21 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
22 | """
23 | Parameters
24 | ----------
25 | boxes: (N, 4) ndarray of float
26 | query_boxes: (K, 4) ndarray of float
27 | Returns
28 | -------
29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
30 | """
31 | cdef unsigned int N = boxes.shape[0]
32 | cdef unsigned int K = query_boxes.shape[0]
33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
34 | cdef DTYPE_t iw, ih, box_area
35 | cdef DTYPE_t ua
36 | cdef unsigned int k, n
37 | for k in range(K):
38 | box_area = (
39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
41 | )
42 | for n in range(N):
43 | iw = (
44 | min(boxes[n, 2], query_boxes[k, 2]) -
45 | max(boxes[n, 0], query_boxes[k, 0]) + 1
46 | )
47 | if iw > 0:
48 | ih = (
49 | min(boxes[n, 3], query_boxes[k, 3]) -
50 | max(boxes[n, 1], query_boxes[k, 1]) + 1
51 | )
52 | if ih > 0:
53 | ua = float(
54 | (boxes[n, 2] - boxes[n, 0] + 1) *
55 | (boxes[n, 3] - boxes[n, 1] + 1) +
56 | box_area - iw * ih
57 | )
58 | overlaps[n, k] = iw * ih / ua
59 | return overlaps
60 |
61 |
62 | def bbox_intersections(
63 | np.ndarray[DTYPE_t, ndim=2] boxes,
64 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
65 | return bbox_intersections_c(boxes, query_boxes)
66 |
67 |
68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
69 | np.ndarray[DTYPE_t, ndim=2] boxes,
70 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
71 | """
72 | For each query box compute the intersection ratio covered by boxes
73 | ----------
74 | Parameters
75 | ----------
76 | boxes: (N, 4) ndarray of float
77 | query_boxes: (K, 4) ndarray of float
78 | Returns
79 | -------
80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes
81 | """
82 | cdef unsigned int N = boxes.shape[0]
83 | cdef unsigned int K = query_boxes.shape[0]
84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
85 | cdef DTYPE_t iw, ih, box_area
86 | cdef DTYPE_t ua
87 | cdef unsigned int k, n
88 | for k in range(K):
89 | box_area = (
90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
92 | )
93 | for n in range(N):
94 | iw = (
95 | min(boxes[n, 2], query_boxes[k, 2]) -
96 | max(boxes[n, 0], query_boxes[k, 0]) + 1
97 | )
98 | if iw > 0:
99 | ih = (
100 | min(boxes[n, 3], query_boxes[k, 3]) -
101 | max(boxes[n, 1], query_boxes[k, 1]) + 1
102 | )
103 | if ih > 0:
104 | intersec[n, k] = iw * ih / box_area
105 | return intersec
--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 |
14 | try:
15 | xrange # Python 2
16 | except NameError:
17 | xrange = range # Python 3
18 |
19 |
20 | def im_list_to_blob(ims):
21 | """Convert a list of images into a network input.
22 |
23 | Assumes images are already prepared (means subtracted, BGR order, ...).
24 | """
25 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
26 | num_images = len(ims)
27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 | dtype=np.float32)
29 | for i in xrange(num_images):
30 | im = ims[i]
31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 |
33 | return blob
34 |
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 | """Mean subtract and scale an image for use in a blob."""
37 |
38 | im = im.astype(np.float32, copy=False)
39 | im -= pixel_means
40 | # im = im[:, :, ::-1]
41 | im_shape = im.shape
42 | im_size_min = np.min(im_shape[0:2])
43 | im_size_max = np.max(im_shape[0:2])
44 | im_scale = float(target_size) / float(im_size_min)
45 | # Prevent the biggest axis from being more than MAX_SIZE
46 | #if np.round(im_scale * im_size_max) > max_size:
47 | # im_scale = float(max_size) / float(im_size_max)
48 | # im = imresize(im, im_scale)
49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 | interpolation=cv2.INTER_LINEAR)
51 |
52 | return im, im_scale
53 |
--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 |
--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 |
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.
27 |
--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | from . import _mask
4 |
5 | # Interface for manipulating masks stored in RLE format.
6 | #
7 | # RLE is a simple yet efficient format for storing binary masks. RLE
8 | # first divides a vector (or vectorized image) into a series of piecewise
9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | # encode - Encode binary masks using RLE.
32 | # decode - Decode binary masks encoded via RLE.
33 | # merge - Compute union or intersection of encoded masks.
34 | # iou - Compute intersection over union between masks.
35 | # area - Compute area of encoded masks.
36 | # toBbox - Get bounding boxes surrounding encoded masks.
37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | # Rs = encode( masks )
41 | # masks = decode( Rs )
42 | # R = merge( Rs, intersect=false )
43 | # o = iou( dt, gt, iscrowd )
44 | # a = area( Rs )
45 | # bbs = toBbox( Rs )
46 | # Rs = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | # Rs - [dict] Run-length encoding of binary masks
50 | # R - dict Run-length encoding of binary mask
51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | # dt,gt - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox. version 2.0
72 | # Data, paper, and tutorials available at: http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 |
76 | encode = _mask.encode
77 | decode = _mask.decode
78 | iou = _mask.iou
79 | merge = _mask.merge
80 | area = _mask.area
81 | toBbox = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects
--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #include "maskApi.h"
8 | #include
9 | #include
10 |
11 | uint umin( uint a, uint b ) { return (ab) ? a : b; }
13 |
14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
16 | if(cnts) for(siz j=0; jcnts[j]=cnts[j];
17 | }
18 |
19 | void rleFree( RLE *R ) {
20 | free(R->cnts); R->cnts=0;
21 | }
22 |
23 | void rlesInit( RLE **R, siz n ) {
24 | *R = (RLE*) malloc(sizeof(RLE)*n);
25 | for(siz i=0; i0 ) {
61 | c=umin(ca,cb); cc+=c; ct=0;
62 | ca-=c; if(!ca && a0) {
83 | crowd=iscrowd!=NULL && iscrowd[g];
84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
88 | while( ct>0 ) {
89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
90 | ca-=c; if(!ca && ad?1:c=dy && xs>xe) || (dxye);
151 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
152 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
153 | if(dx>=dy) for( int d=0; d<=dx; d++ ) {
154 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
155 | } else for( int d=0; d<=dy; d++ ) {
156 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
157 | }
158 | }
159 | // get points along y-boundary and downsample
160 | free(x); free(y); k=m; m=0; double xd, yd;
161 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
162 | for( j=1; jw-1 ) continue;
165 | yd=(double)(v[j]h) yd=h; yd=ceil(yd);
167 | x[m]=(int) xd; y[m]=(int) yd; m++;
168 | }
169 | // compute rle encoding given y-boundary points
170 | k=m; a=malloc(sizeof(uint)*(k+1));
171 | for( j=0; j0) b[m++]=a[j++]; else {
177 | j++; if(jm, p=0; long x; bool more;
184 | char *s=malloc(sizeof(char)*m*6);
185 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
187 | while( more ) {
188 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
189 | if(more) c |= 0x20; c+=48; s[p++]=c;
190 | }
191 | }
192 | s[p]=0; return s;
193 | }
194 |
195 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
196 | siz m=0, p=0, k; long x; bool more; uint *cnts;
197 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
198 | while( s[p] ) {
199 | x=0; k=0; more=1;
200 | while( more ) {
201 | char c=s[p]-48; x |= (c & 0x1f) << 5*k;
202 | more = c & 0x20; p++; k++;
203 | if(!more && (c & 0x10)) x |= -1 << 5*k;
204 | }
205 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
206 | }
207 | rleInit(R,h,w,m,cnts); free(cnts);
208 | }
209 |
--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 | #include
9 |
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 |
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 |
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 |
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 |
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 |
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 |
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 |
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 |
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 |
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 |
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 |
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 |
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 |
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Xinlei Chen
6 | # --------------------------------------------------------
7 |
8 | """Compute minibatch blobs for training a Fast R-CNN network."""
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 |
13 | import numpy as np
14 | import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | def get_minibatch(roidb, num_classes,seg_return=False):
20 | """Given a roidb, construct a minibatch sampled from it."""
21 | num_images = len(roidb)
22 | # Sample random scales to use for each image in this batch
23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
24 | size=num_images)
25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \
27 | format(num_images, cfg.TRAIN.BATCH_SIZE)
28 |
29 | # Get the input image blob, formatted for caffe
30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
31 |
32 | blobs = {'data': im_blob}
33 |
34 | assert len(im_scales) == 1, "Single batch only"
35 | assert len(roidb) == 1, "Single batch only"
36 |
37 | # gt boxes: (x1, y1, x2, y2, cls)
38 | if cfg.TRAIN.USE_ALL_GT:
39 | # Include all ground truth boxes
40 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
41 | else:
42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd''
43 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
44 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
45 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
46 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
47 | blobs['gt_boxes'] = gt_boxes
48 | blobs['im_info'] = np.array(
49 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
50 | dtype=np.float32)
51 | if seg_return:
52 | blobs['seg_map'] = roidb[0]['seg_map']
53 | blobs['img_id'] = roidb[0]['img_id']
54 | blobs['path'] = roidb[0]['image']
55 |
56 | return blobs
57 |
58 | def _get_image_blob(roidb, scale_inds):
59 | """Builds an input blob from the images in the roidb at the specified
60 | scales.
61 | """
62 | num_images = len(roidb)
63 |
64 | processed_ims = []
65 | im_scales = []
66 | for i in range(num_images):
67 | #im = cv2.imread(roidb[i]['image'])
68 | im = imread(roidb[i]['image'])
69 |
70 | if len(im.shape) == 2:
71 | im = im[:,:,np.newaxis]
72 | im = np.concatenate((im,im,im), axis=2)
73 | # flip the channel, since the original one using cv2
74 | # rgb -> bgr
75 | im = im[:,:,::-1]
76 |
77 | if roidb[i]['flipped']:
78 | im = im[:, ::-1, :]
79 | target_size = cfg.TRAIN.SCALES[scale_inds[i]]
80 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
81 | cfg.TRAIN.MAX_SIZE)
82 | im_scales.append(im_scale)
83 | processed_ims.append(im)
84 |
85 | # Create a blob to hold the input images
86 | blob = im_list_to_blob(processed_ims)
87 |
88 | return blob, im_scales
89 |
--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
2 | from __future__ import absolute_import
3 | from __future__ import division
4 | from __future__ import print_function
5 |
6 | import datasets
7 | import numpy as np
8 | from model.utils.config import cfg
9 | from datasets.factory import get_imdb
10 | import PIL
11 | import pdb
12 |
13 | def prepare_roidb(imdb):
14 | """Enrich the imdb's roidb by adding some derived quantities that
15 | are useful for training. This function precomputes the maximum
16 | overlap, taken over ground-truth boxes, between each ROI and
17 | each ground-truth box. The class with maximum overlap is also
18 | recorded.
19 | """
20 |
21 | roidb = imdb.roidb
22 | #if not (imdb.name.startswith('coco')):
23 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size
24 | for i in range(imdb.num_images)]
25 |
26 | for i in range(len(imdb.image_index)):
27 | roidb[i]['img_id'] = imdb.image_id_at(i)
28 | roidb[i]['image'] = imdb.image_path_at(i)
29 | #if not (imdb.name.startswith('coco')):
30 | roidb[i]['width'] = sizes[i][0]
31 | roidb[i]['height'] = sizes[i][1]
32 | # need gt_overlaps as a dense array for argmax
33 | gt_overlaps = roidb[i]['gt_overlaps'].toarray()
34 | # max overlap with gt over classes (columns)
35 | max_overlaps = gt_overlaps.max(axis=1)
36 | # gt class that had the max overlap
37 | max_classes = gt_overlaps.argmax(axis=1)
38 | roidb[i]['max_classes'] = max_classes
39 | roidb[i]['max_overlaps'] = max_overlaps
40 | # sanity checks
41 | # max overlap of 0 => class should be zero (background)
42 | zero_inds = np.where(max_overlaps == 0)[0]
43 | assert all(max_classes[zero_inds] == 0)
44 | # max overlap > 0 => class should not be zero (must be a fg class)
45 | nonzero_inds = np.where(max_overlaps > 0)[0]
46 | assert all(max_classes[nonzero_inds] != 0)
47 |
48 |
49 | def rank_roidb_ratio(roidb):
50 | # rank roidb based on the ratio between width and height.
51 | ratio_large = 2 # largest ratio to preserve.
52 | ratio_small = 0.5 # smallest ratio to preserve.
53 |
54 | ratio_list = []
55 | for i in range(len(roidb)):
56 | width = roidb[i]['width']
57 | height = roidb[i]['height']
58 | ratio = width / float(height)
59 |
60 | if ratio > ratio_large:
61 | roidb[i]['need_crop'] = 1
62 | ratio = ratio_large
63 | elif ratio < ratio_small:
64 | roidb[i]['need_crop'] = 1
65 | ratio = ratio_small
66 | else:
67 | roidb[i]['need_crop'] = 0
68 |
69 | ratio_list.append(ratio)
70 |
71 | ratio_list = np.array(ratio_list)
72 | ratio_index = np.argsort(ratio_list)
73 | return ratio_list[ratio_index], ratio_index
74 |
75 | def filter_roidb(roidb):
76 | # filter the image without bounding box.
77 | print('before filtering, there are %d images...' % (len(roidb)))
78 | i = 0
79 | while i < len(roidb):
80 | if len(roidb[i]['boxes']) == 0:
81 | del roidb[i]
82 | i -= 1
83 | i += 1
84 |
85 | print('after filtering, there are %d images...' % (len(roidb)))
86 | return roidb
87 |
88 | def combined_roidb(imdb_names, training=True):
89 | """
90 | Combine multiple roidbs
91 | """
92 |
93 | def get_training_roidb(imdb):
94 | """Returns a roidb (Region of Interest database) for use in training."""
95 | if cfg.TRAIN.USE_FLIPPED:
96 | print('Appending horizontally-flipped training examples...')
97 | imdb.append_flipped_images()
98 | print('done')
99 |
100 | print('Preparing training data...')
101 |
102 | prepare_roidb(imdb)
103 | #ratio_index = rank_roidb_ratio(imdb)
104 | print('done')
105 |
106 | return imdb.roidb
107 |
108 | def get_roidb(imdb_name):
109 | imdb = get_imdb(imdb_name)
110 | print('Loaded dataset `{:s}` for training'.format(imdb.name))
111 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
112 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
113 | roidb = get_training_roidb(imdb)
114 | return roidb
115 | #print(imdb_names.split('+'))
116 | roidbs = [get_roidb(s) for s in imdb_names.split('+')]
117 | roidb = roidbs[0]
118 |
119 | if len(roidbs) > 1:
120 | for r in roidbs[1:]:
121 | roidb.extend(r)
122 | tmp = get_imdb(imdb_names.split('+')[1])
123 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
124 | else:
125 | imdb = get_imdb(imdb_names)
126 |
127 | if training:
128 | roidb = filter_roidb(roidb)
129 |
130 | ratio_list, ratio_index = rank_roidb_ratio(roidb)
131 |
132 | return imdb, roidb, ratio_list, ratio_index
133 |
--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | # --------------------------------------------------------
3 | # Fast R-CNN
4 | # Copyright (c) 2015 Microsoft
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # Written by Ross Girshick
7 | # --------------------------------------------------------
8 |
9 | import os
10 | from os.path import join as pjoin
11 | import numpy as np
12 | from distutils.core import setup
13 | from distutils.extension import Extension
14 | from Cython.Distutils import build_ext
15 |
16 |
17 | def find_in_path(name, path):
18 | "Find a file in a search path"
19 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
20 | for dir in path.split(os.pathsep):
21 | binpath = pjoin(dir, name)
22 | if os.path.exists(binpath):
23 | return os.path.abspath(binpath)
24 | return None
25 |
26 |
27 | # def locate_cuda():
28 | # """Locate the CUDA environment on the system
29 | #
30 | # Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
31 | # and values giving the absolute path to each directory.
32 | #
33 | # Starts by looking for the CUDAHOME env variable. If not found, everything
34 | # is based on finding 'nvcc' in the PATH.
35 | # """
36 | #
37 | # # first check if the CUDAHOME env variable is in use
38 | # if 'CUDAHOME' in os.environ:
39 | # home = os.environ['CUDAHOME']
40 | # nvcc = pjoin(home, 'bin', 'nvcc')
41 | # else:
42 | # # otherwise, search the PATH for NVCC
43 | # default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
44 | # nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
45 | # if nvcc is None:
46 | # raise EnvironmentError('The nvcc binary could not be '
47 | # 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
48 | # home = os.path.dirname(os.path.dirname(nvcc))
49 | #
50 | # cudaconfig = {'home': home, 'nvcc': nvcc,
51 | # 'include': pjoin(home, 'include'),
52 | # 'lib64': pjoin(home, 'lib64')}
53 | # for k, v in cudaconfig.iteritems():
54 | # if not os.path.exists(v):
55 | # raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
56 | #
57 | # return cudaconfig
58 |
59 |
60 | # CUDA = locate_cuda()
61 |
62 | # Obtain the numpy include directory. This logic works across numpy versions.
63 | try:
64 | numpy_include = np.get_include()
65 | except AttributeError:
66 | numpy_include = np.get_numpy_include()
67 |
68 |
69 | def customize_compiler_for_nvcc(self):
70 | """inject deep into distutils to customize how the dispatch
71 | to gcc/nvcc works.
72 |
73 | If you subclass UnixCCompiler, it's not trivial to get your subclass
74 | injected in, and still have the right customizations (i.e.
75 | distutils.sysconfig.customize_compiler) run on it. So instead of going
76 | the OO route, I have this. Note, it's kindof like a wierd functional
77 | subclassing going on."""
78 |
79 | # tell the compiler it can processes .cu
80 | self.src_extensions.append('.cu')
81 |
82 | # save references to the default compiler_so and _comple methods
83 | default_compiler_so = self.compiler_so
84 | super = self._compile
85 |
86 | # now redefine the _compile method. This gets executed for each
87 | # object but distutils doesn't have the ability to change compilers
88 | # based on source extension: we add it.
89 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
90 | print(extra_postargs)
91 | if os.path.splitext(src)[1] == '.cu':
92 | # use the cuda for .cu files
93 | self.set_executable('compiler_so', CUDA['nvcc'])
94 | # use only a subset of the extra_postargs, which are 1-1 translated
95 | # from the extra_compile_args in the Extension class
96 | postargs = extra_postargs['nvcc']
97 | else:
98 | postargs = extra_postargs['gcc']
99 |
100 | super(obj, src, ext, cc_args, postargs, pp_opts)
101 | # reset the default compiler_so, which we might have changed for cuda
102 | self.compiler_so = default_compiler_so
103 |
104 | # inject our redefined _compile method into the class
105 | self._compile = _compile
106 |
107 |
108 | # run the customize_compiler
109 | class custom_build_ext(build_ext):
110 | def build_extensions(self):
111 | customize_compiler_for_nvcc(self.compiler)
112 | build_ext.build_extensions(self)
113 |
114 |
115 | ext_modules = [
116 | Extension(
117 | "model.utils.cython_bbox",
118 | ["model/utils/bbox.pyx"],
119 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
120 | include_dirs=[numpy_include]
121 | ),
122 | Extension(
123 | 'pycocotools._mask',
124 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
125 | include_dirs=[numpy_include, 'pycocotools'],
126 | extra_compile_args={
127 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
128 | ),
129 | ]
130 |
131 | setup(
132 | name='faster_rcnn',
133 | ext_modules=ext_modules,
134 | # inject our custom trigger
135 | cmdclass={'build_ext': custom_build_ext},
136 | )
137 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | cffi
3 | opencv-python
4 | scipy
5 | msgpack
6 | easydict
7 | matplotlib
8 | pyyaml
9 | tensorboardX
10 |
--------------------------------------------------------------------------------
/test_net_global_local.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow Faster R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Jiasen Lu, Jianwei Yang, based on code from Ross Girshick
5 | # --------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import os
11 | import sys
12 | import numpy as np
13 | import pprint
14 | import time
15 | import _init_paths
16 |
17 | import torch
18 |
19 | from torch.autograd import Variable
20 | import pickle
21 | from roi_data_layer.roidb import combined_roidb
22 | from roi_data_layer.roibatchLoader import roibatchLoader
23 | from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
24 | from model.rpn.bbox_transform import clip_boxes
25 | from model.nms.nms_wrapper import nms
26 | from model.rpn.bbox_transform import bbox_transform_inv
27 | #from model.utils.net_utils import save_net, load_net, vis_detections
28 | from model.utils.parser_func import parse_args,set_dataset_args
29 |
30 | import pdb
31 |
32 | try:
33 | xrange # Python 2
34 | except NameError:
35 | xrange = range # Python 3
36 |
37 |
38 |
39 | lr = cfg.TRAIN.LEARNING_RATE
40 | momentum = cfg.TRAIN.MOMENTUM
41 | weight_decay = cfg.TRAIN.WEIGHT_DECAY
42 |
43 | if __name__ == '__main__':
44 |
45 | args = parse_args()
46 |
47 | print('Called with args:')
48 | print(args)
49 | args = set_dataset_args(args,test=True)
50 | if torch.cuda.is_available() and not args.cuda:
51 | print("WARNING: You have a CUDA device, so you should probably run with --cuda")
52 | np.random.seed(cfg.RNG_SEED)
53 |
54 | if args.cfg_file is not None:
55 | cfg_from_file(args.cfg_file)
56 | if args.set_cfgs is not None:
57 | cfg_from_list(args.set_cfgs)
58 |
59 | print('Using config:')
60 | pprint.pprint(cfg)
61 |
62 | cfg.TRAIN.USE_FLIPPED = False
63 | imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
64 | imdb.competition_mode(on=True)
65 |
66 | print('{:d} roidb entries'.format(len(roidb)))
67 |
68 | # initilize the network here.
69 | from model.faster_rcnn.vgg16_global_local import vgg16
70 | from model.faster_rcnn.resnet_global_local import resnet
71 |
72 | if args.net == 'vgg16':
73 | fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic,lc=args.lc,gc=args.gc)
74 | elif args.net == 'res101':
75 | fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic,lc=args.lc,gc=args.gc)
76 | #elif args.net == 'res50':
77 | # fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic,context=args.context)
78 |
79 | else:
80 | print("network is not defined")
81 | pdb.set_trace()
82 |
83 | fasterRCNN.create_architecture()
84 |
85 | print("load checkpoint %s" % (args.load_name))
86 | checkpoint = torch.load(args.load_name)
87 | fasterRCNN.load_state_dict(checkpoint['model'])
88 | if 'pooling_mode' in checkpoint.keys():
89 | cfg.POOLING_MODE = checkpoint['pooling_mode']
90 |
91 |
92 | print('load model successfully!')
93 | # initilize the tensor holder here.
94 | im_data = torch.FloatTensor(1)
95 | im_info = torch.FloatTensor(1)
96 | num_boxes = torch.LongTensor(1)
97 | gt_boxes = torch.FloatTensor(1)
98 |
99 | # ship to cuda
100 | if args.cuda:
101 | im_data = im_data.cuda()
102 | im_info = im_info.cuda()
103 | num_boxes = num_boxes.cuda()
104 | gt_boxes = gt_boxes.cuda()
105 |
106 | # make variable
107 | im_data = Variable(im_data)
108 | im_info = Variable(im_info)
109 | num_boxes = Variable(num_boxes)
110 | gt_boxes = Variable(gt_boxes)
111 |
112 | if args.cuda:
113 | cfg.CUDA = True
114 |
115 | if args.cuda:
116 | fasterRCNN.cuda()
117 |
118 | start = time.time()
119 | max_per_image = 100
120 |
121 | thresh = 0.0
122 |
123 |
124 | save_name = args.load_name.split('/')[-1]
125 | num_images = len(imdb.image_index)
126 | all_boxes = [[[] for _ in xrange(num_images)]
127 | for _ in xrange(imdb.num_classes)]
128 |
129 | output_dir = get_output_dir(imdb, save_name)
130 | dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
131 | imdb.num_classes, training=False, normalize = False, path_return=True)
132 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1,
133 | shuffle=False, num_workers=0,
134 | pin_memory=True)
135 |
136 | data_iter = iter(dataloader)
137 |
138 | _t = {'im_detect': time.time(), 'misc': time.time()}
139 | det_file = os.path.join(output_dir, 'detections.pkl')
140 |
141 | fasterRCNN.eval()
142 | empty_array = np.transpose(np.array([[],[],[],[],[]]), (1,0))
143 | for i in range(num_images):
144 |
145 | data = next(data_iter)
146 | im_data.data.resize_(data[0].size()).copy_(data[0])
147 | #print(data[0].size())
148 | im_info.data.resize_(data[1].size()).copy_(data[1])
149 | gt_boxes.data.resize_(data[2].size()).copy_(data[2])
150 | num_boxes.data.resize_(data[3].size()).copy_(data[3])
151 |
152 | det_tic = time.time()
153 | rois, cls_prob, bbox_pred, \
154 | rpn_loss_cls, rpn_loss_box, \
155 | RCNN_loss_cls, RCNN_loss_bbox, \
156 | rois_label,d_pred,_ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
157 |
158 | scores = cls_prob.data
159 | boxes = rois.data[:, :, 1:5]
160 | d_pred = d_pred.data
161 | path = data[4]
162 |
163 | if cfg.TEST.BBOX_REG:
164 | # Apply bounding-box regression deltas
165 | box_deltas = bbox_pred.data
166 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
167 | # Optionally normalize targets by a precomputed mean and stdev
168 | if args.class_agnostic:
169 | box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
170 | + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
171 | box_deltas = box_deltas.view(1, -1, 4)
172 | else:
173 | box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
174 | + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
175 | box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))
176 |
177 | pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
178 | pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
179 | else:
180 | # Simply repeat the boxes, once for each class
181 | pred_boxes = np.tile(boxes, (1, scores.shape[1]))
182 |
183 | pred_boxes /= data[1][0][2].item()
184 |
185 | scores = scores.squeeze()
186 | pred_boxes = pred_boxes.squeeze()
187 | det_toc = time.time()
188 | detect_time = det_toc - det_tic
189 | misc_tic = time.time()
190 |
191 | for j in xrange(1, imdb.num_classes):
192 | inds = torch.nonzero(scores[:,j]>thresh).view(-1)
193 | # if there is det
194 | if inds.numel() > 0:
195 | cls_scores = scores[:,j][inds]
196 | _, order = torch.sort(cls_scores, 0, True)
197 | if args.class_agnostic:
198 | cls_boxes = pred_boxes[inds, :]
199 | else:
200 | cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
201 |
202 | cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
203 | # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
204 | cls_dets = cls_dets[order]
205 | keep = nms(cls_dets, cfg.TEST.NMS)
206 | cls_dets = cls_dets[keep.view(-1).long()]
207 |
208 | all_boxes[j][i] = cls_dets.cpu().numpy()
209 | else:
210 | all_boxes[j][i] = empty_array
211 |
212 | # Limit to max_per_image detections *over all classes*
213 | if max_per_image > 0:
214 | image_scores = np.hstack([all_boxes[j][i][:, -1]
215 | for j in xrange(1, imdb.num_classes)])
216 | if len(image_scores) > max_per_image:
217 | image_thresh = np.sort(image_scores)[-max_per_image]
218 | for j in xrange(1, imdb.num_classes):
219 | keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
220 | all_boxes[j][i] = all_boxes[j][i][keep, :]
221 |
222 | misc_toc = time.time()
223 | nms_time = misc_toc - misc_tic
224 |
225 | sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \
226 | .format(i + 1, num_images, detect_time, nms_time))
227 | sys.stdout.flush()
228 |
229 |
230 |
231 | with open(det_file, 'wb') as f:
232 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
233 |
234 | print('Evaluating detections')
235 | imdb.evaluate_detections(all_boxes, output_dir)
236 |
237 | end = time.time()
238 | print("test time: %0.4fs" % (end - start))
239 |
--------------------------------------------------------------------------------
/test_scripts/city2foggycity_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python test_net_global_local.py --cuda --net vgg16 --dataset foggy_cityscape --gc --lc --load_name $2
--------------------------------------------------------------------------------
/test_scripts/clipart_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python test_net_global.py --cuda --net res101 --dataset clipart --gc --load_name $2
--------------------------------------------------------------------------------
/test_scripts/sim10k2cityscape_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python test_net_global_local.py --cuda --net vgg16 --dataset cityscape_car --gc --lc --load_name $2
--------------------------------------------------------------------------------
/test_scripts/watercolor_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python test_net_global_local.py --cuda --net res101 --dataset water --gc --lc --load_name $2
--------------------------------------------------------------------------------
/train_scripts/city2foggycity_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python trainval_net_global_local.py --cuda --net vgg16 --dataset cityscape --dataset_t foggy_cityscape --gc --lc --save_dir $2
--------------------------------------------------------------------------------
/train_scripts/clipart_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python trainval_net_global.py --cuda --net res101 --dataset pascal_voc_0712 --dataset_t clipart --gc --save_dir $2
--------------------------------------------------------------------------------
/train_scripts/sim10k2cityscape_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python trainval_net_global_local.py --cuda --net vgg16 --dataset sim10k --dataset_t cityscape_car --gc --lc --save_dir $2
--------------------------------------------------------------------------------
/train_scripts/watercolor_sample.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | CUDA_VISIBLE_DEVICES=$1 python trainval_net_global_local.py --cuda --net res101 --dataset pascal_voc_water --dataset_t water --gc --lc --save_dir $2
--------------------------------------------------------------------------------
| |