├── .gitignore ├── LICENSE ├── README.md ├── _init_paths.py ├── cfgs ├── res101.yml ├── res101_ls.yml ├── res50.yml └── vgg16.yml ├── debug ├── change_cache ├── change_cache_pascaal_modified ├── change_cache_pascal_modified ├── change_cache_pascal_modified_01.09.18_1 ├── change_cache_pascal_modified_01.09.18_2 ├── change_cache_pascal_modified_31.08.18 ├── change_cache_pascal_modified_31.08.18_2 ├── change_cache_pascal_modified_31.08.18_3 ├── change_cache_pascal_modified_31.08.18_4 ├── change_cache_pascal_modified_31.08.18_5 ├── change_cache_pascal_modified_31.08.18_6 ├── change_cache_pascal_modified_31.08.18_7 ├── change_cache_pascal_modified_31.08.18_8 ├── domain_test_1 ├── gt_roidb_01.09.18_2 ├── gt_roidb_01.09.18_3 ├── gt_roidb_01.09.18_4 ├── optimizer_params_01.09.18_1 ├── optimizer_params_01.09.18_2 ├── optimizer_params_01.09.18_3 ├── optimizer_params_01.09.18_4 ├── optimizer_params_01.09.18_5 ├── t ├── test_epoch_2 ├── test_epoch_3 ├── test_epoch_4 ├── test_epoch_5 ├── test_epoch_6 ├── test_epoch_7 ├── test_epoch_8 ├── test_epoch_9 ├── test_epoch_pascal ├── test_full_1 ├── test_full_2 ├── test_full_3 ├── test_full_4 ├── test_full_5 ├── test_full_6 ├── test_full_7 ├── test_new ├── test_new_paths_2 ├── test_new_paths_3 ├── test_new_paths_4 ├── train_full_1 ├── train_full_2 ├── train_full_3 ├── train_full_4 ├── train_full_5 ├── train_loop_04.09.18_1 ├── train_loop_04.09.18_2 ├── train_loop_04.09.18_3 ├── train_loop_04.09.18_4 ├── train_loop_04.09.18_5 ├── train_loop_post_da_14.09.18_1 ├── train_loop_post_da_14.09.18_2 └── train_loop_post_da_14.09.18_3 ├── demo.py ├── domain_adapt.py ├── from __future__ import absolute_import.py ├── images ├── img1.jpg ├── img2.jpg ├── img3.jpg ├── img4.jpg ├── img5.jpg ├── source_aachen_000000_000019_leftImg8bit.jpg ├── source_aachen_000001_000019_leftImg8bit.jpg ├── source_dusseldorf_000193_000019_leftImg8bit.jpg ├── source_hamburg_000000_102574_leftImg8bit.jpg ├── source_strasbourg_000000_018616_leftImg8bit.jpg ├── source_stuttgart_000071_000019_leftImg8bit.jpg ├── source_tubingen_000070_000019_leftImg8bit.jpg ├── source_ulm_000094_000019_leftImg8bit.jpg ├── source_zurich_000001_000019_leftImg8bit.jpg ├── source_zurich_000121_000019_leftImg8bit.jpg ├── target_aachen_000000_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_dusseldorf_000155_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_frankfurt_000000_002196_leftImg8bit_foggy_beta_0.02.jpg ├── target_frankfurt_000001_013710_leftImg8bit_foggy_beta_0.02.jpg ├── target_frankfurt_000001_019969_leftImg8bit_foggy_beta_0.02.jpg ├── target_frankfurt_000001_068063_leftImg8bit_foggy_beta_0.02.jpg ├── target_hamburg_000000_093325_leftImg8bit_foggy_beta_0.02.jpg ├── target_jena_000089_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_lindau_000016_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_lindau_000024_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_munster_000004_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_munster_000013_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_munster_000021_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_munster_000040_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_munster_000102_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_strasbourg_000001_002644_leftImg8bit_foggy_beta_0.02.jpg ├── target_stuttgart_000076_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_tubingen_000044_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_ulm_000032_000019_leftImg8bit_foggy_beta_0.02.jpg ├── target_weimar_000091_000019_leftImg8bit_foggy_beta_0.02.jpg └── target_zurich_000121_000019_leftImg8bit_foggy_beta_0.02.jpg ├── lib ├── datasets │ ├── README │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── coco.py │ ├── create_test_file.py │ ├── ds_utils.py │ ├── factory.py │ ├── imagenet.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── pascal_voc_rbg.py │ ├── png_to_jpg_converter.py │ ├── test.txt │ ├── to_coco_format.py │ ├── tools │ │ └── mcg_munge.py │ ├── trainval.txt │ ├── trainval_old.txt │ ├── vg.py │ ├── vg_eval.py │ └── voc_eval.py ├── make.sh ├── make2.sh ├── model │ ├── __init__.py │ ├── faster_rcnn │ │ ├── __init__.py │ │ ├── faster_rcnn.py │ │ ├── resnet.py │ │ └── vgg16.py │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── nms │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── make.sh │ │ ├── nms_cpu.py │ │ ├── nms_gpu.py │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cuda.c │ │ │ ├── nms_cuda.h │ │ │ ├── nms_cuda_kernel.cu │ │ │ └── nms_cuda_kernel.h │ ├── roi_align │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── make.sh │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ └── src │ │ │ ├── roi_align.c │ │ │ ├── roi_align.h │ │ │ ├── roi_align_cuda.c │ │ │ ├── roi_align_cuda.h │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ ├── roi_crop │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── crop_resize │ │ │ │ ├── __init__.py │ │ │ │ └── _crop_resize.so │ │ │ └── roi_crop │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── crop_resize.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ ├── make.sh │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ └── src │ │ │ ├── roi_crop.c │ │ │ ├── roi_crop.h │ │ │ ├── roi_crop_cuda.c │ │ │ ├── roi_crop_cuda.h │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ └── roi_crop_cuda_kernel.h │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_pooling │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ └── src │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ ├── roi_pooling_cuda.h │ │ │ ├── roi_pooling_kernel.cu │ │ │ └── roi_pooling_kernel.h │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_target_layer.py │ │ ├── bbox_transform.py │ │ ├── generate_anchors.py │ │ ├── proposal_layer.py │ │ ├── proposal_target_layer_cascade.py │ │ └── rpn.py │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── bbox.c │ │ ├── bbox.pyx │ │ ├── blob.py │ │ ├── config.py │ │ ├── logger.py │ │ └── net_utils.py ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h ├── roi_data_layer │ ├── __init__.py │ ├── minibatch.py │ ├── roibatchLoader.py │ └── roidb.py └── setup.py ├── new_trainval.py ├── output └── res101 │ └── voc_2007_test │ └── faster_rcnn_10 │ ├── aeroplane_pr.pkl │ ├── bicycle_pr.pkl │ ├── bird_pr.pkl │ ├── boat_pr.pkl │ ├── bottle_pr.pkl │ ├── bus_pr.pkl │ ├── car_pr.pkl │ ├── cat_pr.pkl │ ├── chair_pr.pkl │ ├── cow_pr.pkl │ ├── detections.pkl │ ├── diningtable_pr.pkl │ ├── dog_pr.pkl │ ├── horse_pr.pkl │ ├── motorbike_pr.pkl │ ├── motorcycle_pr.pkl │ ├── person_pr.pkl │ ├── pottedplant_pr.pkl │ ├── rider_pr.pkl │ ├── sheep_pr.pkl │ ├── sofa_pr.pkl │ ├── train_pr.pkl │ ├── truck_pr.pkl │ └── tvmonitor_pr.pkl ├── remove_bad_annot_img.py ├── requirements.txt ├── resources ├── img_rois_0.999_threshold.jpg └── in_bala_01_c027a_05052018073503_P1166.jpg ├── standard_train ├── LICENSE ├── README.md ├── _init_paths.py ├── cfgs │ ├── res101.yml │ ├── res101_ls.yml │ ├── res50.yml │ └── vgg16.yml ├── dataloader.py ├── demo.py ├── lib │ ├── datasets │ │ ├── README │ │ ├── VOCdevkit-matlab-wrapper │ │ │ ├── get_voc_opts.m │ │ │ ├── voc_eval.m │ │ │ └── xVOCap.m │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── create_test_file.py │ │ ├── ds_utils.py │ │ ├── factory.py │ │ ├── imagenet.py │ │ ├── imdb.py │ │ ├── pascal_voc.py │ │ ├── pascal_voc_rbg.py │ │ ├── tools │ │ │ └── mcg_munge.py │ │ ├── vg.py │ │ ├── vg_eval.py │ │ └── voc_eval.py │ ├── make.sh │ ├── make2.sh │ ├── model │ │ ├── __init__.py │ │ ├── faster_rcnn │ │ │ ├── __init__.py │ │ │ ├── faster_rcnn.py │ │ │ ├── resnet.py │ │ │ └── vgg16.py │ │ ├── nms │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── nms │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── make.sh │ │ │ ├── nms_cpu.py │ │ │ ├── nms_gpu.py │ │ │ ├── nms_kernel.cu │ │ │ ├── nms_wrapper.py │ │ │ └── src │ │ │ │ ├── nms_cuda.h │ │ │ │ ├── nms_cuda_kernel.cu │ │ │ │ └── nms_cuda_kernel.h │ │ ├── roi_align │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ └── src │ │ │ │ ├── roi_align.c │ │ │ │ ├── roi_align.h │ │ │ │ ├── roi_align_cuda.c │ │ │ │ ├── roi_align_cuda.h │ │ │ │ ├── roi_align_kernel.cu │ │ │ │ └── roi_align_kernel.h │ │ ├── roi_crop │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize │ │ │ │ │ └── __init__.py │ │ │ │ └── roi_crop │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ └── src │ │ │ │ ├── roi_crop.c │ │ │ │ ├── roi_crop.h │ │ │ │ ├── roi_crop_cuda.c │ │ │ │ ├── roi_crop_cuda.h │ │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ │ └── roi_crop_cuda_kernel.h │ │ ├── roi_pooling │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pooling │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ └── src │ │ │ │ ├── roi_pooling.c │ │ │ │ ├── roi_pooling.h │ │ │ │ ├── roi_pooling_cuda.c │ │ │ │ ├── roi_pooling_cuda.h │ │ │ │ ├── roi_pooling_kernel.cu │ │ │ │ └── roi_pooling_kernel.h │ │ ├── rpn │ │ │ ├── __init__.py │ │ │ ├── anchor_target_layer.py │ │ │ ├── bbox_transform.py │ │ │ ├── generate_anchors.py │ │ │ ├── proposal_layer.py │ │ │ ├── proposal_target_layer_cascade.py │ │ │ └── rpn.py │ │ └── utils │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── bbox.pyx │ │ │ ├── blob.py │ │ │ ├── config.py │ │ │ ├── logger.py │ │ │ └── net_utils.py │ ├── pycocotools │ │ ├── UPSTREAM_REV │ │ ├── __init__.py │ │ ├── _mask.c │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── cocoeval.py │ │ ├── license.txt │ │ ├── mask.py │ │ ├── maskApi.c │ │ └── maskApi.h │ ├── roi_data_layer │ │ ├── __init__.py │ │ ├── minibatch.py │ │ ├── roibatchLoader.py │ │ └── roidb.py │ └── setup.py ├── normal_plot.py ├── parsexml.py ├── requirements.txt ├── test_net.py └── trainval_net.py ├── test_net.py └── trainval_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | 3 | # READ THIS BEFORE YOU REFACTOR ME 4 | # 5 | # setup.py uses the list of patterns in this file to decide 6 | # what to delete, but it's not 100% sound. So, for example, 7 | # if you delete aten/build/ because it's redundant with build/, 8 | # aten/build/ will stop being cleaned. So be careful when 9 | # refactoring this file! 10 | 11 | ## PyTorch 12 | 13 | .mypy_cache 14 | *.pyc 15 | */*.pyc 16 | */*.so* 17 | */**/__pycache__ 18 | */**/*.dylib* 19 | */**/*.pyc 20 | */**/*.pyd 21 | */**/*.so* 22 | */**/**/*.pyc 23 | */**/**/**/*.pyc 24 | */**/**/**/**/*.pyc 25 | aten/build/ 26 | aten/src/ATen/Config.h 27 | aten/src/ATen/cuda/CUDAConfig.h 28 | build/ 29 | dist/ 30 | docs/src/**/* 31 | test/.coverage 32 | test/cpp/api/mnist 33 | test/data/gpu_tensors.pt 34 | test/data/legacy_modules.t7 35 | test/data/legacy_serialized.pt 36 | test/data/linear.pt 37 | test/htmlcov 38 | third_party/build/ 39 | tools/shared/_utils_internal.py 40 | torch.egg-info/ 41 | torch/csrc/autograd/generated/* 42 | torch/csrc/cudnn/cuDNN.cpp 43 | torch/csrc/generated 44 | torch/csrc/generic/TensorMethods.cpp 45 | torch/csrc/jit/generated/* 46 | torch/csrc/nn/THCUNN.cpp 47 | torch/csrc/nn/THCUNN.cwrap 48 | torch/csrc/nn/THNN_generic.cpp 49 | torch/csrc/nn/THNN_generic.cwrap 50 | torch/csrc/nn/THNN_generic.h 51 | torch/csrc/nn/THNN.cpp 52 | torch/csrc/nn/THNN.cwrap 53 | torch/lib/*.a* 54 | torch/lib/*.dll* 55 | torch/lib/*.dylib* 56 | torch/lib/*.h 57 | torch/lib/*.lib 58 | torch/lib/*.so* 59 | torch/lib/build 60 | torch/lib/cmake 61 | torch/lib/include 62 | torch/lib/pkgconfig 63 | torch/lib/protoc 64 | torch/lib/tmp_install 65 | torch/lib/torch_shm_manager 66 | torch/version.py 67 | 68 | # IPython notebook checkpoints 69 | .ipynb_checkpoints 70 | 71 | # Editor temporaries 72 | *.swn 73 | *.swo 74 | *.swp 75 | *.swm 76 | *~ 77 | 78 | # macOS dir files 79 | .DS_Store 80 | 81 | # Symbolic files 82 | tools/shared/cwrap_common.py 83 | 84 | # Ninja files 85 | .ninja_deps 86 | .ninja_log 87 | compile_commands.json 88 | *.egg-info/ 89 | docs/source/scripts/activation_images/ 90 | 91 | ## General 92 | 93 | # Compiled Object files 94 | *.slo 95 | *.lo 96 | *.o 97 | *.cuo 98 | *.obj 99 | 100 | # Compiled Dynamic libraries 101 | *.so 102 | *.dylib 103 | *.dll 104 | 105 | # Compiled Static libraries 106 | *.lai 107 | *.la 108 | *.a 109 | *.lib 110 | 111 | # Compiled protocol buffers 112 | *.pb.h 113 | *.pb.cc 114 | *_pb2.py 115 | 116 | # Compiled python 117 | *.pyc 118 | *.pyd 119 | 120 | # Compiled MATLAB 121 | *.mex* 122 | 123 | # IPython notebook checkpoints 124 | .ipynb_checkpoints 125 | 126 | # Editor temporaries 127 | *.swn 128 | *.swo 129 | *.swp 130 | *~ 131 | 132 | # Sublime Text settings 133 | *.sublime-workspace 134 | *.sublime-project 135 | 136 | # Eclipse Project settings 137 | *.*project 138 | .settings 139 | 140 | # QtCreator files 141 | *.user 142 | 143 | # PyCharm files 144 | .idea 145 | 146 | # Visual Studio Code files 147 | .vscode 148 | .vs 149 | 150 | # OSX dir files 151 | .DS_Store 152 | 153 | ## Caffe2 154 | 155 | # build, distribute, and bins (+ python proto bindings) 156 | build 157 | build_host_protoc 158 | build_android 159 | build_ios 160 | /build_* 161 | .build_debug/* 162 | .build_release/* 163 | distribute/* 164 | *.testbin 165 | *.bin 166 | cmake_build 167 | .cmake_build 168 | gen 169 | .setuptools-cmake-build 170 | .pytest_cache 171 | aten/build/* 172 | 173 | # Bram 174 | plsdontbreak 175 | 176 | # Generated documentation 177 | docs/_site 178 | docs/gathered 179 | _site 180 | doxygen 181 | docs/dev 182 | 183 | # LevelDB files 184 | *.sst 185 | *.ldb 186 | LOCK 187 | LOG* 188 | CURRENT 189 | MANIFEST-* 190 | 191 | # generated version file 192 | caffe2/version.py 193 | 194 | # setup.py intermediates 195 | .eggs 196 | caffe2.egg-info 197 | 198 | # Atom/Watchman required file 199 | .watchmanconfig 200 | 201 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jianwei Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | LEARNING_RATE: 0.001 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_SIZE: 7 17 | POOLING_MODE: align 18 | CROP_RESIZE_WITH_MAX_POOL: False 19 | -------------------------------------------------------------------------------- /cfgs/res101_ls.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | SCALES: [800] 13 | DOUBLE_BIAS: False 14 | LEARNING_RATE: 0.001 15 | TEST: 16 | HAS_RPN: True 17 | SCALES: [800] 18 | MAX_SIZE: 1200 19 | RPN_POST_NMS_TOP_N: 1000 20 | POOLING_SIZE: 7 21 | POOLING_MODE: align 22 | CROP_RESIZE_WITH_MAX_POOL: False 23 | -------------------------------------------------------------------------------- /cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | # IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | WEIGHT_DECAY: 0.0001 13 | DOUBLE_BIAS: False 14 | SNAPSHOT_PREFIX: res50_faster_rcnn 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_MODE: crop 18 | -------------------------------------------------------------------------------- /cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | BATCH_SIZE: 256 10 | LEARNING_RATE: 0.01 11 | TEST: 12 | HAS_RPN: True 13 | POOLING_MODE: align 14 | CROP_RESIZE_WITH_MAX_POOL: False 15 | -------------------------------------------------------------------------------- /debug/change_cache: -------------------------------------------------------------------------------- 1 | Called with args: 2 | Namespace(adaption_lr=False, batch_size=1, checkepoch=1, checkpoint=0, checkpoint_interval=10000, checksession=1, class_agnostic=False, cuda=True, da=True, dataset='pascal_voc', disp_interval=100, large_scale=False, lr=0.001, lr_decay_gamma=0.1, lr_decay_step=50000, mGPUs=False, max_epochs=10, net='res101', num_workers=0, optimizer='sgd', resume=False, save_dir='data/pretrained_model', session=1, src_dataset='city', start_epoch=1, tar_dataset='fcity', use_tfboard=False) 3 | Using config: 4 | {'ANCHOR_RATIOS': [0.5, 1, 2], 5 | 'ANCHOR_SCALES': [8, 16, 32], 6 | 'CROP_RESIZE_WITH_MAX_POOL': False, 7 | 'CUDA': False, 8 | 'DATA_DIR': '/home/divyam/FRCN/faster-rcnn.pytorch/data', 9 | 'DEDUP_BOXES': 0.0625, 10 | 'EPS': 1e-14, 11 | 'EXP_DIR': 'res101', 12 | 'FEAT_STRIDE': [16], 13 | 'GPU_ID': 0, 14 | 'MATLAB': 'matlab', 15 | 'MAX_NUM_GT_BOXES': 20, 16 | 'MOBILENET': {'DEPTH_MULTIPLIER': 1.0, 17 | 'FIXED_LAYERS': 5, 18 | 'REGU_DEPTH': False, 19 | 'WEIGHT_DECAY': 4e-05}, 20 | 'PIXEL_MEANS': array([[[102.9801, 115.9465, 122.7717]]]), 21 | 'POOLING_MODE': 'align', 22 | 'POOLING_SIZE': 7, 23 | 'RESNET': {'FIXED_BLOCKS': 1, 'MAX_POOL': False}, 24 | 'RNG_SEED': 3, 25 | 'ROOT_DIR': '/home/divyam/FRCN/faster-rcnn.pytorch', 26 | 'TEST': {'BBOX_REG': True, 27 | 'HAS_RPN': True, 28 | 'MAX_SIZE': 1000, 29 | 'MODE': 'nms', 30 | 'NMS': 0.3, 31 | 'PROPOSAL_METHOD': 'gt', 32 | 'RPN_MIN_SIZE': 16, 33 | 'RPN_NMS_THRESH': 0.7, 34 | 'RPN_POST_NMS_TOP_N': 300, 35 | 'RPN_PRE_NMS_TOP_N': 6000, 36 | 'RPN_TOP_N': 5000, 37 | 'SCALES': [600], 38 | 'SVM': False}, 39 | 'TRAIN': {'ASPECT_GROUPING': False, 40 | 'BATCH_SIZE': 128, 41 | 'BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 42 | 'BBOX_NORMALIZE_MEANS': [0.0, 0.0, 0.0, 0.0], 43 | 'BBOX_NORMALIZE_STDS': [0.1, 0.1, 0.2, 0.2], 44 | 'BBOX_NORMALIZE_TARGETS': True, 45 | 'BBOX_NORMALIZE_TARGETS_PRECOMPUTED': True, 46 | 'BBOX_REG': True, 47 | 'BBOX_THRESH': 0.5, 48 | 'BG_THRESH_HI': 0.5, 49 | 'BG_THRESH_LO': 0.0, 50 | 'BIAS_DECAY': False, 51 | 'BN_TRAIN': False, 52 | 'DISPLAY': 20, 53 | 'DOUBLE_BIAS': False, 54 | 'FG_FRACTION': 0.25, 55 | 'FG_THRESH': 0.5, 56 | 'GAMMA': 0.1, 57 | 'HAS_RPN': True, 58 | 'IMS_PER_BATCH': 1, 59 | 'LEARNING_RATE': 0.001, 60 | 'MAX_SIZE': 1000, 61 | 'MOMENTUM': 0.9, 62 | 'PROPOSAL_METHOD': 'gt', 63 | 'RPN_BATCHSIZE': 256, 64 | 'RPN_BBOX_INSIDE_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 65 | 'RPN_CLOBBER_POSITIVES': False, 66 | 'RPN_FG_FRACTION': 0.5, 67 | 'RPN_MIN_SIZE': 8, 68 | 'RPN_NEGATIVE_OVERLAP': 0.3, 69 | 'RPN_NMS_THRESH': 0.7, 70 | 'RPN_POSITIVE_OVERLAP': 0.7, 71 | 'RPN_POSITIVE_WEIGHT': -1.0, 72 | 'RPN_POST_NMS_TOP_N': 2000, 73 | 'RPN_PRE_NMS_TOP_N': 12000, 74 | 'SCALES': [600], 75 | 'SNAPSHOT_ITERS': 5000, 76 | 'SNAPSHOT_KEPT': 3, 77 | 'SNAPSHOT_PREFIX': 'res101_faster_rcnn', 78 | 'STEPSIZE': [30000], 79 | 'SUMMARY_INTERVAL': 180, 80 | 'TRIM_HEIGHT': 600, 81 | 'TRIM_WIDTH': 600, 82 | 'TRUNCATED': False, 83 | 'USE_ALL_GT': True, 84 | 'USE_FLIPPED': True, 85 | 'USE_GT': False, 86 | 'WEIGHT_DECAY': 0.0001}, 87 | 'USE_GPU_NMS': True} 88 | Loading src, tar datasets 89 | ROOT_DIR /home/divyam/FRCN/faster-rcnn.pytorch/lib/datasets/../.. 90 | image dataset path: /home/divyam/FRCN/faster-rcnn.pytorch/data/src/cityscapes/VOCdevkit2007/VOC2007 91 | sample image from dataset: source_tubingen_000027_000019_leftImg8bit 92 | 93 | Loaded dataset `voc_2007_trainval` for training 94 | Set proposal method: gt 95 | Appending horizontally-flipped training examples... 96 | voc_2007_trainval gt roidb loaded from /home/divyam/FRCN/faster-rcnn.pytorch/data/cache/voc_2007_trainval_gt_roidb.pkl 97 | done 98 | Preparing training data... 99 | done 100 | ROOT_DIR /home/divyam/FRCN/faster-rcnn.pytorch/lib/datasets/../.. 101 | image dataset path: /home/divyam/FRCN/faster-rcnn.pytorch/data/src/cityscapes/VOCdevkit2007/VOC2007 102 | sample image from dataset: source_tubingen_000027_000019_leftImg8b -------------------------------------------------------------------------------- /debug/domain_test_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/debug/domain_test_1 -------------------------------------------------------------------------------- /debug/t: -------------------------------------------------------------------------------- 1 | tee: unrecognized option '--dataset' 2 | Try 'tee --help' for more information. 3 | -------------------------------------------------------------------------------- /domain_adapt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function, Variable 4 | import numpy as np 5 | 6 | class GRL(Function): 7 | def __init__(self, beta=1): 8 | self.beta = beta 9 | 10 | def forward(self, x): 11 | return x.view_as(x) 12 | 13 | def backward(self, grad_output): 14 | output = grad_output*(-1)*self.beta 15 | return output 16 | 17 | def grad_reverse(x, beta=1): 18 | return GRL(beta)(x) 19 | 20 | 21 | class domain_img_cls(nn.Module): 22 | def __init__(self, net): 23 | super(domain_img_cls, self).__init__() 24 | if net=="res101": 25 | in_channels = 1024 26 | else: 27 | in_channels = 512 28 | self.conv_1= nn.Conv2d(in_channels=in_channels, out_channels=512, kernel_size=1, padding=0, stride=1) 29 | self.relu = nn.ReLU() 30 | self.conv_2 = nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, padding=0, stride=1) 31 | self.sigmoid = nn.Sigmoid() 32 | 33 | def forward(self, x, beta=1): 34 | x = grad_reverse(x, beta) 35 | x = self.conv_1(x) 36 | x = self.relu(x) 37 | x = self.conv_2(x) 38 | x = self.sigmoid(x) 39 | x = x.view(-1) 40 | 41 | return x 42 | 43 | 44 | class domain_inst_cls(nn.Module): 45 | def __init__(self, net): 46 | super(domain_inst_cls, self).__init__() 47 | if net=="res101": 48 | in_channels = 2048 49 | else: 50 | in_channels = 4096 51 | self.fc_1 = nn.Linear(in_channels, 1024) 52 | self.fc_2 = nn.Linear(1024, 1024) 53 | self.fc_3 = nn.Linear(1024, 1) 54 | self.relu = nn.ReLU() 55 | self.dropout = nn.Dropout() 56 | self.sigmoid = nn.Sigmoid() 57 | 58 | def forward(self, x, beta=1): 59 | x = grad_reverse(x, beta) 60 | x = self.fc_1(x) 61 | x = self.relu(x) 62 | x = self.dropout(x) 63 | 64 | x = self.fc_2(x) 65 | x = self.relu(x) 66 | x = self.dropout(x) 67 | 68 | x = self.fc_3(x) 69 | x = self.sigmoid(x) 70 | x = x.view(-1) 71 | 72 | return x 73 | 74 | 75 | def domain_loss(logits, labels): 76 | #print('domain loss:', logits.size()) 77 | if labels==0: 78 | # For source 79 | labels = torch.from_numpy(np.zeros(list(logits.size())[0])).float().cuda() 80 | else: 81 | # For target 82 | labels = torch.from_numpy(np.ones(list(logits.size())[0])).float().cuda() 83 | 84 | loss = nn.BCELoss() 85 | return loss(logits, labels) 86 | 87 | 88 | def consistency_loss(source_logits, target_logits): 89 | target = torch.from_numpy(np.zeros(list(target_logits.size())[0])).float().cuda() 90 | source_logits = torch.sum(source_logits)/list(source_logits.size())[0] 91 | source_logits = torch.ones(target_logits.size()).cuda() * source_logits 92 | #source_logits = source_logits.view(1, list(source_logits.size())[0]) 93 | #target_logits = target_logits.view(1, list(target_logits.size())[0]) 94 | loss = nn.L1Loss() 95 | 96 | return loss(source_logits - target_logits, target) -------------------------------------------------------------------------------- /images/img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/img1.jpg -------------------------------------------------------------------------------- /images/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/img2.jpg -------------------------------------------------------------------------------- /images/img3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/img3.jpg -------------------------------------------------------------------------------- /images/img4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/img4.jpg -------------------------------------------------------------------------------- /images/img5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/img5.jpg -------------------------------------------------------------------------------- /images/source_aachen_000000_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_aachen_000000_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_aachen_000001_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_aachen_000001_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_dusseldorf_000193_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_dusseldorf_000193_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_hamburg_000000_102574_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_hamburg_000000_102574_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_strasbourg_000000_018616_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_strasbourg_000000_018616_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_stuttgart_000071_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_stuttgart_000071_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_tubingen_000070_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_tubingen_000070_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_ulm_000094_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_ulm_000094_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_zurich_000001_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_zurich_000001_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/source_zurich_000121_000019_leftImg8bit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/source_zurich_000121_000019_leftImg8bit.jpg -------------------------------------------------------------------------------- /images/target_aachen_000000_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_aachen_000000_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_dusseldorf_000155_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_dusseldorf_000155_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_frankfurt_000000_002196_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_frankfurt_000000_002196_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_frankfurt_000001_013710_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_frankfurt_000001_013710_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_frankfurt_000001_019969_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_frankfurt_000001_019969_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_frankfurt_000001_068063_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_frankfurt_000001_068063_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_hamburg_000000_093325_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_hamburg_000000_093325_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_jena_000089_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_jena_000089_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_lindau_000016_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_lindau_000016_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_lindau_000024_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_lindau_000024_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_munster_000004_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_munster_000004_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_munster_000013_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_munster_000013_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_munster_000021_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_munster_000021_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_munster_000040_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_munster_000040_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_munster_000102_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_munster_000102_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_strasbourg_000001_002644_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_strasbourg_000001_002644_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_stuttgart_000076_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_stuttgart_000076_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_tubingen_000044_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_tubingen_000044_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_ulm_000032_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_ulm_000032_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_weimar_000091_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_weimar_000091_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /images/target_zurich_000121_000019_leftImg8bit_foggy_beta_0.02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/images/target_zurich_000121_000019_leftImg8bit_foggy_beta_0.02.jpg -------------------------------------------------------------------------------- /lib/datasets/README: -------------------------------------------------------------------------------- 1 | Modified trainval to have only source data.` 2 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/datasets/create_test_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | with open('test.txt', 'w') as f: 3 | for file in os.listdir(): 4 | if file=='to_coco_format.py' or file=='create_test_file.py': 5 | continue 6 | else: 7 | f.write(file+"\n") -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /lib/datasets/png_to_jpg_converter.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | for img_file in os.listdir(): 4 | try: 5 | print(img_file) 6 | img = Image.open(img_file) 7 | rgb_im = img.convert('RGB') 8 | #print(img_file[:-3]) 9 | #assert 1<0 10 | rgb_im.save(img_file[:-3] + "jpg") 11 | except: 12 | print("failed:", img_file) -------------------------------------------------------------------------------- /lib/datasets/to_coco_format.py: -------------------------------------------------------------------------------- 1 | import os 2 | for img_name in os.listdir(): 3 | if img_name == "to_coco_format.py": 4 | continue 5 | os.rename(img_name, "COCO_train2014_"+img_name) -------------------------------------------------------------------------------- /lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /lib/datasets/vg_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Fast/er R-CNN 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Bharath Hariharan 6 | # -------------------------------------------------------- 7 | 8 | import xml.etree.ElementTree as ET 9 | import os 10 | import numpy as np 11 | from .voc_eval import voc_ap 12 | 13 | def vg_eval( detpath, 14 | gt_roidb, 15 | image_index, 16 | classindex, 17 | ovthresh=0.5, 18 | use_07_metric=False, 19 | eval_attributes=False): 20 | """rec, prec, ap, sorted_scores, npos = voc_eval( 21 | detpath, 22 | gt_roidb, 23 | image_index, 24 | classindex, 25 | [ovthresh], 26 | [use_07_metric]) 27 | 28 | Top level function that does the Visual Genome evaluation. 29 | 30 | detpath: Path to detections 31 | gt_roidb: List of ground truth structs. 32 | image_index: List of image ids. 33 | classindex: Category index 34 | [ovthresh]: Overlap threshold (default = 0.5) 35 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 36 | (default False) 37 | """ 38 | # extract gt objects for this class 39 | class_recs = {} 40 | npos = 0 41 | for item,imagename in zip(gt_roidb,image_index): 42 | if eval_attributes: 43 | bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :] 44 | else: 45 | bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :] 46 | difficult = np.zeros((bbox.shape[0],)).astype(np.bool) 47 | det = [False] * bbox.shape[0] 48 | npos = npos + sum(~difficult) 49 | class_recs[str(imagename)] = {'bbox': bbox, 50 | 'difficult': difficult, 51 | 'det': det} 52 | if npos == 0: 53 | # No ground truth examples 54 | return 0,0,0,0,npos 55 | 56 | # read dets 57 | with open(detpath, 'r') as f: 58 | lines = f.readlines() 59 | if len(lines) == 0: 60 | # No detection examples 61 | return 0,0,0,0,npos 62 | 63 | splitlines = [x.strip().split(' ') for x in lines] 64 | image_ids = [x[0] for x in splitlines] 65 | confidence = np.array([float(x[1]) for x in splitlines]) 66 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 67 | 68 | # sort by confidence 69 | sorted_ind = np.argsort(-confidence) 70 | sorted_scores = -np.sort(-confidence) 71 | BB = BB[sorted_ind, :] 72 | image_ids = [image_ids[x] for x in sorted_ind] 73 | 74 | # go down dets and mark TPs and FPs 75 | nd = len(image_ids) 76 | tp = np.zeros(nd) 77 | fp = np.zeros(nd) 78 | for d in range(nd): 79 | R = class_recs[image_ids[d]] 80 | bb = BB[d, :].astype(float) 81 | ovmax = -np.inf 82 | BBGT = R['bbox'].astype(float) 83 | 84 | if BBGT.size > 0: 85 | # compute overlaps 86 | # intersection 87 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 88 | iymin = np.maximum(BBGT[:, 1], bb[1]) 89 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 90 | iymax = np.minimum(BBGT[:, 3], bb[3]) 91 | iw = np.maximum(ixmax - ixmin + 1., 0.) 92 | ih = np.maximum(iymax - iymin + 1., 0.) 93 | inters = iw * ih 94 | 95 | # union 96 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 97 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 98 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 99 | 100 | overlaps = inters / uni 101 | ovmax = np.max(overlaps) 102 | jmax = np.argmax(overlaps) 103 | 104 | if ovmax > ovthresh: 105 | if not R['difficult'][jmax]: 106 | if not R['det'][jmax]: 107 | tp[d] = 1. 108 | R['det'][jmax] = 1 109 | else: 110 | fp[d] = 1. 111 | else: 112 | fp[d] = 1. 113 | 114 | # compute precision recall 115 | fp = np.cumsum(fp) 116 | tp = np.cumsum(tp) 117 | rec = tp / float(npos) 118 | # avoid divide by zero in case the first detection matches a difficult 119 | # ground truth 120 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 121 | ap = voc_ap(rec, prec, use_07_metric) 122 | 123 | return rec, prec, ap, sorted_scores, npos 124 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CUDA_PATH=/usr/local/cuda/ 6 | #You may also want to ad the following 7 | #export C_INCLUDE_PATH=/opt/cuda/include 8 | 9 | export CXXFLAGS="-std=c++11" 10 | export CFLAGS="-std=c99" 11 | 12 | python setup.py build_ext --inplace 13 | rm -rf build 14 | 15 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 16 | -gencode arch=compute_35,code=sm_35 \ 17 | -gencode arch=compute_50,code=sm_50 \ 18 | -gencode arch=compute_52,code=sm_52 \ 19 | -gencode arch=compute_60,code=sm_60 \ 20 | -gencode arch=compute_61,code=sm_61 " 21 | 22 | # compile NMS 23 | cd model/nms/src 24 | echo "Compiling nms kernels by nvcc..." 25 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 26 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 27 | 28 | cd ../ 29 | python build.py 30 | 31 | # compile roi_pooling 32 | cd ../../ 33 | cd model/roi_pooling/src 34 | echo "Compiling roi pooling kernels by nvcc..." 35 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 36 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 37 | cd ../ 38 | python build.py 39 | 40 | # compile roi_align 41 | cd ../../ 42 | cd model/roi_align/src 43 | echo "Compiling roi align kernels by nvcc..." 44 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 45 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 46 | cd ../ 47 | python build.py 48 | 49 | # compile roi_crop 50 | cd ../../ 51 | cd model/roi_crop/src 52 | echo "Compiling roi crop kernels by nvcc..." 53 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 54 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 55 | cd ../ 56 | python build.py 57 | -------------------------------------------------------------------------------- /lib/make2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CUDA_PATH=/usr/local/cuda/ 6 | export PATH=/usr/local/cuda-8.0/bin:$PATH 7 | export LD_LIBRARY_PATH=/usr/local/cuda-8.0/lib64:$LD_LIBRARY_PATH 8 | 9 | 10 | python setup.py build_ext --inplace 11 | rm -rf build 12 | 13 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 14 | -gencode arch=compute_35,code=sm_35 \ 15 | -gencode arch=compute_50,code=sm_50 \ 16 | -gencode arch=compute_52,code=sm_52 \ 17 | -gencode arch=compute_60,code=sm_60 \ 18 | -gencode arch=compute_61,code=sm_61 " 19 | 20 | # compile NMS 21 | cd model/nms/src 22 | echo "Compiling nms kernels by nvcc..." 23 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 24 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 25 | 26 | cd ../ 27 | python build.py 28 | 29 | # compile roi_pooling 30 | cd ../../ 31 | cd model/roi_pooling/src 32 | echo "Compiling roi pooling kernels by nvcc..." 33 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 34 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 35 | cd ../ 36 | python build.py 37 | 38 | # compile roi_align 39 | cd ../../ 40 | cd model/roi_align/src 41 | echo "Compiling roi align kernels by nvcc..." 42 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 43 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 44 | cd ../ 45 | python build.py 46 | 47 | # compile roi_crop 48 | cd ../../ 49 | cd model/roi_crop/src 50 | echo "Compiling roi crop kernels by nvcc..." 51 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 52 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 53 | cd ../ 54 | python build.py 55 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/__init__.py -------------------------------------------------------------------------------- /lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /lib/model/faster_rcnn/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | import math 15 | import torchvision.models as models 16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 17 | import pdb 18 | 19 | class vgg16(_fasterRCNN): 20 | def __init__(self, classes, pretrained=False, class_agnostic=False): 21 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth' 22 | self.dout_base_model = 512 23 | self.pretrained = pretrained 24 | self.class_agnostic = class_agnostic 25 | 26 | _fasterRCNN.__init__(self, classes, class_agnostic) 27 | 28 | def _init_modules(self): 29 | vgg = models.vgg16() 30 | if self.pretrained: 31 | print("Loading pretrained weights from %s" %(self.model_path)) 32 | state_dict = torch.load(self.model_path) 33 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()}) 34 | 35 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) 36 | 37 | # not using the last maxpool layer 38 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 39 | 40 | # Fix the layers before conv3: 41 | for layer in range(10): 42 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False 43 | 44 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model) 45 | 46 | self.RCNN_top = vgg.classifier 47 | 48 | # not using the last maxpool layer 49 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes) 50 | 51 | if self.class_agnostic: 52 | self.RCNN_bbox_pred = nn.Linear(4096, 4) 53 | else: 54 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) 55 | 56 | def _head_to_tail(self, pool5): 57 | 58 | pool5_flat = pool5.view(pool5.size(0), -1) 59 | fc7 = self.RCNN_top(pool5_flat) 60 | 61 | return fc7 62 | 63 | -------------------------------------------------------------------------------- /lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/nms/nms_cpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import torch 5 | 6 | def nms_cpu(dets, thresh): 7 | dets = dets.numpy() 8 | x1 = dets[:, 0] 9 | y1 = dets[:, 1] 10 | x2 = dets[:, 2] 11 | y2 = dets[:, 3] 12 | scores = dets[:, 4] 13 | 14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | i = order.item(0) 20 | keep.append(i) 21 | xx1 = np.maximum(x1[i], x1[order[1:]]) 22 | yy1 = np.maximum(y1[i], y1[order[1:]]) 23 | xx2 = np.minimum(x2[i], x2[order[1:]]) 24 | yy2 = np.minimum(y2[i], y2[order[1:]]) 25 | 26 | w = np.maximum(0.0, xx2 - xx1 + 1) 27 | h = np.maximum(0.0, yy2 - yy1 + 1) 28 | inter = w * h 29 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 30 | 31 | inds = np.where(ovr <= thresh)[0] 32 | order = order[inds + 1] 33 | 34 | return torch.IntTensor(keep) 35 | -------------------------------------------------------------------------------- /lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from model.utils.config import cfg 9 | if torch.cuda.is_available(): 10 | from model.nms.nms_gpu import nms_gpu 11 | from model.nms.nms_cpu import nms_cpu 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | if dets.shape[0] == 0: 16 | return [] 17 | # ---numpy version--- 18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # ---pytorch version--- 20 | 21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh) 22 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nms_cuda_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 9 | THCudaIntTensor *num_out, float nms_overlap_thresh) { 10 | 11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 12 | THCudaIntTensor_data(state, num_out), 13 | THCudaTensor_data(state, boxes_host), 14 | THCudaTensor_size(state, boxes_host, 0), 15 | THCudaTensor_size(state, boxes_host, 1), 16 | nms_overlap_thresh); 17 | 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | sources = ['src/roi_align.c'] 7 | headers = ['src/roi_align.h'] 8 | extra_objects = [] 9 | #sources = [] 10 | #headers = [] 11 | defines = [] 12 | with_cuda = False 13 | 14 | this_file = os.path.dirname(os.path.realpath(__file__)) 15 | print(this_file) 16 | 17 | if torch.cuda.is_available(): 18 | print('Including CUDA code.') 19 | sources += ['src/roi_align_cuda.c'] 20 | headers += ['src/roi_align_cuda.h'] 21 | defines += [('WITH_CUDA', None)] 22 | with_cuda = True 23 | 24 | extra_objects = ['src/roi_align_kernel.cu.o'] 25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 26 | 27 | ffi = create_extension( 28 | '_ext.roi_align', 29 | headers=headers, 30 | sources=sources, 31 | define_macros=defines, 32 | relative_to=__file__, 33 | with_cuda=with_cuda, 34 | extra_objects=extra_objects 35 | ) 36 | 37 | if __name__ == '__main__': 38 | ffi.build() 39 | -------------------------------------------------------------------------------- /lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | roi_align.roi_align_forward(self.aligned_height, 30 | self.aligned_width, 31 | self.spatial_scale, features, 32 | rois, output) 33 | # raise NotImplementedError 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | assert(self.feature_size is not None and grad_output.is_cuda) 39 | 40 | batch_size, num_channels, data_height, data_width = self.feature_size 41 | 42 | grad_input = self.rois.new(batch_size, num_channels, data_height, 43 | data_width).zero_() 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | -------------------------------------------------------------------------------- /lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); 3 | 4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale, 5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/crop_resize/_crop_resize.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_crop/_ext/crop_resize/_crop_resize.so -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | extra_objects = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | this_file = os.path.dirname(os.path.realpath(__file__)) 14 | print(this_file) 15 | 16 | if torch.cuda.is_available(): 17 | print('Including CUDA code.') 18 | sources += ['src/roi_pooling_cuda.c'] 19 | headers += ['src/roi_pooling_cuda.h'] 20 | defines += [('WITH_CUDA', None)] 21 | with_cuda = True 22 | extra_objects = ['src/roi_pooling.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_pooling', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /lib/model/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | import pdb 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | #array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | try: 40 | xrange # Python 2 41 | except NameError: 42 | xrange = range # Python 3 43 | 44 | 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 46 | scales=2**np.arange(3, 6)): 47 | """ 48 | Generate anchor (reference) windows by enumerating aspect ratios X 49 | scales wrt a reference (0, 0, 15, 15) window. 50 | """ 51 | 52 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 53 | ratio_anchors = _ratio_enum(base_anchor, ratios) 54 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 55 | for i in xrange(ratio_anchors.shape[0])]) 56 | return anchors 57 | 58 | def _whctrs(anchor): 59 | """ 60 | Return width, height, x center, and y center for an anchor (window). 61 | """ 62 | 63 | w = anchor[2] - anchor[0] + 1 64 | h = anchor[3] - anchor[1] + 1 65 | x_ctr = anchor[0] + 0.5 * (w - 1) 66 | y_ctr = anchor[1] + 0.5 * (h - 1) 67 | return w, h, x_ctr, y_ctr 68 | 69 | def _mkanchors(ws, hs, x_ctr, y_ctr): 70 | """ 71 | Given a vector of widths (ws) and heights (hs) around a center 72 | (x_ctr, y_ctr), output a set of anchors (windows). 73 | """ 74 | 75 | ws = ws[:, np.newaxis] 76 | hs = hs[:, np.newaxis] 77 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 78 | y_ctr - 0.5 * (hs - 1), 79 | x_ctr + 0.5 * (ws - 1), 80 | y_ctr + 0.5 * (hs - 1))) 81 | return anchors 82 | 83 | def _ratio_enum(anchor, ratios): 84 | """ 85 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 86 | """ 87 | 88 | w, h, x_ctr, y_ctr = _whctrs(anchor) 89 | size = w * h 90 | size_ratios = size / ratios 91 | ws = np.round(np.sqrt(size_ratios)) 92 | hs = np.round(ws * ratios) 93 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 94 | return anchors 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | if __name__ == '__main__': 108 | import time 109 | t = time.time() 110 | a = generate_anchors() 111 | print(time.time() - t) 112 | print(a) 113 | from IPython import embed; embed() 114 | -------------------------------------------------------------------------------- /lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /lib/model/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes, 16 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 17 | return bbox_overlaps_c(boxes, query_boxes) 18 | 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 20 | np.ndarray[DTYPE_t, ndim=2] boxes, 21 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 22 | """ 23 | Parameters 24 | ---------- 25 | boxes: (N, 4) ndarray of float 26 | query_boxes: (K, 4) ndarray of float 27 | Returns 28 | ------- 29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 30 | """ 31 | cdef unsigned int N = boxes.shape[0] 32 | cdef unsigned int K = query_boxes.shape[0] 33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 34 | cdef DTYPE_t iw, ih, box_area 35 | cdef DTYPE_t ua 36 | cdef unsigned int k, n 37 | for k in range(K): 38 | box_area = ( 39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 41 | ) 42 | for n in range(N): 43 | iw = ( 44 | min(boxes[n, 2], query_boxes[k, 2]) - 45 | max(boxes[n, 0], query_boxes[k, 0]) + 1 46 | ) 47 | if iw > 0: 48 | ih = ( 49 | min(boxes[n, 3], query_boxes[k, 3]) - 50 | max(boxes[n, 1], query_boxes[k, 1]) + 1 51 | ) 52 | if ih > 0: 53 | ua = float( 54 | (boxes[n, 2] - boxes[n, 0] + 1) * 55 | (boxes[n, 3] - boxes[n, 1] + 1) + 56 | box_area - iw * ih 57 | ) 58 | overlaps[n, k] = iw * ih / ua 59 | return overlaps 60 | 61 | 62 | def bbox_intersections( 63 | np.ndarray[DTYPE_t, ndim=2] boxes, 64 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 65 | return bbox_intersections_c(boxes, query_boxes) 66 | 67 | 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 69 | np.ndarray[DTYPE_t, ndim=2] boxes, 70 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 71 | """ 72 | For each query box compute the intersection ratio covered by boxes 73 | ---------- 74 | Parameters 75 | ---------- 76 | boxes: (N, 4) ndarray of float 77 | query_boxes: (K, 4) ndarray of float 78 | Returns 79 | ------- 80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 81 | """ 82 | cdef unsigned int N = boxes.shape[0] 83 | cdef unsigned int K = query_boxes.shape[0] 84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 85 | cdef DTYPE_t iw, ih, box_area 86 | cdef DTYPE_t ua 87 | cdef unsigned int k, n 88 | for k in range(K): 89 | box_area = ( 90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 92 | ) 93 | for n in range(N): 94 | iw = ( 95 | min(boxes[n, 2], query_boxes[k, 2]) - 96 | max(boxes[n, 0], query_boxes[k, 0]) + 1 97 | ) 98 | if iw > 0: 99 | ih = ( 100 | min(boxes[n, 3], query_boxes[k, 3]) - 101 | max(boxes[n, 1], query_boxes[k, 1]) + 1 102 | ) 103 | if ih > 0: 104 | intersec[n, k] = iw * ih / box_area 105 | return intersec -------------------------------------------------------------------------------- /lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | # from scipy.misc import imread, imresize 12 | import cv2 13 | 14 | try: 15 | xrange # Python 2 16 | except NameError: 17 | xrange = range # Python 3 18 | 19 | 20 | def im_list_to_blob(ims): 21 | """Convert a list of images into a network input. 22 | 23 | Assumes images are already prepared (means subtracted, BGR order, ...). 24 | """ 25 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 26 | num_images = len(ims) 27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 28 | dtype=np.float32) 29 | for i in xrange(num_images): 30 | im = ims[i] 31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 32 | 33 | return blob 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | im_size_min = np.min(im_shape[0:2]) 43 | im_size_max = np.max(im_shape[0:2]) 44 | im_scale = float(target_size) / float(im_size_min) 45 | # Prevent the biggest axis from being more than MAX_SIZE 46 | # if np.round(im_scale * im_size_max) > max_size: 47 | # im_scale = float(max_size) / float(im_size_max) 48 | # im = imresize(im, im_scale) 49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 50 | interpolation=cv2.INTER_LINEAR) 51 | 52 | return im, im_scale 53 | -------------------------------------------------------------------------------- /lib/model/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | try: 6 | from StringIO import StringIO # Python 2.7 7 | except ImportError: 8 | from io import BytesIO # Python 3.x 9 | 10 | 11 | class Logger(object): 12 | 13 | def __init__(self, log_dir): 14 | """Create a summary writer logging to log_dir.""" 15 | self.writer = tf.summary.FileWriter(log_dir) 16 | 17 | def scalar_summary(self, tag, value, step): 18 | """Log a scalar variable.""" 19 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 20 | self.writer.add_summary(summary, step) 21 | 22 | def image_summary(self, tag, images, step): 23 | """Log a list of images.""" 24 | 25 | img_summaries = [] 26 | for i, img in enumerate(images): 27 | # Write the image to a string 28 | try: 29 | s = StringIO() 30 | except: 31 | s = BytesIO() 32 | scipy.misc.toimage(img).save(s, format="png") 33 | 34 | # Create an Image object 35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 36 | height=img.shape[0], 37 | width=img.shape[1]) 38 | # Create a Summary value 39 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 40 | 41 | # Create and write Summary 42 | summary = tf.Summary(value=img_summaries) 43 | self.writer.add_summary(summary, step) 44 | 45 | def histo_summary(self, tag, values, step, bins=1000): 46 | """Log a histogram of the tensor of values.""" 47 | 48 | # Create a histogram using numpy 49 | counts, bin_edges = np.histogram(values, bins=bins) 50 | 51 | # Fill the fields of the histogram proto 52 | hist = tf.HistogramProto() 53 | hist.min = float(np.min(values)) 54 | hist.max = float(np.max(values)) 55 | hist.num = int(np.prod(values.shape)) 56 | hist.sum = float(np.sum(values)) 57 | hist.sum_squares = float(np.sum(values**2)) 58 | 59 | # Drop the start of the first bin 60 | bin_edges = bin_edges[1:] 61 | 62 | # Add bin edges and counts 63 | for edge in bin_edges: 64 | hist.bucket_limit.append(edge) 65 | for c in counts: 66 | hist.bucket.append(c) 67 | 68 | # Create and write Summary 69 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 70 | self.writer.add_summary(summary, step) 71 | self.writer.flush() 72 | -------------------------------------------------------------------------------- /lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /lib/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | from scipy.misc import imread 16 | from model.utils.config import cfg 17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 18 | import pdb 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | # Sample random scales to use for each image in this batch 23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 24 | size=num_images) 25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 27 | format(num_images, cfg.TRAIN.BATCH_SIZE) 28 | 29 | # Get the input image blob, formatted for caffe 30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 31 | 32 | blobs = {'data': im_blob} 33 | 34 | assert len(im_scales) == 1, "Single batch only" 35 | assert len(roidb) == 1, "Single batch only" 36 | 37 | # gt boxes: (x1, y1, x2, y2, cls) 38 | if cfg.TRAIN.USE_ALL_GT: 39 | # Include all ground truth boxes 40 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 41 | else: 42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 43 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 44 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 45 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 46 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 47 | blobs['gt_boxes'] = gt_boxes 48 | blobs['im_info'] = np.array( 49 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], 50 | dtype=np.float32) 51 | 52 | blobs['img_id'] = roidb[0]['img_id'] 53 | 54 | return blobs 55 | 56 | def _get_image_blob(roidb, scale_inds): 57 | """Builds an input blob from the images in the roidb at the specified 58 | scales. 59 | """ 60 | num_images = len(roidb) 61 | 62 | processed_ims = [] 63 | im_scales = [] 64 | for i in range(num_images): 65 | #im = cv2.imread(roidb[i]['image']) 66 | im = imread(roidb[i]['image']) 67 | 68 | if len(im.shape) == 2: 69 | im = im[:,:,np.newaxis] 70 | im = np.concatenate((im,im,im), axis=2) 71 | # flip the channel, since the original one using cv2 72 | # rgb -> bgr 73 | im = im[:,:,::-1] 74 | 75 | if roidb[i]['flipped']: 76 | im = im[:, ::-1, :] 77 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 78 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 79 | cfg.TRAIN.MAX_SIZE) 80 | im_scales.append(im_scale) 81 | processed_ims.append(im) 82 | 83 | # Create a blob to hold the input images 84 | blob = im_list_to_blob(processed_ims) 85 | 86 | return blob, im_scales 87 | -------------------------------------------------------------------------------- /output/res101/voc_2007_test/faster_rcnn_10/detections.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/output/res101/voc_2007_test/faster_rcnn_10/detections.pkl -------------------------------------------------------------------------------- /remove_bad_annot_img.py: -------------------------------------------------------------------------------- 1 | import os 2 | with open('debug_caltech.txt', 'r') as f: 3 | bad_files = f.readlines() 4 | for bad_file in bad_files: 5 | os.rename("/home/divyam/dafrcnn/dafrcnn-pytorch/data/src/caltech/coco/images/"+"COCO_train2014_"+bad_file[:-1]+".jpg", 6 | "/home/divyam/dafrcnn/dafrcnn-pytorch/data/src/caltech/coco/images_no_annots/"+"COCO_train2014_"+bad_file[:-1]+".jpg") -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | cffi 3 | opencv-python 4 | scipy 5 | msgpack 6 | easydict 7 | matplotlib 8 | pyyaml 9 | tensorboardX 10 | -------------------------------------------------------------------------------- /resources/img_rois_0.999_threshold.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/resources/img_rois_0.999_threshold.jpg -------------------------------------------------------------------------------- /resources/in_bala_01_c027a_05052018073503_P1166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/resources/in_bala_01_c027a_05052018073503_P1166.jpg -------------------------------------------------------------------------------- /standard_train/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jianwei Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /standard_train/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /standard_train/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | LEARNING_RATE: 0.001 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_SIZE: 7 17 | POOLING_MODE: align 18 | CROP_RESIZE_WITH_MAX_POOL: False 19 | -------------------------------------------------------------------------------- /standard_train/cfgs/res101_ls.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | SCALES: [800] 13 | DOUBLE_BIAS: False 14 | LEARNING_RATE: 0.001 15 | TEST: 16 | HAS_RPN: True 17 | SCALES: [800] 18 | MAX_SIZE: 1200 19 | RPN_POST_NMS_TOP_N: 1000 20 | POOLING_SIZE: 7 21 | POOLING_MODE: align 22 | CROP_RESIZE_WITH_MAX_POOL: False 23 | -------------------------------------------------------------------------------- /standard_train/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | # IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | WEIGHT_DECAY: 0.0001 13 | DOUBLE_BIAS: False 14 | SNAPSHOT_PREFIX: res50_faster_rcnn 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_MODE: crop 18 | -------------------------------------------------------------------------------- /standard_train/cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | BATCH_SIZE: 256 10 | LEARNING_RATE: 0.01 11 | TEST: 12 | HAS_RPN: True 13 | POOLING_MODE: align 14 | CROP_RESIZE_WITH_MAX_POOL: False 15 | -------------------------------------------------------------------------------- /standard_train/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/dataloader.py -------------------------------------------------------------------------------- /standard_train/lib/datasets/README: -------------------------------------------------------------------------------- 1 | Modified trainval to have only source data.` 2 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/create_test_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | with open('test.txt', 'w') as f: 3 | for file in os.listdir(): 4 | if file=='to_coco_format.py' or file=='create_test_file.py': 5 | continue 6 | else: 7 | f.write(file+"\n") -------------------------------------------------------------------------------- /standard_train/lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /standard_train/lib/datasets/vg_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Fast/er R-CNN 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Bharath Hariharan 6 | # -------------------------------------------------------- 7 | 8 | import xml.etree.ElementTree as ET 9 | import os 10 | import numpy as np 11 | from .voc_eval import voc_ap 12 | 13 | def vg_eval( detpath, 14 | gt_roidb, 15 | image_index, 16 | classindex, 17 | ovthresh=0.5, 18 | use_07_metric=False, 19 | eval_attributes=False): 20 | """rec, prec, ap, sorted_scores, npos = voc_eval( 21 | detpath, 22 | gt_roidb, 23 | image_index, 24 | classindex, 25 | [ovthresh], 26 | [use_07_metric]) 27 | 28 | Top level function that does the Visual Genome evaluation. 29 | 30 | detpath: Path to detections 31 | gt_roidb: List of ground truth structs. 32 | image_index: List of image ids. 33 | classindex: Category index 34 | [ovthresh]: Overlap threshold (default = 0.5) 35 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 36 | (default False) 37 | """ 38 | # extract gt objects for this class 39 | class_recs = {} 40 | npos = 0 41 | for item,imagename in zip(gt_roidb,image_index): 42 | if eval_attributes: 43 | bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :] 44 | else: 45 | bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :] 46 | difficult = np.zeros((bbox.shape[0],)).astype(np.bool) 47 | det = [False] * bbox.shape[0] 48 | npos = npos + sum(~difficult) 49 | class_recs[str(imagename)] = {'bbox': bbox, 50 | 'difficult': difficult, 51 | 'det': det} 52 | if npos == 0: 53 | # No ground truth examples 54 | return 0,0,0,0,npos 55 | 56 | # read dets 57 | with open(detpath, 'r') as f: 58 | lines = f.readlines() 59 | if len(lines) == 0: 60 | # No detection examples 61 | return 0,0,0,0,npos 62 | 63 | splitlines = [x.strip().split(' ') for x in lines] 64 | image_ids = [x[0] for x in splitlines] 65 | confidence = np.array([float(x[1]) for x in splitlines]) 66 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 67 | 68 | # sort by confidence 69 | sorted_ind = np.argsort(-confidence) 70 | sorted_scores = -np.sort(-confidence) 71 | BB = BB[sorted_ind, :] 72 | image_ids = [image_ids[x] for x in sorted_ind] 73 | 74 | # go down dets and mark TPs and FPs 75 | nd = len(image_ids) 76 | tp = np.zeros(nd) 77 | fp = np.zeros(nd) 78 | for d in range(nd): 79 | R = class_recs[image_ids[d]] 80 | bb = BB[d, :].astype(float) 81 | ovmax = -np.inf 82 | BBGT = R['bbox'].astype(float) 83 | 84 | if BBGT.size > 0: 85 | # compute overlaps 86 | # intersection 87 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 88 | iymin = np.maximum(BBGT[:, 1], bb[1]) 89 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 90 | iymax = np.minimum(BBGT[:, 3], bb[3]) 91 | iw = np.maximum(ixmax - ixmin + 1., 0.) 92 | ih = np.maximum(iymax - iymin + 1., 0.) 93 | inters = iw * ih 94 | 95 | # union 96 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 97 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 98 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 99 | 100 | overlaps = inters / uni 101 | ovmax = np.max(overlaps) 102 | jmax = np.argmax(overlaps) 103 | 104 | if ovmax > ovthresh: 105 | if not R['difficult'][jmax]: 106 | if not R['det'][jmax]: 107 | tp[d] = 1. 108 | R['det'][jmax] = 1 109 | else: 110 | fp[d] = 1. 111 | else: 112 | fp[d] = 1. 113 | 114 | # compute precision recall 115 | fp = np.cumsum(fp) 116 | tp = np.cumsum(tp) 117 | rec = tp / float(npos) 118 | # avoid divide by zero in case the first detection matches a difficult 119 | # ground truth 120 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 121 | ap = voc_ap(rec, prec, use_07_metric) 122 | 123 | return rec, prec, ap, sorted_scores, npos 124 | -------------------------------------------------------------------------------- /standard_train/lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CUDA_PATH=/usr/local/cuda/ 6 | #You may also want to ad the following 7 | #export C_INCLUDE_PATH=/opt/cuda/include 8 | 9 | export CXXFLAGS="-std=c++11" 10 | export CFLAGS="-std=c99" 11 | 12 | python setup.py build_ext --inplace 13 | rm -rf build 14 | 15 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 16 | -gencode arch=compute_35,code=sm_35 \ 17 | -gencode arch=compute_50,code=sm_50 \ 18 | -gencode arch=compute_52,code=sm_52 \ 19 | -gencode arch=compute_60,code=sm_60 \ 20 | -gencode arch=compute_61,code=sm_61 " 21 | 22 | # compile NMS 23 | cd model/nms/src 24 | echo "Compiling nms kernels by nvcc..." 25 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 26 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 27 | 28 | cd ../ 29 | python build.py 30 | 31 | # compile roi_pooling 32 | cd ../../ 33 | cd model/roi_pooling/src 34 | echo "Compiling roi pooling kernels by nvcc..." 35 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 36 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 37 | cd ../ 38 | python build.py 39 | 40 | # compile roi_align 41 | cd ../../ 42 | cd model/roi_align/src 43 | echo "Compiling roi align kernels by nvcc..." 44 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 45 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 46 | cd ../ 47 | python build.py 48 | 49 | # compile roi_crop 50 | cd ../../ 51 | cd model/roi_crop/src 52 | echo "Compiling roi crop kernels by nvcc..." 53 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 54 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 55 | cd ../ 56 | python build.py 57 | -------------------------------------------------------------------------------- /standard_train/lib/make2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CUDA_PATH=/usr/local/cuda/ 6 | export PATH=/usr/local/cuda-8.0/bin:$PATH 7 | export LD_LIBRARY_PATH=/usr/local/cuda-8.0/lib64:$LD_LIBRARY_PATH 8 | 9 | 10 | python setup.py build_ext --inplace 11 | rm -rf build 12 | 13 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 14 | -gencode arch=compute_35,code=sm_35 \ 15 | -gencode arch=compute_50,code=sm_50 \ 16 | -gencode arch=compute_52,code=sm_52 \ 17 | -gencode arch=compute_60,code=sm_60 \ 18 | -gencode arch=compute_61,code=sm_61 " 19 | 20 | # compile NMS 21 | cd model/nms/src 22 | echo "Compiling nms kernels by nvcc..." 23 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 24 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 25 | 26 | cd ../ 27 | python build.py 28 | 29 | # compile roi_pooling 30 | cd ../../ 31 | cd model/roi_pooling/src 32 | echo "Compiling roi pooling kernels by nvcc..." 33 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 34 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 35 | cd ../ 36 | python build.py 37 | 38 | # compile roi_align 39 | cd ../../ 40 | cd model/roi_align/src 41 | echo "Compiling roi align kernels by nvcc..." 42 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 43 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 44 | cd ../ 45 | python build.py 46 | 47 | # compile roi_crop 48 | cd ../../ 49 | cd model/roi_crop/src 50 | echo "Compiling roi crop kernels by nvcc..." 51 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 52 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 53 | cd ../ 54 | python build.py 55 | -------------------------------------------------------------------------------- /standard_train/lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/faster_rcnn/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | import math 15 | import torchvision.models as models 16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 17 | import pdb 18 | 19 | class vgg16(_fasterRCNN): 20 | def __init__(self, classes, pretrained=False, class_agnostic=False): 21 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth' 22 | self.dout_base_model = 512 23 | self.pretrained = pretrained 24 | self.class_agnostic = class_agnostic 25 | 26 | _fasterRCNN.__init__(self, classes, class_agnostic) 27 | 28 | def _init_modules(self): 29 | vgg = models.vgg16() 30 | if self.pretrained: 31 | print("Loading pretrained weights from %s" %(self.model_path)) 32 | state_dict = torch.load(self.model_path) 33 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()}) 34 | 35 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) 36 | 37 | # not using the last maxpool layer 38 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 39 | 40 | # Fix the layers before conv3: 41 | for layer in range(10): 42 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False 43 | 44 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model) 45 | 46 | self.RCNN_top = vgg.classifier 47 | 48 | # not using the last maxpool layer 49 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes) 50 | 51 | if self.class_agnostic: 52 | self.RCNN_bbox_pred = nn.Linear(4096, 4) 53 | else: 54 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) 55 | 56 | def _head_to_tail(self, pool5): 57 | 58 | pool5_flat = pool5.view(pool5.size(0), -1) 59 | fc7 = self.RCNN_top(pool5_flat) 60 | 61 | return fc7 62 | 63 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/nms_cpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import torch 5 | 6 | def nms_cpu(dets, thresh): 7 | dets = dets.numpy() 8 | x1 = dets[:, 0] 9 | y1 = dets[:, 1] 10 | x2 = dets[:, 2] 11 | y2 = dets[:, 3] 12 | scores = dets[:, 4] 13 | 14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | i = order.item(0) 20 | keep.append(i) 21 | xx1 = np.maximum(x1[i], x1[order[1:]]) 22 | yy1 = np.maximum(y1[i], y1[order[1:]]) 23 | xx2 = np.minimum(x2[i], x2[order[1:]]) 24 | yy2 = np.minimum(y2[i], y2[order[1:]]) 25 | 26 | w = np.maximum(0.0, xx2 - xx1 + 1) 27 | h = np.maximum(0.0, yy2 - yy1 + 1) 28 | inter = w * h 29 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 30 | 31 | inds = np.where(ovr <= thresh)[0] 32 | order = order[inds + 1] 33 | 34 | return torch.IntTensor(keep) 35 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from model.utils.config import cfg 9 | if torch.cuda.is_available(): 10 | from model.nms.nms_gpu import nms_gpu 11 | from model.nms.nms_cpu import nms_cpu 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | if dets.shape[0] == 0: 16 | return [] 17 | # ---numpy version--- 18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # ---pytorch version--- 20 | 21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh) 22 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /standard_train/lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | sources = ['src/roi_align.c'] 7 | headers = ['src/roi_align.h'] 8 | extra_objects = [] 9 | #sources = [] 10 | #headers = [] 11 | defines = [] 12 | with_cuda = False 13 | 14 | this_file = os.path.dirname(os.path.realpath(__file__)) 15 | print(this_file) 16 | 17 | if torch.cuda.is_available(): 18 | print('Including CUDA code.') 19 | sources += ['src/roi_align_cuda.c'] 20 | headers += ['src/roi_align_cuda.h'] 21 | defines += [('WITH_CUDA', None)] 22 | with_cuda = True 23 | 24 | extra_objects = ['src/roi_align_kernel.cu.o'] 25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 26 | 27 | ffi = create_extension( 28 | '_ext.roi_align', 29 | headers=headers, 30 | sources=sources, 31 | define_macros=defines, 32 | relative_to=__file__, 33 | with_cuda=with_cuda, 34 | extra_objects=extra_objects 35 | ) 36 | 37 | if __name__ == '__main__': 38 | ffi.build() 39 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | roi_align.roi_align_forward(self.aligned_height, 30 | self.aligned_width, 31 | self.spatial_scale, features, 32 | rois, output) 33 | # raise NotImplementedError 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | assert(self.feature_size is not None and grad_output.is_cuda) 39 | 40 | batch_size, num_channels, data_height, data_width = self.feature_size 41 | 42 | grad_input = self.rois.new(batch_size, num_channels, data_height, 43 | data_width).zero_() 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/src/roi_align.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); 3 | 4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale, 5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | extra_objects = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | this_file = os.path.dirname(os.path.realpath(__file__)) 14 | print(this_file) 15 | 16 | if torch.cuda.is_available(): 17 | print('Including CUDA code.') 18 | sources += ['src/roi_pooling_cuda.c'] 19 | headers += ['src/roi_pooling_cuda.h'] 20 | defines += [('WITH_CUDA', None)] 21 | with_cuda = True 22 | extra_objects = ['src/roi_pooling.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_pooling', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /standard_train/lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /standard_train/lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | import pdb 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | #array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | try: 40 | xrange # Python 2 41 | except NameError: 42 | xrange = range # Python 3 43 | 44 | 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 46 | scales=2**np.arange(3, 6)): 47 | """ 48 | Generate anchor (reference) windows by enumerating aspect ratios X 49 | scales wrt a reference (0, 0, 15, 15) window. 50 | """ 51 | 52 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 53 | ratio_anchors = _ratio_enum(base_anchor, ratios) 54 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 55 | for i in xrange(ratio_anchors.shape[0])]) 56 | return anchors 57 | 58 | def _whctrs(anchor): 59 | """ 60 | Return width, height, x center, and y center for an anchor (window). 61 | """ 62 | 63 | w = anchor[2] - anchor[0] + 1 64 | h = anchor[3] - anchor[1] + 1 65 | x_ctr = anchor[0] + 0.5 * (w - 1) 66 | y_ctr = anchor[1] + 0.5 * (h - 1) 67 | return w, h, x_ctr, y_ctr 68 | 69 | def _mkanchors(ws, hs, x_ctr, y_ctr): 70 | """ 71 | Given a vector of widths (ws) and heights (hs) around a center 72 | (x_ctr, y_ctr), output a set of anchors (windows). 73 | """ 74 | 75 | ws = ws[:, np.newaxis] 76 | hs = hs[:, np.newaxis] 77 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 78 | y_ctr - 0.5 * (hs - 1), 79 | x_ctr + 0.5 * (ws - 1), 80 | y_ctr + 0.5 * (hs - 1))) 81 | return anchors 82 | 83 | def _ratio_enum(anchor, ratios): 84 | """ 85 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 86 | """ 87 | 88 | w, h, x_ctr, y_ctr = _whctrs(anchor) 89 | size = w * h 90 | size_ratios = size / ratios 91 | ws = np.round(np.sqrt(size_ratios)) 92 | hs = np.round(ws * ratios) 93 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 94 | return anchors 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | if __name__ == '__main__': 108 | import time 109 | t = time.time() 110 | a = generate_anchors() 111 | print(time.time() - t) 112 | print(a) 113 | from IPython import embed; embed() 114 | -------------------------------------------------------------------------------- /standard_train/lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /standard_train/lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divyam02/dafrcnn-pytorch/4a9c54cd029426a18fff09b4158917cd4d82a85e/standard_train/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /standard_train/lib/model/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes, 16 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 17 | return bbox_overlaps_c(boxes, query_boxes) 18 | 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 20 | np.ndarray[DTYPE_t, ndim=2] boxes, 21 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 22 | """ 23 | Parameters 24 | ---------- 25 | boxes: (N, 4) ndarray of float 26 | query_boxes: (K, 4) ndarray of float 27 | Returns 28 | ------- 29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 30 | """ 31 | cdef unsigned int N = boxes.shape[0] 32 | cdef unsigned int K = query_boxes.shape[0] 33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 34 | cdef DTYPE_t iw, ih, box_area 35 | cdef DTYPE_t ua 36 | cdef unsigned int k, n 37 | for k in range(K): 38 | box_area = ( 39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 41 | ) 42 | for n in range(N): 43 | iw = ( 44 | min(boxes[n, 2], query_boxes[k, 2]) - 45 | max(boxes[n, 0], query_boxes[k, 0]) + 1 46 | ) 47 | if iw > 0: 48 | ih = ( 49 | min(boxes[n, 3], query_boxes[k, 3]) - 50 | max(boxes[n, 1], query_boxes[k, 1]) + 1 51 | ) 52 | if ih > 0: 53 | ua = float( 54 | (boxes[n, 2] - boxes[n, 0] + 1) * 55 | (boxes[n, 3] - boxes[n, 1] + 1) + 56 | box_area - iw * ih 57 | ) 58 | overlaps[n, k] = iw * ih / ua 59 | return overlaps 60 | 61 | 62 | def bbox_intersections( 63 | np.ndarray[DTYPE_t, ndim=2] boxes, 64 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 65 | return bbox_intersections_c(boxes, query_boxes) 66 | 67 | 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 69 | np.ndarray[DTYPE_t, ndim=2] boxes, 70 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 71 | """ 72 | For each query box compute the intersection ratio covered by boxes 73 | ---------- 74 | Parameters 75 | ---------- 76 | boxes: (N, 4) ndarray of float 77 | query_boxes: (K, 4) ndarray of float 78 | Returns 79 | ------- 80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 81 | """ 82 | cdef unsigned int N = boxes.shape[0] 83 | cdef unsigned int K = query_boxes.shape[0] 84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 85 | cdef DTYPE_t iw, ih, box_area 86 | cdef DTYPE_t ua 87 | cdef unsigned int k, n 88 | for k in range(K): 89 | box_area = ( 90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 92 | ) 93 | for n in range(N): 94 | iw = ( 95 | min(boxes[n, 2], query_boxes[k, 2]) - 96 | max(boxes[n, 0], query_boxes[k, 0]) + 1 97 | ) 98 | if iw > 0: 99 | ih = ( 100 | min(boxes[n, 3], query_boxes[k, 3]) - 101 | max(boxes[n, 1], query_boxes[k, 1]) + 1 102 | ) 103 | if ih > 0: 104 | intersec[n, k] = iw * ih / box_area 105 | return intersec -------------------------------------------------------------------------------- /standard_train/lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | # from scipy.misc import imread, imresize 12 | import cv2 13 | 14 | try: 15 | xrange # Python 2 16 | except NameError: 17 | xrange = range # Python 3 18 | 19 | 20 | def im_list_to_blob(ims): 21 | """Convert a list of images into a network input. 22 | 23 | Assumes images are already prepared (means subtracted, BGR order, ...). 24 | """ 25 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 26 | num_images = len(ims) 27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 28 | dtype=np.float32) 29 | for i in xrange(num_images): 30 | im = ims[i] 31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 32 | 33 | return blob 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | im_size_min = np.min(im_shape[0:2]) 43 | im_size_max = np.max(im_shape[0:2]) 44 | im_scale = float(target_size) / float(im_size_min) 45 | # Prevent the biggest axis from being more than MAX_SIZE 46 | # if np.round(im_scale * im_size_max) > max_size: 47 | # im_scale = float(max_size) / float(im_size_max) 48 | # im = imresize(im, im_scale) 49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 50 | interpolation=cv2.INTER_LINEAR) 51 | 52 | return im, im_scale 53 | -------------------------------------------------------------------------------- /standard_train/lib/model/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | try: 6 | from StringIO import StringIO # Python 2.7 7 | except ImportError: 8 | from io import BytesIO # Python 3.x 9 | 10 | 11 | class Logger(object): 12 | 13 | def __init__(self, log_dir): 14 | """Create a summary writer logging to log_dir.""" 15 | self.writer = tf.summary.FileWriter(log_dir) 16 | 17 | def scalar_summary(self, tag, value, step): 18 | """Log a scalar variable.""" 19 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 20 | self.writer.add_summary(summary, step) 21 | 22 | def image_summary(self, tag, images, step): 23 | """Log a list of images.""" 24 | 25 | img_summaries = [] 26 | for i, img in enumerate(images): 27 | # Write the image to a string 28 | try: 29 | s = StringIO() 30 | except: 31 | s = BytesIO() 32 | scipy.misc.toimage(img).save(s, format="png") 33 | 34 | # Create an Image object 35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 36 | height=img.shape[0], 37 | width=img.shape[1]) 38 | # Create a Summary value 39 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 40 | 41 | # Create and write Summary 42 | summary = tf.Summary(value=img_summaries) 43 | self.writer.add_summary(summary, step) 44 | 45 | def histo_summary(self, tag, values, step, bins=1000): 46 | """Log a histogram of the tensor of values.""" 47 | 48 | # Create a histogram using numpy 49 | counts, bin_edges = np.histogram(values, bins=bins) 50 | 51 | # Fill the fields of the histogram proto 52 | hist = tf.HistogramProto() 53 | hist.min = float(np.min(values)) 54 | hist.max = float(np.max(values)) 55 | hist.num = int(np.prod(values.shape)) 56 | hist.sum = float(np.sum(values)) 57 | hist.sum_squares = float(np.sum(values**2)) 58 | 59 | # Drop the start of the first bin 60 | bin_edges = bin_edges[1:] 61 | 62 | # Add bin edges and counts 63 | for edge in bin_edges: 64 | hist.bucket_limit.append(edge) 65 | for c in counts: 66 | hist.bucket.append(c) 67 | 68 | # Create and write Summary 69 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 70 | self.writer.add_summary(summary, step) 71 | self.writer.flush() 72 | -------------------------------------------------------------------------------- /standard_train/lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /standard_train/lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /standard_train/lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /standard_train/lib/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /standard_train/lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /standard_train/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /standard_train/lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | from scipy.misc import imread 16 | from model.utils.config import cfg 17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 18 | import pdb 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | # Sample random scales to use for each image in this batch 23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 24 | size=num_images) 25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 27 | format(num_images, cfg.TRAIN.BATCH_SIZE) 28 | 29 | # Get the input image blob, formatted for caffe 30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 31 | 32 | blobs = {'data': im_blob} 33 | 34 | assert len(im_scales) == 1, "Single batch only" 35 | assert len(roidb) == 1, "Single batch only" 36 | 37 | # gt boxes: (x1, y1, x2, y2, cls) 38 | if cfg.TRAIN.USE_ALL_GT: 39 | # Include all ground truth boxes 40 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 41 | else: 42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 43 | gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 44 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 45 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 46 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 47 | blobs['gt_boxes'] = gt_boxes 48 | blobs['im_info'] = np.array( 49 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], 50 | dtype=np.float32) 51 | 52 | blobs['img_id'] = roidb[0]['img_id'] 53 | 54 | return blobs 55 | 56 | def _get_image_blob(roidb, scale_inds): 57 | """Builds an input blob from the images in the roidb at the specified 58 | scales. 59 | """ 60 | num_images = len(roidb) 61 | 62 | processed_ims = [] 63 | im_scales = [] 64 | for i in range(num_images): 65 | #im = cv2.imread(roidb[i]['image']) 66 | im = imread(roidb[i]['image']) 67 | 68 | if len(im.shape) == 2: 69 | im = im[:,:,np.newaxis] 70 | im = np.concatenate((im,im,im), axis=2) 71 | # flip the channel, since the original one using cv2 72 | # rgb -> bgr 73 | im = im[:,:,::-1] 74 | 75 | if roidb[i]['flipped']: 76 | im = im[:, ::-1, :] 77 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 78 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 79 | cfg.TRAIN.MAX_SIZE) 80 | im_scales.append(im_scale) 81 | processed_ims.append(im) 82 | 83 | # Create a blob to hold the input images 84 | blob = im_list_to_blob(processed_ims) 85 | 86 | return blob, im_scales 87 | -------------------------------------------------------------------------------- /standard_train/normal_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skdata.mnist.views import OfficialImageClassification 3 | from matplotlib import pyplot as plt 4 | from tsne import bh_sne 5 | # load up data 6 | data = OfficialImageClassification(x_dtype="float32") 7 | x_data = data.all_images 8 | y_data = data.all_labels 9 | # convert image data to float64 matrix. float64 is need for bh_sne 10 | x_data = np.asarray(x_data).astype('float64') 11 | x_data = x_data.reshape((x_data.shape[0], -1)) 12 | # For speed of computation, only run on a subset 13 | n = 20000 14 | print(x_data.shape) 15 | print(y_data.shape) 16 | assert 1<0 17 | x_data = x_data[:n] 18 | y_data = y_data[:n] 19 | # perform t-SNE embedding 20 | vis_data = bh_sne(x_data) 21 | # plot the result 22 | vis_x = vis_data[:, 0] 23 | vis_y = vis_data[:, 1] 24 | plt.scatter(vis_x, vis_y, c=y_data, cmap=plt.cm.get_cmap("jet", 10)) 25 | plt.colorbar(ticks=range(10)) 26 | plt.clim(-0.5, 9.5) 27 | plt.show() -------------------------------------------------------------------------------- /standard_train/parsexml.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import os 3 | import pickle 4 | import numpy as np 5 | 6 | obj_struct = {} 7 | with open('check.txt', 'r') as f: 8 | for filename in f.readlines(): 9 | try: 10 | filename = filename[:-1]+'.xml' 11 | tree = ET.parse(filename) 12 | for obj in tree.findall('object'): 13 | name = obj.find('name').text 14 | obj_struct[str(name)] = True 15 | except: 16 | print("weird file", filename) 17 | print(obj_struct.keys()) -------------------------------------------------------------------------------- /standard_train/requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | cffi 3 | opencv-python 4 | scipy 5 | msgpack 6 | easydict 7 | matplotlib 8 | pyyaml 9 | tensorboardX 10 | --------------------------------------------------------------------------------