├── .spyproject ├── codestyle.ini ├── encoding.ini ├── vcs.ini └── workspace.ini ├── README.md ├── data ├── VOCdevkit2007 │ └── train_faster_rcnn.sh ├── VOCdevkit2012 │ └── train_faster_rcnn.sh ├── imagenet_weights │ └── train_faster_rcnn.sh └── selective_search_data │ └── train_faster_rcnn.sh ├── experiments ├── cfgs │ ├── change_log.txt │ ├── mobile.yml │ ├── res101-lg.yml │ ├── res101.yml │ ├── res50.yml │ └── vgg16.yml ├── logs │ ├── 46.67 │ │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-11_08-16-58 │ │ └── vgg16_voc_2007_trainval__vgg16.txt.2019-03-11_08-16-28 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-03_19-21-38 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-06_02-46-04 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-06_08-16-58 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-08_08-06-10 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-08_09-03-31 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-08_11-20-57 │ ├── test_vgg16_voc_2007_trainval_.txt.2019-03-10_18-07-42 │ ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-03_19-20-42 │ ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-05_08-20-12 │ ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-06_08-16-31 │ ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-08_08-05-45 │ ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-08_09-03-06 │ ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-08_11-20-27 │ └── vgg16_voc_2007_trainval__vgg16.txt.2019-03-09_20-05-42 └── scripts │ ├── convert_vgg16.sh │ ├── test_faster_rcnn.sh │ ├── test_faster_rcnn_notime.sh │ ├── train_faster_rcnn.sh │ └── train_faster_rcnn_notime.sh ├── lib ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── coco.cpython-36.pyc │ │ ├── coco.cpython-37.pyc │ │ ├── ds_utils.cpython-36.pyc │ │ ├── ds_utils.cpython-37.pyc │ │ ├── factory.cpython-36.pyc │ │ ├── factory.cpython-37.pyc │ │ ├── imdb.cpython-36.pyc │ │ ├── imdb.cpython-37.pyc │ │ ├── pascal_voc.cpython-36.pyc │ │ ├── pascal_voc.cpython-37.pyc │ │ ├── voc_eval.cpython-36.pyc │ │ └── voc_eval.cpython-37.pyc │ ├── coco.py │ ├── ds_utils.py │ ├── factory.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── tools │ │ └── mcg_munge.py │ └── voc_eval.py ├── layer_utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── anchor_target_layer.cpython-36.pyc │ │ ├── anchor_target_layer.cpython-37.pyc │ │ ├── generate_anchors.cpython-36.pyc │ │ ├── generate_anchors.cpython-37.pyc │ │ ├── proposal_layer.cpython-36.pyc │ │ ├── proposal_layer.cpython-37.pyc │ │ ├── proposal_target_layer.cpython-36.pyc │ │ ├── proposal_target_layer.cpython-37.pyc │ │ ├── proposal_top_layer.cpython-36.pyc │ │ ├── proposal_top_layer.cpython-37.pyc │ │ ├── snippets.cpython-36.pyc │ │ └── snippets.cpython-37.pyc │ ├── anchor_target_layer.py │ ├── generate_anchors.py │ ├── proposal_layer.py │ ├── proposal_target_layer.py │ ├── proposal_top_layer.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── crop_and_resize.cpython-36.pyc │ │ │ └── crop_and_resize.cpython-37.pyc │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── __init__.cpython-37.pyc │ │ │ └── crop_and_resize │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── __init__.cpython-37.pyc │ │ │ │ └── _crop_and_resize.so │ │ ├── build.py │ │ ├── crop_and_resize.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── crop_and_resize.c │ │ │ ├── crop_and_resize.h │ │ │ ├── crop_and_resize_gpu.c │ │ │ ├── crop_and_resize_gpu.h │ │ │ └── cuda │ │ │ ├── crop_and_resize_kernel.cu │ │ │ ├── crop_and_resize_kernel.cu.o │ │ │ └── crop_and_resize_kernel.h │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-36.pyc │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ └── roi_pooling │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ │ └── _roi_pooling.so │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── roi_pool.cpython-36.pyc │ │ │ └── roi_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ └── src │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.cu.o │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ ├── roi_pooling_cuda.h │ │ │ ├── roi_pooling_kernel.cu │ │ │ └── roi_pooling_kernel.h │ ├── roi_ring_pooling │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ └── roi_ring_pooling │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ └── __init__.cpython-36.pyc │ │ │ │ └── _roi_ring_pooling.so │ │ ├── build.py │ │ ├── functions │ │ │ ├── __pycache__ │ │ │ │ └── roi_ring_pool.cpython-36.pyc │ │ │ └── roi_ring_pool.py │ │ ├── modules │ │ │ └── roi_ring_pool.py │ │ └── src │ │ │ ├── roi_ring_pooling.c │ │ │ ├── roi_ring_pooling.h │ │ │ ├── roi_ring_pooling_cuda.c │ │ │ ├── roi_ring_pooling_cuda.h │ │ │ ├── roi_ring_pooling_kernel.cu │ │ │ ├── roi_ring_pooling_kernel.cu.o │ │ │ └── roi_ring_pooling_kernel.h │ └── snippets.py ├── layers │ ├── __pycache__ │ │ └── recurrent_linear.cpython-36.pyc │ └── recurrent_linear.py ├── make.sh ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── apmetric.cpython-36.pyc │ │ ├── bbox_transform.cpython-36.pyc │ │ ├── bbox_transform.cpython-37.pyc │ │ ├── config.cpython-36.pyc │ │ ├── config.cpython-37.pyc │ │ ├── nms_wrapper.cpython-36.pyc │ │ ├── nms_wrapper.cpython-37.pyc │ │ ├── test.cpython-36.pyc │ │ ├── test.cpython-37.pyc │ │ ├── train_val.cpython-36.pyc │ │ └── train_val.cpython-37.pyc │ ├── apmetric.py │ ├── bbox_transform.py │ ├── config.py │ ├── nms_wrapper.py │ ├── test.py │ └── train_val.py ├── nets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── mobilenet_v1.cpython-36.pyc │ │ ├── mobilenet_v1.cpython-37.pyc │ │ ├── network.cpython-36.pyc │ │ ├── network.cpython-37.pyc │ │ ├── resnet_v1.cpython-36.pyc │ │ ├── resnet_v1.cpython-37.pyc │ │ ├── vgg16.cpython-36.pyc │ │ └── vgg16.cpython-37.pyc │ ├── mobilenet_v1.py │ ├── network.py │ ├── resnet_v1.py │ └── vgg16.py ├── nms │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── pth_nms.cpython-36.pyc │ │ └── pth_nms.cpython-37.pyc │ ├── _ext │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── __init__.cpython-37.pyc │ │ └── nms │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ └── __init__.cpython-37.pyc │ │ │ └── _nms.so │ ├── build.py │ ├── pth_nms.py │ └── src │ │ ├── cuda │ │ ├── nms_kernel.cu │ │ ├── nms_kernel.cu.o │ │ └── nms_kernel.h │ │ ├── nms.c │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms_cuda.h ├── roi_data_layer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── layer.cpython-36.pyc │ │ ├── layer.cpython-37.pyc │ │ ├── minibatch.cpython-36.pyc │ │ ├── minibatch.cpython-37.pyc │ │ ├── roidb.cpython-36.pyc │ │ └── roidb.cpython-37.pyc │ ├── layer.py │ ├── minibatch.py │ └── roidb.py └── utils │ ├── .gitignore │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── bbox.cpython-36.pyc │ ├── bbox.cpython-37.pyc │ ├── blob.cpython-36.pyc │ ├── blob.cpython-37.pyc │ ├── timer.cpython-36.pyc │ ├── timer.cpython-37.pyc │ ├── visualization.cpython-36.pyc │ └── visualization.cpython-37.pyc │ ├── bbox.py │ ├── blob.py │ ├── timer.py │ └── visualization.py ├── output └── train_faster_rcnn.sh ├── tensorboard └── vgg16 │ └── voc_2007_trainval │ └── default_val │ └── events.out.tfevents.1552263409.vasgaoweithu-Precision-Tower-7910 └── tools ├── __pycache__ └── _init_paths.cpython-36.pyc ├── _init_paths.py ├── demo.py ├── test_net.py └── trainval_net.py /.spyproject/codestyle.ini: -------------------------------------------------------------------------------- 1 | [codestyle] 2 | indentation = True 3 | 4 | [main] 5 | version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/encoding.ini: -------------------------------------------------------------------------------- 1 | [encoding] 2 | text_encoding = utf-8 3 | 4 | [main] 5 | version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/vcs.ini: -------------------------------------------------------------------------------- 1 | [vcs] 2 | use_version_control = False 3 | version_control_system = 4 | 5 | [main] 6 | version = 0.1.0 7 | 8 | -------------------------------------------------------------------------------- /.spyproject/workspace.ini: -------------------------------------------------------------------------------- 1 | [workspace] 2 | restore_data_on_startup = True 3 | save_data_on_exit = True 4 | save_history = True 5 | save_non_project_files = False 6 | 7 | [main] 8 | version = 0.1.0 9 | <<<<<<< HEAD 10 | recent_files = ['/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/nets/vgg16.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/nets/network.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/layer_utils/roi_ring_pooling/functions/roi_ring_pool.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/tools/trainval_net.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/layers/recurrent_linear.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/tools/test_net.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/make.sh', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/model/config.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/model/test.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/model/train_val.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/experiments/cfgs/vgg16.yml'] 11 | ======= 12 | recent_files = [] 13 | >>>>>>> bd73dd11c938cb9256829ec3559daaab1fc77b74 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch_MLEM 2 | **News. This repo supports pytorch-1.0 and higher version now!!! I borrowed code from [mmdetection](https://github.com/open-mmlab/mmdetection) and also some implementation idea.** 3 | 4 | This is a simplified version of MELM with context in pytorch for the paper《Min-Entropy Latent Model for Weakly Supervised Object Detection》,which is a accepted paper in [CVPR2018](http://openaccess.thecvf.com/content_cvpr_2018/papers/Wan_Min-Entropy_Latent_Model_CVPR_2018_paper.pdf) and [TPAMI](https://ieeexplore.ieee.org/document/8640243). 5 | 6 | This implementation is based on [Winfrand's](https://github.com/Winfrand/MELM) which is the official version based on torch7 and lua. This implementation is also based on ruotianluo's [pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn). 7 | 8 | **And trained on PASCAL_VOC 2007 trainval and tested on PASCAL_VOC test with VGG16 backbone, I got a performance mAP 47.98 a little better than the paper's result** 9 | 10 | # If you find MELM useful and use this code, please cite our paper: 11 | ``` 12 | @inproceedings{wan2018min, 13 | title={Min-Entropy Latent Model for Weakly Supervised Object Detection}, 14 | author={Wan, Fang and Wei, Pengxu and Jiao, Jianbin and Han, Zhenjun and Ye, Qixiang}, 15 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 16 | pages={1297--1306}, 17 | year={2018} 18 | } 19 | ``` 20 | ``` 21 | @article{wan2019Pami, 22 | author = {Fang Wan and 23 | Pengxu Wei and 24 | Jianbin Jiao and 25 | Zhenjun Han and 26 | Qixiang Ye}, 27 | title = {Min-Entropy Latent Model for Weakly Supervised Object Detection}, 28 | journal = {{IEEE} Trans. Pattern Anal. Mach. Intell.}, 29 | volume = {DOI:10.1109/TPAMI.2019.2898858}, 30 | year = {2019} 31 | } 32 | ``` 33 | 34 | 35 | # Prerequisites 36 | * Nvidia GPU 1080Ti 37 | * Ubuntu 16.04 LTS 38 | * python **3.6** 39 | * pytorch **0.4** is required. For pytorch **1.0** or higher version, please go to the **pytorch1.0** version. 40 | * tensorflow, tensorboard and [tensorboardX](https://github.com/lanpa/tensorboardX) for visualizing training and validation curve. 41 | 42 | # Installation 43 | 1. Clone the repository 44 | ```Shell 45 | git clone https://github.com/vasgaowei/pytorch_MELM.git 46 | ``` 47 | 2. Compile the modules(nms, roi_pooling, roi_ring_pooling and roi_align) 48 | ``` 49 | cd pytorch_MELM/lib 50 | bash make.sh 51 | ``` 52 | # Setup the data 53 | 54 | 1. Download the training, validation, test data and the VOCdevkit 55 | ``` 56 | cd pytorch_MELM/ 57 | mkdir data 58 | cd data/ 59 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 60 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 61 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar 62 | ``` 63 | 64 | 65 | 2. Extract all of these tars into one directory named VOCdevkit 66 | ``` 67 | tar xvf VOCtrainval_06-Nov-2007.tar 68 | tar xvf VOCtest_06-Nov-2007.tar 69 | tar xvf VOCdevkit_08-Jun-2007.tar 70 | ``` 71 | 3. Create symlinks for PASCAL VOC dataset or just rename the VOCdevkit to VOCdevkit2007 72 | ``` 73 | cd pytorch_MELM/data 74 | ln -s VOCdevkit VOCdevkit2007 75 | ``` 76 | 4. It should have this basic structure 77 | ``` 78 | $VOCdevkit2007/ # development kit 79 | $VOCdevkit2007/VOC2007/ # VOC utility code 80 | $VOCdevkit2007/VOCcode/ # image sets, annodations, etc 81 | ``` 82 | And for PASCAL VOC 2010 and PASCAL VOC 2012, just following the similar steps. 83 | 84 | # Download the pre-trained ImageNet models 85 | Downloa the pre-trained ImageNet models from https://drive.google.com/drive/folders/0B1_fAEgxdnvJSmF3YUlZcHFqWTQ 86 | or download from https://drive.google.com/drive/folders/1FV6ZOHOxLMQjE4ujTNOObI7lN8USH0v_?usp=sharing and put in in the data/imagenet_weights and rename it vgg16.pth. The folder has the following form. 87 | ``` 88 | $ data/imagenet_weights/vgg16.pth 89 | $ data/imagenet_weights/res50.pth 90 | ``` 91 | # Download the Selective Search proposals for PASCAL VOC 2007 92 | Download it from: https://dl.dropboxusercontent.com/s/orrt7o6bp6ae0tc/selective_search_data.tgz 93 | and unzip it and the final folder has the following form 94 | ``` 95 | $ data/selective_search_data/voc_2007_train.mat 96 | $ data/selective_search_data/voc_2007_test.mat 97 | $ data/selective_search_data/voc_2007_trainval.mat 98 | ``` 99 | # Train your own model 100 | For vgg16 backbone, we can train the model using the following commands 101 | ``` 102 | ./experiments/scripts/train_faster_rcnn.sh 0 pascal_voc vgg16 103 | ``` 104 | And for test, we can using the following commands 105 | ``` 106 | ./experiments/scripts/test_faster_rcnn.sh 0 pascal_voc vgg16 107 | ``` 108 | # Visualizing some detection results 109 | I have pretrained MLEM_pytorch model on PASCAL VOC 2007 based on vgg16 backbone and you can download it from https://drive.google.com/drive/folders/1FV6ZOHOxLMQjE4ujTNOObI7lN8USH0v_?usp=sharing and put it in the 110 | folder output vgg16/voc_2007_trainval/default/vgg16_MELM.pth and run the following commands. 111 | ``` 112 | cd pytorch_MELM 113 | python ./tools/demo.py --net vgg16 --dataset pascal_voc 114 | ``` 115 | Also you can visualize training and validation curve. 116 | ``` 117 | tensorboard --logdir tensorboard/vgg16/voc_2007_trainval/ 118 | ``` 119 | 120 | -------------------------------------------------------------------------------- /data/VOCdevkit2007/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias time='/usr/bin/time' 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | GPU_ID=$1 11 | DATASET=$2 12 | NET=$3 13 | 14 | array=( $@ ) 15 | len=${#array[@]} 16 | EXTRA_ARGS=${array[@]:3:$len} 17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 18 | 19 | case ${DATASET} in 20 | pascal_voc) 21 | TRAIN_IMDB="voc_2007_trainval" 22 | TEST_IMDB="voc_2007_test" 23 | STEPSIZE="[50000]" 24 | ITERS=100000 25 | ANCHORS="[8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | pascal_voc_0712) 29 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 30 | TEST_IMDB="voc_2007_test" 31 | STEPSIZE="[80000]" 32 | ITERS=110000 33 | ANCHORS="[8,16,32]" 34 | RATIOS="[0.5,1,2]" 35 | ;; 36 | coco) 37 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 38 | TEST_IMDB="coco_2014_minival" 39 | STEPSIZE="[350000]" 40 | ITERS=490000 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | ;; 44 | *) 45 | echo "No dataset given" 46 | exit 47 | ;; 48 | esac 49 | 50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 51 | exec &> >(tee -a "$LOG") 52 | echo Logging output to "$LOG" 53 | 54 | set +x 55 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | else 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | fi 60 | set -x 61 | 62 | if [ ! -f ${NET_FINAL}.index ]; then 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 65 | --weight data/imagenet_weights/${NET}.pth \ 66 | --imdb ${TRAIN_IMDB} \ 67 | --imdbval ${TEST_IMDB} \ 68 | --iters ${ITERS} \ 69 | --cfg experiments/cfgs/${NET}.yml \ 70 | --tag ${EXTRA_ARGS_SLUG} \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 74 | else 75 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 76 | --weight data/imagenet_weights/${NET}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | echo $@ 88 | ./experiments/scripts/test_faster_rcnn.sh $@ 89 | -------------------------------------------------------------------------------- /data/VOCdevkit2012/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias time='/usr/bin/time' 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | GPU_ID=$1 11 | DATASET=$2 12 | NET=$3 13 | 14 | array=( $@ ) 15 | len=${#array[@]} 16 | EXTRA_ARGS=${array[@]:3:$len} 17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 18 | 19 | case ${DATASET} in 20 | pascal_voc) 21 | TRAIN_IMDB="voc_2007_trainval" 22 | TEST_IMDB="voc_2007_test" 23 | STEPSIZE="[50000]" 24 | ITERS=100000 25 | ANCHORS="[8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | pascal_voc_0712) 29 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 30 | TEST_IMDB="voc_2007_test" 31 | STEPSIZE="[80000]" 32 | ITERS=110000 33 | ANCHORS="[8,16,32]" 34 | RATIOS="[0.5,1,2]" 35 | ;; 36 | coco) 37 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 38 | TEST_IMDB="coco_2014_minival" 39 | STEPSIZE="[350000]" 40 | ITERS=490000 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | ;; 44 | *) 45 | echo "No dataset given" 46 | exit 47 | ;; 48 | esac 49 | 50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 51 | exec &> >(tee -a "$LOG") 52 | echo Logging output to "$LOG" 53 | 54 | set +x 55 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | else 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | fi 60 | set -x 61 | 62 | if [ ! -f ${NET_FINAL}.index ]; then 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 65 | --weight data/imagenet_weights/${NET}.pth \ 66 | --imdb ${TRAIN_IMDB} \ 67 | --imdbval ${TEST_IMDB} \ 68 | --iters ${ITERS} \ 69 | --cfg experiments/cfgs/${NET}.yml \ 70 | --tag ${EXTRA_ARGS_SLUG} \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 74 | else 75 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 76 | --weight data/imagenet_weights/${NET}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | echo $@ 88 | ./experiments/scripts/test_faster_rcnn.sh $@ 89 | -------------------------------------------------------------------------------- /data/imagenet_weights/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias time='/usr/bin/time' 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | GPU_ID=$1 11 | DATASET=$2 12 | NET=$3 13 | 14 | array=( $@ ) 15 | len=${#array[@]} 16 | EXTRA_ARGS=${array[@]:3:$len} 17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 18 | 19 | case ${DATASET} in 20 | pascal_voc) 21 | TRAIN_IMDB="voc_2007_trainval" 22 | TEST_IMDB="voc_2007_test" 23 | STEPSIZE="[50000]" 24 | ITERS=100000 25 | ANCHORS="[8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | pascal_voc_0712) 29 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 30 | TEST_IMDB="voc_2007_test" 31 | STEPSIZE="[80000]" 32 | ITERS=110000 33 | ANCHORS="[8,16,32]" 34 | RATIOS="[0.5,1,2]" 35 | ;; 36 | coco) 37 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 38 | TEST_IMDB="coco_2014_minival" 39 | STEPSIZE="[350000]" 40 | ITERS=490000 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | ;; 44 | *) 45 | echo "No dataset given" 46 | exit 47 | ;; 48 | esac 49 | 50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 51 | exec &> >(tee -a "$LOG") 52 | echo Logging output to "$LOG" 53 | 54 | set +x 55 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | else 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | fi 60 | set -x 61 | 62 | if [ ! -f ${NET_FINAL}.index ]; then 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 65 | --weight data/imagenet_weights/${NET}.pth \ 66 | --imdb ${TRAIN_IMDB} \ 67 | --imdbval ${TEST_IMDB} \ 68 | --iters ${ITERS} \ 69 | --cfg experiments/cfgs/${NET}.yml \ 70 | --tag ${EXTRA_ARGS_SLUG} \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 74 | else 75 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 76 | --weight data/imagenet_weights/${NET}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | echo $@ 88 | ./experiments/scripts/test_faster_rcnn.sh $@ 89 | -------------------------------------------------------------------------------- /data/selective_search_data/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias time='/usr/bin/time' 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | GPU_ID=$1 11 | DATASET=$2 12 | NET=$3 13 | 14 | array=( $@ ) 15 | len=${#array[@]} 16 | EXTRA_ARGS=${array[@]:3:$len} 17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 18 | 19 | case ${DATASET} in 20 | pascal_voc) 21 | TRAIN_IMDB="voc_2007_trainval" 22 | TEST_IMDB="voc_2007_test" 23 | STEPSIZE="[50000]" 24 | ITERS=100000 25 | ANCHORS="[8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | pascal_voc_0712) 29 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 30 | TEST_IMDB="voc_2007_test" 31 | STEPSIZE="[80000]" 32 | ITERS=110000 33 | ANCHORS="[8,16,32]" 34 | RATIOS="[0.5,1,2]" 35 | ;; 36 | coco) 37 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 38 | TEST_IMDB="coco_2014_minival" 39 | STEPSIZE="[350000]" 40 | ITERS=490000 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | ;; 44 | *) 45 | echo "No dataset given" 46 | exit 47 | ;; 48 | esac 49 | 50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 51 | exec &> >(tee -a "$LOG") 52 | echo Logging output to "$LOG" 53 | 54 | set +x 55 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | else 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | fi 60 | set -x 61 | 62 | if [ ! -f ${NET_FINAL}.index ]; then 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 65 | --weight data/imagenet_weights/${NET}.pth \ 66 | --imdb ${TRAIN_IMDB} \ 67 | --imdbval ${TEST_IMDB} \ 68 | --iters ${ITERS} \ 69 | --cfg experiments/cfgs/${NET}.yml \ 70 | --tag ${EXTRA_ARGS_SLUG} \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 74 | else 75 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 76 | --weight data/imagenet_weights/${NET}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | echo $@ 88 | ./experiments/scripts/test_faster_rcnn.sh $@ 89 | -------------------------------------------------------------------------------- /experiments/cfgs/change_log.txt: -------------------------------------------------------------------------------- 1 | 2018/11/22/9:37 2 | In the vgg16.yml files, The orginal TEST.NMS value is 0.3 but changed to 0.4 for 3 | implementation for WSDNN 4 | 5 | 2018/11/23/14:19 6 | In vgg16.yml files, the original POOLING_MODE == crop, however changed to roi_align 7 | for implementation for WSDNN 8 | 9 | 2018/11/24/8:37 10 | In vgg16.yml file, the original TRAIN.WEIGHT_DECAY == 0.0001, and changed to 0.005 11 | Also ss_boxes whose widths or heights are less than 20 are removed 12 | 13 | 2018/11/24/9:01 14 | In vgg16.yml files, the TRAIN.SCALES is {600,}, however changed to {480, 576, 688, 864, 1200} 15 | TEST.SCLAES is {688, } 16 | 17 | 2018/11/24/9:23 18 | In vgg16.yml file, the TRAIN.MAX_SIZE is 1000, however changed to 1200 19 | 20 | 2018/11/24/20:33 21 | In lib/datasets/pascal_voc.py, changed classes from 21 classes to 20 classes 22 | original: 23 | self._classes = ('__background__', # always index 0 24 | 'aeroplane', 'bicycle', 'bird', 'boat', 25 | 'bottle', 'bus', 'car', 'cat', 'chair', 26 | 'cow', 'diningtable', 'dog', 'horse', 27 | 'motorbike', 'person', 'pottedplant', 28 | 'sheep', 'sofa', 'train', 'tvmonitor') 29 | new: 30 | self._classes = ( 31 | 'aeroplane', 'bicycle', 'bird', 'boat', 32 | 'bottle', 'bus', 'car', 'cat', 'chair', 33 | 'cow', 'diningtable', 'dog', 'horse', 34 | 'motorbike', 'person', 'pottedplant', 35 | 'sheep', 'sofa', 'train', 'tvmonitor') 36 | In network.py, added a score_det_net 37 | 38 | In vgg16.yml file, changed TRAIN.STEPSIZE from 30000 to 60000 39 | 40 | 41 | 2018/11/25/9:59 42 | In vgg16.yml, the learning rate is 0.0001, and changed to 0.0005. 43 | 44 | 2018/11/26/11:14 45 | In vgg16.yml, the TEST.SCLAES changed from [688] to [480, 576, 688, 864, 1200] 46 | for multiple scale test 47 | 48 | 49 | -------------------------------------------------------------------------------- /experiments/cfgs/mobile.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: mobile 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: mobile_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/res101-lg.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101-lg 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res101_faster_rcnn 14 | SCALES: [800] 15 | MAX_SIZE: 1333 16 | TEST: 17 | HAS_RPN: True 18 | SCALES: [800] 19 | MAX_SIZE: 1333 20 | RPN_POST_NMS_TOP_N: 1000 21 | POOLING_MODE: crop 22 | ANCHOR_SCALES: [2,4,8,16,32] 23 | -------------------------------------------------------------------------------- /experiments/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res101_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | DOUBLE_BIAS: False 13 | SNAPSHOT_PREFIX: res50_faster_rcnn 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_MODE: crop 17 | -------------------------------------------------------------------------------- /experiments/cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: False 4 | IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: selective_search 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | SNAPSHOT_PREFIX: vgg16_MELM 13 | LEARNING_RATE: 0.001 14 | WEIGHT_DECAY: 0.0005 15 | SCALES: [480, 576, 688, 864, 1200] 16 | MAX_SIZE: 2000 17 | STEPSIZE: [50000,] 18 | MIL_RECURRENT_STEP: 20000 19 | MIL_RECURRECT_WEIGHT: 0.09 20 | TEST: 21 | HAS_RPN: False 22 | PROPOSAL_METHOD: selective_search 23 | NMS: 0.3 24 | SCALES: [480, 576, 688, 864, 1200] 25 | POOLING_MODE: roi_align 26 | -------------------------------------------------------------------------------- /experiments/scripts/convert_vgg16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=vgg16 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:2:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=70000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | *) 40 | echo "No dataset given" 41 | exit 42 | ;; 43 | esac 44 | 45 | set +x 46 | NET_FINAL=${NET}_faster_rcnn_iter_${ITERS} 47 | set -x 48 | 49 | if [ ! -f ${NET_FINAL}.index ]; then 50 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 51 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \ 52 | --snapshot ${NET_FINAL} \ 53 | --imdb ${TRAIN_IMDB} \ 54 | --iters ${ITERS} \ 55 | --cfg experiments/cfgs/${NET}.yml \ 56 | --tag ${EXTRA_ARGS_SLUG} \ 57 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS} 58 | else 59 | CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \ 60 | --snapshot ${NET_FINAL} \ 61 | --imdb ${TRAIN_IMDB} \ 62 | --iters ${ITERS} \ 63 | --cfg experiments/cfgs/${NET}.yml \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS} 65 | fi 66 | fi 67 | 68 | -------------------------------------------------------------------------------- /experiments/scripts/test_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=100000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | *) 40 | echo "No dataset given" 41 | exit 42 | ;; 43 | esac 44 | 45 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 46 | exec &> >(tee -a "$LOG") 47 | echo Logging output to "$LOG" 48 | 49 | set +x 50 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 51 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_MELM_iter_${ITERS}.pth 52 | else 53 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_MELM_iter_${ITERS}.pth 54 | fi 55 | set -x 56 | 57 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 58 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \ 59 | --imdb ${TEST_IMDB} \ 60 | --model ${NET_FINAL} \ 61 | --cfg experiments/cfgs/${NET}.yml \ 62 | --tag ${EXTRA_ARGS_SLUG} \ 63 | --net ${NET} \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 65 | ${EXTRA_ARGS} 66 | else 67 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \ 68 | --imdb ${TEST_IMDB} \ 69 | --model ${NET_FINAL} \ 70 | --cfg experiments/cfgs/${NET}.yml \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | ${EXTRA_ARGS} 74 | fi 75 | 76 | -------------------------------------------------------------------------------- /experiments/scripts/test_faster_rcnn_notime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | ITERS=70000 22 | ANCHORS="[8,16,32]" 23 | RATIOS="[0.5,1,2]" 24 | ;; 25 | pascal_voc_0712) 26 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 27 | TEST_IMDB="voc_2007_test" 28 | ITERS=110000 29 | ANCHORS="[8,16,32]" 30 | RATIOS="[0.5,1,2]" 31 | ;; 32 | coco) 33 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 34 | TEST_IMDB="coco_2014_minival" 35 | ITERS=490000 36 | ANCHORS="[4,8,16,32]" 37 | RATIOS="[0.5,1,2]" 38 | ;; 39 | *) 40 | echo "No dataset given" 41 | exit 42 | ;; 43 | esac 44 | 45 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 46 | exec &> >(tee -a "$LOG") 47 | echo Logging output to "$LOG" 48 | 49 | set +x 50 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 51 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 52 | else 53 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 54 | fi 55 | set -x 56 | 57 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 58 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \ 59 | --imdb ${TEST_IMDB} \ 60 | --model ${NET_FINAL} \ 61 | --cfg experiments/cfgs/${NET}.yml \ 62 | --tag ${EXTRA_ARGS_SLUG} \ 63 | --net ${NET} \ 64 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 65 | ${EXTRA_ARGS} 66 | else 67 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \ 68 | --imdb ${TEST_IMDB} \ 69 | --model ${NET_FINAL} \ 70 | --cfg experiments/cfgs/${NET}.yml \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | ${EXTRA_ARGS} 74 | fi 75 | 76 | -------------------------------------------------------------------------------- /experiments/scripts/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias time='/usr/bin/time' 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | GPU_ID=$1 11 | DATASET=$2 12 | NET=$3 13 | 14 | array=( $@ ) 15 | len=${#array[@]} 16 | EXTRA_ARGS=${array[@]:3:$len} 17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 18 | 19 | case ${DATASET} in 20 | pascal_voc) 21 | TRAIN_IMDB="voc_2007_trainval" 22 | TEST_IMDB="voc_2007_test" 23 | STEPSIZE="[50000]" 24 | ITERS=100000 25 | ANCHORS="[8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | pascal_voc_0712) 29 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 30 | TEST_IMDB="voc_2007_test" 31 | STEPSIZE="[80000]" 32 | ITERS=110000 33 | ANCHORS="[8,16,32]" 34 | RATIOS="[0.5,1,2]" 35 | ;; 36 | coco) 37 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 38 | TEST_IMDB="coco_2014_minival" 39 | STEPSIZE="[350000]" 40 | ITERS=490000 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | ;; 44 | *) 45 | echo "No dataset given" 46 | exit 47 | ;; 48 | esac 49 | 50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 51 | exec &> >(tee -a "$LOG") 52 | echo Logging output to "$LOG" 53 | 54 | set +x 55 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_MELM_iter_${ITERS}.pth 57 | else 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_MELM_iter_${ITERS}.pth 59 | fi 60 | set -x 61 | 62 | if [ ! -f ${NET_FINAL}.index ]; then 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 65 | --weight data/imagenet_weights/${NET}.pth \ 66 | --imdb ${TRAIN_IMDB} \ 67 | --imdbval ${TEST_IMDB} \ 68 | --iters ${ITERS} \ 69 | --cfg experiments/cfgs/${NET}.yml \ 70 | --tag ${EXTRA_ARGS_SLUG} \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 74 | else 75 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 76 | --weight data/imagenet_weights/${NET}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | echo $@ 88 | ./experiments/scripts/test_faster_rcnn.sh $@ 89 | -------------------------------------------------------------------------------- /experiments/scripts/train_faster_rcnn_notime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | 8 | GPU_ID=$1 9 | DATASET=$2 10 | NET=$3 11 | 12 | array=( $@ ) 13 | len=${#array[@]} 14 | EXTRA_ARGS=${array[@]:3:$len} 15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 16 | 17 | case ${DATASET} in 18 | pascal_voc) 19 | TRAIN_IMDB="voc_2007_trainval" 20 | TEST_IMDB="voc_2007_test" 21 | STEPSIZE="[50000]" 22 | ITERS=70000 23 | ANCHORS="[8,16,32]" 24 | RATIOS="[0.5,1,2]" 25 | ;; 26 | pascal_voc_0712) 27 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 28 | TEST_IMDB="voc_2007_test" 29 | STEPSIZE="[80000]" 30 | ITERS=110000 31 | ANCHORS="[8,16,32]" 32 | RATIOS="[0.5,1,2]" 33 | ;; 34 | coco) 35 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 36 | TEST_IMDB="coco_2014_minival" 37 | STEPSIZE="[900000]" 38 | ITERS=1190000 39 | ANCHORS="[4,8,16,32]" 40 | RATIOS="[0.5,1,2]" 41 | ;; 42 | *) 43 | echo "No dataset given" 44 | exit 45 | ;; 46 | esac 47 | 48 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 49 | exec &> >(tee -a "$LOG") 50 | echo Logging output to "$LOG" 51 | 52 | set +x 53 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 54 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 55 | else 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | fi 58 | set -x 59 | 60 | if [ ! -f ${NET_FINAL}.index ]; then 61 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 62 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 63 | --weight data/imagenet_weights/${NET}.pth \ 64 | --imdb ${TRAIN_IMDB} \ 65 | --imdbval ${TEST_IMDB} \ 66 | --iters ${ITERS} \ 67 | --cfg experiments/cfgs/${NET}.yml \ 68 | --tag ${EXTRA_ARGS_SLUG} \ 69 | --net ${NET} \ 70 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 71 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 72 | else 73 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 74 | --weight data/imagenet_weights/${NET}.pth \ 75 | --imdb ${TRAIN_IMDB} \ 76 | --imdbval ${TEST_IMDB} \ 77 | --iters ${ITERS} \ 78 | --cfg experiments/cfgs/${NET}.yml \ 79 | --net ${NET} \ 80 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 81 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 82 | fi 83 | fi 84 | 85 | ./experiments/scripts/test_faster_rcnn_notime.sh $@ 86 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/datasets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/coco.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/coco.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/coco.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/coco.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/ds_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/ds_utils.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/ds_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/ds_utils.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/factory.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/factory.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/imdb.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/imdb.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/imdb.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/imdb.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/pascal_voc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/pascal_voc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/pascal_voc.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/voc_eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/voc_eval.cpython-36.pyc -------------------------------------------------------------------------------- /lib/datasets/__pycache__/voc_eval.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/voc_eval.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | from datasets.coco import coco 16 | 17 | 18 | import numpy as np 19 | 20 | # Set up voc__ 21 | for year in ['2007', '2012']: 22 | for split in ['train', 'val', 'trainval', 'test']: 23 | name = 'voc_{}_{}'.format(year, split) 24 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 25 | 26 | for year in ['2007', '2012']: 27 | for split in ['train', 'val', 'trainval', 'test']: 28 | name = 'voc_{}_{}_diff'.format(year, split) 29 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year, use_diff=True)) 30 | 31 | 32 | # Set up coco_2014_ 33 | for year in ['2014']: 34 | for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']: 35 | name = 'coco_{}_{}'.format(year, split) 36 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 37 | 38 | # Set up coco_2015_ 39 | for year in ['2015']: 40 | for split in ['test', 'test-dev']: 41 | name = 'coco_{}_{}'.format(year, split) 42 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 43 | 44 | 45 | 46 | def get_imdb(name): 47 | """Get an imdb (image database) by name.""" 48 | if name not in __sets: 49 | raise KeyError('Unknown dataset: {}'.format(name)) 50 | return __sets[name]() 51 | 52 | 53 | def list_imdbs(): 54 | """List all registered imdbs.""" 55 | return list(__sets.keys()) 56 | -------------------------------------------------------------------------------- /lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 6 | so that it's consistent with those computed by Jan Hosang (see: 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 8 | computing/research/object-recognition-and-scene-understanding/how- 9 | good-are-detection-proposals-really/) 10 | 11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 13 | """ 14 | 15 | def munge(src_dir): 16 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 17 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 18 | 19 | files = os.listdir(src_dir) 20 | for fn in files: 21 | base, ext = os.path.splitext(fn) 22 | # first 14 chars / first 22 chars / all chars + .mat 23 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 24 | first = base[:14] 25 | second = base[:22] 26 | dst_dir = os.path.join('MCG', 'mat', first, second) 27 | if not os.path.exists(dst_dir): 28 | os.makedirs(dst_dir) 29 | src = os.path.join(src_dir, fn) 30 | dst = os.path.join(dst_dir, fn) 31 | print 'MV: {} -> {}'.format(src, dst) 32 | os.rename(src, dst) 33 | 34 | if __name__ == '__main__': 35 | # src_dir should look something like: 36 | # src_dir = 'MCG-COCO-val2014-boxes' 37 | src_dir = sys.argv[1] 38 | munge(src_dir) 39 | -------------------------------------------------------------------------------- /lib/datasets/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import xml.etree.ElementTree as ET 11 | import os 12 | import pickle 13 | import numpy as np 14 | 15 | def parse_rec(filename): 16 | """ Parse a PASCAL VOC xml file """ 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_struct = {} 21 | obj_struct['name'] = obj.find('name').text 22 | obj_struct['pose'] = obj.find('pose').text 23 | obj_struct['truncated'] = int(obj.find('truncated').text) 24 | obj_struct['difficult'] = int(obj.find('difficult').text) 25 | bbox = obj.find('bndbox') 26 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 27 | int(bbox.find('ymin').text), 28 | int(bbox.find('xmax').text), 29 | int(bbox.find('ymax').text)] 30 | objects.append(obj_struct) 31 | 32 | return objects 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ ap = voc_ap(rec, prec, [use_07_metric]) 37 | Compute VOC AP given precision and recall. 38 | If use_07_metric is true, uses the 39 | VOC 07 11 point method (default:False). 40 | """ 41 | if use_07_metric: 42 | # 11 point metric 43 | ap = 0. 44 | for t in np.arange(0., 1.1, 0.1): 45 | if np.sum(rec >= t) == 0: 46 | p = 0 47 | else: 48 | p = np.max(prec[rec >= t]) 49 | ap = ap + p / 11. 50 | else: 51 | # correct AP calculation 52 | # first append sentinel values at the end 53 | mrec = np.concatenate(([0.], rec, [1.])) 54 | mpre = np.concatenate(([0.], prec, [0.])) 55 | 56 | # compute the precision envelope 57 | for i in range(mpre.size - 1, 0, -1): 58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 59 | 60 | # to calculate area under PR curve, look for points 61 | # where X axis (recall) changes value 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # and sum (\Delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | 69 | def voc_eval(detpath, 70 | annopath, 71 | imagesetfile, 72 | classname, 73 | cachedir, 74 | ovthresh=0.5, 75 | use_07_metric=False, 76 | use_diff=False): 77 | """rec, prec, ap = voc_eval(detpath, 78 | annopath, 79 | imagesetfile, 80 | classname, 81 | [ovthresh], 82 | [use_07_metric]) 83 | 84 | Top level function that does the PASCAL VOC evaluation. 85 | 86 | detpath: Path to detections 87 | detpath.format(classname) should produce the detection results file. 88 | annopath: Path to annotations 89 | annopath.format(imagename) should be the xml annotations file. 90 | imagesetfile: Text file containing the list of images, one image per line. 91 | classname: Category name (duh) 92 | cachedir: Directory for caching the annotations 93 | [ovthresh]: Overlap threshold (default = 0.5) 94 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 95 | (default False) 96 | """ 97 | # assumes detections are in detpath.format(classname) 98 | # assumes annotations are in annopath.format(imagename) 99 | # assumes imagesetfile is a text file with each line an image name 100 | # cachedir caches the annotations in a pickle file 101 | 102 | # first load gt 103 | if not os.path.isdir(cachedir): 104 | os.mkdir(cachedir) 105 | cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile) 106 | # read list of images 107 | with open(imagesetfile, 'r') as f: 108 | lines = f.readlines() 109 | imagenames = [x.strip() for x in lines] 110 | 111 | if not os.path.isfile(cachefile): 112 | # load annotations 113 | recs = {} 114 | for i, imagename in enumerate(imagenames): 115 | recs[imagename] = parse_rec(annopath.format(imagename)) 116 | if i % 100 == 0: 117 | print('Reading annotation for {:d}/{:d}'.format( 118 | i + 1, len(imagenames))) 119 | # save 120 | print('Saving cached annotations to {:s}'.format(cachefile)) 121 | with open(cachefile, 'wb') as f: 122 | pickle.dump(recs, f) 123 | else: 124 | # load 125 | with open(cachefile, 'rb') as f: 126 | try: 127 | recs = pickle.load(f) 128 | except: 129 | recs = pickle.load(f, encoding='bytes') 130 | 131 | # extract gt objects for this class 132 | class_recs = {} 133 | npos = 0 134 | for imagename in imagenames: 135 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 136 | bbox = np.array([x['bbox'] for x in R]) 137 | if use_diff: 138 | difficult = np.array([False for x in R]).astype(np.bool) 139 | else: 140 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 141 | det = [False] * len(R) 142 | npos = npos + sum(~difficult) 143 | class_recs[imagename] = {'bbox': bbox, 144 | 'difficult': difficult, 145 | 'det': det} 146 | 147 | # read dets 148 | detfile = detpath.format(classname) 149 | with open(detfile, 'r') as f: 150 | lines = f.readlines() 151 | 152 | splitlines = [x.strip().split(' ') for x in lines] 153 | image_ids = [x[0] for x in splitlines] 154 | confidence = np.array([float(x[1]) for x in splitlines]) 155 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 156 | 157 | nd = len(image_ids) 158 | tp = np.zeros(nd) 159 | fp = np.zeros(nd) 160 | 161 | if BB.shape[0] > 0: 162 | # sort by confidence 163 | sorted_ind = np.argsort(-confidence) 164 | sorted_scores = np.sort(-confidence) 165 | BB = BB[sorted_ind, :] 166 | image_ids = [image_ids[x] for x in sorted_ind] 167 | 168 | # go down dets and mark TPs and FPs 169 | for d in range(nd): 170 | R = class_recs[image_ids[d]] 171 | bb = BB[d, :].astype(float) 172 | ovmax = -np.inf 173 | BBGT = R['bbox'].astype(float) 174 | 175 | if BBGT.size > 0: 176 | # compute overlaps 177 | # intersection 178 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 179 | iymin = np.maximum(BBGT[:, 1], bb[1]) 180 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 181 | iymax = np.minimum(BBGT[:, 3], bb[3]) 182 | iw = np.maximum(ixmax - ixmin + 1., 0.) 183 | ih = np.maximum(iymax - iymin + 1., 0.) 184 | inters = iw * ih 185 | 186 | # union 187 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 188 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 189 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 190 | 191 | overlaps = inters / uni 192 | ovmax = np.max(overlaps) 193 | jmax = np.argmax(overlaps) 194 | 195 | if ovmax > ovthresh: 196 | if not R['difficult'][jmax]: 197 | if not R['det'][jmax]: 198 | tp[d] = 1. 199 | R['det'][jmax] = 1 200 | else: 201 | fp[d] = 1. 202 | else: 203 | fp[d] = 1. 204 | 205 | # compute precision recall 206 | fp = np.cumsum(fp) 207 | tp = np.cumsum(tp) 208 | rec = tp / float(npos) 209 | # avoid divide by zero in case the first detection matches a difficult 210 | # ground truth 211 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 212 | ap = voc_ap(rec, prec, use_07_metric) 213 | 214 | return rec, prec, ap 215 | -------------------------------------------------------------------------------- /lib/layer_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/anchor_target_layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/anchor_target_layer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/anchor_target_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/anchor_target_layer.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/generate_anchors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/generate_anchors.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/generate_anchors.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/generate_anchors.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/proposal_layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_layer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/proposal_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_layer.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/proposal_target_layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_target_layer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/proposal_target_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_target_layer.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/proposal_top_layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_top_layer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/proposal_top_layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_top_layer.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/snippets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/snippets.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/__pycache__/snippets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/snippets.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/anchor_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from model.config import cfg 13 | import numpy as np 14 | import numpy.random as npr 15 | from utils.bbox import bbox_overlaps 16 | from model.bbox_transform import bbox_transform 17 | import torch 18 | 19 | 20 | def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): 21 | """Same as the anchor target layer in original Fast/er RCNN """ 22 | A = num_anchors 23 | total_anchors = all_anchors.shape[0] 24 | K = total_anchors / num_anchors 25 | 26 | # allow boxes to sit over the edge by a small amount 27 | _allowed_border = 0 28 | 29 | # map of shape (..., H, W) 30 | height, width = rpn_cls_score.shape[1:3] 31 | 32 | # only keep anchors inside the image 33 | inds_inside = np.where( 34 | (all_anchors[:, 0] >= -_allowed_border) & 35 | (all_anchors[:, 1] >= -_allowed_border) & 36 | (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width 37 | (all_anchors[:, 3] < im_info[0] + _allowed_border) # height 38 | )[0] 39 | 40 | # keep only inside anchors 41 | anchors = all_anchors[inds_inside, :] 42 | 43 | # label: 1 is positive, 0 is negative, -1 is dont care 44 | labels = np.empty((len(inds_inside),), dtype=np.float32) 45 | labels.fill(-1) 46 | 47 | # overlaps between the anchors and the gt boxes 48 | # overlaps (ex, gt) 49 | overlaps = bbox_overlaps( 50 | np.ascontiguousarray(anchors, dtype=np.float), 51 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 52 | argmax_overlaps = overlaps.argmax(axis=1) 53 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 54 | gt_argmax_overlaps = overlaps.argmax(axis=0) 55 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 56 | np.arange(overlaps.shape[1])] 57 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 58 | 59 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: 60 | # assign bg labels first so that positive labels can clobber them 61 | # first set the negatives 62 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 63 | 64 | # fg label: for each gt, anchor with highest overlap 65 | labels[gt_argmax_overlaps] = 1 66 | 67 | # fg label: above threshold IOU 68 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 69 | 70 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES: 71 | # assign bg labels last so that negative labels can clobber positives 72 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 73 | 74 | # subsample positive labels if we have too many 75 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) 76 | fg_inds = np.where(labels == 1)[0] 77 | if len(fg_inds) > num_fg: 78 | disable_inds = npr.choice( 79 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 80 | labels[disable_inds] = -1 81 | 82 | # subsample negative labels if we have too many 83 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) 84 | bg_inds = np.where(labels == 0)[0] 85 | if len(bg_inds) > num_bg: 86 | disable_inds = npr.choice( 87 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 88 | labels[disable_inds] = -1 89 | 90 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 91 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 92 | 93 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 94 | # only the positive ones have regression targets 95 | bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) 96 | 97 | bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 98 | if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: 99 | # uniform weighting of examples (given non-uniform sampling) 100 | num_examples = np.sum(labels >= 0) 101 | positive_weights = np.ones((1, 4)) * 1.0 / num_examples 102 | negative_weights = np.ones((1, 4)) * 1.0 / num_examples 103 | else: 104 | assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & 105 | (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) 106 | positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / 107 | np.sum(labels == 1)) 108 | negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / 109 | np.sum(labels == 0)) 110 | bbox_outside_weights[labels == 1, :] = positive_weights 111 | bbox_outside_weights[labels == 0, :] = negative_weights 112 | 113 | # map up to original set of anchors 114 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 115 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 116 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) 117 | bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) 118 | 119 | # labels 120 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) 121 | labels = labels.reshape((1, 1, A * height, width)) 122 | rpn_labels = labels 123 | 124 | # bbox_targets 125 | bbox_targets = bbox_targets \ 126 | .reshape((1, height, width, A * 4)) 127 | 128 | rpn_bbox_targets = bbox_targets 129 | # bbox_inside_weights 130 | bbox_inside_weights = bbox_inside_weights \ 131 | .reshape((1, height, width, A * 4)) 132 | 133 | rpn_bbox_inside_weights = bbox_inside_weights 134 | 135 | # bbox_outside_weights 136 | bbox_outside_weights = bbox_outside_weights \ 137 | .reshape((1, height, width, A * 4)) 138 | 139 | rpn_bbox_outside_weights = bbox_outside_weights 140 | return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights 141 | 142 | 143 | def _unmap(data, count, inds, fill=0): 144 | """ Unmap a subset of item (data) back to the original set of items (of 145 | size count) """ 146 | if len(data.shape) == 1: 147 | ret = np.empty((count,), dtype=np.float32) 148 | ret.fill(fill) 149 | ret[inds] = data 150 | else: 151 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 152 | ret.fill(fill) 153 | ret[inds, :] = data 154 | return ret 155 | 156 | 157 | def _compute_targets(ex_rois, gt_rois): 158 | """Compute bounding-box regression targets for an image.""" 159 | 160 | assert ex_rois.shape[0] == gt_rois.shape[0] 161 | assert ex_rois.shape[1] == 4 162 | assert gt_rois.shape[1] == 5 163 | 164 | return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy() 165 | -------------------------------------------------------------------------------- /lib/layer_utils/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 15 | # 16 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 17 | # >> anchors 18 | # 19 | # anchors = 20 | # 21 | # -83 -39 100 56 22 | # -175 -87 192 104 23 | # -359 -183 376 200 24 | # -55 -55 72 72 25 | # -119 -119 136 136 26 | # -247 -247 264 264 27 | # -35 -79 52 96 28 | # -79 -167 96 184 29 | # -167 -343 184 360 30 | 31 | # array([[ -83., -39., 100., 56.], 32 | # [-175., -87., 192., 104.], 33 | # [-359., -183., 376., 200.], 34 | # [ -55., -55., 72., 72.], 35 | # [-119., -119., 136., 136.], 36 | # [-247., -247., 264., 264.], 37 | # [ -35., -79., 52., 96.], 38 | # [ -79., -167., 96., 184.], 39 | # [-167., -343., 184., 360.]]) 40 | 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 42 | scales=2 ** np.arange(3, 6)): 43 | """ 44 | Generate anchor (reference) windows by enumerating aspect ratios X 45 | scales wrt a reference (0, 0, 15, 15) window. 46 | """ 47 | 48 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 49 | ratio_anchors = _ratio_enum(base_anchor, ratios) 50 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 51 | for i in range(ratio_anchors.shape[0])]) 52 | return anchors 53 | 54 | 55 | def _whctrs(anchor): 56 | """ 57 | Return width, height, x center, and y center for an anchor (window). 58 | """ 59 | 60 | w = anchor[2] - anchor[0] + 1 61 | h = anchor[3] - anchor[1] + 1 62 | x_ctr = anchor[0] + 0.5 * (w - 1) 63 | y_ctr = anchor[1] + 0.5 * (h - 1) 64 | return w, h, x_ctr, y_ctr 65 | 66 | 67 | def _mkanchors(ws, hs, x_ctr, y_ctr): 68 | """ 69 | Given a vector of widths (ws) and heights (hs) around a center 70 | (x_ctr, y_ctr), output a set of anchors (windows). 71 | """ 72 | 73 | ws = ws[:, np.newaxis] 74 | hs = hs[:, np.newaxis] 75 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 76 | y_ctr - 0.5 * (hs - 1), 77 | x_ctr + 0.5 * (ws - 1), 78 | y_ctr + 0.5 * (hs - 1))) 79 | return anchors 80 | 81 | 82 | def _ratio_enum(anchor, ratios): 83 | """ 84 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 85 | """ 86 | 87 | w, h, x_ctr, y_ctr = _whctrs(anchor) 88 | size = w * h 89 | size_ratios = size / ratios 90 | ws = np.round(np.sqrt(size_ratios)) 91 | hs = np.round(ws * ratios) 92 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 93 | return anchors 94 | 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | 108 | if __name__ == '__main__': 109 | import time 110 | 111 | t = time.time() 112 | a = generate_anchors() 113 | print(time.time() - t) 114 | print(a) 115 | from IPython import embed; 116 | 117 | embed() 118 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick and Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from model.config import cfg 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes 13 | from model.nms_wrapper import nms 14 | 15 | import torch 16 | 17 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): 18 | """A simplified version compared to fast/er RCNN 19 | For details please see the technical report 20 | """ 21 | if type(cfg_key) == bytes: 22 | cfg_key = cfg_key.decode('utf-8') 23 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 24 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 25 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 26 | 27 | # Get the scores and bounding boxes 28 | scores = rpn_cls_prob[:, :, :, num_anchors:] 29 | rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) 30 | scores = scores.contiguous().view(-1, 1) 31 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 32 | proposals = clip_boxes(proposals, im_info[:2]) 33 | 34 | # Pick the top region proposals 35 | scores, order = scores.view(-1).sort(descending=True) 36 | if pre_nms_topN > 0: 37 | order = order[:pre_nms_topN] 38 | scores = scores[:pre_nms_topN].view(-1, 1) 39 | proposals = proposals[order.data, :] 40 | 41 | # Non-maximal suppression 42 | keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) 43 | 44 | # Pick th top region proposals after NMS 45 | if post_nms_topN > 0: 46 | keep = keep[:post_nms_topN] 47 | proposals = proposals[keep, :] 48 | scores = scores[keep,] 49 | 50 | # Only support single image as input 51 | batch_inds = proposals.new_zeros(proposals.size(0), 1) 52 | blob = torch.cat((batch_inds, proposals), 1) 53 | 54 | return blob, scores 55 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import numpy.random as npr 13 | from model.config import cfg 14 | from model.bbox_transform import bbox_transform 15 | from utils.bbox import bbox_overlaps 16 | 17 | 18 | import torch 19 | 20 | def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes): 21 | """ 22 | Assign object detection proposals to ground-truth targets. Produces proposal 23 | classification labels and bounding-box regression targets. 24 | """ 25 | 26 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 27 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 28 | all_rois = rpn_rois 29 | all_scores = rpn_scores 30 | 31 | # Include ground-truth boxes in the set of candidate rois 32 | if cfg.TRAIN.USE_GT: 33 | zeros = rpn_rois.new_zeros(gt_boxes.shape[0], 1) 34 | all_rois = torch.cat( 35 | (all_rois, torch.cat((zeros, gt_boxes[:, :-1]), 1)) 36 | , 0) 37 | # not sure if it a wise appending, but anyway i am not using it 38 | all_scores = torch.cat((all_scores, zeros), 0) 39 | 40 | num_images = 1 41 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 42 | fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image)) 43 | 44 | # Sample rois with classification labels and bounding box regression 45 | # targets 46 | labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois( 47 | all_rois, all_scores, gt_boxes, fg_rois_per_image, 48 | rois_per_image, _num_classes) 49 | 50 | print('proposal target_layer ') 51 | print('labels ', labels.shape) 52 | print('rois ', rois.shape) 53 | print('roi_scores', roi_scores.shape) 54 | print('bbox_targets ', bbox_targets.shape) 55 | 56 | 57 | rois = rois.view(-1, 5) 58 | roi_scores = roi_scores.view(-1) 59 | labels = labels.view(-1, 1) 60 | bbox_targets = bbox_targets.view(-1, _num_classes * 4) 61 | bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4) 62 | bbox_outside_weights = (bbox_inside_weights > 0).float() 63 | 64 | return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights 65 | 66 | 67 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 68 | """Bounding-box regression targets (bbox_target_data) are stored in a 69 | compact form N x (class, tx, ty, tw, th) 70 | 71 | This function expands those targets into the 4-of-4*K representation used 72 | by the network (i.e. only one class has non-zero targets). 73 | 74 | Returns: 75 | bbox_target (ndarray): N x 4K blob of regression targets 76 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 77 | """ 78 | # Inputs are tensor 79 | 80 | clss = bbox_target_data[:, 0] 81 | bbox_targets = clss.new_zeros(clss.numel(), 4 * num_classes) 82 | bbox_inside_weights = clss.new_zeros(bbox_targets.shape) 83 | inds = (clss > 0).nonzero().view(-1) 84 | if inds.numel() > 0: 85 | clss = clss[inds].contiguous().view(-1,1) 86 | dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4) 87 | dim2_inds = torch.cat([4*clss, 4*clss+1, 4*clss+2, 4*clss+3], 1).long() 88 | bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:] 89 | bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds) 90 | 91 | return bbox_targets, bbox_inside_weights 92 | 93 | 94 | def _compute_targets(ex_rois, gt_rois, labels): 95 | """Compute bounding-box regression targets for an image.""" 96 | # Inputs are tensor 97 | 98 | assert ex_rois.shape[0] == gt_rois.shape[0] 99 | assert ex_rois.shape[1] == 4 100 | assert gt_rois.shape[1] == 4 101 | 102 | targets = bbox_transform(ex_rois, gt_rois) 103 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 104 | # Optionally normalize targets by a precomputed mean and stdev 105 | targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 106 | / targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 107 | return torch.cat( 108 | [labels.unsqueeze(1), targets], 1) 109 | 110 | 111 | def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): 112 | """Generate a random sample of RoIs comprising foreground and background 113 | examples. 114 | """ 115 | # overlaps: (rois x gt_boxes) 116 | overlaps = bbox_overlaps( 117 | all_rois[:, 1:5].data, 118 | gt_boxes[:, :4].data) 119 | max_overlaps, gt_assignment = overlaps.max(1) 120 | labels = gt_boxes[gt_assignment, [4]] 121 | 122 | # Select foreground RoIs as those with >= FG_THRESH overlap 123 | fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1) 124 | # Guard against the case when an image has fewer than fg_rois_per_image 125 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 126 | bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1) 127 | 128 | # Small modification to the original version where we ensure a fixed number of regions are sampled 129 | if fg_inds.numel() > 0 and bg_inds.numel() > 0: 130 | fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel()) 131 | fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().to(gt_boxes.device)] 132 | bg_rois_per_image = rois_per_image - fg_rois_per_image 133 | to_replace = bg_inds.numel() < bg_rois_per_image 134 | bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().to(gt_boxes.device)] 135 | elif fg_inds.numel() > 0: 136 | to_replace = fg_inds.numel() < rois_per_image 137 | fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().to(gt_boxes.device)] 138 | fg_rois_per_image = rois_per_image 139 | elif bg_inds.numel() > 0: 140 | to_replace = bg_inds.numel() < rois_per_image 141 | bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().to(gt_boxes.device)] 142 | fg_rois_per_image = 0 143 | else: 144 | import pdb 145 | pdb.set_trace() 146 | 147 | # The indices that we're selecting (both fg and bg) 148 | keep_inds = torch.cat([fg_inds, bg_inds], 0) 149 | # Select sampled values from various arrays: 150 | labels = labels[keep_inds].contiguous() 151 | # Clamp labels for the background RoIs to 0 152 | labels[int(fg_rois_per_image):] = 0 153 | rois = all_rois[keep_inds].contiguous() 154 | roi_scores = all_scores[keep_inds].contiguous() 155 | 156 | bbox_target_data = _compute_targets( 157 | rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data) 158 | 159 | bbox_targets, bbox_inside_weights = \ 160 | _get_bbox_regression_labels(bbox_target_data, num_classes) 161 | 162 | return labels, rois, roi_scores, bbox_targets, bbox_inside_weights 163 | -------------------------------------------------------------------------------- /lib/layer_utils/proposal_top_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from model.config import cfg 12 | from model.bbox_transform import bbox_transform_inv, clip_boxes 13 | import numpy.random as npr 14 | 15 | import torch 16 | 17 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): 18 | """A layer that just selects the top region proposals 19 | without using non-maximal suppression, 20 | For details please see the technical report 21 | """ 22 | rpn_top_n = cfg.TEST.RPN_TOP_N 23 | 24 | scores = rpn_cls_prob[:, :, :, num_anchors:] 25 | 26 | rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) 27 | scores = scores.contiguous().view(-1, 1) 28 | 29 | length = scores.size(0) 30 | if length < rpn_top_n: 31 | # Random selection, maybe unnecessary and loses good proposals 32 | # But such case rarely happens 33 | top_inds = torch.from_numpy(npr.choice(length, size=rpn_top_n, replace=True)).long().to(anchors.device) 34 | else: 35 | top_inds = scores.sort(0, descending=True)[1] 36 | top_inds = top_inds[:rpn_top_n] 37 | top_inds = top_inds.view(rpn_top_n) 38 | 39 | # Do the selection here 40 | anchors = anchors[top_inds, :].contiguous() 41 | rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous() 42 | scores = scores[top_inds].contiguous() 43 | 44 | # Convert anchors into proposals via bbox transformations 45 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 46 | 47 | # Clip predicted boxes to image 48 | proposals = clip_boxes(proposals, im_info[:2]) 49 | 50 | # Output rois blob 51 | # Our RPN implementation only supports a single input image, so all 52 | # batch inds are 0 53 | batch_inds = proposals.new_zeros(proposals.size(0), 1) 54 | blob = torch.cat([batch_inds, proposals], 1) 55 | return blob, scores 56 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/crop_and_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_and_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/_ext/crop_and_resize/_crop_and_resize.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/crop_and_resize/_crop_and_resize.so -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | #sources = ['src/crop_and_resize.c'] 7 | #headers = ['src/crop_and_resize.h'] 8 | #defines = [] 9 | #with_cuda = False 10 | 11 | sources = [] 12 | headers = [] 13 | defines = [] 14 | 15 | extra_objects = [] 16 | if torch.cuda.is_available(): 17 | print('Including CUDA code.') 18 | sources += ['src/crop_and_resize_gpu.c'] 19 | headers += ['src/crop_and_resize_gpu.h'] 20 | defines += [('WITH_CUDA', None)] 21 | extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o'] 22 | with_cuda = True 23 | 24 | extra_compile_args = ['-std=c99'] 25 | 26 | this_file = os.path.dirname(os.path.realpath(__file__)) 27 | print(this_file) 28 | sources = [os.path.join(this_file, fname) for fname in sources] 29 | headers = [os.path.join(this_file, fname) for fname in headers] 30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 31 | 32 | ffi = create_extension( 33 | '_ext.crop_and_resize', 34 | headers=headers, 35 | sources=sources, 36 | define_macros=defines, 37 | relative_to=__file__, 38 | with_cuda=with_cuda, 39 | extra_objects=extra_objects, 40 | extra_compile_args=extra_compile_args 41 | ) 42 | 43 | if __name__ == '__main__': 44 | ffi.build() 45 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/crop_and_resize.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Function 6 | 7 | from ._ext import crop_and_resize as _backend 8 | 9 | 10 | class CropAndResizeFunction(Function): 11 | 12 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 13 | self.crop_height = crop_height 14 | self.crop_width = crop_width 15 | self.extrapolation_value = extrapolation_value 16 | 17 | def forward(self, image, boxes, box_ind): 18 | crops = torch.zeros_like(image) 19 | 20 | if image.is_cuda: 21 | _backend.crop_and_resize_gpu_forward( 22 | image, boxes, box_ind, 23 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 24 | else: 25 | _backend.crop_and_resize_forward( 26 | image, boxes, box_ind, 27 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 28 | 29 | # save for backward 30 | self.im_size = image.size() 31 | self.save_for_backward(boxes, box_ind) 32 | 33 | return crops 34 | 35 | def backward(self, grad_outputs): 36 | boxes, box_ind = self.saved_tensors 37 | 38 | grad_outputs = grad_outputs.contiguous() 39 | grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size) 40 | 41 | if grad_outputs.is_cuda: 42 | _backend.crop_and_resize_gpu_backward( 43 | grad_outputs, boxes, box_ind, grad_image 44 | ) 45 | else: 46 | _backend.crop_and_resize_backward( 47 | grad_outputs, boxes, box_ind, grad_image 48 | ) 49 | 50 | return grad_image, None, None 51 | 52 | 53 | class CropAndResize(nn.Module): 54 | """ 55 | Crop and resize ported from tensorflow 56 | See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize 57 | """ 58 | 59 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 60 | super(CropAndResize, self).__init__() 61 | 62 | self.crop_height = crop_height 63 | self.crop_width = crop_width 64 | self.extrapolation_value = extrapolation_value 65 | 66 | def forward(self, image, boxes, box_ind): 67 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind) 68 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .crop_and_resize import CropAndResizeFunction, CropAndResize 5 | 6 | 7 | class RoIAlign(nn.Module): 8 | 9 | def __init__(self, crop_height, crop_width, extrapolation_value=0, transform_fpcoor=True): 10 | super(RoIAlign, self).__init__() 11 | 12 | self.crop_height = crop_height 13 | self.crop_width = crop_width 14 | self.extrapolation_value = extrapolation_value 15 | self.transform_fpcoor = transform_fpcoor 16 | 17 | def forward(self, featuremap, boxes, box_ind): 18 | """ 19 | RoIAlign based on crop_and_resize. 20 | See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301 21 | :param featuremap: NxCxHxW 22 | :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization** 23 | :param box_ind: M 24 | :return: MxCxoHxoW 25 | """ 26 | x1, y1, x2, y2 = torch.split(boxes, 1, dim=1) 27 | image_height, image_width = featuremap.size()[2:4] 28 | 29 | if self.transform_fpcoor: 30 | spacing_w = (x2 - x1) / float(self.crop_width) 31 | spacing_h = (y2 - y1) / float(self.crop_height) 32 | 33 | nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1) 34 | ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1) 35 | nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1) 36 | nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1) 37 | 38 | boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1) 39 | else: 40 | x1 = x1 / float(image_width - 1) 41 | x2 = x2 / float(image_width - 1) 42 | y1 = y1 / float(image_height - 1) 43 | y2 = y2 / float(image_height - 1) 44 | boxes = torch.cat((y1, x1, y2, x2), 1) 45 | 46 | boxes = boxes.detach().contiguous() 47 | box_ind = box_ind.detach() 48 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind) 49 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/crop_and_resize.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_forward( 2 | THFloatTensor * image, 3 | THFloatTensor * boxes, // [y1, x1, y2, x2] 4 | THIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THFloatTensor * crops 9 | ); 10 | 11 | void crop_and_resize_backward( 12 | THFloatTensor * grads, 13 | THFloatTensor * boxes, // [y1, x1, y2, x2] 14 | THIntTensor * box_index, // range in [0, batch_size) 15 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/crop_and_resize_gpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda/crop_and_resize_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | 7 | void crop_and_resize_gpu_forward( 8 | THCudaTensor * image, 9 | THCudaTensor * boxes, // [y1, x1, y2, x2] 10 | THCudaIntTensor * box_index, // range in [0, batch_size) 11 | const float extrapolation_value, 12 | const int crop_height, 13 | const int crop_width, 14 | THCudaTensor * crops 15 | ) { 16 | const int batch_size = THCudaTensor_size(state, image, 0); 17 | const int depth = THCudaTensor_size(state, image, 1); 18 | const int image_height = THCudaTensor_size(state, image, 2); 19 | const int image_width = THCudaTensor_size(state, image, 3); 20 | 21 | const int num_boxes = THCudaTensor_size(state, boxes, 0); 22 | 23 | // init output space 24 | THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width); 25 | THCudaTensor_zero(state, crops); 26 | 27 | cudaStream_t stream = THCState_getCurrentStream(state); 28 | CropAndResizeLaucher( 29 | THCudaTensor_data(state, image), 30 | THCudaTensor_data(state, boxes), 31 | THCudaIntTensor_data(state, box_index), 32 | num_boxes, batch_size, image_height, image_width, 33 | crop_height, crop_width, depth, extrapolation_value, 34 | THCudaTensor_data(state, crops), 35 | stream 36 | ); 37 | } 38 | 39 | 40 | void crop_and_resize_gpu_backward( 41 | THCudaTensor * grads, 42 | THCudaTensor * boxes, // [y1, x1, y2, x2] 43 | THCudaIntTensor * box_index, // range in [0, batch_size) 44 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 45 | ) { 46 | // shape 47 | const int batch_size = THCudaTensor_size(state, grads_image, 0); 48 | const int depth = THCudaTensor_size(state, grads_image, 1); 49 | const int image_height = THCudaTensor_size(state, grads_image, 2); 50 | const int image_width = THCudaTensor_size(state, grads_image, 3); 51 | 52 | const int num_boxes = THCudaTensor_size(state, grads, 0); 53 | const int crop_height = THCudaTensor_size(state, grads, 2); 54 | const int crop_width = THCudaTensor_size(state, grads, 3); 55 | 56 | // init output space 57 | THCudaTensor_zero(state, grads_image); 58 | 59 | cudaStream_t stream = THCState_getCurrentStream(state); 60 | CropAndResizeBackpropImageLaucher( 61 | THCudaTensor_data(state, grads), 62 | THCudaTensor_data(state, boxes), 63 | THCudaIntTensor_data(state, box_index), 64 | num_boxes, batch_size, image_height, image_width, 65 | crop_height, crop_width, depth, 66 | THCudaTensor_data(state, grads_image), 67 | stream 68 | ); 69 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/crop_and_resize_gpu.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_gpu_forward( 2 | THCudaTensor * image, 3 | THCudaTensor * boxes, // [y1, x1, y2, x2] 4 | THCudaIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THCudaTensor * crops 9 | ); 10 | 11 | void crop_and_resize_gpu_backward( 12 | THCudaTensor * grads, 13 | THCudaTensor * boxes, // [y1, x1, y2, x2] 14 | THCudaIntTensor * box_index, // range in [0, batch_size) 15 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o -------------------------------------------------------------------------------- /lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _CropAndResize_Kernel 2 | #define _CropAndResize_Kernel 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void CropAndResizeLaucher( 9 | const float *image_ptr, const float *boxes_ptr, 10 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 11 | int image_width, int crop_height, int crop_width, int depth, 12 | float extrapolation_value, float *crops_ptr, cudaStream_t stream); 13 | 14 | void CropAndResizeBackpropImageLaucher( 15 | const float *grads_ptr, const float *boxes_ptr, 16 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 17 | int image_width, int crop_height, int crop_width, int depth, 18 | float *grads_image_ptr, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/roi_pooling/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/roi_pooling/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/_ext/roi_pooling/_roi_pooling.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/roi_pooling/_roi_pooling.so -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/roi_pooling_cuda.c'] 15 | headers += ['src/roi_pooling_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/roi_pooling_kernel.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.roi_pooling', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/functions/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/functions/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/functions/__pycache__/roi_pool.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/functions/__pycache__/roi_pool.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/src/roi_pooling.cu.o -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/layer_utils/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/_ext/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_ring_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/_roi_ring_pooling.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/_roi_ring_pooling.so -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_ring_pooling.c'] 8 | headers = ['src/roi_ring_pooling.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/roi_ring_pooling_cuda.c'] 15 | headers += ['src/roi_ring_pooling_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/roi_ring_pooling_kernel.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.roi_ring_pooling', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/functions/__pycache__/roi_ring_pool.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/functions/__pycache__/roi_ring_pool.cpython-36.pyc -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/functions/roi_ring_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_ring_pooling 4 | import copy 5 | 6 | import pdb 7 | 8 | class RoIRingPoolFunction(Function): 9 | def __init__(ctx, pooled_height, pooled_width, spatial_scale, scale_inner, scale_outer): 10 | ctx.pooled_height = pooled_height 11 | ctx.pooled_width = pooled_width 12 | ctx.spatial_scale = spatial_scale 13 | ctx.scale_inner = scale_inner 14 | ctx.scale_outer = scale_outer 15 | ctx.feature_size = None 16 | 17 | def forward(ctx, features, rois): 18 | ctx.feature_size = features.size() 19 | batch_size, num_channels, data_height, data_width = ctx.feature_size 20 | num_rois = rois.size(0) 21 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 22 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 23 | 24 | ctx.rois = rois 25 | ctx.processed_rois = features.new(rois.size(0), 9).zero_() 26 | 27 | RectangularRing(rois, ctx.processed_rois, ctx.spatial_scale, ctx.scale_inner, ctx.scale_outer) 28 | #print('rois ', rois[100:101, :]) 29 | #print('preco ', ctx.processed_rois[100:101,:]) 30 | #if not features.is_cuda: 31 | # _features = features.permute(0, 2, 3, 1) 32 | # roi_ring_pooling.roi_ring_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 33 | # _features, ctx.processed_rois, output) 34 | #else: 35 | #print('3333', rois) 36 | #print('ctx process roi ', ctx.processed_rois) 37 | roi_ring_pooling.roi_ring_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 38 | features, ctx.processed_rois, output, ctx.argmax) 39 | return output 40 | def backward(ctx, grad_output): 41 | assert(ctx.feature_size is not None and grad_output.is_cuda) 42 | batch_size, num_channels, data_height, data_width = ctx.feature_size 43 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 44 | 45 | roi_ring_pooling.roi_ring_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 46 | grad_output, ctx.processed_rois, grad_input, ctx.argmax) 47 | return grad_input, None 48 | 49 | 50 | 51 | def RectangularRing(rois, processed_rois,spatial_scale, scale_inner, scale_outer): 52 | #widths = rois[:, 3] - rois[:, 1] + 1.0 53 | #heights = rois[:, 4] - rois[:, 2] + 1.0 54 | #ctr_x = rois[:, 1] + 0.5 * widths 55 | #ctr_y = rois[:, 2] + 0.5 * heights 56 | 57 | ctr_x = (rois[:, 1] + rois[:, 3]) / 2 58 | ctr_y = (rois[:, 2] + rois[:, 4]) / 2 59 | w_half = (rois[:, 3] - rois[:, 1]) / 2 60 | h_half = (rois[:, 4] - rois[:, 2]) / 2 61 | 62 | 63 | #for i in range(rois.size(0)): 64 | # processed_rois[i, 0] = 0 65 | processed_rois[:, 1] = torch.tensor(ctr_x - w_half * scale_outer, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(0.5).floor_() 66 | processed_rois[:, 2] = torch.tensor(ctr_y - h_half * scale_outer, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(0.5).floor_() 67 | processed_rois[:, 3] = torch.tensor(ctr_x + w_half * scale_outer, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(-0.5).ceil_() 68 | processed_rois[:, 4] = torch.tensor(ctr_y + h_half * scale_outer, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(-0.5).ceil_() 69 | processed_rois[:, 5] = torch.tensor(ctr_x - w_half * scale_inner, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(0.5).floor_() 70 | processed_rois[:, 6] = torch.tensor(ctr_y - h_half * scale_inner, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(0.5).floor_() 71 | processed_rois[:, 7] = torch.tensor(ctr_x + w_half * scale_inner, dtype=rois.dtype, device=rois.device) ##.mul_(spatial_scale).add_(-0.5).ceil_() 72 | processed_rois[:, 8] = torch.tensor(ctr_y + h_half * scale_inner, dtype=rois.dtype, device=rois.device) 73 | 74 | if scale_inner == 0: 75 | processed_rois[:, 5:] = 0 76 | 77 | return 1 -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/modules/roi_ring_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_ring_pool import RoIRingPoolFunction 3 | 4 | 5 | class _RoIRingPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale, scale_inner, scale_outer): 7 | super(_RoIRingPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.scale_inner = scale_inner 13 | self.scale_outer = scale_outer 14 | 15 | def forward(self, features, rois): 16 | return RoIRingPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.scale_inner, self.scale_outer)(features, rois) -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | int roi_ring_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 6 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 7 | { 8 | // Grab the input tensor 9 | float * data_flat = THFloatTensor_data(features); 10 | float * rois_flat = THFloatTensor_data(rois); 11 | 12 | float * output_flat = THFloatTensor_data(output); 13 | 14 | // Number of ROIs 15 | int num_rois = THFloatTensor_size(rois, 0); 16 | int size_rois = THFloatTensor_size(rois, 1); 17 | // batch size 18 | int batch_size = THFloatTensor_size(features, 0); 19 | if(batch_size != 1) 20 | { 21 | return 0; 22 | } 23 | // data height 24 | int data_height = THFloatTensor_size(features, 1); 25 | // data width 26 | int data_width = THFloatTensor_size(features, 2); 27 | // Number of channels 28 | int num_channels = THFloatTensor_size(features, 3); 29 | 30 | // Set all element of the output tensor to -inf. 31 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 32 | 33 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 34 | int index_roi = 0; 35 | int index_output = 0; 36 | int n; 37 | for (n = 0; n < num_rois; ++n) 38 | { 39 | int roi_batch_ind = rois_flat[index_roi + 0]; 40 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 41 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 42 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 43 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 44 | int roi_start_w_in = round(rois_flat[index_roi + 5] * spatial_scale); 45 | int roi_start_h_in = round(rois_flat[index_roi + 6] * spatial_scale); 46 | int roi_end_w_in = round(rois_flat[index_roi + 7] * spatial_scale); 47 | int roi_end_h_in = round(rois_flat[index_roi + 8] * spatial_scale); 48 | // CHECK_GE(roi_batch_ind, 0); 49 | // CHECK_LT(roi_batch_ind, batch_size); 50 | 51 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 52 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 53 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 54 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 55 | 56 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 57 | const int output_area = pooled_width * pooled_height; 58 | 59 | int c, ph, pw; 60 | for (ph = 0; ph < pooled_height; ++ph) 61 | { 62 | for (pw = 0; pw < pooled_width; ++pw) 63 | { 64 | int hstart = (floor((float)(ph) * bin_size_h)); 65 | int wstart = (floor((float)(pw) * bin_size_w)); 66 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 67 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 68 | 69 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 70 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 71 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 72 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 73 | 74 | const int pool_index = index_output + (ph * pooled_width + pw); 75 | int is_empty = (hend <= hstart) || (wend <= wstart); 76 | if (is_empty) 77 | { 78 | for (c = 0; c < num_channels * output_area; c += output_area) 79 | { 80 | output_flat[pool_index + c] = 0; 81 | } 82 | } 83 | else 84 | { 85 | int h, w, c; 86 | for (h = hstart; h < hend; ++h) 87 | { 88 | for (w = wstart; w < wend; ++w) 89 | { 90 | if(!( w > roi_start_w_in && w < roi_end_w_in && h > roi_start_h_in && h < roi_end_h_in)) 91 | { 92 | for (c = 0; c < num_channels; ++c) 93 | { 94 | const int index = (h * data_width + w) * num_channels + c; 95 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 96 | { 97 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 98 | } 99 | } 100 | } 101 | //for (c = 0; c < num_channels; ++c) 102 | //{ 103 | // const int index = (h * data_width + w) * num_channels + c; 104 | // if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 105 | // { 106 | // output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 107 | // } 108 | //} 109 | } 110 | } 111 | } 112 | } 113 | } 114 | 115 | // Increment ROI index 116 | index_roi += size_rois; 117 | index_output += pooled_height * pooled_width * num_channels; 118 | } 119 | return 1; 120 | } -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_ring_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_ring_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_ring_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 9) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIRingPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_ring_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 9) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIRingPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_ring_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | int roi_ring_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 4 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_kernel.cu.o -------------------------------------------------------------------------------- /lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_RING_POOLING_KERNEL 2 | #define _ROI_RING_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIRingPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIRingPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif -------------------------------------------------------------------------------- /lib/layer_utils/snippets.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from layer_utils.generate_anchors import generate_anchors 12 | 13 | def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)): 14 | """ A wrapper function to generate anchors given different scales 15 | Also return the number of anchors in variable 'length' 16 | """ 17 | anchors = generate_anchors(ratios=np.array(anchor_ratios), scales=np.array(anchor_scales)) 18 | A = anchors.shape[0] 19 | shift_x = np.arange(0, width) * feat_stride 20 | shift_y = np.arange(0, height) * feat_stride 21 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 22 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 23 | K = shifts.shape[0] 24 | # width changes faster, so here it is H, W, C 25 | anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 26 | anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) 27 | length = np.int32(anchors.shape[0]) 28 | 29 | return anchors, length 30 | -------------------------------------------------------------------------------- /lib/layers/__pycache__/recurrent_linear.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layers/__pycache__/recurrent_linear.cpython-36.pyc -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 2 | -gencode arch=compute_35,code=sm_35 \ 3 | -gencode arch=compute_50,code=sm_50 \ 4 | -gencode arch=compute_52,code=sm_52 \ 5 | -gencode arch=compute_60,code=sm_60 \ 6 | -gencode arch=compute_61,code=sm_61 \ 7 | -gencode arch=compute_70,code=sm_70 \ 8 | -gencode arch=compute_70,code=compute_70 9 | " 10 | 11 | # Build RoiPooling module 12 | cd layer_utils/roi_pooling/src 13 | echo "Compiling roi_pooling kernels by nvcc..." 14 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 15 | cd .. 16 | python build.py 17 | cd ../../ 18 | 19 | # Build RoiRingpooling module 20 | cd layer_utils/roi_ring_pooling/src 21 | echo "Compiling roi_ring_pooling kernels by nvcc" 22 | nvcc -c -o roi_ring_pooling_kernel.cu.o roi_ring_pooling_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 23 | cd .. 24 | python build.py 25 | 26 | # Build RoIAlign 27 | cd layer_utils/roi_align/src/cuda 28 | echo 'Compiling crop_and_resize kernels by nvcc...' 29 | nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 30 | cd ../../ 31 | python build.py 32 | cd ../../ 33 | 34 | # Build NMS 35 | cd nms/src/cuda 36 | echo "Compiling nms kernels by nvcc..." 37 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 38 | cd ../../ 39 | python build.py 40 | cd ../ 41 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- 1 | from . import config 2 | -------------------------------------------------------------------------------- /lib/model/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/apmetric.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/apmetric.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/bbox_transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/bbox_transform.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/bbox_transform.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/bbox_transform.cpython-37.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/nms_wrapper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/nms_wrapper.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/nms_wrapper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/nms_wrapper.cpython-37.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/test.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/test.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/test.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/test.cpython-37.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/train_val.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/train_val.cpython-36.pyc -------------------------------------------------------------------------------- /lib/model/__pycache__/train_val.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/train_val.cpython-37.pyc -------------------------------------------------------------------------------- /lib/model/apmetric.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Nov 22 16:14:59 2018 5 | 6 | @author: vasgaoweithu 7 | """ 8 | 9 | import torch 10 | import numpy as np 11 | from copy import deepcopy 12 | import math 13 | 14 | class AveragePrecisionMeter(object): 15 | def __init__(self, difficult_examples=False): 16 | super(AveragePrecisionMeter, self).__init__() 17 | self.reset() 18 | self.difficult_examples = difficult_examples 19 | 20 | def reset(self): 21 | self.scores = torch.FloatTensor(torch.FloatStorage()) 22 | self.targets = torch.LongTensor(torch.LongStorage()) 23 | 24 | def add(self, output, target): 25 | if not torch.is_tensor(output): 26 | output = torch.from_numpy(output) 27 | if not torch.is_tensor(target): 28 | target = torch.from_numpy(target) 29 | 30 | if output.dim() == 1: 31 | output = output.view(-1, 1) 32 | else: 33 | assert output.dim() == 2, \ 34 | 'wrong output size (should be 1D or 2D with one column \ 35 | per class)' 36 | if target.dim() == 1: 37 | target = target.view(-1, 1) 38 | else: 39 | assert target.dim() == 2, \ 40 | 'wrong target size (should be 1D or 2D with one column \ 41 | per class)' 42 | if self.scores.numel() > 0: 43 | assert target.size(1) == self.targets.size(1), \ 44 | 'dimensions for output should match previously added examples.' 45 | 46 | # make sure storage is of sufficient size 47 | if self.scores.storage().size() < self.scores.numel() + output.numel(): 48 | new_size = math.ceil(self.scores.storage().size() * 1.5) 49 | self.scores.storage().resize_(int(new_size + output.numel())) 50 | self.targets.storage().resize_(int(new_size + output.numel())) 51 | 52 | # store scores and targets 53 | offset = self.scores.size(0) if self.scores.dim() > 0 else 0 54 | self.scores.resize_(offset + output.size(0), output.size(1)) 55 | self.targets.resize_(offset + target.size(0), target.size(1)) 56 | self.scores.narrow(0, offset, output.size(0)).copy_(output) 57 | self.targets.narrow(0, offset, target.size(0)).copy_(target) 58 | 59 | def value(self): 60 | if self.scores.numel() == 0: 61 | return 0 62 | ap = torch.zeros(self.scores.size(1)) 63 | rg = torch.arange(1, self.scores.size(0)).float() 64 | 65 | # compute average precision for each class 66 | for k in range(self.scores.size(1)): 67 | # sort scores 68 | scores = self.scores[:, k] 69 | targets = self.targets[:, k] 70 | 71 | # compute average precision 72 | ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples) 73 | return ap 74 | 75 | @staticmethod 76 | def average_precision(output, target, difficult_examples=True): 77 | 78 | # sort examples 79 | sorted, indices = torch.sort(output, dim=0, descending=True) 80 | 81 | # Computes prec@i 82 | pos_count = 0. 83 | total_count = 0. 84 | precision_at_i = 0. 85 | for i in indices: 86 | label = target[i] 87 | if difficult_examples and label == 0: 88 | continue 89 | if label == 1: 90 | pos_count += 1 91 | total_count += 1 92 | if label == 1: 93 | precision_at_i += pos_count / total_count 94 | precision_at_i /= pos_count 95 | return precision_at_i -------------------------------------------------------------------------------- /lib/model/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import torch 13 | 14 | def bbox_transform(ex_rois, gt_rois): 15 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 16 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 17 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 18 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 19 | 20 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 21 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 22 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 23 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 24 | 25 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 26 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 27 | targets_dw = torch.log(gt_widths / ex_widths) 28 | targets_dh = torch.log(gt_heights / ex_heights) 29 | 30 | targets = torch.stack( 31 | (targets_dx, targets_dy, targets_dw, targets_dh), 1) 32 | return targets 33 | 34 | 35 | def bbox_transform_inv(boxes, deltas): 36 | # Input should be both tensor or both Variable and on the same device 37 | if len(boxes) == 0: 38 | return deltas.detach() * 0 39 | 40 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 41 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 42 | ctr_x = boxes[:, 0] + 0.5 * widths 43 | ctr_y = boxes[:, 1] + 0.5 * heights 44 | 45 | dx = deltas[:, 0::4] 46 | dy = deltas[:, 1::4] 47 | dw = deltas[:, 2::4] 48 | dh = deltas[:, 3::4] 49 | 50 | pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) 51 | pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) 52 | pred_w = torch.exp(dw) * widths.unsqueeze(1) 53 | pred_h = torch.exp(dh) * heights.unsqueeze(1) 54 | 55 | pred_boxes = torch.cat(\ 56 | [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\ 57 | pred_ctr_y - 0.5 * pred_h,\ 58 | pred_ctr_x + 0.5 * pred_w,\ 59 | pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1) 60 | 61 | return pred_boxes 62 | 63 | 64 | def clip_boxes(boxes, im_shape): 65 | """ 66 | Clip boxes to image boundaries. 67 | boxes must be tensor or Variable, im_shape can be anything but Variable 68 | """ 69 | 70 | if not hasattr(boxes, 'data'): 71 | boxes_ = boxes.numpy() 72 | 73 | boxes = boxes.view(boxes.size(0), -1, 4) 74 | boxes = torch.stack(\ 75 | [boxes[:,:,0].clamp(0, im_shape[1] - 1), 76 | boxes[:,:,1].clamp(0, im_shape[0] - 1), 77 | boxes[:,:,2].clamp(0, im_shape[1] - 1), 78 | boxes[:,:,3].clamp(0, im_shape[0] - 1)], 2).view(boxes.size(0), -1) 79 | 80 | return boxes 81 | -------------------------------------------------------------------------------- /lib/model/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from nms.pth_nms import pth_nms 12 | 13 | 14 | def nms(dets, thresh): 15 | """Dispatch to either CPU or GPU NMS implementations. 16 | Accept dets as tensor""" 17 | return pth_nms(dets, thresh) 18 | -------------------------------------------------------------------------------- /lib/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__init__.py -------------------------------------------------------------------------------- /lib/nets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/mobilenet_v1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/mobilenet_v1.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/network.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/network.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/network.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/resnet_v1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/resnet_v1.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/resnet_v1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/resnet_v1.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/vgg16.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/vgg16.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nets/__pycache__/vgg16.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/vgg16.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nets/resnet_v1.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi He and Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from nets.network import Network 11 | from model.config import cfg 12 | 13 | import utils.timer 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | from torch.autograd import Variable 19 | import math 20 | import torch.utils.model_zoo as model_zoo 21 | 22 | import torchvision 23 | from torchvision.models.resnet import BasicBlock, Bottleneck 24 | 25 | class ResNet(torchvision.models.resnet.ResNet): 26 | def __init__(self, block, layers, num_classes=1000): 27 | self.inplanes = 64 28 | super(ResNet, self).__init__(block, layers, num_classes) 29 | # change to match the caffe resnet 30 | for i in range(2, 4): 31 | getattr(self, 'layer%d'%i)[0].conv1.stride = (2,2) 32 | getattr(self, 'layer%d'%i)[0].conv2.stride = (1,1) 33 | # use stride 1 for the last conv4 layer (same as tf-faster-rcnn) 34 | self.layer4[0].conv2.stride = (1,1) 35 | self.layer4[0].downsample[0].stride = (1,1) 36 | 37 | del self.avgpool, self.fc 38 | 39 | 40 | def resnet18(pretrained=False): 41 | """Constructs a ResNet-18 model. 42 | Args: 43 | pretrained (bool): If True, returns a model pre-trained on ImageNet 44 | """ 45 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 46 | if pretrained: 47 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 48 | return model 49 | 50 | 51 | def resnet34(pretrained=False): 52 | """Constructs a ResNet-34 model. 53 | Args: 54 | pretrained (bool): If True, returns a model pre-trained on ImageNet 55 | """ 56 | model = ResNet(BasicBlock, [3, 4, 6, 3]) 57 | if pretrained: 58 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 59 | return model 60 | 61 | 62 | def resnet50(pretrained=False): 63 | """Constructs a ResNet-50 model. 64 | Args: 65 | pretrained (bool): If True, returns a model pre-trained on ImageNet 66 | """ 67 | model = ResNet(Bottleneck, [3, 4, 6, 3]) 68 | if pretrained: 69 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 70 | return model 71 | 72 | 73 | def resnet101(pretrained=False): 74 | """Constructs a ResNet-101 model. 75 | Args: 76 | pretrained (bool): If True, returns a model pre-trained on ImageNet 77 | """ 78 | model = ResNet(Bottleneck, [3, 4, 23, 3]) 79 | if pretrained: 80 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 81 | return model 82 | 83 | 84 | def resnet152(pretrained=False): 85 | """Constructs a ResNet-152 model. 86 | Args: 87 | pretrained (bool): If True, returns a model pre-trained on ImageNet 88 | """ 89 | model = ResNet(Bottleneck, [3, 8, 36, 3]) 90 | if pretrained: 91 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 92 | return model 93 | 94 | class resnetv1(Network): 95 | def __init__(self, num_layers=50): 96 | Network.__init__(self) 97 | self._feat_stride = [16, ] 98 | self._feat_compress = [1. / float(self._feat_stride[0]), ] 99 | self._num_layers = num_layers 100 | self._net_conv_channels = 1024 101 | self._fc7_channels = 2048 102 | 103 | def _crop_pool_layer(self, bottom, rois): 104 | return Network._crop_pool_layer(self, bottom, rois, cfg.RESNET.MAX_POOL) 105 | 106 | def _image_to_head(self): 107 | net_conv = self._layers['head'](self._image) 108 | self._act_summaries['conv'] = net_conv 109 | 110 | return net_conv 111 | 112 | def _head_to_tail(self, pool5): 113 | fc7 = self.resnet.layer4(pool5).mean(3).mean(2) # average pooling after layer4 114 | return fc7 115 | 116 | def _init_head_tail(self): 117 | # choose different blocks for different number of layers 118 | if self._num_layers == 50: 119 | self.resnet = resnet50() 120 | 121 | elif self._num_layers == 101: 122 | self.resnet = resnet101() 123 | 124 | elif self._num_layers == 152: 125 | self.resnet = resnet152() 126 | 127 | else: 128 | # other numbers are not supported 129 | raise NotImplementedError 130 | 131 | # Fix blocks 132 | for p in self.resnet.bn1.parameters(): p.requires_grad=False 133 | for p in self.resnet.conv1.parameters(): p.requires_grad=False 134 | assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) 135 | if cfg.RESNET.FIXED_BLOCKS >= 3: 136 | for p in self.resnet.layer3.parameters(): p.requires_grad=False 137 | if cfg.RESNET.FIXED_BLOCKS >= 2: 138 | for p in self.resnet.layer2.parameters(): p.requires_grad=False 139 | if cfg.RESNET.FIXED_BLOCKS >= 1: 140 | for p in self.resnet.layer1.parameters(): p.requires_grad=False 141 | 142 | def set_bn_fix(m): 143 | classname = m.__class__.__name__ 144 | if classname.find('BatchNorm') != -1: 145 | for p in m.parameters(): p.requires_grad=False 146 | 147 | self.resnet.apply(set_bn_fix) 148 | 149 | # Build resnet. 150 | self._layers['head'] = nn.Sequential(self.resnet.conv1, self.resnet.bn1,self.resnet.relu, 151 | self.resnet.maxpool,self.resnet.layer1,self.resnet.layer2,self.resnet.layer3) 152 | 153 | def train(self, mode=True): 154 | # Override train so that the training mode is set as we want 155 | nn.Module.train(self, mode) 156 | if mode: 157 | # Set fixed blocks to be in eval mode (not really doing anything) 158 | self.resnet.eval() 159 | if cfg.RESNET.FIXED_BLOCKS <= 3: 160 | self.resnet.layer4.train() 161 | if cfg.RESNET.FIXED_BLOCKS <= 2: 162 | self.resnet.layer3.train() 163 | if cfg.RESNET.FIXED_BLOCKS <= 1: 164 | self.resnet.layer2.train() 165 | if cfg.RESNET.FIXED_BLOCKS == 0: 166 | self.resnet.layer1.train() 167 | 168 | # Set batchnorm always in eval mode during training 169 | def set_bn_eval(m): 170 | classname = m.__class__.__name__ 171 | if classname.find('BatchNorm') != -1: 172 | m.eval() 173 | 174 | self.resnet.apply(set_bn_eval) 175 | 176 | def load_pretrained_cnn(self, state_dict): 177 | self.resnet.load_state_dict({k: state_dict[k] for k in list(self.resnet.state_dict())}) 178 | -------------------------------------------------------------------------------- /lib/nets/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from nets.network import Network 11 | from model.config import cfg 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | from torch.autograd import Variable 17 | import math 18 | import torchvision.models as models 19 | 20 | class MELM_vgg16(Network): 21 | def __init__(self): 22 | Network.__init__(self) 23 | self._feat_stride = [16, ] 24 | self._feat_compress = [1. / float(self._feat_stride[0]), ] 25 | self._net_conv_channels = 512 26 | self._fc7_channels = 4096 27 | 28 | def _init_head_tail(self): 29 | self.vgg = models.vgg16() 30 | # Remove fc8 31 | self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1]) 32 | 33 | # Fix the layers before conv3: 34 | for layer in range(10): 35 | for p in self.vgg.features[layer].parameters(): p.requires_grad = False 36 | 37 | # not using the last maxpool layer 38 | self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1]) 39 | 40 | def _image_to_head(self): 41 | net_conv = self._layers['head'](self._image) 42 | self._act_summaries['conv'] = net_conv 43 | 44 | return net_conv 45 | 46 | def _head_to_tail(self, pool5): 47 | pool5_flat = pool5.view(pool5.size(0), -1) 48 | fc7 = self.vgg.classifier(pool5_flat) 49 | self._predictions['fc7'] = fc7 50 | 51 | return fc7 52 | 53 | def load_pretrained_cnn(self, state_dict): 54 | self.vgg.load_state_dict({k:v for k,v in state_dict.items() if k in self.vgg.state_dict()}) -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/pth_nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/pth_nms.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/pth_nms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/pth_nms.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/nms/_ext/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/nms/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/nms/_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/nms/_nms.so -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 35 | 36 | dets = dets[order].contiguous() 37 | 38 | keep = torch.LongTensor(dets.size(0)) 39 | num_out = torch.LongTensor(1) 40 | # keep = torch.cuda.LongTensor(dets.size(0)) 41 | # num_out = torch.cuda.LongTensor(1) 42 | nms.gpu_nms(keep, num_out, dets, thresh) 43 | 44 | return order[keep[:num_out[0]].cuda()].contiguous() 45 | # return order[keep[:num_out[0]]].contiguous() 46 | 47 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/src/cuda/nms_kernel.cu.o -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/layer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/layer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/layer.cpython-37.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/minibatch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/minibatch.cpython-37.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/__pycache__/roidb.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/roidb.cpython-37.pyc -------------------------------------------------------------------------------- /lib/roi_data_layer/layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """The data layer used during training to train a Fast R-CNN network. 9 | 10 | RoIDataLayer implements a Caffe Python layer. 11 | """ 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | from model.config import cfg 17 | from roi_data_layer.minibatch import get_minibatch 18 | import numpy as np 19 | import time 20 | 21 | class RoIDataLayer(object): 22 | """Fast R-CNN data layer used for training.""" 23 | 24 | def __init__(self, roidb, num_classes, random=False): 25 | """Set the roidb to be used by this layer during training.""" 26 | self._roidb = roidb 27 | self._num_classes = num_classes 28 | # Also set a random flag 29 | self._random = random 30 | self._shuffle_roidb_inds() 31 | 32 | def _shuffle_roidb_inds(self): 33 | """Randomly permute the training roidb.""" 34 | # If the random flag is set, 35 | # then the database is shuffled according to system time 36 | # Useful for the validation set 37 | if self._random: 38 | st0 = np.random.get_state() 39 | millis = int(round(time.time() * 1000)) % 4294967295 40 | np.random.seed(millis) 41 | 42 | if cfg.TRAIN.ASPECT_GROUPING: 43 | widths = np.array([r['width'] for r in self._roidb]) 44 | heights = np.array([r['height'] for r in self._roidb]) 45 | horz = (widths >= heights) 46 | vert = np.logical_not(horz) 47 | horz_inds = np.where(horz)[0] 48 | vert_inds = np.where(vert)[0] 49 | inds = np.hstack(( 50 | np.random.permutation(horz_inds), 51 | np.random.permutation(vert_inds))) 52 | inds = np.reshape(inds, (-1, 2)) 53 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 54 | inds = np.reshape(inds[row_perm, :], (-1,)) 55 | self._perm = inds 56 | else: 57 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 58 | # Restore the random state 59 | if self._random: 60 | np.random.set_state(st0) 61 | 62 | self._cur = 0 63 | 64 | def _get_next_minibatch_inds(self): 65 | """Return the roidb indices for the next minibatch.""" 66 | 67 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 68 | self._shuffle_roidb_inds() 69 | 70 | db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] 71 | self._cur += cfg.TRAIN.IMS_PER_BATCH 72 | 73 | return db_inds 74 | 75 | def _get_next_minibatch(self): 76 | """Return the blobs to be used for the next minibatch. 77 | 78 | If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a 79 | separate process and made available through self._blob_queue. 80 | """ 81 | db_inds = self._get_next_minibatch_inds() 82 | minibatch_db = [self._roidb[i] for i in db_inds] 83 | return get_minibatch(minibatch_db, self._num_classes) 84 | 85 | def forward(self): 86 | """Get blobs and copy them into this layer's top blob vector.""" 87 | blobs = self._get_next_minibatch() 88 | return blobs 89 | -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import numpy.random as npr 15 | import cv2 16 | from model.config import cfg 17 | from utils.blob import prep_im_for_blob, im_list_to_blob 18 | 19 | def get_minibatch(roidb, num_classes): 20 | """Given a roidb, construct a minibatch sampled from it.""" 21 | num_images = len(roidb) 22 | # Sample random scales to use for each image in this batch 23 | random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 24 | size=num_images) 25 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 26 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 27 | format(num_images, cfg.TRAIN.BATCH_SIZE) 28 | 29 | # Get the input image blob, formatted for caffe 30 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 31 | 32 | blobs = {'data': im_blob} 33 | 34 | assert len(im_scales) == 1, "Single batch only" 35 | assert len(roidb) == 1, "Single batch only" 36 | 37 | # gt boxes: (x1, y1, x2, y2, cls) 38 | if cfg.TRAIN.USE_ALL_GT: 39 | # Include all ground truth boxes 40 | gt_inds = np.where(roidb[0]['gt_classes'] != -1)[0] 41 | else: 42 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 43 | gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 44 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 45 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 46 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 47 | blobs['gt_boxes'] = gt_boxes 48 | blobs['im_info'] = np.array( 49 | [im_blob.shape[1], im_blob.shape[2], im_scales[0]], 50 | dtype=np.float32) 51 | blobs['image_level_labels'] = roidb[0]['image_level_labels'] 52 | 53 | # add ss_boxes into blob 54 | #Changed for WSDNN 55 | if True: 56 | ss_inds = np.where(roidb[0]['gt_classes'] == -1)[0] # remove gt_rois in ss_boxes 57 | ss_boxes = np.empty((len(ss_inds), 5), dtype=np.float32) 58 | ss_boxes[:, 1:] = roidb[0]['boxes'][ss_inds,:] * im_scales[0] 59 | ss_boxes[:, 0] = 0 60 | blobs['ss_boxes'] = ss_boxes 61 | else: 62 | print('haha True') 63 | ss_boxes = np.empty((len(roidb[0]['boxes']), 5), dtype=np.float32) 64 | ss_boxes[:,1:] = roidb[0]['boxes'] * im_scales[0] 65 | ss_boxes[:,0] = 0 66 | blobs['ss_boxes'] = ss_boxes 67 | 68 | return blobs 69 | 70 | def _get_image_blob(roidb, scale_inds): 71 | """Builds an input blob from the images in the roidb at the specified 72 | scales. 73 | """ 74 | num_images = len(roidb) 75 | processed_ims = [] 76 | im_scales = [] 77 | for i in range(num_images): 78 | im = cv2.imread(roidb[i]['image']) 79 | if roidb[i]['flipped']: 80 | im = im[:, ::-1, :] 81 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 82 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 83 | cfg.TRAIN.MAX_SIZE) 84 | im_scales.append(im_scale) 85 | processed_ims.append(im) 86 | 87 | # Create a blob to hold the input images 88 | blob = im_list_to_blob(processed_ims) 89 | 90 | return blob, im_scales 91 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | from model.config import cfg 15 | import PIL 16 | 17 | def prepare_roidb(imdb): 18 | """Enrich the imdb's roidb by adding some derived quantities that 19 | are useful for training. This function precomputes the maximum 20 | overlap, taken over ground-truth boxes, between each ROI and 21 | each ground-truth box. The class with maximum overlap is also 22 | recorded. 23 | """ 24 | roidb = imdb.roidb 25 | if not (imdb.name.startswith('coco')): 26 | sizes = [PIL.Image.open(imdb.image_path_at(i)).size 27 | for i in range(imdb.num_images)] 28 | for i in range(len(imdb.image_index)): 29 | roidb[i]['image'] = imdb.image_path_at(i) 30 | if not (imdb.name.startswith('coco')): 31 | roidb[i]['width'] = sizes[i][0] 32 | roidb[i]['height'] = sizes[i][1] 33 | # need gt_overlaps as a dense array for argmax 34 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 35 | # max overlap with gt over classes (columns) 36 | max_overlaps = gt_overlaps.max(axis=1) 37 | # gt class that had the max overlap 38 | max_classes = gt_overlaps.argmax(axis=1) 39 | roidb[i]['max_classes'] = max_classes 40 | roidb[i]['max_overlaps'] = max_overlaps 41 | # sanity checks 42 | # max overlap of 0 => class should be zero (background) 43 | zero_inds = np.where(max_overlaps == 0)[0] 44 | assert all(max_classes[zero_inds] == 0) 45 | # max overlap > 0 => class should not be zero (must be a fg class) 46 | nonzero_inds = np.where(max_overlaps > 0)[0] 47 | #assert all(max_classes[nonzero_inds] != 0) 48 | -------------------------------------------------------------------------------- /lib/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.h 4 | *.hpp 5 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/bbox.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/bbox.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/bbox.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/bbox.cpython-37.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/blob.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/blob.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/blob.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/blob.cpython-37.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/timer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/timer.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/timer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/timer.cpython-37.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/visualization.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/visualization.cpython-36.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/visualization.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/visualization.cpython-37.pyc -------------------------------------------------------------------------------- /lib/utils/bbox.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def bbox_overlaps(boxes, query_boxes): 5 | """ 6 | Parameters 7 | ---------- 8 | boxes: (N, 4) ndarray or tensor or variable 9 | query_boxes: (K, 4) ndarray or tensor or variable 10 | Returns 11 | ------- 12 | overlaps: (N, K) overlap between boxes and query_boxes 13 | """ 14 | if isinstance(boxes, np.ndarray): 15 | boxes = torch.from_numpy(boxes) 16 | query_boxes = torch.from_numpy(query_boxes) 17 | out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return 18 | else: 19 | out_fn = lambda x: x 20 | 21 | box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \ 22 | (boxes[:, 3] - boxes[:, 1] + 1) 23 | query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \ 24 | (query_boxes[:, 3] - query_boxes[:, 1] + 1) 25 | 26 | iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0) 27 | ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0) 28 | ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih 29 | overlaps = iw * ih / ua 30 | return out_fn(overlaps) -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import cv2 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """Convert a list of images into a network input. 19 | 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in range(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | 30 | return blob 31 | 32 | 33 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 34 | """Mean subtract and scale an image for use in a blob.""" 35 | im = im.astype(np.float32, copy=False) 36 | im -= pixel_means 37 | im_shape = im.shape 38 | im_size_min = np.min(im_shape[0:2]) 39 | im_size_max = np.max(im_shape[0:2]) 40 | im_scale = float(target_size) / float(im_size_min) 41 | # Prevent the biggest axis from being more than MAX_SIZE 42 | if np.round(im_scale * im_size_max) > max_size: 43 | im_scale = float(max_size) / float(im_size_max) 44 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 45 | interpolation=cv2.INTER_LINEAR) 46 | 47 | return im, im_scale 48 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | import torch 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self._total_time = {} 15 | self._calls = {} 16 | self._start_time = {} 17 | self._diff = {} 18 | self._average_time = {} 19 | 20 | def tic(self, name='default'): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | if torch.cuda.is_available(): 24 | torch.cuda.synchronize() 25 | self._start_time[name] = time.time() 26 | 27 | def toc(self, name='default', average=True): 28 | if torch.cuda.is_available(): 29 | torch.cuda.synchronize() 30 | self._diff[name] = time.time() - self._start_time[name] 31 | self._total_time[name] = self._total_time.get(name, 0.) + self._diff[name] 32 | self._calls[name] = self._calls.get(name, 0 ) + 1 33 | self._average_time[name] = self._total_time[name] / self._calls[name] 34 | if average: 35 | return self._average_time[name] 36 | else: 37 | return self._diff[name] 38 | 39 | def average_time(self, name='default'): 40 | return self._average_time[name] 41 | 42 | def total_time(self, name='default'): 43 | return self._total_time[name] 44 | 45 | timer = Timer() 46 | -------------------------------------------------------------------------------- /lib/utils/visualization.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from six.moves import range 12 | import PIL.Image as Image 13 | import PIL.ImageColor as ImageColor 14 | import PIL.ImageDraw as ImageDraw 15 | import PIL.ImageFont as ImageFont 16 | 17 | STANDARD_COLORS = [ 18 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 19 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 20 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 21 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 22 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 23 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 24 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 25 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 26 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 27 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 28 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 29 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 30 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 31 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 32 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 33 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 34 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 35 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 36 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 37 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 38 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 39 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 40 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 41 | ] 42 | 43 | NUM_COLORS = len(STANDARD_COLORS) 44 | 45 | try: 46 | FONT = ImageFont.truetype('arial.ttf', 24) 47 | except IOError: 48 | FONT = ImageFont.load_default() 49 | 50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4): 51 | draw = ImageDraw.Draw(image) 52 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 53 | draw.line([(left, top), (left, bottom), (right, bottom), 54 | (right, top), (left, top)], width=thickness, fill=color) 55 | text_bottom = bottom 56 | # Reverse list and print from bottom to top. 57 | text_width, text_height = font.getsize(display_str) 58 | margin = np.ceil(0.05 * text_height) 59 | draw.rectangle( 60 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 61 | text_bottom)], 62 | fill=color) 63 | draw.text( 64 | (left + margin, text_bottom - text_height - margin), 65 | display_str, 66 | fill='black', 67 | font=font) 68 | 69 | return image 70 | 71 | def draw_bounding_boxes(image, gt_boxes, im_info): 72 | num_boxes = gt_boxes.shape[0] 73 | gt_boxes_new = gt_boxes.copy() 74 | gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2]) 75 | disp_image = Image.fromarray(np.uint8(image[0])) 76 | 77 | for i in range(num_boxes): 78 | this_class = int(gt_boxes_new[i, 4]) 79 | disp_image = _draw_single_box(disp_image, 80 | gt_boxes_new[i, 0], 81 | gt_boxes_new[i, 1], 82 | gt_boxes_new[i, 2], 83 | gt_boxes_new[i, 3], 84 | 'N%02d-C%02d' % (i, this_class), 85 | FONT, 86 | color=STANDARD_COLORS[this_class % NUM_COLORS]) 87 | 88 | image[0, :] = np.array(disp_image) 89 | return image -------------------------------------------------------------------------------- /output/train_faster_rcnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | alias time='/usr/bin/time' 4 | 5 | set -x 6 | set -e 7 | 8 | export PYTHONUNBUFFERED="True" 9 | 10 | GPU_ID=$1 11 | DATASET=$2 12 | NET=$3 13 | 14 | array=( $@ ) 15 | len=${#array[@]} 16 | EXTRA_ARGS=${array[@]:3:$len} 17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 18 | 19 | case ${DATASET} in 20 | pascal_voc) 21 | TRAIN_IMDB="voc_2007_trainval" 22 | TEST_IMDB="voc_2007_test" 23 | STEPSIZE="[50000]" 24 | ITERS=100000 25 | ANCHORS="[8,16,32]" 26 | RATIOS="[0.5,1,2]" 27 | ;; 28 | pascal_voc_0712) 29 | TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval" 30 | TEST_IMDB="voc_2007_test" 31 | STEPSIZE="[80000]" 32 | ITERS=110000 33 | ANCHORS="[8,16,32]" 34 | RATIOS="[0.5,1,2]" 35 | ;; 36 | coco) 37 | TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival" 38 | TEST_IMDB="coco_2014_minival" 39 | STEPSIZE="[350000]" 40 | ITERS=490000 41 | ANCHORS="[4,8,16,32]" 42 | RATIOS="[0.5,1,2]" 43 | ;; 44 | *) 45 | echo "No dataset given" 46 | exit 47 | ;; 48 | esac 49 | 50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" 51 | exec &> >(tee -a "$LOG") 52 | echo Logging output to "$LOG" 53 | 54 | set +x 55 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 56 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth 57 | else 58 | NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth 59 | fi 60 | set -x 61 | 62 | if [ ! -f ${NET_FINAL}.index ]; then 63 | if [[ ! -z ${EXTRA_ARGS_SLUG} ]]; then 64 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 65 | --weight data/imagenet_weights/${NET}.pth \ 66 | --imdb ${TRAIN_IMDB} \ 67 | --imdbval ${TEST_IMDB} \ 68 | --iters ${ITERS} \ 69 | --cfg experiments/cfgs/${NET}.yml \ 70 | --tag ${EXTRA_ARGS_SLUG} \ 71 | --net ${NET} \ 72 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 73 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 74 | else 75 | CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \ 76 | --weight data/imagenet_weights/${NET}.pth \ 77 | --imdb ${TRAIN_IMDB} \ 78 | --imdbval ${TEST_IMDB} \ 79 | --iters ${ITERS} \ 80 | --cfg experiments/cfgs/${NET}.yml \ 81 | --net ${NET} \ 82 | --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \ 83 | TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS} 84 | fi 85 | fi 86 | 87 | echo $@ 88 | ./experiments/scripts/test_faster_rcnn.sh $@ 89 | -------------------------------------------------------------------------------- /tensorboard/vgg16/voc_2007_trainval/default_val/events.out.tfevents.1552263409.vasgaoweithu-Precision-Tower-7910: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/tensorboard/vgg16/voc_2007_trainval/default_val/events.out.tfevents.1552263409.vasgaoweithu-Precision-Tower-7910 -------------------------------------------------------------------------------- /tools/__pycache__/_init_paths.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/tools/__pycache__/_init_paths.cpython-36.pyc -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Nov 16 15:17:33 2018 5 | 6 | @author: vasgaoweithu 7 | """ 8 | 9 | import os.path as osp 10 | import sys 11 | def add_path(path): 12 | if path not in sys.path: 13 | sys.path.insert(0, path) 14 | this_dir = osp.dirname(__file__) 15 | 16 | lib_path = osp.join(this_dir, '..', 'lib') 17 | add_path(lib_path) 18 | 19 | coco_path = osp.join(this_dir, '..', 'data', 'coco', 'PythonAPI') 20 | add_path(coco_path) 21 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Tensorflow Faster R-CNN 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Xinlei Chen, based on code from Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | """ 10 | Demo script showing detections in sample images. 11 | 12 | See README.md for installation instructions before running. 13 | """ 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | from __future__ import print_function 17 | 18 | import _init_paths 19 | from model.config import cfg 20 | from model.test import im_detect 21 | from model.nms_wrapper import nms 22 | from datasets.factory import get_imdb 23 | 24 | 25 | from utils.timer import Timer 26 | import matplotlib.pyplot as plt 27 | import numpy as np 28 | import os, cv2 29 | import argparse 30 | 31 | from nets.vgg16 import vgg16 32 | from nets.resnet_v1 import resnetv1 33 | 34 | import torch 35 | 36 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 37 | 38 | CLASSES = ('__background__', 39 | 'aeroplane', 'bicycle', 'bird', 'boat', 40 | 'bottle', 'bus', 'car', 'cat', 'chair', 41 | 'cow', 'diningtable', 'dog', 'horse', 42 | 'motorbike', 'person', 'pottedplant', 43 | 'sheep', 'sofa', 'train', 'tvmonitor') 44 | CLASSES = ( 45 | 'aeroplane', 'bicycle', 'bird', 'boat', 46 | 'bottle', 'bus', 'car', 'cat', 'chair', 47 | 'cow', 'diningtable', 'dog', 'horse', 48 | 'motorbike', 'person', 'pottedplant', 49 | 'sheep', 'sofa', 'train', 'tvmonitor') 50 | 51 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)} 52 | DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)} 53 | 54 | def vis_detections(im, class_name, dets, thresh=0.5): 55 | """Draw detected bounding boxes.""" 56 | inds = np.where(dets[:, -1] >= thresh)[0] 57 | if len(inds) == 0: 58 | #print('hahaha') 59 | return 60 | 61 | im = im[:, :, (2, 1, 0)] 62 | fig, ax = plt.subplots(figsize=(12, 12)) 63 | ax.imshow(im, aspect='equal') 64 | for i in inds: 65 | bbox = dets[i, :4] 66 | score = dets[i, -1] 67 | 68 | ax.add_patch( 69 | plt.Rectangle((bbox[0], bbox[1]), 70 | bbox[2] - bbox[0], 71 | bbox[3] - bbox[1], fill=False, 72 | edgecolor='red', linewidth=3.5) 73 | ) 74 | ax.text(bbox[0], bbox[1] - 2, 75 | '{:s} {:.3f}'.format(class_name, score), 76 | bbox=dict(facecolor='blue', alpha=0.5), 77 | fontsize=14, color='white') 78 | 79 | ax.set_title(('{} detections with ' 80 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 81 | thresh), 82 | fontsize=14) 83 | plt.axis('off') 84 | plt.tight_layout() 85 | plt.draw() 86 | 87 | def demo(net, image_name, roidb): 88 | """Detect object classes in an image using pre-computed object proposals.""" 89 | 90 | # Load the demo image 91 | #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) 92 | im = cv2.imread(image_name) 93 | 94 | # Detect all object classes and regress object bounds 95 | timer = Timer() 96 | timer.tic() 97 | scores, boxes,_,_ = im_detect(net, im, roidb) 98 | timer.toc() 99 | print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0])) 100 | 101 | # Visualize detections for each class 102 | CONF_THRESH = 0.3 103 | NMS_THRESH = 0.3 104 | for cls_ind, cls in enumerate(CLASSES): 105 | #cls_ind += 1 # because we skipped background 106 | cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] 107 | cls_scores = scores[:, cls_ind] 108 | dets = np.hstack((cls_boxes, 109 | cls_scores[:, np.newaxis])).astype(np.float32) 110 | keep = nms(torch.from_numpy(dets), NMS_THRESH) 111 | dets = dets[keep.numpy(), :] 112 | vis_detections(im, cls, dets, thresh=CONF_THRESH) 113 | 114 | def parse_args(): 115 | """Parse input arguments.""" 116 | parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo') 117 | parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]', 118 | choices=NETS.keys(), default='res101') 119 | parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]', 120 | choices=DATASETS.keys(), default='pascal_voc_0712') 121 | args = parser.parse_args() 122 | 123 | return args 124 | 125 | if __name__ == '__main__': 126 | cfg.TEST.HAS_RPN = True # Use RPN for proposals 127 | args = parse_args() 128 | 129 | # model path 130 | demonet = args.demo_net 131 | dataset = args.dataset 132 | saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default', 133 | NETS[demonet][0] %(100000 if dataset == 'pascal_voc' else 110000)) 134 | 135 | 136 | if not os.path.isfile(saved_model): 137 | raise IOError(('{:s} not found.\nDid you download the proper networks from ' 138 | 'our server and place them properly?').format(saved_model)) 139 | 140 | if args.dataset == 'pascal_voc': 141 | test_name = 'voc_2007_test' 142 | 143 | imdb = get_imdb(test_name) 144 | roidb = imdb.roidb 145 | #imdb.competition_mode(args.comp_mode) 146 | # load network 147 | if demonet == 'vgg16': 148 | net = vgg16() 149 | elif demonet == 'res101': 150 | net = resnetv1(num_layers=101) 151 | else: 152 | raise NotImplementedError 153 | net.create_architecture(20, 154 | tag='default', anchor_scales=[8, 16, 32]) 155 | 156 | net.load_state_dict(torch.load(saved_model, map_location=lambda storage, loc: storage)) 157 | 158 | net.eval() 159 | if not torch.cuda.is_available(): 160 | net._device = 'cpu' 161 | net.to(net._device) 162 | 163 | print('Loaded network {:s}'.format(saved_model)) 164 | 165 | 166 | #im_names = ['000456.jpg', '000542.jpg', '001150.jpg', 167 | # '001763.jpg', '004545.jpg'] 168 | #for im_name in im_names: 169 | # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') 170 | # print('Demo for data/demo/{}'.format(im_name)) 171 | # demo(net, im_name) 172 | 173 | index = np.random.randint(0, len(imdb.image_index), 10) 174 | 175 | index = np.arange(10, 20) 176 | 177 | for i in index: 178 | im_path = imdb.image_path_at(i) 179 | demo(net, im_path, roidb[i]) 180 | 181 | 182 | plt.show() 183 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import _init_paths 11 | from model.test import test_net 12 | from model.train_val import get_training_roidb 13 | from model.config import cfg, cfg_from_file, cfg_from_list 14 | from datasets.factory import get_imdb 15 | import argparse 16 | import pprint 17 | import time, os, sys 18 | 19 | from nets.vgg16 import MELM_vgg16 20 | from nets.resnet_v1 import resnetv1 21 | from nets.mobilenet_v1 import mobilenetv1 22 | 23 | import torch 24 | 25 | def parse_args(): 26 | """ 27 | Parse input arguments 28 | """ 29 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 30 | parser.add_argument('--cfg', dest='cfg_file', 31 | help='optional config file', default=None, type=str) 32 | parser.add_argument('--model', dest='model', 33 | help='model to test', 34 | default=None, type=str) 35 | parser.add_argument('--imdb', dest='imdb_name', 36 | help='dataset to test', 37 | default='voc_2007_test', type=str) 38 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 39 | action='store_true') 40 | parser.add_argument('--num_dets', dest='max_per_image', 41 | help='max number of detections per image', 42 | default=100, type=int) 43 | parser.add_argument('--tag', dest='tag', 44 | help='tag of the model', 45 | default='', type=str) 46 | parser.add_argument('--net', dest='net', 47 | help='vgg16, res50, res101, res152, mobile', 48 | default='res50', type=str) 49 | parser.add_argument('--set', dest='set_cfgs', 50 | help='set config keys', default=None, 51 | nargs=argparse.REMAINDER) 52 | 53 | if len(sys.argv) == 1: 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | args = parser.parse_args() 58 | return args 59 | 60 | def combined_roidb(imdb_names): 61 | """ 62 | Combine multiple roidbs 63 | """ 64 | 65 | def get_roidb(imdb_name): 66 | imdb = get_imdb(imdb_name) 67 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 68 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 69 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 70 | roidb = get_training_roidb(imdb) 71 | return roidb 72 | 73 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 74 | roidb = roidbs[0] 75 | if len(roidbs) > 1: 76 | for r in roidbs[1:]: 77 | roidb.extend(r) 78 | tmp = get_imdb(imdb_names.split('+')[1]) 79 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 80 | else: 81 | imdb = get_imdb(imdb_names) 82 | return imdb, roidb 83 | 84 | 85 | if __name__ == '__main__': 86 | args = parse_args() 87 | 88 | print('Called with args:') 89 | print(args) 90 | 91 | if args.cfg_file is not None: 92 | cfg_from_file(args.cfg_file) 93 | if args.set_cfgs is not None: 94 | cfg_from_list(args.set_cfgs) 95 | 96 | print('Using config:') 97 | pprint.pprint(cfg) 98 | 99 | # if has model, get the name from it 100 | # if does not, then just use the initialization weights 101 | if args.model: 102 | filename = os.path.splitext(os.path.basename(args.model))[0] 103 | else: 104 | filename = os.path.splitext(os.path.basename(args.weight))[0] 105 | 106 | tag = args.tag 107 | tag = tag if tag else 'default' 108 | filename = tag + '/' + filename 109 | 110 | imdb = get_imdb(args.imdb_name) 111 | roidb = imdb.roidb 112 | imdb.competition_mode(args.comp_mode) 113 | 114 | # load network 115 | if args.net == 'vgg16': 116 | net = MELM_vgg16() 117 | elif args.net == 'res50': 118 | net = resnetv1(num_layers=50) 119 | elif args.net == 'res101': 120 | net = resnetv1(num_layers=101) 121 | elif args.net == 'res152': 122 | net = resnetv1(num_layers=152) 123 | elif args.net == 'mobile': 124 | net = mobilenetv1() 125 | else: 126 | raise NotImplementedError 127 | 128 | # load model 129 | net.create_architecture(imdb.num_classes, tag='default', 130 | anchor_scales=cfg.ANCHOR_SCALES, 131 | anchor_ratios=cfg.ANCHOR_RATIOS) 132 | 133 | net.eval() 134 | if not torch.cuda.is_available(): 135 | net._device = 'cpu' 136 | net.to(net._device) 137 | 138 | if args.model: 139 | print(('Loading model check point from {:s}').format(args.model)) 140 | net.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage)) 141 | print('Loaded.') 142 | else: 143 | print(('Loading initial weights from {:s}').format(args.weight)) 144 | print('Loaded.') 145 | 146 | test_net(net, imdb, roidb, filename, max_per_image=args.max_per_image) 147 | -------------------------------------------------------------------------------- /tools/trainval_net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import _init_paths 11 | from model.train_val import get_training_roidb, train_net 12 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir 13 | from datasets.factory import get_imdb 14 | import datasets.imdb 15 | import argparse 16 | import pprint 17 | import numpy as np 18 | import sys 19 | 20 | from nets.vgg16 import MELM_vgg16 21 | from nets.resnet_v1 import resnetv1 22 | from nets.mobilenet_v1 import mobilenetv1 23 | 24 | def parse_args(): 25 | """ 26 | Parse input arguments 27 | """ 28 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 29 | parser.add_argument('--cfg', dest='cfg_file', 30 | help='optional config file', 31 | default=None, type=str) 32 | parser.add_argument('--weight', dest='weight', 33 | help='initialize with pretrained model weights', 34 | type=str) 35 | parser.add_argument('--imdb', dest='imdb_name', 36 | help='dataset to train on', 37 | default='voc_2007_trainval', type=str) 38 | parser.add_argument('--imdbval', dest='imdbval_name', 39 | help='dataset to validate on', 40 | default='voc_2007_test', type=str) 41 | parser.add_argument('--iters', dest='max_iters', 42 | help='number of iterations to train', 43 | default=40000, type=int) 44 | parser.add_argument('--tag', dest='tag', 45 | help='tag of the model', 46 | default=None, type=str) 47 | parser.add_argument('--net', dest='net', 48 | help='vgg16, res50, res101, res152, mobile', 49 | default='res50', type=str) 50 | parser.add_argument('--set', dest='set_cfgs', 51 | help='set config keys', default=None, 52 | nargs=argparse.REMAINDER) 53 | 54 | if len(sys.argv) == 1: 55 | parser.print_help() 56 | sys.exit(1) 57 | 58 | args = parser.parse_args() 59 | return args 60 | 61 | 62 | def combined_roidb(imdb_names): 63 | """ 64 | Combine multiple roidbs 65 | """ 66 | 67 | def get_roidb(imdb_name): 68 | imdb = get_imdb(imdb_name) 69 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 70 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 71 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 72 | roidb = get_training_roidb(imdb) 73 | return roidb 74 | 75 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 76 | roidb = roidbs[0] 77 | if len(roidbs) > 1: 78 | for r in roidbs[1:]: 79 | roidb.extend(r) 80 | tmp = get_imdb(imdb_names.split('+')[1]) 81 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 82 | else: 83 | imdb = get_imdb(imdb_names) 84 | return imdb, roidb 85 | 86 | 87 | if __name__ == '__main__': 88 | args = parse_args() 89 | 90 | print('Called with args:') 91 | print(args) 92 | 93 | if args.cfg_file is not None: 94 | cfg_from_file(args.cfg_file) 95 | if args.set_cfgs is not None: 96 | cfg_from_list(args.set_cfgs) 97 | 98 | print('Using config:') 99 | pprint.pprint(cfg) 100 | 101 | np.random.seed(cfg.RNG_SEED) 102 | 103 | # train set 104 | imdb, roidb = combined_roidb(args.imdb_name) 105 | print('{:d} roidb entries'.format(len(roidb))) 106 | 107 | # output directory where the models are saved 108 | output_dir = get_output_dir(imdb, args.tag) 109 | print('Output will be saved to `{:s}`'.format(output_dir)) 110 | 111 | # tensorboard directory where the summaries are saved during training 112 | tb_dir = get_output_tb_dir(imdb, args.tag) 113 | print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir)) 114 | 115 | # also add the validation set, but with no flipping images 116 | orgflip = cfg.TRAIN.USE_FLIPPED 117 | cfg.TRAIN.USE_FLIPPED = False 118 | _, valroidb = combined_roidb(args.imdbval_name) 119 | print('{:d} validation roidb entries'.format(len(valroidb))) 120 | cfg.TRAIN.USE_FLIPPED = orgflip 121 | 122 | # load network 123 | if args.net == 'vgg16': 124 | net = MELM_vgg16() 125 | else: 126 | raise NotImplementedError 127 | 128 | train_net(net, imdb, roidb, valroidb, output_dir, tb_dir, 129 | pretrained_model=args.weight, 130 | max_iters=args.max_iters) 131 | --------------------------------------------------------------------------------