├── .spyproject
    ├── codestyle.ini
    ├── encoding.ini
    ├── vcs.ini
    └── workspace.ini
├── README.md
├── data
    ├── VOCdevkit2007
    │   └── train_faster_rcnn.sh
    ├── VOCdevkit2012
    │   └── train_faster_rcnn.sh
    ├── imagenet_weights
    │   └── train_faster_rcnn.sh
    └── selective_search_data
    │   └── train_faster_rcnn.sh
├── experiments
    ├── cfgs
    │   ├── change_log.txt
    │   ├── mobile.yml
    │   ├── res101-lg.yml
    │   ├── res101.yml
    │   ├── res50.yml
    │   └── vgg16.yml
    ├── logs
    │   ├── 46.67
    │   │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-11_08-16-58
    │   │   └── vgg16_voc_2007_trainval__vgg16.txt.2019-03-11_08-16-28
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-03_19-21-38
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-06_02-46-04
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-06_08-16-58
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-08_08-06-10
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-08_09-03-31
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-08_11-20-57
    │   ├── test_vgg16_voc_2007_trainval_.txt.2019-03-10_18-07-42
    │   ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-03_19-20-42
    │   ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-05_08-20-12
    │   ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-06_08-16-31
    │   ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-08_08-05-45
    │   ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-08_09-03-06
    │   ├── vgg16_voc_2007_trainval__vgg16.txt.2019-03-08_11-20-27
    │   └── vgg16_voc_2007_trainval__vgg16.txt.2019-03-09_20-05-42
    └── scripts
    │   ├── convert_vgg16.sh
    │   ├── test_faster_rcnn.sh
    │   ├── test_faster_rcnn_notime.sh
    │   ├── train_faster_rcnn.sh
    │   └── train_faster_rcnn_notime.sh
├── lib
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── coco.cpython-36.pyc
    │   │   ├── coco.cpython-37.pyc
    │   │   ├── ds_utils.cpython-36.pyc
    │   │   ├── ds_utils.cpython-37.pyc
    │   │   ├── factory.cpython-36.pyc
    │   │   ├── factory.cpython-37.pyc
    │   │   ├── imdb.cpython-36.pyc
    │   │   ├── imdb.cpython-37.pyc
    │   │   ├── pascal_voc.cpython-36.pyc
    │   │   ├── pascal_voc.cpython-37.pyc
    │   │   ├── voc_eval.cpython-36.pyc
    │   │   └── voc_eval.cpython-37.pyc
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   └── voc_eval.py
    ├── layer_utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── anchor_target_layer.cpython-36.pyc
    │   │   ├── anchor_target_layer.cpython-37.pyc
    │   │   ├── generate_anchors.cpython-36.pyc
    │   │   ├── generate_anchors.cpython-37.pyc
    │   │   ├── proposal_layer.cpython-36.pyc
    │   │   ├── proposal_layer.cpython-37.pyc
    │   │   ├── proposal_target_layer.cpython-36.pyc
    │   │   ├── proposal_target_layer.cpython-37.pyc
    │   │   ├── proposal_top_layer.cpython-36.pyc
    │   │   ├── proposal_top_layer.cpython-37.pyc
    │   │   ├── snippets.cpython-36.pyc
    │   │   └── snippets.cpython-37.pyc
    │   ├── anchor_target_layer.py
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   ├── proposal_target_layer.py
    │   ├── proposal_top_layer.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── crop_and_resize.cpython-36.pyc
    │   │   │   └── crop_and_resize.cpython-37.pyc
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── __init__.cpython-37.pyc
    │   │   │   └── crop_and_resize
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       ├── __init__.cpython-36.pyc
    │   │   │   │       └── __init__.cpython-37.pyc
    │   │   │   │   └── _crop_and_resize.so
    │   │   ├── build.py
    │   │   ├── crop_and_resize.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── crop_and_resize.c
    │   │   │   ├── crop_and_resize.h
    │   │   │   ├── crop_and_resize_gpu.c
    │   │   │   ├── crop_and_resize_gpu.h
    │   │   │   └── cuda
    │   │   │       ├── crop_and_resize_kernel.cu
    │   │   │       ├── crop_and_resize_kernel.cu.o
    │   │   │       └── crop_and_resize_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-36.pyc
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   └── roi_pooling
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── __init__.cpython-36.pyc
    │   │   │   │   └── _roi_pooling.so
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   │   └── roi_pool.cpython-36.pyc
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.cu.o
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.cu
    │   │   │   └── roi_pooling_kernel.h
    │   ├── roi_ring_pooling
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   └── roi_ring_pooling
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── __init__.cpython-36.pyc
    │   │   │   │   └── _roi_ring_pooling.so
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __pycache__
    │   │   │   │   └── roi_ring_pool.cpython-36.pyc
    │   │   │   └── roi_ring_pool.py
    │   │   ├── modules
    │   │   │   └── roi_ring_pool.py
    │   │   └── src
    │   │   │   ├── roi_ring_pooling.c
    │   │   │   ├── roi_ring_pooling.h
    │   │   │   ├── roi_ring_pooling_cuda.c
    │   │   │   ├── roi_ring_pooling_cuda.h
    │   │   │   ├── roi_ring_pooling_kernel.cu
    │   │   │   ├── roi_ring_pooling_kernel.cu.o
    │   │   │   └── roi_ring_pooling_kernel.h
    │   └── snippets.py
    ├── layers
    │   ├── __pycache__
    │   │   └── recurrent_linear.cpython-36.pyc
    │   └── recurrent_linear.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── apmetric.cpython-36.pyc
    │   │   ├── bbox_transform.cpython-36.pyc
    │   │   ├── bbox_transform.cpython-37.pyc
    │   │   ├── config.cpython-36.pyc
    │   │   ├── config.cpython-37.pyc
    │   │   ├── nms_wrapper.cpython-36.pyc
    │   │   ├── nms_wrapper.cpython-37.pyc
    │   │   ├── test.cpython-36.pyc
    │   │   ├── test.cpython-37.pyc
    │   │   ├── train_val.cpython-36.pyc
    │   │   └── train_val.cpython-37.pyc
    │   ├── apmetric.py
    │   ├── bbox_transform.py
    │   ├── config.py
    │   ├── nms_wrapper.py
    │   ├── test.py
    │   └── train_val.py
    ├── nets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── mobilenet_v1.cpython-36.pyc
    │   │   ├── mobilenet_v1.cpython-37.pyc
    │   │   ├── network.cpython-36.pyc
    │   │   ├── network.cpython-37.pyc
    │   │   ├── resnet_v1.cpython-36.pyc
    │   │   ├── resnet_v1.cpython-37.pyc
    │   │   ├── vgg16.cpython-36.pyc
    │   │   └── vgg16.cpython-37.pyc
    │   ├── mobilenet_v1.py
    │   ├── network.py
    │   ├── resnet_v1.py
    │   └── vgg16.py
    ├── nms
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── pth_nms.cpython-36.pyc
    │   │   └── pth_nms.cpython-37.pyc
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   └── __init__.cpython-37.pyc
    │   │   └── nms
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-36.pyc
    │   │   │       └── __init__.cpython-37.pyc
    │   │   │   └── _nms.so
    │   ├── build.py
    │   ├── pth_nms.py
    │   └── src
    │   │   ├── cuda
    │   │       ├── nms_kernel.cu
    │   │       ├── nms_kernel.cu.o
    │   │       └── nms_kernel.h
    │   │   ├── nms.c
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms_cuda.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── layer.cpython-36.pyc
    │   │   ├── layer.cpython-37.pyc
    │   │   ├── minibatch.cpython-36.pyc
    │   │   ├── minibatch.cpython-37.pyc
    │   │   ├── roidb.cpython-36.pyc
    │   │   └── roidb.cpython-37.pyc
    │   ├── layer.py
    │   ├── minibatch.py
    │   └── roidb.py
    └── utils
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-36.pyc
    │       ├── __init__.cpython-37.pyc
    │       ├── bbox.cpython-36.pyc
    │       ├── bbox.cpython-37.pyc
    │       ├── blob.cpython-36.pyc
    │       ├── blob.cpython-37.pyc
    │       ├── timer.cpython-36.pyc
    │       ├── timer.cpython-37.pyc
    │       ├── visualization.cpython-36.pyc
    │       └── visualization.cpython-37.pyc
    │   ├── bbox.py
    │   ├── blob.py
    │   ├── timer.py
    │   └── visualization.py
├── output
    └── train_faster_rcnn.sh
├── tensorboard
    └── vgg16
    │   └── voc_2007_trainval
    │       └── default_val
    │           └── events.out.tfevents.1552263409.vasgaoweithu-Precision-Tower-7910
└── tools
    ├── __pycache__
        └── _init_paths.cpython-36.pyc
    ├── _init_paths.py
    ├── demo.py
    ├── test_net.py
    └── trainval_net.py


/.spyproject/codestyle.ini:
--------------------------------------------------------------------------------
1 | [codestyle]
2 | indentation = True
3 | 
4 | [main]
5 | version = 0.1.0
6 | 
7 | 


--------------------------------------------------------------------------------
/.spyproject/encoding.ini:
--------------------------------------------------------------------------------
1 | [encoding]
2 | text_encoding = utf-8
3 | 
4 | [main]
5 | version = 0.1.0
6 | 
7 | 


--------------------------------------------------------------------------------
/.spyproject/vcs.ini:
--------------------------------------------------------------------------------
1 | [vcs]
2 | use_version_control = False
3 | version_control_system = 
4 | 
5 | [main]
6 | version = 0.1.0
7 | 
8 | 


--------------------------------------------------------------------------------
/.spyproject/workspace.ini:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | restore_data_on_startup = True
 3 | save_data_on_exit = True
 4 | save_history = True
 5 | save_non_project_files = False
 6 | 
 7 | [main]
 8 | version = 0.1.0
 9 | <<<<<<< HEAD
10 | recent_files = ['/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/nets/vgg16.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/nets/network.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/layer_utils/roi_ring_pooling/functions/roi_ring_pool.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/tools/trainval_net.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/layers/recurrent_linear.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/tools/test_net.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/make.sh', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/model/config.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/model/test.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/lib/model/train_val.py', '/media/vasgaoweithu/0BCB122F0BCB122F/vasgaowei/demo/Pytorch_MELM/experiments/cfgs/vgg16.yml']
11 | =======
12 | recent_files = []
13 | >>>>>>> bd73dd11c938cb9256829ec3559daaab1fc77b74
14 | 
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pytorch_MLEM
  2 | **News. This repo supports pytorch-1.0 and higher version now!!! I borrowed code from [mmdetection](https://github.com/open-mmlab/mmdetection) and also some implementation idea.**
  3 | 
  4 | This is a simplified version of MELM with context in pytorch for the paper《Min-Entropy Latent Model for Weakly Supervised Object Detection》,which is a accepted paper in [CVPR2018](http://openaccess.thecvf.com/content_cvpr_2018/papers/Wan_Min-Entropy_Latent_Model_CVPR_2018_paper.pdf) and [TPAMI](https://ieeexplore.ieee.org/document/8640243). 
  5 | 
  6 | This implementation is based on [Winfrand's](https://github.com/Winfrand/MELM) which is the official version based on torch7 and lua. This implementation is also based on ruotianluo's [pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn).
  7 | 
  8 | **And trained on PASCAL_VOC 2007 trainval and tested on PASCAL_VOC test with VGG16 backbone, I got a performance mAP 47.98 a little better than the paper's result**
  9 | 
 10 | # If you find MELM useful and use this code, please cite our paper:
 11 | ```
 12 | @inproceedings{wan2018min,
 13 |   title={Min-Entropy Latent Model for Weakly Supervised Object Detection},
 14 |   author={Wan, Fang and Wei, Pengxu and Jiao, Jianbin and Han, Zhenjun and Ye, Qixiang},
 15 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
 16 |   pages={1297--1306},
 17 |   year={2018}
 18 | }
 19 | ```
 20 | ```
 21 | @article{wan2019Pami,
 22 |   author    = {Fang Wan and 
 23 |                Pengxu Wei and
 24 |                Jianbin Jiao and
 25 |                Zhenjun Han and 
 26 |                Qixiang Ye},
 27 |   title     = {Min-Entropy Latent Model for Weakly Supervised Object Detection},
 28 |   journal   = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
 29 |   volume       = {DOI:10.1109/TPAMI.2019.2898858},
 30 |   year      = {2019}
 31 | }
 32 | ```
 33 | 
 34 | 
 35 | # Prerequisites
 36 | * Nvidia GPU 1080Ti
 37 | * Ubuntu 16.04 LTS
 38 | * python **3.6**
 39 | * pytorch **0.4** is required. For pytorch **1.0** or higher version, please go to the **pytorch1.0** version.
 40 | * tensorflow, tensorboard and [tensorboardX](https://github.com/lanpa/tensorboardX) for visualizing training and    validation curve.
 41 | 
 42 | # Installation
 43 | 1. Clone the repository
 44 |   ```Shell
 45 |   git clone https://github.com/vasgaowei/pytorch_MELM.git
 46 |   ```
 47 | 2. Compile the modules(nms, roi_pooling, roi_ring_pooling and roi_align)
 48 |   ```
 49 |   cd pytorch_MELM/lib
 50 |   bash make.sh
 51 |   ```
 52 | # Setup the data
 53 | 
 54 | 1. Download the training, validation, test data and the VOCdevkit
 55 |   ```
 56 |   cd pytorch_MELM/
 57 |   mkdir data
 58 |   cd data/
 59 |   wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
 60 |   wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
 61 |   wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
 62 |   ```
 63 |   
 64 |   
 65 | 2. Extract all of these tars into one directory named VOCdevkit
 66 |   ```
 67 |   tar xvf VOCtrainval_06-Nov-2007.tar
 68 |   tar xvf VOCtest_06-Nov-2007.tar
 69 |   tar xvf VOCdevkit_08-Jun-2007.tar
 70 |   ```
 71 | 3. Create symlinks for PASCAL VOC dataset or just rename the VOCdevkit to VOCdevkit2007
 72 |   ```
 73 |   cd pytorch_MELM/data
 74 |   ln -s VOCdevkit VOCdevkit2007
 75 |   ```
 76 | 4. It should have this basic structure
 77 |   ```
 78 |   $VOCdevkit2007/                     # development kit
 79 |   $VOCdevkit2007/VOC2007/             # VOC utility code
 80 |   $VOCdevkit2007/VOCcode/             # image sets, annodations, etc
 81 |   ```
 82 |   And for PASCAL VOC 2010 and PASCAL VOC 2012, just following the similar steps.
 83 |   
 84 | # Download the pre-trained ImageNet models
 85 |   Downloa the pre-trained ImageNet models from https://drive.google.com/drive/folders/0B1_fAEgxdnvJSmF3YUlZcHFqWTQ
 86 |   or download from  https://drive.google.com/drive/folders/1FV6ZOHOxLMQjE4ujTNOObI7lN8USH0v_?usp=sharing and put in in the     data/imagenet_weights and rename it vgg16.pth. The folder has the following form.
 87 |   ```
 88 |   $ data/imagenet_weights/vgg16.pth
 89 |   $ data/imagenet_weights/res50.pth
 90 |   ```
 91 | # Download the Selective Search proposals for PASCAL VOC 2007
 92 |   Download it from: https://dl.dropboxusercontent.com/s/orrt7o6bp6ae0tc/selective_search_data.tgz
 93 |   and unzip it and the final folder has the following form
 94 |   ```
 95 |   $ data/selective_search_data/voc_2007_train.mat
 96 |   $ data/selective_search_data/voc_2007_test.mat
 97 |   $ data/selective_search_data/voc_2007_trainval.mat
 98 |   ```
 99 | # Train your own model
100 |   For vgg16 backbone, we can train the model using the following commands
101 |   ```
102 |   ./experiments/scripts/train_faster_rcnn.sh 0 pascal_voc vgg16
103 |   ```
104 |   And for test, we can using the following commands
105 |   ```
106 |   ./experiments/scripts/test_faster_rcnn.sh 0 pascal_voc vgg16
107 |   ```
108 | # Visualizing some detection results
109 |   I have pretrained MLEM_pytorch model on PASCAL VOC 2007 based on vgg16 backbone and you can download it from              https://drive.google.com/drive/folders/1FV6ZOHOxLMQjE4ujTNOObI7lN8USH0v_?usp=sharing and put it in the
110 |   folder output vgg16/voc_2007_trainval/default/vgg16_MELM.pth and run the following commands.
111 |   ```
112 |   cd pytorch_MELM
113 |   python ./tools/demo.py --net vgg16 --dataset pascal_voc
114 |   ```
115 |   Also you can visualize training and validation curve.
116 |   ```
117 |   tensorboard --logdir tensorboard/vgg16/voc_2007_trainval/
118 |   ```
119 |   
120 | 


--------------------------------------------------------------------------------
/data/VOCdevkit2007/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | alias time='/usr/bin/time'
 4 | 
 5 | set -x
 6 | set -e
 7 | 
 8 | export PYTHONUNBUFFERED="True"
 9 | 
10 | GPU_ID=$1
11 | DATASET=$2
12 | NET=$3
13 | 
14 | array=( $@ )
15 | len=${#array[@]}
16 | EXTRA_ARGS=${array[@]:3:$len}
17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
18 | 
19 | case ${DATASET} in
20 |   pascal_voc)
21 |     TRAIN_IMDB="voc_2007_trainval"
22 |     TEST_IMDB="voc_2007_test"
23 |     STEPSIZE="[50000]"
24 |     ITERS=100000
25 |     ANCHORS="[8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   pascal_voc_0712)
29 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
30 |     TEST_IMDB="voc_2007_test"
31 |     STEPSIZE="[80000]"
32 |     ITERS=110000
33 |     ANCHORS="[8,16,32]"
34 |     RATIOS="[0.5,1,2]"
35 |     ;;
36 |   coco)
37 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
38 |     TEST_IMDB="coco_2014_minival"
39 |     STEPSIZE="[350000]"
40 |     ITERS=490000
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     ;;
44 |   *)
45 |     echo "No dataset given"
46 |     exit
47 |     ;;
48 | esac
49 | 
50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
51 | exec &> >(tee -a "$LOG")
52 | echo Logging output to "$LOG"
53 | 
54 | set +x
55 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | else
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | fi
60 | set -x
61 | 
62 | if [ ! -f ${NET_FINAL}.index ]; then
63 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
65 |       --weight data/imagenet_weights/${NET}.pth \
66 |       --imdb ${TRAIN_IMDB} \
67 |       --imdbval ${TEST_IMDB} \
68 |       --iters ${ITERS} \
69 |       --cfg experiments/cfgs/${NET}.yml \
70 |       --tag ${EXTRA_ARGS_SLUG} \
71 |       --net ${NET} \
72 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
74 |   else
75 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
76 |       --weight data/imagenet_weights/${NET}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | echo $@
88 | ./experiments/scripts/test_faster_rcnn.sh $@
89 | 


--------------------------------------------------------------------------------
/data/VOCdevkit2012/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | alias time='/usr/bin/time'
 4 | 
 5 | set -x
 6 | set -e
 7 | 
 8 | export PYTHONUNBUFFERED="True"
 9 | 
10 | GPU_ID=$1
11 | DATASET=$2
12 | NET=$3
13 | 
14 | array=( $@ )
15 | len=${#array[@]}
16 | EXTRA_ARGS=${array[@]:3:$len}
17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
18 | 
19 | case ${DATASET} in
20 |   pascal_voc)
21 |     TRAIN_IMDB="voc_2007_trainval"
22 |     TEST_IMDB="voc_2007_test"
23 |     STEPSIZE="[50000]"
24 |     ITERS=100000
25 |     ANCHORS="[8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   pascal_voc_0712)
29 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
30 |     TEST_IMDB="voc_2007_test"
31 |     STEPSIZE="[80000]"
32 |     ITERS=110000
33 |     ANCHORS="[8,16,32]"
34 |     RATIOS="[0.5,1,2]"
35 |     ;;
36 |   coco)
37 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
38 |     TEST_IMDB="coco_2014_minival"
39 |     STEPSIZE="[350000]"
40 |     ITERS=490000
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     ;;
44 |   *)
45 |     echo "No dataset given"
46 |     exit
47 |     ;;
48 | esac
49 | 
50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
51 | exec &> >(tee -a "$LOG")
52 | echo Logging output to "$LOG"
53 | 
54 | set +x
55 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | else
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | fi
60 | set -x
61 | 
62 | if [ ! -f ${NET_FINAL}.index ]; then
63 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
65 |       --weight data/imagenet_weights/${NET}.pth \
66 |       --imdb ${TRAIN_IMDB} \
67 |       --imdbval ${TEST_IMDB} \
68 |       --iters ${ITERS} \
69 |       --cfg experiments/cfgs/${NET}.yml \
70 |       --tag ${EXTRA_ARGS_SLUG} \
71 |       --net ${NET} \
72 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
74 |   else
75 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
76 |       --weight data/imagenet_weights/${NET}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | echo $@
88 | ./experiments/scripts/test_faster_rcnn.sh $@
89 | 


--------------------------------------------------------------------------------
/data/imagenet_weights/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | alias time='/usr/bin/time'
 4 | 
 5 | set -x
 6 | set -e
 7 | 
 8 | export PYTHONUNBUFFERED="True"
 9 | 
10 | GPU_ID=$1
11 | DATASET=$2
12 | NET=$3
13 | 
14 | array=( $@ )
15 | len=${#array[@]}
16 | EXTRA_ARGS=${array[@]:3:$len}
17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
18 | 
19 | case ${DATASET} in
20 |   pascal_voc)
21 |     TRAIN_IMDB="voc_2007_trainval"
22 |     TEST_IMDB="voc_2007_test"
23 |     STEPSIZE="[50000]"
24 |     ITERS=100000
25 |     ANCHORS="[8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   pascal_voc_0712)
29 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
30 |     TEST_IMDB="voc_2007_test"
31 |     STEPSIZE="[80000]"
32 |     ITERS=110000
33 |     ANCHORS="[8,16,32]"
34 |     RATIOS="[0.5,1,2]"
35 |     ;;
36 |   coco)
37 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
38 |     TEST_IMDB="coco_2014_minival"
39 |     STEPSIZE="[350000]"
40 |     ITERS=490000
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     ;;
44 |   *)
45 |     echo "No dataset given"
46 |     exit
47 |     ;;
48 | esac
49 | 
50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
51 | exec &> >(tee -a "$LOG")
52 | echo Logging output to "$LOG"
53 | 
54 | set +x
55 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | else
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | fi
60 | set -x
61 | 
62 | if [ ! -f ${NET_FINAL}.index ]; then
63 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
65 |       --weight data/imagenet_weights/${NET}.pth \
66 |       --imdb ${TRAIN_IMDB} \
67 |       --imdbval ${TEST_IMDB} \
68 |       --iters ${ITERS} \
69 |       --cfg experiments/cfgs/${NET}.yml \
70 |       --tag ${EXTRA_ARGS_SLUG} \
71 |       --net ${NET} \
72 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
74 |   else
75 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
76 |       --weight data/imagenet_weights/${NET}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | echo $@
88 | ./experiments/scripts/test_faster_rcnn.sh $@
89 | 


--------------------------------------------------------------------------------
/data/selective_search_data/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | alias time='/usr/bin/time'
 4 | 
 5 | set -x
 6 | set -e
 7 | 
 8 | export PYTHONUNBUFFERED="True"
 9 | 
10 | GPU_ID=$1
11 | DATASET=$2
12 | NET=$3
13 | 
14 | array=( $@ )
15 | len=${#array[@]}
16 | EXTRA_ARGS=${array[@]:3:$len}
17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
18 | 
19 | case ${DATASET} in
20 |   pascal_voc)
21 |     TRAIN_IMDB="voc_2007_trainval"
22 |     TEST_IMDB="voc_2007_test"
23 |     STEPSIZE="[50000]"
24 |     ITERS=100000
25 |     ANCHORS="[8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   pascal_voc_0712)
29 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
30 |     TEST_IMDB="voc_2007_test"
31 |     STEPSIZE="[80000]"
32 |     ITERS=110000
33 |     ANCHORS="[8,16,32]"
34 |     RATIOS="[0.5,1,2]"
35 |     ;;
36 |   coco)
37 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
38 |     TEST_IMDB="coco_2014_minival"
39 |     STEPSIZE="[350000]"
40 |     ITERS=490000
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     ;;
44 |   *)
45 |     echo "No dataset given"
46 |     exit
47 |     ;;
48 | esac
49 | 
50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
51 | exec &> >(tee -a "$LOG")
52 | echo Logging output to "$LOG"
53 | 
54 | set +x
55 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | else
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | fi
60 | set -x
61 | 
62 | if [ ! -f ${NET_FINAL}.index ]; then
63 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
65 |       --weight data/imagenet_weights/${NET}.pth \
66 |       --imdb ${TRAIN_IMDB} \
67 |       --imdbval ${TEST_IMDB} \
68 |       --iters ${ITERS} \
69 |       --cfg experiments/cfgs/${NET}.yml \
70 |       --tag ${EXTRA_ARGS_SLUG} \
71 |       --net ${NET} \
72 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
74 |   else
75 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
76 |       --weight data/imagenet_weights/${NET}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | echo $@
88 | ./experiments/scripts/test_faster_rcnn.sh $@
89 | 


--------------------------------------------------------------------------------
/experiments/cfgs/change_log.txt:
--------------------------------------------------------------------------------
 1 | 2018/11/22/9:37
 2 | In the vgg16.yml files, The orginal TEST.NMS value is 0.3 but changed to 0.4 for 
 3 | implementation for WSDNN 
 4 | 
 5 | 2018/11/23/14:19
 6 | In vgg16.yml files, the original POOLING_MODE == crop, however changed to roi_align
 7 | for implementation for WSDNN
 8 | 
 9 | 2018/11/24/8:37
10 | In vgg16.yml file, the original TRAIN.WEIGHT_DECAY == 0.0001, and changed to 0.005
11 | Also ss_boxes whose widths or heights are less than 20 are removed
12 | 
13 | 2018/11/24/9:01
14 | In vgg16.yml files, the TRAIN.SCALES is {600,}, however changed to {480, 576, 688, 864, 1200}
15 | TEST.SCLAES is {688, }
16 | 
17 | 2018/11/24/9:23
18 | In vgg16.yml file, the TRAIN.MAX_SIZE is 1000, however changed to 1200
19 | 
20 | 2018/11/24/20:33
21 | In lib/datasets/pascal_voc.py, changed classes from 21 classes to 20 classes
22 | original:
23 | self._classes = ('__background__',  # always index 0
24 |                      'aeroplane', 'bicycle', 'bird', 'boat',
25 |                      'bottle', 'bus', 'car', 'cat', 'chair',
26 |                      'cow', 'diningtable', 'dog', 'horse',
27 |                      'motorbike', 'person', 'pottedplant',
28 |                      'sheep', 'sofa', 'train', 'tvmonitor')
29 | new:
30 | self._classes = (
31 |                      'aeroplane', 'bicycle', 'bird', 'boat',
32 |                      'bottle', 'bus', 'car', 'cat', 'chair',
33 |                      'cow', 'diningtable', 'dog', 'horse',
34 |                      'motorbike', 'person', 'pottedplant',
35 |                      'sheep', 'sofa', 'train', 'tvmonitor')
36 | In network.py, added a score_det_net
37 | 
38 | In vgg16.yml file, changed TRAIN.STEPSIZE from 30000 to 60000
39 | 
40 | 
41 | 2018/11/25/9:59
42 | In vgg16.yml, the learning rate is 0.0001, and changed to 0.0005.
43 | 
44 | 2018/11/26/11:14
45 | In vgg16.yml, the TEST.SCLAES changed from [688] to [480, 576, 688, 864, 1200]
46 | for multiple scale test
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/experiments/cfgs/mobile.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: mobile
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: mobile_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res101-lg.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101-lg
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res101_faster_rcnn
14 |   SCALES: [800]
15 |   MAX_SIZE: 1333
16 | TEST:
17 |   HAS_RPN: True
18 |   SCALES: [800]
19 |   MAX_SIZE: 1333
20 |   RPN_POST_NMS_TOP_N: 1000
21 | POOLING_MODE: crop
22 | ANCHOR_SCALES: [2,4,8,16,32]
23 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res101_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   DOUBLE_BIAS: False
13 |   SNAPSHOT_PREFIX: res50_faster_rcnn
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_MODE: crop
17 | 


--------------------------------------------------------------------------------
/experiments/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: False
 4 |   IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: selective_search
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   SNAPSHOT_PREFIX: vgg16_MELM
13 |   LEARNING_RATE: 0.001
14 |   WEIGHT_DECAY: 0.0005
15 |   SCALES: [480, 576, 688, 864, 1200]
16 |   MAX_SIZE: 2000
17 |   STEPSIZE: [50000,]
18 |   MIL_RECURRENT_STEP: 20000
19 |   MIL_RECURRECT_WEIGHT: 0.09
20 | TEST:
21 |   HAS_RPN: False
22 |   PROPOSAL_METHOD: selective_search
23 |   NMS: 0.3
24 |   SCALES: [480, 576, 688, 864, 1200]
25 | POOLING_MODE: roi_align
26 | 


--------------------------------------------------------------------------------
/experiments/scripts/convert_vgg16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=vgg16
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:2:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     ITERS=70000
22 |     ANCHORS="[8,16,32]"
23 |     RATIOS="[0.5,1,2]"
24 |     ;;
25 |   pascal_voc_0712)
26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
27 |     TEST_IMDB="voc_2007_test"
28 |     ITERS=110000
29 |     ANCHORS="[8,16,32]"
30 |     RATIOS="[0.5,1,2]"
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
34 |     TEST_IMDB="coco_2014_minival"
35 |     ITERS=490000
36 |     ANCHORS="[4,8,16,32]"
37 |     RATIOS="[0.5,1,2]"
38 |     ;;
39 |   *)
40 |     echo "No dataset given"
41 |     exit
42 |     ;;
43 | esac
44 | 
45 | set +x
46 | NET_FINAL=${NET}_faster_rcnn_iter_${ITERS}
47 | set -x
48 | 
49 | if [ ! -f ${NET_FINAL}.index ]; then
50 |     if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
51 |         CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
52 |             --snapshot ${NET_FINAL} \
53 |             --imdb ${TRAIN_IMDB} \
54 |             --iters ${ITERS} \
55 |             --cfg experiments/cfgs/${NET}.yml \
56 |             --tag ${EXTRA_ARGS_SLUG} \
57 |             --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
58 |     else
59 |         CUDA_VISIBLE_DEVICES=${GPU_ID} time python ./tools/convert_from_depre.py \
60 |             --snapshot ${NET_FINAL} \
61 |             --imdb ${TRAIN_IMDB} \
62 |             --iters ${ITERS} \
63 |             --cfg experiments/cfgs/${NET}.yml \
64 |             --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} ${EXTRA_ARGS}
65 |     fi
66 | fi
67 | 
68 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     ITERS=100000
22 |     ANCHORS="[8,16,32]"
23 |     RATIOS="[0.5,1,2]"
24 |     ;;
25 |   pascal_voc_0712)
26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
27 |     TEST_IMDB="voc_2007_test"
28 |     ITERS=110000
29 |     ANCHORS="[8,16,32]"
30 |     RATIOS="[0.5,1,2]"
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
34 |     TEST_IMDB="coco_2014_minival"
35 |     ITERS=490000
36 |     ANCHORS="[4,8,16,32]"
37 |     RATIOS="[0.5,1,2]"
38 |     ;;
39 |   *)
40 |     echo "No dataset given"
41 |     exit
42 |     ;;
43 | esac
44 | 
45 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
46 | exec &> >(tee -a "$LOG")
47 | echo Logging output to "$LOG"
48 | 
49 | set +x
50 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
51 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_MELM_iter_${ITERS}.pth
52 | else
53 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_MELM_iter_${ITERS}.pth
54 | fi
55 | set -x
56 | 
57 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
58 |   CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \
59 |     --imdb ${TEST_IMDB} \
60 |     --model ${NET_FINAL} \
61 |     --cfg experiments/cfgs/${NET}.yml \
62 |     --tag ${EXTRA_ARGS_SLUG} \
63 |     --net ${NET} \
64 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
65 |           ${EXTRA_ARGS}
66 | else
67 |   CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \
68 |     --imdb ${TEST_IMDB} \
69 |     --model ${NET_FINAL} \
70 |     --cfg experiments/cfgs/${NET}.yml \
71 |     --net ${NET} \
72 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |           ${EXTRA_ARGS}
74 | fi
75 | 
76 | 


--------------------------------------------------------------------------------
/experiments/scripts/test_faster_rcnn_notime.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     ITERS=70000
22 |     ANCHORS="[8,16,32]"
23 |     RATIOS="[0.5,1,2]"
24 |     ;;
25 |   pascal_voc_0712)
26 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
27 |     TEST_IMDB="voc_2007_test"
28 |     ITERS=110000
29 |     ANCHORS="[8,16,32]"
30 |     RATIOS="[0.5,1,2]"
31 |     ;;
32 |   coco)
33 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
34 |     TEST_IMDB="coco_2014_minival"
35 |     ITERS=490000
36 |     ANCHORS="[4,8,16,32]"
37 |     RATIOS="[0.5,1,2]"
38 |     ;;
39 |   *)
40 |     echo "No dataset given"
41 |     exit
42 |     ;;
43 | esac
44 | 
45 | LOG="experiments/logs/test_${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
46 | exec &> >(tee -a "$LOG")
47 | echo Logging output to "$LOG"
48 | 
49 | set +x
50 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
51 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
52 | else
53 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
54 | fi
55 | set -x
56 | 
57 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
58 |   CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \
59 |     --imdb ${TEST_IMDB} \
60 |     --model ${NET_FINAL} \
61 |     --cfg experiments/cfgs/${NET}.yml \
62 |     --tag ${EXTRA_ARGS_SLUG} \
63 |     --net ${NET} \
64 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
65 |           ${EXTRA_ARGS}
66 | else
67 |   CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/test_net.py \
68 |     --imdb ${TEST_IMDB} \
69 |     --model ${NET_FINAL} \
70 |     --cfg experiments/cfgs/${NET}.yml \
71 |     --net ${NET} \
72 |     --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |           ${EXTRA_ARGS}
74 | fi
75 | 
76 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | alias time='/usr/bin/time'
 4 | 
 5 | set -x
 6 | set -e
 7 | 
 8 | export PYTHONUNBUFFERED="True"
 9 | 
10 | GPU_ID=$1
11 | DATASET=$2
12 | NET=$3
13 | 
14 | array=( $@ )
15 | len=${#array[@]}
16 | EXTRA_ARGS=${array[@]:3:$len}
17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
18 | 
19 | case ${DATASET} in
20 |   pascal_voc)
21 |     TRAIN_IMDB="voc_2007_trainval"
22 |     TEST_IMDB="voc_2007_test"
23 |     STEPSIZE="[50000]"
24 |     ITERS=100000
25 |     ANCHORS="[8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   pascal_voc_0712)
29 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
30 |     TEST_IMDB="voc_2007_test"
31 |     STEPSIZE="[80000]"
32 |     ITERS=110000
33 |     ANCHORS="[8,16,32]"
34 |     RATIOS="[0.5,1,2]"
35 |     ;;
36 |   coco)
37 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
38 |     TEST_IMDB="coco_2014_minival"
39 |     STEPSIZE="[350000]"
40 |     ITERS=490000
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     ;;
44 |   *)
45 |     echo "No dataset given"
46 |     exit
47 |     ;;
48 | esac
49 | 
50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
51 | exec &> >(tee -a "$LOG")
52 | echo Logging output to "$LOG"
53 | 
54 | set +x
55 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_MELM_iter_${ITERS}.pth
57 | else
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_MELM_iter_${ITERS}.pth
59 | fi
60 | set -x
61 | 
62 | if [ ! -f ${NET_FINAL}.index ]; then
63 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
65 |       --weight data/imagenet_weights/${NET}.pth \
66 |       --imdb ${TRAIN_IMDB} \
67 |       --imdbval ${TEST_IMDB} \
68 |       --iters ${ITERS} \
69 |       --cfg experiments/cfgs/${NET}.yml \
70 |       --tag ${EXTRA_ARGS_SLUG} \
71 |       --net ${NET} \
72 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
74 |   else
75 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
76 |       --weight data/imagenet_weights/${NET}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | echo $@
88 | ./experiments/scripts/test_faster_rcnn.sh $@
89 | 


--------------------------------------------------------------------------------
/experiments/scripts/train_faster_rcnn_notime.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | 
 8 | GPU_ID=$1
 9 | DATASET=$2
10 | NET=$3
11 | 
12 | array=( $@ )
13 | len=${#array[@]}
14 | EXTRA_ARGS=${array[@]:3:$len}
15 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
16 | 
17 | case ${DATASET} in
18 |   pascal_voc)
19 |     TRAIN_IMDB="voc_2007_trainval"
20 |     TEST_IMDB="voc_2007_test"
21 |     STEPSIZE="[50000]"
22 |     ITERS=70000
23 |     ANCHORS="[8,16,32]"
24 |     RATIOS="[0.5,1,2]"
25 |     ;;
26 |   pascal_voc_0712)
27 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
28 |     TEST_IMDB="voc_2007_test"
29 |     STEPSIZE="[80000]"
30 |     ITERS=110000
31 |     ANCHORS="[8,16,32]"
32 |     RATIOS="[0.5,1,2]"
33 |     ;;
34 |   coco)
35 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
36 |     TEST_IMDB="coco_2014_minival"
37 |     STEPSIZE="[900000]"
38 |     ITERS=1190000
39 |     ANCHORS="[4,8,16,32]"
40 |     RATIOS="[0.5,1,2]"
41 |     ;;
42 |   *)
43 |     echo "No dataset given"
44 |     exit
45 |     ;;
46 | esac
47 | 
48 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
49 | exec &> >(tee -a "$LOG")
50 | echo Logging output to "$LOG"
51 | 
52 | set +x
53 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
54 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
55 | else
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | fi
58 | set -x
59 | 
60 | if [ ! -f ${NET_FINAL}.index ]; then
61 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
62 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
63 |       --weight data/imagenet_weights/${NET}.pth \
64 |       --imdb ${TRAIN_IMDB} \
65 |       --imdbval ${TEST_IMDB} \
66 |       --iters ${ITERS} \
67 |       --cfg experiments/cfgs/${NET}.yml \
68 |       --tag ${EXTRA_ARGS_SLUG} \
69 |       --net ${NET} \
70 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
71 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
72 |   else
73 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
74 |       --weight data/imagenet_weights/${NET}.pth \
75 |       --imdb ${TRAIN_IMDB} \
76 |       --imdbval ${TEST_IMDB} \
77 |       --iters ${ITERS} \
78 |       --cfg experiments/cfgs/${NET}.yml \
79 |       --net ${NET} \
80 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
81 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
82 |   fi
83 | fi
84 | 
85 | ./experiments/scripts/test_faster_rcnn_notime.sh $@
86 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/coco.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/coco.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/coco.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/ds_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/ds_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/ds_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/ds_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/factory.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/factory.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/factory.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/factory.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/imdb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/imdb.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/imdb.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/imdb.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/pascal_voc.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/pascal_voc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/pascal_voc.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/voc_eval.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/voc_eval.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/datasets/__pycache__/voc_eval.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/datasets/__pycache__/voc_eval.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | 
17 | 
18 | import numpy as np
19 | 
20 | # Set up voc_<year>_<split> 
21 | for year in ['2007', '2012']:
22 |   for split in ['train', 'val', 'trainval', 'test']:
23 |     name = 'voc_{}_{}'.format(year, split)
24 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
25 | 
26 | for year in ['2007', '2012']:
27 |   for split in ['train', 'val', 'trainval', 'test']:
28 |     name = 'voc_{}_{}_diff'.format(year, split)
29 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year, use_diff=True))
30 | 
31 | 
32 | # Set up coco_2014_<split>
33 | for year in ['2014']:
34 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
35 |     name = 'coco_{}_{}'.format(year, split)
36 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
37 | 
38 | # Set up coco_2015_<split>
39 | for year in ['2015']:
40 |   for split in ['test', 'test-dev']:
41 |     name = 'coco_{}_{}'.format(year, split)
42 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
43 | 
44 | 
45 | 
46 | def get_imdb(name):
47 |   """Get an imdb (image database) by name."""
48 |   if name not in __sets:
49 |     raise KeyError('Unknown dataset: {}'.format(name))
50 |   return __sets[name]()
51 | 
52 | 
53 | def list_imdbs():
54 |   """List all registered imdbs."""
55 |   return list(__sets.keys())
56 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False,
 76 |              use_diff=False):
 77 |   """rec, prec, ap = voc_eval(detpath,
 78 |                               annopath,
 79 |                               imagesetfile,
 80 |                               classname,
 81 |                               [ovthresh],
 82 |                               [use_07_metric])
 83 | 
 84 |   Top level function that does the PASCAL VOC evaluation.
 85 | 
 86 |   detpath: Path to detections
 87 |       detpath.format(classname) should produce the detection results file.
 88 |   annopath: Path to annotations
 89 |       annopath.format(imagename) should be the xml annotations file.
 90 |   imagesetfile: Text file containing the list of images, one image per line.
 91 |   classname: Category name (duh)
 92 |   cachedir: Directory for caching the annotations
 93 |   [ovthresh]: Overlap threshold (default = 0.5)
 94 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 95 |       (default False)
 96 |   """
 97 |   # assumes detections are in detpath.format(classname)
 98 |   # assumes annotations are in annopath.format(imagename)
 99 |   # assumes imagesetfile is a text file with each line an image name
100 |   # cachedir caches the annotations in a pickle file
101 | 
102 |   # first load gt
103 |   if not os.path.isdir(cachedir):
104 |     os.mkdir(cachedir)
105 |   cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
106 |   # read list of images
107 |   with open(imagesetfile, 'r') as f:
108 |     lines = f.readlines()
109 |   imagenames = [x.strip() for x in lines]
110 | 
111 |   if not os.path.isfile(cachefile):
112 |     # load annotations
113 |     recs = {}
114 |     for i, imagename in enumerate(imagenames):
115 |       recs[imagename] = parse_rec(annopath.format(imagename))
116 |       if i % 100 == 0:
117 |         print('Reading annotation for {:d}/{:d}'.format(
118 |           i + 1, len(imagenames)))
119 |     # save
120 |     print('Saving cached annotations to {:s}'.format(cachefile))
121 |     with open(cachefile, 'wb') as f:
122 |       pickle.dump(recs, f)
123 |   else:
124 |     # load
125 |     with open(cachefile, 'rb') as f:
126 |       try:
127 |         recs = pickle.load(f)
128 |       except:
129 |         recs = pickle.load(f, encoding='bytes')
130 | 
131 |   # extract gt objects for this class
132 |   class_recs = {}
133 |   npos = 0
134 |   for imagename in imagenames:
135 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
136 |     bbox = np.array([x['bbox'] for x in R])
137 |     if use_diff:
138 |       difficult = np.array([False for x in R]).astype(np.bool)
139 |     else:
140 |       difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
141 |     det = [False] * len(R)
142 |     npos = npos + sum(~difficult)
143 |     class_recs[imagename] = {'bbox': bbox,
144 |                              'difficult': difficult,
145 |                              'det': det}
146 | 
147 |   # read dets
148 |   detfile = detpath.format(classname)
149 |   with open(detfile, 'r') as f:
150 |     lines = f.readlines()
151 | 
152 |   splitlines = [x.strip().split(' ') for x in lines]
153 |   image_ids = [x[0] for x in splitlines]
154 |   confidence = np.array([float(x[1]) for x in splitlines])
155 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
156 | 
157 |   nd = len(image_ids)
158 |   tp = np.zeros(nd)
159 |   fp = np.zeros(nd)
160 | 
161 |   if BB.shape[0] > 0:
162 |     # sort by confidence
163 |     sorted_ind = np.argsort(-confidence)
164 |     sorted_scores = np.sort(-confidence)
165 |     BB = BB[sorted_ind, :]
166 |     image_ids = [image_ids[x] for x in sorted_ind]
167 | 
168 |     # go down dets and mark TPs and FPs
169 |     for d in range(nd):
170 |       R = class_recs[image_ids[d]]
171 |       bb = BB[d, :].astype(float)
172 |       ovmax = -np.inf
173 |       BBGT = R['bbox'].astype(float)
174 | 
175 |       if BBGT.size > 0:
176 |         # compute overlaps
177 |         # intersection
178 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
179 |         iymin = np.maximum(BBGT[:, 1], bb[1])
180 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
181 |         iymax = np.minimum(BBGT[:, 3], bb[3])
182 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
183 |         ih = np.maximum(iymax - iymin + 1., 0.)
184 |         inters = iw * ih
185 | 
186 |         # union
187 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
188 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
189 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
190 | 
191 |         overlaps = inters / uni
192 |         ovmax = np.max(overlaps)
193 |         jmax = np.argmax(overlaps)
194 | 
195 |       if ovmax > ovthresh:
196 |         if not R['difficult'][jmax]:
197 |           if not R['det'][jmax]:
198 |             tp[d] = 1.
199 |             R['det'][jmax] = 1
200 |           else:
201 |             fp[d] = 1.
202 |       else:
203 |         fp[d] = 1.
204 | 
205 |   # compute precision recall
206 |   fp = np.cumsum(fp)
207 |   tp = np.cumsum(tp)
208 |   rec = tp / float(npos)
209 |   # avoid divide by zero in case the first detection matches a difficult
210 |   # ground truth
211 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
212 |   ap = voc_ap(rec, prec, use_07_metric)
213 | 
214 |   return rec, prec, ap
215 | 


--------------------------------------------------------------------------------
/lib/layer_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/anchor_target_layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/anchor_target_layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/anchor_target_layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/anchor_target_layer.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/generate_anchors.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/generate_anchors.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/generate_anchors.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/generate_anchors.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/proposal_layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/proposal_layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_layer.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/proposal_target_layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_target_layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/proposal_target_layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_target_layer.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/proposal_top_layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_top_layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/proposal_top_layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/proposal_top_layer.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/snippets.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/snippets.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/__pycache__/snippets.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/__pycache__/snippets.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | from model.config import cfg
 13 | import numpy as np
 14 | import numpy.random as npr
 15 | from utils.bbox import bbox_overlaps
 16 | from model.bbox_transform import bbox_transform
 17 | import torch
 18 | 
 19 | 
 20 | def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
 21 |   """Same as the anchor target layer in original Fast/er RCNN """
 22 |   A = num_anchors
 23 |   total_anchors = all_anchors.shape[0]
 24 |   K = total_anchors / num_anchors
 25 | 
 26 |   # allow boxes to sit over the edge by a small amount
 27 |   _allowed_border = 0
 28 | 
 29 |   # map of shape (..., H, W)
 30 |   height, width = rpn_cls_score.shape[1:3]
 31 | 
 32 |   # only keep anchors inside the image
 33 |   inds_inside = np.where(
 34 |     (all_anchors[:, 0] >= -_allowed_border) &
 35 |     (all_anchors[:, 1] >= -_allowed_border) &
 36 |     (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
 37 |     (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
 38 |   )[0]
 39 | 
 40 |   # keep only inside anchors
 41 |   anchors = all_anchors[inds_inside, :]
 42 | 
 43 |   # label: 1 is positive, 0 is negative, -1 is dont care
 44 |   labels = np.empty((len(inds_inside),), dtype=np.float32)
 45 |   labels.fill(-1)
 46 | 
 47 |   # overlaps between the anchors and the gt boxes
 48 |   # overlaps (ex, gt)
 49 |   overlaps = bbox_overlaps(
 50 |     np.ascontiguousarray(anchors, dtype=np.float),
 51 |     np.ascontiguousarray(gt_boxes, dtype=np.float))
 52 |   argmax_overlaps = overlaps.argmax(axis=1)
 53 |   max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
 54 |   gt_argmax_overlaps = overlaps.argmax(axis=0)
 55 |   gt_max_overlaps = overlaps[gt_argmax_overlaps,
 56 |                              np.arange(overlaps.shape[1])]
 57 |   gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 58 | 
 59 |   if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 60 |     # assign bg labels first so that positive labels can clobber them
 61 |     # first set the negatives
 62 |     labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 63 | 
 64 |   # fg label: for each gt, anchor with highest overlap
 65 |   labels[gt_argmax_overlaps] = 1
 66 | 
 67 |   # fg label: above threshold IOU
 68 |   labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
 69 | 
 70 |   if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 71 |     # assign bg labels last so that negative labels can clobber positives
 72 |     labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 73 | 
 74 |   # subsample positive labels if we have too many
 75 |   num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
 76 |   fg_inds = np.where(labels == 1)[0]
 77 |   if len(fg_inds) > num_fg:
 78 |     disable_inds = npr.choice(
 79 |       fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 80 |     labels[disable_inds] = -1
 81 | 
 82 |   # subsample negative labels if we have too many
 83 |   num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
 84 |   bg_inds = np.where(labels == 0)[0]
 85 |   if len(bg_inds) > num_bg:
 86 |     disable_inds = npr.choice(
 87 |       bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 88 |     labels[disable_inds] = -1
 89 | 
 90 |   bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
 91 |   bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
 92 | 
 93 |   bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
 94 |   # only the positive ones have regression targets
 95 |   bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
 96 | 
 97 |   bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
 98 |   if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
 99 |     # uniform weighting of examples (given non-uniform sampling)
100 |     num_examples = np.sum(labels >= 0)
101 |     positive_weights = np.ones((1, 4)) * 1.0 / num_examples
102 |     negative_weights = np.ones((1, 4)) * 1.0 / num_examples
103 |   else:
104 |     assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
105 |             (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
106 |     positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
107 |                         np.sum(labels == 1))
108 |     negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
109 |                         np.sum(labels == 0))
110 |   bbox_outside_weights[labels == 1, :] = positive_weights
111 |   bbox_outside_weights[labels == 0, :] = negative_weights
112 | 
113 |   # map up to original set of anchors
114 |   labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
115 |   bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
116 |   bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
117 |   bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
118 | 
119 |   # labels
120 |   labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
121 |   labels = labels.reshape((1, 1, A * height, width))
122 |   rpn_labels = labels
123 | 
124 |   # bbox_targets
125 |   bbox_targets = bbox_targets \
126 |     .reshape((1, height, width, A * 4))
127 | 
128 |   rpn_bbox_targets = bbox_targets
129 |   # bbox_inside_weights
130 |   bbox_inside_weights = bbox_inside_weights \
131 |     .reshape((1, height, width, A * 4))
132 | 
133 |   rpn_bbox_inside_weights = bbox_inside_weights
134 | 
135 |   # bbox_outside_weights
136 |   bbox_outside_weights = bbox_outside_weights \
137 |     .reshape((1, height, width, A * 4))
138 | 
139 |   rpn_bbox_outside_weights = bbox_outside_weights
140 |   return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
141 | 
142 | 
143 | def _unmap(data, count, inds, fill=0):
144 |   """ Unmap a subset of item (data) back to the original set of items (of
145 |   size count) """
146 |   if len(data.shape) == 1:
147 |     ret = np.empty((count,), dtype=np.float32)
148 |     ret.fill(fill)
149 |     ret[inds] = data
150 |   else:
151 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
152 |     ret.fill(fill)
153 |     ret[inds, :] = data
154 |   return ret
155 | 
156 | 
157 | def _compute_targets(ex_rois, gt_rois):
158 |   """Compute bounding-box regression targets for an image."""
159 | 
160 |   assert ex_rois.shape[0] == gt_rois.shape[0]
161 |   assert ex_rois.shape[1] == 4
162 |   assert gt_rois.shape[1] == 5
163 | 
164 |   return bbox_transform(torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy()
165 | 


--------------------------------------------------------------------------------
/lib/layer_utils/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 15 | #
 16 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 17 | #    >> anchors
 18 | #
 19 | #    anchors =
 20 | #
 21 | #       -83   -39   100    56
 22 | #      -175   -87   192   104
 23 | #      -359  -183   376   200
 24 | #       -55   -55    72    72
 25 | #      -119  -119   136   136
 26 | #      -247  -247   264   264
 27 | #       -35   -79    52    96
 28 | #       -79  -167    96   184
 29 | #      -167  -343   184   360
 30 | 
 31 | # array([[ -83.,  -39.,  100.,   56.],
 32 | #       [-175.,  -87.,  192.,  104.],
 33 | #       [-359., -183.,  376.,  200.],
 34 | #       [ -55.,  -55.,   72.,   72.],
 35 | #       [-119., -119.,  136.,  136.],
 36 | #       [-247., -247.,  264.,  264.],
 37 | #       [ -35.,  -79.,   52.,   96.],
 38 | #       [ -79., -167.,   96.,  184.],
 39 | #       [-167., -343.,  184.,  360.]])
 40 | 
 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 42 |                      scales=2 ** np.arange(3, 6)):
 43 |   """
 44 |   Generate anchor (reference) windows by enumerating aspect ratios X
 45 |   scales wrt a reference (0, 0, 15, 15) window.
 46 |   """
 47 | 
 48 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 49 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 50 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 51 |                        for i in range(ratio_anchors.shape[0])])
 52 |   return anchors
 53 | 
 54 | 
 55 | def _whctrs(anchor):
 56 |   """
 57 |   Return width, height, x center, and y center for an anchor (window).
 58 |   """
 59 | 
 60 |   w = anchor[2] - anchor[0] + 1
 61 |   h = anchor[3] - anchor[1] + 1
 62 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 63 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 64 |   return w, h, x_ctr, y_ctr
 65 | 
 66 | 
 67 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 68 |   """
 69 |   Given a vector of widths (ws) and heights (hs) around a center
 70 |   (x_ctr, y_ctr), output a set of anchors (windows).
 71 |   """
 72 | 
 73 |   ws = ws[:, np.newaxis]
 74 |   hs = hs[:, np.newaxis]
 75 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 76 |                        y_ctr - 0.5 * (hs - 1),
 77 |                        x_ctr + 0.5 * (ws - 1),
 78 |                        y_ctr + 0.5 * (hs - 1)))
 79 |   return anchors
 80 | 
 81 | 
 82 | def _ratio_enum(anchor, ratios):
 83 |   """
 84 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
 85 |   """
 86 | 
 87 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 88 |   size = w * h
 89 |   size_ratios = size / ratios
 90 |   ws = np.round(np.sqrt(size_ratios))
 91 |   hs = np.round(ws * ratios)
 92 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 93 |   return anchors
 94 | 
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |   """
 98 |   Enumerate a set of anchors for each scale wrt an anchor.
 99 |   """
100 | 
101 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |   ws = w * scales
103 |   hs = h * scales
104 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |   return anchors
106 | 
107 | 
108 | if __name__ == '__main__':
109 |   import time
110 | 
111 |   t = time.time()
112 |   a = generate_anchors()
113 |   print(time.time() - t)
114 |   print(a)
115 |   from IPython import embed;
116 | 
117 |   embed()
118 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick and Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | from model.nms_wrapper import nms
14 | 
15 | import torch
16 | 
17 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
18 |   """A simplified version compared to fast/er RCNN
19 |      For details please see the technical report
20 |   """
21 |   if type(cfg_key) == bytes:
22 |       cfg_key = cfg_key.decode('utf-8')
23 |   pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
24 |   post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
25 |   nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
26 | 
27 |   # Get the scores and bounding boxes
28 |   scores = rpn_cls_prob[:, :, :, num_anchors:]
29 |   rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
30 |   scores = scores.contiguous().view(-1, 1)
31 |   proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
32 |   proposals = clip_boxes(proposals, im_info[:2])
33 | 
34 |   # Pick the top region proposals
35 |   scores, order = scores.view(-1).sort(descending=True)
36 |   if pre_nms_topN > 0:
37 |     order = order[:pre_nms_topN]
38 |     scores = scores[:pre_nms_topN].view(-1, 1)
39 |   proposals = proposals[order.data, :]
40 | 
41 |   # Non-maximal suppression
42 |   keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)
43 | 
44 |   # Pick th top region proposals after NMS
45 |   if post_nms_topN > 0:
46 |     keep = keep[:post_nms_topN]
47 |   proposals = proposals[keep, :]
48 |   scores = scores[keep,]
49 | 
50 |   # Only support single image as input
51 |   batch_inds = proposals.new_zeros(proposals.size(0), 1)
52 |   blob = torch.cat((batch_inds, proposals), 1)
53 | 
54 |   return blob, scores
55 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from model.config import cfg
 14 | from model.bbox_transform import bbox_transform
 15 | from utils.bbox import bbox_overlaps
 16 | 
 17 | 
 18 | import torch
 19 | 
 20 | def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
 21 |   """
 22 |   Assign object detection proposals to ground-truth targets. Produces proposal
 23 |   classification labels and bounding-box regression targets.
 24 |   """
 25 | 
 26 |   # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 27 |   # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 28 |   all_rois = rpn_rois
 29 |   all_scores = rpn_scores
 30 | 
 31 |   # Include ground-truth boxes in the set of candidate rois
 32 |   if cfg.TRAIN.USE_GT:
 33 |     zeros = rpn_rois.new_zeros(gt_boxes.shape[0], 1)
 34 |     all_rois = torch.cat(
 35 |       (all_rois, torch.cat((zeros, gt_boxes[:, :-1]), 1))
 36 |     , 0)
 37 |     # not sure if it a wise appending, but anyway i am not using it
 38 |     all_scores = torch.cat((all_scores, zeros), 0)
 39 | 
 40 |   num_images = 1
 41 |   rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 42 |   fg_rois_per_image = int(round(cfg.TRAIN.FG_FRACTION * rois_per_image))
 43 | 
 44 |   # Sample rois with classification labels and bounding box regression
 45 |   # targets
 46 |   labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
 47 |     all_rois, all_scores, gt_boxes, fg_rois_per_image,
 48 |     rois_per_image, _num_classes)
 49 |   
 50 |   print('proposal target_layer ')
 51 |   print('labels ', labels.shape)
 52 |   print('rois ', rois.shape)
 53 |   print('roi_scores', roi_scores.shape)
 54 |   print('bbox_targets ', bbox_targets.shape)
 55 |   
 56 |   
 57 |   rois = rois.view(-1, 5)
 58 |   roi_scores = roi_scores.view(-1)
 59 |   labels = labels.view(-1, 1)
 60 |   bbox_targets = bbox_targets.view(-1, _num_classes * 4)
 61 |   bbox_inside_weights = bbox_inside_weights.view(-1, _num_classes * 4)
 62 |   bbox_outside_weights = (bbox_inside_weights > 0).float()
 63 | 
 64 |   return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
 65 | 
 66 | 
 67 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
 68 |   """Bounding-box regression targets (bbox_target_data) are stored in a
 69 |   compact form N x (class, tx, ty, tw, th)
 70 | 
 71 |   This function expands those targets into the 4-of-4*K representation used
 72 |   by the network (i.e. only one class has non-zero targets).
 73 | 
 74 |   Returns:
 75 |       bbox_target (ndarray): N x 4K blob of regression targets
 76 |       bbox_inside_weights (ndarray): N x 4K blob of loss weights
 77 |   """
 78 |   # Inputs are tensor
 79 | 
 80 |   clss = bbox_target_data[:, 0]
 81 |   bbox_targets = clss.new_zeros(clss.numel(), 4 * num_classes)
 82 |   bbox_inside_weights = clss.new_zeros(bbox_targets.shape)
 83 |   inds = (clss > 0).nonzero().view(-1)
 84 |   if inds.numel() > 0:
 85 |     clss = clss[inds].contiguous().view(-1,1)
 86 |     dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
 87 |     dim2_inds = torch.cat([4*clss, 4*clss+1, 4*clss+2, 4*clss+3], 1).long()
 88 |     bbox_targets[dim1_inds, dim2_inds] = bbox_target_data[inds][:, 1:]
 89 |     bbox_inside_weights[dim1_inds, dim2_inds] = bbox_targets.new(cfg.TRAIN.BBOX_INSIDE_WEIGHTS).view(-1, 4).expand_as(dim1_inds)
 90 | 
 91 |   return bbox_targets, bbox_inside_weights
 92 | 
 93 | 
 94 | def _compute_targets(ex_rois, gt_rois, labels):
 95 |   """Compute bounding-box regression targets for an image."""
 96 |   # Inputs are tensor
 97 | 
 98 |   assert ex_rois.shape[0] == gt_rois.shape[0]
 99 |   assert ex_rois.shape[1] == 4
100 |   assert gt_rois.shape[1] == 4
101 | 
102 |   targets = bbox_transform(ex_rois, gt_rois)
103 |   if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
104 |     # Optionally normalize targets by a precomputed mean and stdev
105 |     targets = ((targets - targets.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
106 |                / targets.new(cfg.TRAIN.BBOX_NORMALIZE_STDS))
107 |   return torch.cat(
108 |     [labels.unsqueeze(1), targets], 1)
109 | 
110 | 
111 | def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
112 |   """Generate a random sample of RoIs comprising foreground and background
113 |   examples.
114 |   """
115 |   # overlaps: (rois x gt_boxes)
116 |   overlaps = bbox_overlaps(
117 |     all_rois[:, 1:5].data,
118 |     gt_boxes[:, :4].data)
119 |   max_overlaps, gt_assignment = overlaps.max(1)
120 |   labels = gt_boxes[gt_assignment, [4]]
121 | 
122 |   # Select foreground RoIs as those with >= FG_THRESH overlap
123 |   fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
124 |   # Guard against the case when an image has fewer than fg_rois_per_image
125 |   # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
126 |   bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)
127 | 
128 |   # Small modification to the original version where we ensure a fixed number of regions are sampled
129 |   if fg_inds.numel() > 0 and bg_inds.numel() > 0:
130 |     fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
131 |     fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().to(gt_boxes.device)]
132 |     bg_rois_per_image = rois_per_image - fg_rois_per_image
133 |     to_replace = bg_inds.numel() < bg_rois_per_image
134 |     bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().to(gt_boxes.device)]
135 |   elif fg_inds.numel() > 0:
136 |     to_replace = fg_inds.numel() < rois_per_image
137 |     fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().to(gt_boxes.device)]
138 |     fg_rois_per_image = rois_per_image
139 |   elif bg_inds.numel() > 0:
140 |     to_replace = bg_inds.numel() < rois_per_image
141 |     bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().to(gt_boxes.device)]
142 |     fg_rois_per_image = 0
143 |   else:
144 |     import pdb
145 |     pdb.set_trace()
146 | 
147 |   # The indices that we're selecting (both fg and bg)
148 |   keep_inds = torch.cat([fg_inds, bg_inds], 0)
149 |   # Select sampled values from various arrays:
150 |   labels = labels[keep_inds].contiguous()
151 |   # Clamp labels for the background RoIs to 0
152 |   labels[int(fg_rois_per_image):] = 0
153 |   rois = all_rois[keep_inds].contiguous()
154 |   roi_scores = all_scores[keep_inds].contiguous()
155 | 
156 |   bbox_target_data = _compute_targets(
157 |     rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)
158 | 
159 |   bbox_targets, bbox_inside_weights = \
160 |     _get_bbox_regression_labels(bbox_target_data, num_classes)
161 | 
162 |   return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
163 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_top_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | import numpy.random as npr
14 | 
15 | import torch
16 | 
17 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
18 |   """A layer that just selects the top region proposals
19 |      without using non-maximal suppression,
20 |      For details please see the technical report
21 |   """
22 |   rpn_top_n = cfg.TEST.RPN_TOP_N
23 | 
24 |   scores = rpn_cls_prob[:, :, :, num_anchors:]
25 | 
26 |   rpn_bbox_pred = rpn_bbox_pred.view(-1, 4)
27 |   scores = scores.contiguous().view(-1, 1)
28 | 
29 |   length = scores.size(0)
30 |   if length < rpn_top_n:
31 |     # Random selection, maybe unnecessary and loses good proposals
32 |     # But such case rarely happens
33 |     top_inds = torch.from_numpy(npr.choice(length, size=rpn_top_n, replace=True)).long().to(anchors.device)
34 |   else:
35 |     top_inds = scores.sort(0, descending=True)[1]
36 |     top_inds = top_inds[:rpn_top_n]
37 |     top_inds = top_inds.view(rpn_top_n)
38 | 
39 |   # Do the selection here
40 |   anchors = anchors[top_inds, :].contiguous()
41 |   rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous()
42 |   scores = scores[top_inds].contiguous()
43 | 
44 |   # Convert anchors into proposals via bbox transformations
45 |   proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
46 | 
47 |   # Clip predicted boxes to image
48 |   proposals = clip_boxes(proposals, im_info[:2])
49 | 
50 |   # Output rois blob
51 |   # Our RPN implementation only supports a single input image, so all
52 |   # batch inds are 0
53 |   batch_inds = proposals.new_zeros(proposals.size(0), 1)
54 |   blob = torch.cat([batch_inds, proposals], 1)
55 |   return blob, scores
56 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/__pycache__/crop_and_resize.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/crop_and_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_and_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/_ext/crop_and_resize/_crop_and_resize.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/_ext/crop_and_resize/_crop_and_resize.so


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | #sources = ['src/crop_and_resize.c']
 7 | #headers = ['src/crop_and_resize.h']
 8 | #defines = []
 9 | #with_cuda = False
10 | 
11 | sources = []
12 | headers = []
13 | defines = []
14 | 
15 | extra_objects = []
16 | if torch.cuda.is_available():
17 |     print('Including CUDA code.')
18 |     sources += ['src/crop_and_resize_gpu.c']
19 |     headers += ['src/crop_and_resize_gpu.h']
20 |     defines += [('WITH_CUDA', None)]
21 |     extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o']
22 |     with_cuda = True
23 | 
24 | extra_compile_args = ['-std=c99']
25 | 
26 | this_file = os.path.dirname(os.path.realpath(__file__))
27 | print(this_file)
28 | sources = [os.path.join(this_file, fname) for fname in sources]
29 | headers = [os.path.join(this_file, fname) for fname in headers]
30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
31 | 
32 | ffi = create_extension(
33 |     '_ext.crop_and_resize',
34 |     headers=headers,
35 |     sources=sources,
36 |     define_macros=defines,
37 |     relative_to=__file__,
38 |     with_cuda=with_cuda,
39 |     extra_objects=extra_objects,
40 |     extra_compile_args=extra_compile_args
41 | )
42 | 
43 | if __name__ == '__main__':
44 |     ffi.build()
45 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/crop_and_resize.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Function
 6 | 
 7 | from ._ext import crop_and_resize as _backend
 8 | 
 9 | 
10 | class CropAndResizeFunction(Function):
11 | 
12 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
13 |         self.crop_height = crop_height
14 |         self.crop_width = crop_width
15 |         self.extrapolation_value = extrapolation_value
16 | 
17 |     def forward(self, image, boxes, box_ind):
18 |         crops = torch.zeros_like(image)
19 | 
20 |         if image.is_cuda:
21 |             _backend.crop_and_resize_gpu_forward(
22 |                 image, boxes, box_ind,
23 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
24 |         else:
25 |             _backend.crop_and_resize_forward(
26 |                 image, boxes, box_ind,
27 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
28 | 
29 |         # save for backward
30 |         self.im_size = image.size()
31 |         self.save_for_backward(boxes, box_ind)
32 | 
33 |         return crops
34 | 
35 |     def backward(self, grad_outputs):
36 |         boxes, box_ind = self.saved_tensors
37 | 
38 |         grad_outputs = grad_outputs.contiguous()
39 |         grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size)
40 | 
41 |         if grad_outputs.is_cuda:
42 |             _backend.crop_and_resize_gpu_backward(
43 |                 grad_outputs, boxes, box_ind, grad_image
44 |             )
45 |         else:
46 |             _backend.crop_and_resize_backward(
47 |                 grad_outputs, boxes, box_ind, grad_image
48 |             )
49 | 
50 |         return grad_image, None, None
51 | 
52 | 
53 | class CropAndResize(nn.Module):
54 |     """
55 |     Crop and resize ported from tensorflow
56 |     See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize
57 |     """
58 | 
59 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
60 |         super(CropAndResize, self).__init__()
61 | 
62 |         self.crop_height = crop_height
63 |         self.crop_width = crop_width
64 |         self.extrapolation_value = extrapolation_value
65 | 
66 |     def forward(self, image, boxes, box_ind):
67 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind)
68 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from .crop_and_resize import CropAndResizeFunction, CropAndResize
 5 | 
 6 | 
 7 | class RoIAlign(nn.Module):
 8 | 
 9 |     def __init__(self, crop_height, crop_width, extrapolation_value=0, transform_fpcoor=True):
10 |         super(RoIAlign, self).__init__()
11 | 
12 |         self.crop_height = crop_height
13 |         self.crop_width = crop_width
14 |         self.extrapolation_value = extrapolation_value
15 |         self.transform_fpcoor = transform_fpcoor
16 | 
17 |     def forward(self, featuremap, boxes, box_ind):
18 |         """
19 |         RoIAlign based on crop_and_resize.
20 |         See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301
21 |         :param featuremap: NxCxHxW
22 |         :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization**
23 |         :param box_ind: M
24 |         :return: MxCxoHxoW
25 |         """
26 |         x1, y1, x2, y2 = torch.split(boxes, 1, dim=1)
27 |         image_height, image_width = featuremap.size()[2:4]
28 | 
29 |         if self.transform_fpcoor:
30 |             spacing_w = (x2 - x1) / float(self.crop_width)
31 |             spacing_h = (y2 - y1) / float(self.crop_height)
32 | 
33 |             nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1)
34 |             ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1)
35 |             nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1)
36 |             nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1)
37 | 
38 |             boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1)
39 |         else:
40 |             x1 = x1 / float(image_width - 1)
41 |             x2 = x2 / float(image_width - 1)
42 |             y1 = y1 / float(image_height - 1)
43 |             y2 = y2 / float(image_height - 1)
44 |             boxes = torch.cat((y1, x1, y2, x2), 1)
45 | 
46 |         boxes = boxes.detach().contiguous()
47 |         box_ind = box_ind.detach()
48 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind)
49 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/crop_and_resize.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_forward(
 2 |     THFloatTensor * image,
 3 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
 4 |     THIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THFloatTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_backward(
12 |     THFloatTensor * grads,
13 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
14 |     THIntTensor * box_index,    // range in [0, batch_size)
15 |     THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/crop_and_resize_gpu.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "cuda/crop_and_resize_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 | 
 6 | 
 7 | void crop_and_resize_gpu_forward(
 8 |     THCudaTensor * image,
 9 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
10 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
11 |     const float extrapolation_value,
12 |     const int crop_height,
13 |     const int crop_width,
14 |     THCudaTensor * crops
15 | ) {
16 |     const int batch_size = THCudaTensor_size(state, image, 0);
17 |     const int depth = THCudaTensor_size(state, image, 1);
18 |     const int image_height = THCudaTensor_size(state, image, 2);
19 |     const int image_width = THCudaTensor_size(state, image, 3);
20 | 
21 |     const int num_boxes = THCudaTensor_size(state, boxes, 0);
22 | 
23 |     // init output space
24 |     THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width);
25 |     THCudaTensor_zero(state, crops);
26 | 
27 |     cudaStream_t stream = THCState_getCurrentStream(state);
28 |     CropAndResizeLaucher(
29 |         THCudaTensor_data(state, image),
30 |         THCudaTensor_data(state, boxes),
31 |         THCudaIntTensor_data(state, box_index),
32 |         num_boxes, batch_size, image_height, image_width,
33 |         crop_height, crop_width, depth, extrapolation_value,
34 |         THCudaTensor_data(state, crops),
35 |         stream
36 |     );
37 | }
38 | 
39 | 
40 | void crop_and_resize_gpu_backward(
41 |     THCudaTensor * grads,
42 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
43 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
44 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
45 | ) {
46 |     // shape
47 |     const int batch_size = THCudaTensor_size(state, grads_image, 0);
48 |     const int depth = THCudaTensor_size(state, grads_image, 1);
49 |     const int image_height = THCudaTensor_size(state, grads_image, 2);
50 |     const int image_width = THCudaTensor_size(state, grads_image, 3);
51 | 
52 |     const int num_boxes = THCudaTensor_size(state, grads, 0);
53 |     const int crop_height = THCudaTensor_size(state, grads, 2);
54 |     const int crop_width = THCudaTensor_size(state, grads, 3);
55 | 
56 |     // init output space
57 |     THCudaTensor_zero(state, grads_image);
58 | 
59 |     cudaStream_t stream = THCState_getCurrentStream(state);
60 |     CropAndResizeBackpropImageLaucher(
61 |         THCudaTensor_data(state, grads),
62 |         THCudaTensor_data(state, boxes),
63 |         THCudaIntTensor_data(state, box_index),
64 |         num_boxes, batch_size, image_height, image_width,
65 |         crop_height, crop_width, depth,
66 |         THCudaTensor_data(state, grads_image),
67 |         stream
68 |     );
69 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/crop_and_resize_gpu.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_gpu_forward(
 2 |     THCudaTensor * image,
 3 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
 4 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THCudaTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_gpu_backward(
12 |     THCudaTensor * grads,
13 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
14 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
15 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.cu.o


--------------------------------------------------------------------------------
/lib/layer_utils/roi_align/src/cuda/crop_and_resize_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CropAndResize_Kernel
 2 | #define _CropAndResize_Kernel
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void CropAndResizeLaucher(
 9 |     const float *image_ptr, const float *boxes_ptr,
10 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
11 |     int image_width, int crop_height, int crop_width, int depth,
12 |     float extrapolation_value, float *crops_ptr, cudaStream_t stream);
13 | 
14 | void CropAndResizeBackpropImageLaucher(
15 |     const float *grads_ptr, const float *boxes_ptr,
16 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
17 |     int image_width, int crop_height, int crop_width, int depth,
18 |     float *grads_image_ptr, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/roi_pooling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/roi_pooling/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/_ext/roi_pooling/_roi_pooling.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/_ext/roi_pooling/_roi_pooling.so


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_pooling_cuda.c']
15 |     headers += ['src/roi_pooling_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_pooling_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_pooling',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/functions/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/functions/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/functions/__pycache__/roi_pool.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/functions/__pycache__/roi_pool.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_pooling/src/roi_pooling.cu.o


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/layer_utils/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_ring_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/_roi_ring_pooling.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/_ext/roi_ring_pooling/_roi_ring_pooling.so


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_ring_pooling.c']
 8 | headers = ['src/roi_ring_pooling.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_ring_pooling_cuda.c']
15 |     headers += ['src/roi_ring_pooling_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_ring_pooling_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_ring_pooling',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/functions/__pycache__/roi_ring_pool.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/functions/__pycache__/roi_ring_pool.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/functions/roi_ring_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_ring_pooling
 4 | import copy
 5 | 
 6 | import pdb
 7 | 
 8 | class RoIRingPoolFunction(Function):
 9 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale, scale_inner, scale_outer):
10 |         ctx.pooled_height = pooled_height
11 |         ctx.pooled_width = pooled_width
12 |         ctx.spatial_scale = spatial_scale
13 |         ctx.scale_inner = scale_inner
14 |         ctx.scale_outer = scale_outer
15 |         ctx.feature_size = None
16 |     
17 |     def forward(ctx, features, rois):
18 |         ctx.feature_size = features.size()
19 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
20 |         num_rois = rois.size(0)
21 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
22 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
23 | 
24 |         ctx.rois = rois
25 |         ctx.processed_rois = features.new(rois.size(0), 9).zero_()
26 |         
27 |         RectangularRing(rois, ctx.processed_rois, ctx.spatial_scale, ctx.scale_inner, ctx.scale_outer)
28 |         #print('rois ', rois[100:101, :])
29 |         #print('preco ', ctx.processed_rois[100:101,:])
30 |         #if not features.is_cuda:
31 |         #    _features = features.permute(0, 2, 3, 1)
32 |         #    roi_ring_pooling.roi_ring_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
33 |         #                                              _features, ctx.processed_rois, output)
34 |         #else:
35 |         #print('3333', rois)
36 |         #print('ctx process roi ', ctx.processed_rois)
37 |         roi_ring_pooling.roi_ring_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
38 |                                                            features, ctx.processed_rois, output, ctx.argmax)
39 |         return output
40 |     def backward(ctx, grad_output):
41 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
42 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
43 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
44 | 
45 |         roi_ring_pooling.roi_ring_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
46 |                                                         grad_output, ctx.processed_rois, grad_input, ctx.argmax)
47 |         return grad_input, None
48 | 
49 | 
50 | 
51 | def RectangularRing(rois, processed_rois,spatial_scale, scale_inner, scale_outer):
52 |     #widths = rois[:, 3] - rois[:, 1] + 1.0
53 |     #heights = rois[:, 4] - rois[:, 2] + 1.0
54 |     #ctr_x = rois[:, 1] + 0.5 * widths
55 |     #ctr_y = rois[:, 2] + 0.5 * heights
56 | 
57 |     ctr_x = (rois[:, 1] + rois[:, 3]) / 2
58 |     ctr_y = (rois[:, 2] + rois[:, 4]) / 2
59 |     w_half = (rois[:, 3] - rois[:, 1]) / 2
60 |     h_half = (rois[:, 4] - rois[:, 2]) / 2
61 | 
62 |     
63 |     #for i in range(rois.size(0)):
64 |     #    processed_rois[i, 0] = 0
65 |     processed_rois[:, 1] = torch.tensor(ctr_x - w_half * scale_outer, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(0.5).floor_()
66 |     processed_rois[:, 2] = torch.tensor(ctr_y - h_half * scale_outer, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(0.5).floor_()
67 |     processed_rois[:, 3] = torch.tensor(ctr_x + w_half * scale_outer, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(-0.5).ceil_()
68 |     processed_rois[:, 4] = torch.tensor(ctr_y + h_half * scale_outer, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(-0.5).ceil_()
69 |     processed_rois[:, 5] = torch.tensor(ctr_x - w_half * scale_inner, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(0.5).floor_()
70 |     processed_rois[:, 6] = torch.tensor(ctr_y - h_half * scale_inner, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(0.5).floor_()
71 |     processed_rois[:, 7] = torch.tensor(ctr_x + w_half * scale_inner, dtype=rois.dtype, device=rois.device)  ##.mul_(spatial_scale).add_(-0.5).ceil_()
72 |     processed_rois[:, 8] = torch.tensor(ctr_y + h_half * scale_inner, dtype=rois.dtype, device=rois.device) 
73 |     
74 |     if scale_inner == 0:
75 |         processed_rois[:, 5:] = 0
76 | 
77 |     return 1


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/modules/roi_ring_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_ring_pool import RoIRingPoolFunction
 3 | 
 4 | 
 5 | class _RoIRingPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale,  scale_inner, scale_outer):
 7 |         super(_RoIRingPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.scale_inner = scale_inner
13 |         self.scale_outer = scale_outer
14 | 
15 |     def forward(self, features, rois):
16 |         return RoIRingPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.scale_inner, self.scale_outer)(features, rois)


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling.c:
--------------------------------------------------------------------------------
  1 | #include<TH/TH.h>
  2 | #include<math.h>
  3 | 
  4 | 
  5 | int roi_ring_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  6 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  7 | {
  8 |     // Grab the input tensor
  9 |     float * data_flat = THFloatTensor_data(features);
 10 |     float * rois_flat = THFloatTensor_data(rois);
 11 | 
 12 |     float * output_flat = THFloatTensor_data(output);
 13 | 
 14 |     // Number of ROIs
 15 |     int num_rois = THFloatTensor_size(rois, 0);
 16 |     int size_rois = THFloatTensor_size(rois, 1);
 17 |     // batch size
 18 |     int batch_size = THFloatTensor_size(features, 0);
 19 |     if(batch_size != 1)
 20 |     {
 21 |         return 0;
 22 |     }
 23 |     // data height
 24 |     int data_height = THFloatTensor_size(features, 1);
 25 |     // data width
 26 |     int data_width = THFloatTensor_size(features, 2);
 27 |     // Number of channels
 28 |     int num_channels = THFloatTensor_size(features, 3);
 29 | 
 30 |     // Set all element of the output tensor to -inf.
 31 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 32 | 
 33 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 34 |     int index_roi = 0;
 35 |     int index_output = 0;
 36 |     int n;
 37 |     for (n = 0; n < num_rois; ++n)
 38 |     {
 39 |         int roi_batch_ind = rois_flat[index_roi + 0];
 40 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 41 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 42 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 43 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 44 |         int roi_start_w_in = round(rois_flat[index_roi + 5] * spatial_scale);
 45 |         int roi_start_h_in = round(rois_flat[index_roi + 6] * spatial_scale);
 46 |         int roi_end_w_in = round(rois_flat[index_roi + 7] * spatial_scale);
 47 |         int roi_end_h_in = round(rois_flat[index_roi + 8] * spatial_scale);
 48 |         //      CHECK_GE(roi_batch_ind, 0);
 49 |         //      CHECK_LT(roi_batch_ind, batch_size);
 50 | 
 51 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 52 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 53 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 54 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 55 | 
 56 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 57 |         const int output_area = pooled_width * pooled_height;
 58 | 
 59 |         int c, ph, pw;
 60 |         for (ph = 0; ph < pooled_height; ++ph)
 61 |         {
 62 |             for (pw = 0; pw < pooled_width; ++pw)
 63 |             {
 64 |                 int hstart = (floor((float)(ph) * bin_size_h));
 65 |                 int wstart = (floor((float)(pw) * bin_size_w));
 66 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 67 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 68 | 
 69 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 70 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 71 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 72 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 73 | 
 74 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 75 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 76 |                 if (is_empty)
 77 |                 {
 78 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 79 |                     {
 80 |                         output_flat[pool_index + c] = 0;
 81 |                     }
 82 |                 }
 83 |                 else
 84 |                 {
 85 |                     int h, w, c;
 86 |                     for (h = hstart; h < hend; ++h)
 87 |                     {
 88 |                         for (w = wstart; w < wend; ++w)
 89 |                         {
 90 |                             if(!( w > roi_start_w_in && w < roi_end_w_in && h > roi_start_h_in && h < roi_end_h_in))
 91 |                             {
 92 |                                 for (c = 0; c < num_channels; ++c)
 93 |                                 {
 94 |                                     const int index = (h * data_width + w) * num_channels + c;
 95 |                                     if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 96 |                                     {
 97 |                                         output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 98 |                                     }
 99 |                                 }
100 |                             }
101 |                             //for (c = 0; c < num_channels; ++c)
102 |                             //{
103 |                             //    const int index = (h * data_width + w) * num_channels + c;
104 |                             //    if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
105 |                             //    {
106 |                             //        output_flat[pool_index + c * output_area] = data_flat[index_data + index];
107 |                             //    }
108 |                             //}
109 |                         }
110 |                     }
111 |                 }
112 |             }
113 |         }
114 | 
115 |         // Increment ROI index
116 |         index_roi += size_rois;
117 |         index_output += pooled_height * pooled_width * num_channels;
118 |     }
119 |     return 1;
120 | }


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_ring_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_ring_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_ring_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 9)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIRingPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_ring_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 9)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIRingPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_ring_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | int roi_ring_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
4 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_kernel.cu.o


--------------------------------------------------------------------------------
/lib/layer_utils/roi_ring_pooling/src/roi_ring_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_RING_POOLING_KERNEL
 2 | #define _ROI_RING_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIRingPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIRingPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif


--------------------------------------------------------------------------------
/lib/layer_utils/snippets.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from layer_utils.generate_anchors import generate_anchors
12 | 
13 | def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)):
14 |   """ A wrapper function to generate anchors given different scales
15 |     Also return the number of anchors in variable 'length'
16 |   """
17 |   anchors = generate_anchors(ratios=np.array(anchor_ratios), scales=np.array(anchor_scales))
18 |   A = anchors.shape[0]
19 |   shift_x = np.arange(0, width) * feat_stride
20 |   shift_y = np.arange(0, height) * feat_stride
21 |   shift_x, shift_y = np.meshgrid(shift_x, shift_y)
22 |   shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
23 |   K = shifts.shape[0]
24 |   # width changes faster, so here it is H, W, C
25 |   anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
26 |   anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
27 |   length = np.int32(anchors.shape[0])
28 | 
29 |   return anchors, length
30 | 


--------------------------------------------------------------------------------
/lib/layers/__pycache__/recurrent_linear.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/layers/__pycache__/recurrent_linear.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
 2 |            -gencode arch=compute_35,code=sm_35 \
 3 |            -gencode arch=compute_50,code=sm_50 \
 4 |            -gencode arch=compute_52,code=sm_52 \
 5 |            -gencode arch=compute_60,code=sm_60 \
 6 |            -gencode arch=compute_61,code=sm_61 \
 7 |            -gencode arch=compute_70,code=sm_70 \
 8 |            -gencode arch=compute_70,code=compute_70
 9 |            "
10 | 
11 | # Build RoiPooling module
12 | cd layer_utils/roi_pooling/src
13 | echo "Compiling roi_pooling kernels by nvcc..."
14 | nvcc -c -o roi_pooling_kernel.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 
15 | cd ..
16 | python build.py
17 | cd ../../
18 | 
19 | # Build RoiRingpooling module
20 | cd layer_utils/roi_ring_pooling/src
21 | echo "Compiling roi_ring_pooling kernels by nvcc"
22 | nvcc -c -o roi_ring_pooling_kernel.cu.o roi_ring_pooling_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH 
23 | cd ..
24 | python build.py
25 | 
26 | # Build RoIAlign
27 | cd layer_utils/roi_align/src/cuda
28 | echo 'Compiling crop_and_resize kernels by nvcc...'
29 | nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
30 | cd ../../
31 | python build.py
32 | cd ../../
33 | 
34 | # Build NMS
35 | cd nms/src/cuda
36 | echo "Compiling nms kernels by nvcc..."
37 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH
38 | cd ../../
39 | python build.py
40 | cd ../
41 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
1 | from . import config
2 | 


--------------------------------------------------------------------------------
/lib/model/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/apmetric.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/apmetric.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/bbox_transform.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/bbox_transform.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/bbox_transform.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/bbox_transform.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/config.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/nms_wrapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/nms_wrapper.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/nms_wrapper.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/nms_wrapper.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/test.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/test.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/test.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/test.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/train_val.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/train_val.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/model/__pycache__/train_val.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/model/__pycache__/train_val.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/model/apmetric.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Nov 22 16:14:59 2018
 5 | 
 6 | @author: vasgaoweithu
 7 | """
 8 | 
 9 | import torch
10 | import numpy as np
11 | from copy import deepcopy
12 | import math
13 | 
14 | class AveragePrecisionMeter(object):
15 |     def __init__(self, difficult_examples=False):
16 |         super(AveragePrecisionMeter, self).__init__()
17 |         self.reset()
18 |         self.difficult_examples = difficult_examples
19 | 
20 |     def reset(self):
21 |         self.scores = torch.FloatTensor(torch.FloatStorage())
22 |         self.targets = torch.LongTensor(torch.LongStorage())
23 | 
24 |     def add(self, output, target):
25 |         if not torch.is_tensor(output):
26 |             output = torch.from_numpy(output)
27 |         if not torch.is_tensor(target):
28 |             target = torch.from_numpy(target)
29 | 
30 |         if output.dim() == 1:
31 |             output = output.view(-1, 1)
32 |         else:
33 |             assert output.dim() == 2, \
34 |                 'wrong output size (should be 1D or 2D with one column \
35 |                 per class)'
36 |         if target.dim() == 1:
37 |             target = target.view(-1, 1)
38 |         else:
39 |             assert target.dim() == 2, \
40 |                 'wrong target size (should be 1D or 2D with one column \
41 |                 per class)'
42 |         if self.scores.numel() > 0:
43 |             assert target.size(1) == self.targets.size(1), \
44 |                 'dimensions for output should match previously added examples.'
45 | 
46 |         # make sure storage is of sufficient size
47 |         if self.scores.storage().size() < self.scores.numel() + output.numel():
48 |             new_size = math.ceil(self.scores.storage().size() * 1.5)
49 |             self.scores.storage().resize_(int(new_size + output.numel()))
50 |             self.targets.storage().resize_(int(new_size + output.numel()))
51 | 
52 |         # store scores and targets
53 |         offset = self.scores.size(0) if self.scores.dim() > 0 else 0
54 |         self.scores.resize_(offset + output.size(0), output.size(1))
55 |         self.targets.resize_(offset + target.size(0), target.size(1))
56 |         self.scores.narrow(0, offset, output.size(0)).copy_(output)
57 |         self.targets.narrow(0, offset, target.size(0)).copy_(target)
58 | 
59 |     def value(self):
60 |         if self.scores.numel() == 0:
61 |             return 0
62 |         ap = torch.zeros(self.scores.size(1))
63 |         rg = torch.arange(1, self.scores.size(0)).float()
64 | 
65 |         # compute average precision for each class
66 |         for k in range(self.scores.size(1)):
67 |             # sort scores
68 |             scores = self.scores[:, k]
69 |             targets = self.targets[:, k]
70 |             
71 |             # compute average precision
72 |             ap[k] = AveragePrecisionMeter.average_precision(scores, targets, self.difficult_examples)
73 |         return ap
74 | 
75 |     @staticmethod
76 |     def average_precision(output, target, difficult_examples=True):
77 | 
78 |         # sort examples
79 |         sorted, indices = torch.sort(output, dim=0, descending=True)
80 | 
81 |         # Computes prec@i
82 |         pos_count = 0.
83 |         total_count = 0.
84 |         precision_at_i = 0.
85 |         for i in indices:
86 |             label = target[i]
87 |             if difficult_examples and label == 0:
88 |                 continue
89 |             if label == 1:
90 |                 pos_count += 1
91 |             total_count += 1
92 |             if label == 1:
93 |                 precision_at_i += pos_count / total_count
94 |         precision_at_i /= pos_count
95 |         return precision_at_i


--------------------------------------------------------------------------------
/lib/model/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | import torch
13 | 
14 | def bbox_transform(ex_rois, gt_rois):
15 |   ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
16 |   ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
17 |   ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
18 |   ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
19 | 
20 |   gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
21 |   gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
22 |   gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
23 |   gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
24 | 
25 |   targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
26 |   targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
27 |   targets_dw = torch.log(gt_widths / ex_widths)
28 |   targets_dh = torch.log(gt_heights / ex_heights)
29 | 
30 |   targets = torch.stack(
31 |     (targets_dx, targets_dy, targets_dw, targets_dh), 1)
32 |   return targets
33 | 
34 | 
35 | def bbox_transform_inv(boxes, deltas):
36 |   # Input should be both tensor or both Variable and on the same device
37 |   if len(boxes) == 0:
38 |     return deltas.detach() * 0
39 | 
40 |   widths = boxes[:, 2] - boxes[:, 0] + 1.0
41 |   heights = boxes[:, 3] - boxes[:, 1] + 1.0
42 |   ctr_x = boxes[:, 0] + 0.5 * widths
43 |   ctr_y = boxes[:, 1] + 0.5 * heights
44 | 
45 |   dx = deltas[:, 0::4]
46 |   dy = deltas[:, 1::4]
47 |   dw = deltas[:, 2::4]
48 |   dh = deltas[:, 3::4]
49 |   
50 |   pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
51 |   pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
52 |   pred_w = torch.exp(dw) * widths.unsqueeze(1)
53 |   pred_h = torch.exp(dh) * heights.unsqueeze(1)
54 | 
55 |   pred_boxes = torch.cat(\
56 |     [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\
57 |                               pred_ctr_y - 0.5 * pred_h,\
58 |                               pred_ctr_x + 0.5 * pred_w,\
59 |                               pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1)
60 | 
61 |   return pred_boxes
62 | 
63 | 
64 | def clip_boxes(boxes, im_shape):
65 |   """
66 |   Clip boxes to image boundaries.
67 |   boxes must be tensor or Variable, im_shape can be anything but Variable
68 |   """
69 | 
70 |   if not hasattr(boxes, 'data'):
71 |     boxes_ = boxes.numpy()
72 | 
73 |   boxes = boxes.view(boxes.size(0), -1, 4)
74 |   boxes = torch.stack(\
75 |     [boxes[:,:,0].clamp(0, im_shape[1] - 1),
76 |      boxes[:,:,1].clamp(0, im_shape[0] - 1),
77 |      boxes[:,:,2].clamp(0, im_shape[1] - 1),
78 |      boxes[:,:,3].clamp(0, im_shape[0] - 1)], 2).view(boxes.size(0), -1)
79 | 
80 |   return boxes
81 | 


--------------------------------------------------------------------------------
/lib/model/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from nms.pth_nms import pth_nms
12 | 
13 | 
14 | def nms(dets, thresh):
15 |   """Dispatch to either CPU or GPU NMS implementations.
16 |   Accept dets as tensor"""
17 |   return pth_nms(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/lib/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__init__.py


--------------------------------------------------------------------------------
/lib/nets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/mobilenet_v1.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/mobilenet_v1.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/mobilenet_v1.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/network.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/network.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/network.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/network.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/resnet_v1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/resnet_v1.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/resnet_v1.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/resnet_v1.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/vgg16.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/vgg16.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nets/__pycache__/vgg16.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nets/__pycache__/vgg16.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nets/resnet_v1.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi He and Xinlei Chen
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | from nets.network import Network
 11 | from model.config import cfg
 12 | 
 13 | import utils.timer
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | from torch.autograd import Variable
 19 | import math
 20 | import torch.utils.model_zoo as model_zoo
 21 | 
 22 | import torchvision
 23 | from torchvision.models.resnet import BasicBlock, Bottleneck
 24 | 
 25 | class ResNet(torchvision.models.resnet.ResNet):
 26 |   def __init__(self, block, layers, num_classes=1000):
 27 |     self.inplanes = 64
 28 |     super(ResNet, self).__init__(block, layers, num_classes)
 29 |     # change to match the caffe resnet
 30 |     for i in range(2, 4):
 31 |       getattr(self, 'layer%d'%i)[0].conv1.stride = (2,2)
 32 |       getattr(self, 'layer%d'%i)[0].conv2.stride = (1,1)
 33 |     # use stride 1 for the last conv4 layer (same as tf-faster-rcnn)
 34 |     self.layer4[0].conv2.stride = (1,1)
 35 |     self.layer4[0].downsample[0].stride = (1,1)
 36 | 
 37 |     del self.avgpool, self.fc
 38 | 
 39 | 
 40 | def resnet18(pretrained=False):
 41 |   """Constructs a ResNet-18 model.
 42 |   Args:
 43 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
 44 |   """
 45 |   model = ResNet(BasicBlock, [2, 2, 2, 2])
 46 |   if pretrained:
 47 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
 48 |   return model
 49 | 
 50 | 
 51 | def resnet34(pretrained=False):
 52 |   """Constructs a ResNet-34 model.
 53 |   Args:
 54 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
 55 |   """
 56 |   model = ResNet(BasicBlock, [3, 4, 6, 3])
 57 |   if pretrained:
 58 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
 59 |   return model
 60 | 
 61 | 
 62 | def resnet50(pretrained=False):
 63 |   """Constructs a ResNet-50 model.
 64 |   Args:
 65 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
 66 |   """
 67 |   model = ResNet(Bottleneck, [3, 4, 6, 3])
 68 |   if pretrained:
 69 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
 70 |   return model
 71 | 
 72 | 
 73 | def resnet101(pretrained=False):
 74 |   """Constructs a ResNet-101 model.
 75 |   Args:
 76 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
 77 |   """
 78 |   model = ResNet(Bottleneck, [3, 4, 23, 3])
 79 |   if pretrained:
 80 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
 81 |   return model
 82 | 
 83 | 
 84 | def resnet152(pretrained=False):
 85 |   """Constructs a ResNet-152 model.
 86 |   Args:
 87 |     pretrained (bool): If True, returns a model pre-trained on ImageNet
 88 |   """
 89 |   model = ResNet(Bottleneck, [3, 8, 36, 3])
 90 |   if pretrained:
 91 |     model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
 92 |   return model
 93 | 
 94 | class resnetv1(Network):
 95 |   def __init__(self, num_layers=50):
 96 |     Network.__init__(self)
 97 |     self._feat_stride = [16, ]
 98 |     self._feat_compress = [1. / float(self._feat_stride[0]), ]
 99 |     self._num_layers = num_layers
100 |     self._net_conv_channels = 1024
101 |     self._fc7_channels = 2048
102 | 
103 |   def _crop_pool_layer(self, bottom, rois):
104 |     return Network._crop_pool_layer(self, bottom, rois, cfg.RESNET.MAX_POOL)
105 | 
106 |   def _image_to_head(self):
107 |     net_conv = self._layers['head'](self._image)
108 |     self._act_summaries['conv'] = net_conv
109 | 
110 |     return net_conv
111 | 
112 |   def _head_to_tail(self, pool5):
113 |     fc7 = self.resnet.layer4(pool5).mean(3).mean(2) # average pooling after layer4
114 |     return fc7
115 | 
116 |   def _init_head_tail(self):
117 |     # choose different blocks for different number of layers
118 |     if self._num_layers == 50:
119 |       self.resnet = resnet50()
120 | 
121 |     elif self._num_layers == 101:
122 |       self.resnet = resnet101()
123 | 
124 |     elif self._num_layers == 152:
125 |       self.resnet = resnet152()
126 | 
127 |     else:
128 |       # other numbers are not supported
129 |       raise NotImplementedError
130 | 
131 |     # Fix blocks 
132 |     for p in self.resnet.bn1.parameters(): p.requires_grad=False
133 |     for p in self.resnet.conv1.parameters(): p.requires_grad=False
134 |     assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
135 |     if cfg.RESNET.FIXED_BLOCKS >= 3:
136 |       for p in self.resnet.layer3.parameters(): p.requires_grad=False
137 |     if cfg.RESNET.FIXED_BLOCKS >= 2:
138 |       for p in self.resnet.layer2.parameters(): p.requires_grad=False
139 |     if cfg.RESNET.FIXED_BLOCKS >= 1:
140 |       for p in self.resnet.layer1.parameters(): p.requires_grad=False
141 | 
142 |     def set_bn_fix(m):
143 |       classname = m.__class__.__name__
144 |       if classname.find('BatchNorm') != -1:
145 |         for p in m.parameters(): p.requires_grad=False
146 | 
147 |     self.resnet.apply(set_bn_fix)
148 | 
149 |     # Build resnet.
150 |     self._layers['head'] = nn.Sequential(self.resnet.conv1, self.resnet.bn1,self.resnet.relu, 
151 |       self.resnet.maxpool,self.resnet.layer1,self.resnet.layer2,self.resnet.layer3)
152 | 
153 |   def train(self, mode=True):
154 |     # Override train so that the training mode is set as we want
155 |     nn.Module.train(self, mode)
156 |     if mode:
157 |       # Set fixed blocks to be in eval mode (not really doing anything)
158 |       self.resnet.eval()
159 |       if cfg.RESNET.FIXED_BLOCKS <= 3:
160 |         self.resnet.layer4.train()
161 |       if cfg.RESNET.FIXED_BLOCKS <= 2:
162 |         self.resnet.layer3.train()
163 |       if cfg.RESNET.FIXED_BLOCKS <= 1:
164 |         self.resnet.layer2.train()
165 |       if cfg.RESNET.FIXED_BLOCKS == 0:
166 |         self.resnet.layer1.train()
167 | 
168 |       # Set batchnorm always in eval mode during training
169 |       def set_bn_eval(m):
170 |         classname = m.__class__.__name__
171 |         if classname.find('BatchNorm') != -1:
172 |           m.eval()
173 | 
174 |       self.resnet.apply(set_bn_eval)
175 | 
176 |   def load_pretrained_cnn(self, state_dict):
177 |     self.resnet.load_state_dict({k: state_dict[k] for k in list(self.resnet.state_dict())})
178 | 


--------------------------------------------------------------------------------
/lib/nets/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | from nets.network import Network
11 | from model.config import cfg
12 | 
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.functional as F
16 | from torch.autograd import Variable
17 | import math
18 | import torchvision.models as models
19 | 
20 | class MELM_vgg16(Network):
21 |   def __init__(self):
22 |     Network.__init__(self)
23 |     self._feat_stride = [16, ]
24 |     self._feat_compress = [1. / float(self._feat_stride[0]), ]
25 |     self._net_conv_channels = 512
26 |     self._fc7_channels = 4096
27 | 
28 |   def _init_head_tail(self):
29 |     self.vgg = models.vgg16()
30 |     # Remove fc8
31 |     self.vgg.classifier = nn.Sequential(*list(self.vgg.classifier._modules.values())[:-1])
32 | 
33 |     # Fix the layers before conv3:
34 |     for layer in range(10):
35 |       for p in self.vgg.features[layer].parameters(): p.requires_grad = False
36 | 
37 |     # not using the last maxpool layer
38 |     self._layers['head'] = nn.Sequential(*list(self.vgg.features._modules.values())[:-1])
39 | 
40 |   def _image_to_head(self):
41 |     net_conv = self._layers['head'](self._image)
42 |     self._act_summaries['conv'] = net_conv
43 |     
44 |     return net_conv
45 | 
46 |   def _head_to_tail(self, pool5):
47 |     pool5_flat = pool5.view(pool5.size(0), -1)
48 |     fc7 = self.vgg.classifier(pool5_flat)
49 |     self._predictions['fc7'] = fc7
50 | 
51 |     return fc7
52 | 
53 |   def load_pretrained_cnn(self, state_dict):
54 |     self.vgg.load_state_dict({k:v for k,v in state_dict.items() if k in self.vgg.state_dict()})


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/pth_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/pth_nms.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/pth_nms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/__pycache__/pth_nms.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/nms/_ext/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/nms/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/nms/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/_ext/nms/_nms.so


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 | 
36 |     dets = dets[order].contiguous()
37 | 
38 |     keep = torch.LongTensor(dets.size(0))
39 |     num_out = torch.LongTensor(1)
40 |     # keep = torch.cuda.LongTensor(dets.size(0))
41 |     # num_out = torch.cuda.LongTensor(1)
42 |     nms.gpu_nms(keep, num_out, dets, thresh)
43 | 
44 |     return order[keep[:num_out[0]].cuda()].contiguous()
45 |     # return order[keep[:num_out[0]]].contiguous()
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/nms/src/cuda/nms_kernel.cu.o


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/layer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/layer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/layer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/layer.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/minibatch.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/minibatch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/minibatch.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/roidb.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/__pycache__/roidb.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/roi_data_layer/__pycache__/roidb.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """The data layer used during training to train a Fast R-CNN network.
 9 | 
10 | RoIDataLayer implements a Caffe Python layer.
11 | """
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | from model.config import cfg
17 | from roi_data_layer.minibatch import get_minibatch
18 | import numpy as np
19 | import time
20 | 
21 | class RoIDataLayer(object):
22 |   """Fast R-CNN data layer used for training."""
23 | 
24 |   def __init__(self, roidb, num_classes, random=False):
25 |     """Set the roidb to be used by this layer during training."""
26 |     self._roidb = roidb
27 |     self._num_classes = num_classes
28 |     # Also set a random flag
29 |     self._random = random
30 |     self._shuffle_roidb_inds()
31 | 
32 |   def _shuffle_roidb_inds(self):
33 |     """Randomly permute the training roidb."""
34 |     # If the random flag is set, 
35 |     # then the database is shuffled according to system time
36 |     # Useful for the validation set
37 |     if self._random:
38 |       st0 = np.random.get_state()
39 |       millis = int(round(time.time() * 1000)) % 4294967295
40 |       np.random.seed(millis)
41 |     
42 |     if cfg.TRAIN.ASPECT_GROUPING:
43 |       widths = np.array([r['width'] for r in self._roidb])
44 |       heights = np.array([r['height'] for r in self._roidb])
45 |       horz = (widths >= heights)
46 |       vert = np.logical_not(horz)
47 |       horz_inds = np.where(horz)[0]
48 |       vert_inds = np.where(vert)[0]
49 |       inds = np.hstack((
50 |           np.random.permutation(horz_inds),
51 |           np.random.permutation(vert_inds)))
52 |       inds = np.reshape(inds, (-1, 2))
53 |       row_perm = np.random.permutation(np.arange(inds.shape[0]))
54 |       inds = np.reshape(inds[row_perm, :], (-1,))
55 |       self._perm = inds
56 |     else:
57 |       self._perm = np.random.permutation(np.arange(len(self._roidb)))
58 |     # Restore the random state
59 |     if self._random:
60 |       np.random.set_state(st0)
61 |       
62 |     self._cur = 0
63 | 
64 |   def _get_next_minibatch_inds(self):
65 |     """Return the roidb indices for the next minibatch."""
66 |     
67 |     if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
68 |       self._shuffle_roidb_inds()
69 | 
70 |     db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
71 |     self._cur += cfg.TRAIN.IMS_PER_BATCH
72 | 
73 |     return db_inds
74 | 
75 |   def _get_next_minibatch(self):
76 |     """Return the blobs to be used for the next minibatch.
77 | 
78 |     If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
79 |     separate process and made available through self._blob_queue.
80 |     """
81 |     db_inds = self._get_next_minibatch_inds()
82 |     minibatch_db = [self._roidb[i] for i in db_inds]
83 |     return get_minibatch(minibatch_db, self._num_classes)
84 |       
85 |   def forward(self):
86 |     """Get blobs and copy them into this layer's top blob vector."""
87 |     blobs = self._get_next_minibatch()
88 |     return blobs
89 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | import cv2
16 | from model.config import cfg
17 | from utils.blob import prep_im_for_blob, im_list_to_blob
18 | 
19 | def get_minibatch(roidb, num_classes):
20 |   """Given a roidb, construct a minibatch sampled from it."""
21 |   num_images = len(roidb)
22 |   # Sample random scales to use for each image in this batch
23 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
24 |                   size=num_images)
25 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
26 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
27 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
28 | 
29 |   # Get the input image blob, formatted for caffe
30 |   im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
31 | 
32 |   blobs = {'data': im_blob}
33 | 
34 |   assert len(im_scales) == 1, "Single batch only"
35 |   assert len(roidb) == 1, "Single batch only"
36 |   
37 |   # gt boxes: (x1, y1, x2, y2, cls)
38 |   if cfg.TRAIN.USE_ALL_GT:
39 |     # Include all ground truth boxes
40 |     gt_inds = np.where(roidb[0]['gt_classes'] != -1)[0]
41 |   else:
42 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
43 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
44 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
45 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
46 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
47 |   blobs['gt_boxes'] = gt_boxes
48 |   blobs['im_info'] = np.array(
49 |     [im_blob.shape[1], im_blob.shape[2], im_scales[0]],
50 |     dtype=np.float32)
51 |   blobs['image_level_labels'] = roidb[0]['image_level_labels']
52 | 
53 |   # add ss_boxes into blob
54 |   #Changed for WSDNN
55 |   if True:
56 |     ss_inds = np.where(roidb[0]['gt_classes'] == -1)[0] # remove gt_rois in ss_boxes
57 |     ss_boxes = np.empty((len(ss_inds), 5), dtype=np.float32)
58 |     ss_boxes[:, 1:] = roidb[0]['boxes'][ss_inds,:] * im_scales[0]
59 |     ss_boxes[:, 0] = 0
60 |     blobs['ss_boxes'] = ss_boxes
61 |   else:
62 |     print('haha True')
63 |     ss_boxes = np.empty((len(roidb[0]['boxes']), 5), dtype=np.float32)
64 |     ss_boxes[:,1:] = roidb[0]['boxes'] * im_scales[0]
65 |     ss_boxes[:,0]  = 0
66 |     blobs['ss_boxes'] = ss_boxes
67 | 
68 |   return blobs
69 | 
70 | def _get_image_blob(roidb, scale_inds):
71 |   """Builds an input blob from the images in the roidb at the specified
72 |   scales.
73 |   """
74 |   num_images = len(roidb)
75 |   processed_ims = []
76 |   im_scales = []
77 |   for i in range(num_images):
78 |     im = cv2.imread(roidb[i]['image'])
79 |     if roidb[i]['flipped']:
80 |       im = im[:, ::-1, :]
81 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
82 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
83 |                     cfg.TRAIN.MAX_SIZE)
84 |     im_scales.append(im_scale)
85 |     processed_ims.append(im)
86 | 
87 |   # Create a blob to hold the input images
88 |   blob = im_list_to_blob(processed_ims)
89 | 
90 |   return blob, im_scales
91 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | from model.config import cfg
15 | import PIL
16 | 
17 | def prepare_roidb(imdb):
18 |   """Enrich the imdb's roidb by adding some derived quantities that
19 |   are useful for training. This function precomputes the maximum
20 |   overlap, taken over ground-truth boxes, between each ROI and
21 |   each ground-truth box. The class with maximum overlap is also
22 |   recorded.
23 |   """
24 |   roidb = imdb.roidb
25 |   if not (imdb.name.startswith('coco')):
26 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
27 |          for i in range(imdb.num_images)]
28 |   for i in range(len(imdb.image_index)):
29 |     roidb[i]['image'] = imdb.image_path_at(i)
30 |     if not (imdb.name.startswith('coco')):
31 |       roidb[i]['width'] = sizes[i][0]
32 |       roidb[i]['height'] = sizes[i][1]
33 |     # need gt_overlaps as a dense array for argmax
34 |     gt_overlaps = roidb[i]['gt_overlaps'].toarray()
35 |     # max overlap with gt over classes (columns)
36 |     max_overlaps = gt_overlaps.max(axis=1)
37 |     # gt class that had the max overlap
38 |     max_classes = gt_overlaps.argmax(axis=1)
39 |     roidb[i]['max_classes'] = max_classes
40 |     roidb[i]['max_overlaps'] = max_overlaps
41 |     # sanity checks
42 |     # max overlap of 0 => class should be zero (background)
43 |     zero_inds = np.where(max_overlaps == 0)[0]
44 |     assert all(max_classes[zero_inds] == 0)
45 |     # max overlap > 0 => class should not be zero (must be a fg class)
46 |     nonzero_inds = np.where(max_overlaps > 0)[0]
47 |     #assert all(max_classes[nonzero_inds] != 0)
48 | 


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.h
4 | *.hpp
5 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/bbox.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/bbox.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/bbox.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/bbox.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/blob.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/blob.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/blob.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/blob.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/timer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/timer.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/timer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/timer.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/visualization.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/visualization.cpython-36.pyc


--------------------------------------------------------------------------------
/lib/utils/__pycache__/visualization.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/lib/utils/__pycache__/visualization.cpython-37.pyc


--------------------------------------------------------------------------------
/lib/utils/bbox.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def bbox_overlaps(boxes, query_boxes):
 5 |     """
 6 |     Parameters
 7 |     ----------
 8 |     boxes: (N, 4) ndarray or tensor or variable
 9 |     query_boxes: (K, 4) ndarray or tensor or variable
10 |     Returns
11 |     -------
12 |     overlaps: (N, K) overlap between boxes and query_boxes
13 |     """
14 |     if isinstance(boxes, np.ndarray):
15 |         boxes = torch.from_numpy(boxes)
16 |         query_boxes = torch.from_numpy(query_boxes)
17 |         out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return
18 |     else:
19 |         out_fn = lambda x: x
20 | 
21 |     box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \
22 |             (boxes[:, 3] - boxes[:, 1] + 1)
23 |     query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \
24 |             (query_boxes[:, 3] - query_boxes[:, 1] + 1)
25 | 
26 |     iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0)
27 |     ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0)
28 |     ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih
29 |     overlaps = iw * ih / ua
30 |     return out_fn(overlaps)


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import cv2
15 | 
16 | 
17 | def im_list_to_blob(ims):
18 |   """Convert a list of images into a network input.
19 | 
20 |   Assumes images are already prepared (means subtracted, BGR order, ...).
21 |   """
22 |   max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 |   num_images = len(ims)
24 |   blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 |                   dtype=np.float32)
26 |   for i in range(num_images):
27 |     im = ims[i]
28 |     blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | 
30 |   return blob
31 | 
32 | 
33 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
34 |   """Mean subtract and scale an image for use in a blob."""
35 |   im = im.astype(np.float32, copy=False)
36 |   im -= pixel_means
37 |   im_shape = im.shape
38 |   im_size_min = np.min(im_shape[0:2])
39 |   im_size_max = np.max(im_shape[0:2])
40 |   im_scale = float(target_size) / float(im_size_min)
41 |   # Prevent the biggest axis from being more than MAX_SIZE
42 |   if np.round(im_scale * im_size_max) > max_size:
43 |     im_scale = float(max_size) / float(im_size_max)
44 |   im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
45 |                   interpolation=cv2.INTER_LINEAR)
46 | 
47 |   return im, im_scale
48 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | import torch
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self._total_time = {}
15 |         self._calls = {}
16 |         self._start_time = {}
17 |         self._diff = {}
18 |         self._average_time = {}
19 | 
20 |     def tic(self, name='default'):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         if torch.cuda.is_available():
24 |             torch.cuda.synchronize()
25 |         self._start_time[name] = time.time()
26 | 
27 |     def toc(self, name='default', average=True):
28 |         if torch.cuda.is_available():
29 |             torch.cuda.synchronize()
30 |         self._diff[name] = time.time() - self._start_time[name]
31 |         self._total_time[name] = self._total_time.get(name, 0.) + self._diff[name]
32 |         self._calls[name] = self._calls.get(name, 0 ) + 1
33 |         self._average_time[name] = self._total_time[name] / self._calls[name]
34 |         if average:
35 |             return self._average_time[name]
36 |         else:
37 |             return self._diff[name]
38 | 
39 |     def average_time(self, name='default'):
40 |         return self._average_time[name]
41 | 
42 |     def total_time(self, name='default'):
43 |         return self._total_time[name]
44 | 
45 | timer = Timer()
46 | 


--------------------------------------------------------------------------------
/lib/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from six.moves import range
12 | import PIL.Image as Image
13 | import PIL.ImageColor as ImageColor
14 | import PIL.ImageDraw as ImageDraw
15 | import PIL.ImageFont as ImageFont
16 | 
17 | STANDARD_COLORS = [
18 |     'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
19 |     'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
20 |     'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
21 |     'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
22 |     'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
23 |     'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
24 |     'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
25 |     'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
26 |     'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
27 |     'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
28 |     'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
29 |     'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
30 |     'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
31 |     'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
32 |     'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
33 |     'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
34 |     'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
35 |     'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
36 |     'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
37 |     'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
38 |     'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
39 |     'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
40 |     'WhiteSmoke', 'Yellow', 'YellowGreen'
41 | ]
42 | 
43 | NUM_COLORS = len(STANDARD_COLORS)
44 | 
45 | try:
46 |   FONT = ImageFont.truetype('arial.ttf', 24)
47 | except IOError:
48 |   FONT = ImageFont.load_default()
49 | 
50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4):
51 |   draw = ImageDraw.Draw(image)
52 |   (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
53 |   draw.line([(left, top), (left, bottom), (right, bottom),
54 |              (right, top), (left, top)], width=thickness, fill=color)
55 |   text_bottom = bottom
56 |   # Reverse list and print from bottom to top.
57 |   text_width, text_height = font.getsize(display_str)
58 |   margin = np.ceil(0.05 * text_height)
59 |   draw.rectangle(
60 |       [(left, text_bottom - text_height - 2 * margin), (left + text_width,
61 |                                                         text_bottom)],
62 |       fill=color)
63 |   draw.text(
64 |       (left + margin, text_bottom - text_height - margin),
65 |       display_str,
66 |       fill='black',
67 |       font=font)
68 | 
69 |   return image
70 | 
71 | def draw_bounding_boxes(image, gt_boxes, im_info):
72 |   num_boxes = gt_boxes.shape[0]
73 |   gt_boxes_new = gt_boxes.copy()
74 |   gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2])
75 |   disp_image = Image.fromarray(np.uint8(image[0]))
76 | 
77 |   for i in range(num_boxes):
78 |     this_class = int(gt_boxes_new[i, 4])
79 |     disp_image = _draw_single_box(disp_image, 
80 |                                 gt_boxes_new[i, 0],
81 |                                 gt_boxes_new[i, 1],
82 |                                 gt_boxes_new[i, 2],
83 |                                 gt_boxes_new[i, 3],
84 |                                 'N%02d-C%02d' % (i, this_class),
85 |                                 FONT,
86 |                                 color=STANDARD_COLORS[this_class % NUM_COLORS])
87 | 
88 |   image[0, :] = np.array(disp_image)
89 |   return image


--------------------------------------------------------------------------------
/output/train_faster_rcnn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | alias time='/usr/bin/time'
 4 | 
 5 | set -x
 6 | set -e
 7 | 
 8 | export PYTHONUNBUFFERED="True"
 9 | 
10 | GPU_ID=$1
11 | DATASET=$2
12 | NET=$3
13 | 
14 | array=( $@ )
15 | len=${#array[@]}
16 | EXTRA_ARGS=${array[@]:3:$len}
17 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
18 | 
19 | case ${DATASET} in
20 |   pascal_voc)
21 |     TRAIN_IMDB="voc_2007_trainval"
22 |     TEST_IMDB="voc_2007_test"
23 |     STEPSIZE="[50000]"
24 |     ITERS=100000
25 |     ANCHORS="[8,16,32]"
26 |     RATIOS="[0.5,1,2]"
27 |     ;;
28 |   pascal_voc_0712)
29 |     TRAIN_IMDB="voc_2007_trainval+voc_2012_trainval"
30 |     TEST_IMDB="voc_2007_test"
31 |     STEPSIZE="[80000]"
32 |     ITERS=110000
33 |     ANCHORS="[8,16,32]"
34 |     RATIOS="[0.5,1,2]"
35 |     ;;
36 |   coco)
37 |     TRAIN_IMDB="coco_2014_train+coco_2014_valminusminival"
38 |     TEST_IMDB="coco_2014_minival"
39 |     STEPSIZE="[350000]"
40 |     ITERS=490000
41 |     ANCHORS="[4,8,16,32]"
42 |     RATIOS="[0.5,1,2]"
43 |     ;;
44 |   *)
45 |     echo "No dataset given"
46 |     exit
47 |     ;;
48 | esac
49 | 
50 | LOG="experiments/logs/${NET}_${TRAIN_IMDB}_${EXTRA_ARGS_SLUG}_${NET}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
51 | exec &> >(tee -a "$LOG")
52 | echo Logging output to "$LOG"
53 | 
54 | set +x
55 | if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
56 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/${EXTRA_ARGS_SLUG}/${NET}_faster_rcnn_iter_${ITERS}.pth
57 | else
58 |   NET_FINAL=output/${NET}/${TRAIN_IMDB}/default/${NET}_faster_rcnn_iter_${ITERS}.pth
59 | fi
60 | set -x
61 | 
62 | if [ ! -f ${NET_FINAL}.index ]; then
63 |   if [[ ! -z  ${EXTRA_ARGS_SLUG}  ]]; then
64 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
65 |       --weight data/imagenet_weights/${NET}.pth \
66 |       --imdb ${TRAIN_IMDB} \
67 |       --imdbval ${TEST_IMDB} \
68 |       --iters ${ITERS} \
69 |       --cfg experiments/cfgs/${NET}.yml \
70 |       --tag ${EXTRA_ARGS_SLUG} \
71 |       --net ${NET} \
72 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
73 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
74 |   else
75 |     CUDA_VISIBLE_DEVICES=${GPU_ID} python ./tools/trainval_net.py \
76 |       --weight data/imagenet_weights/${NET}.pth \
77 |       --imdb ${TRAIN_IMDB} \
78 |       --imdbval ${TEST_IMDB} \
79 |       --iters ${ITERS} \
80 |       --cfg experiments/cfgs/${NET}.yml \
81 |       --net ${NET} \
82 |       --set ANCHOR_SCALES ${ANCHORS} ANCHOR_RATIOS ${RATIOS} \
83 |       TRAIN.STEPSIZE ${STEPSIZE} ${EXTRA_ARGS}
84 |   fi
85 | fi
86 | 
87 | echo $@
88 | ./experiments/scripts/test_faster_rcnn.sh $@
89 | 


--------------------------------------------------------------------------------
/tensorboard/vgg16/voc_2007_trainval/default_val/events.out.tfevents.1552263409.vasgaoweithu-Precision-Tower-7910:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/tensorboard/vgg16/voc_2007_trainval/default_val/events.out.tfevents.1552263409.vasgaoweithu-Precision-Tower-7910


--------------------------------------------------------------------------------
/tools/__pycache__/_init_paths.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasgaowei/pytorch_MELM/3fba2bceb605cb90c2f8a5450e8f644a0661c8e8/tools/__pycache__/_init_paths.cpython-36.pyc


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri Nov 16 15:17:33 2018
 5 | 
 6 | @author: vasgaoweithu
 7 | """
 8 | 
 9 | import os.path as osp
10 | import sys
11 | def add_path(path):
12 |     if path not in sys.path:
13 |         sys.path.insert(0, path)
14 | this_dir = osp.dirname(__file__)
15 | 
16 | lib_path = osp.join(this_dir, '..', 'lib')
17 | add_path(lib_path)
18 | 
19 | coco_path = osp.join(this_dir, '..', 'data', 'coco', 'PythonAPI')
20 | add_path(coco_path)
21 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Tensorflow Faster R-CNN
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Xinlei Chen, based on code from Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | """
 10 | Demo script showing detections in sample images.
 11 | 
 12 | See README.md for installation instructions before running.
 13 | """
 14 | from __future__ import absolute_import
 15 | from __future__ import division
 16 | from __future__ import print_function
 17 | 
 18 | import _init_paths
 19 | from model.config import cfg
 20 | from model.test import im_detect
 21 | from model.nms_wrapper import nms
 22 | from datasets.factory import get_imdb
 23 | 
 24 | 
 25 | from utils.timer import Timer
 26 | import matplotlib.pyplot as plt
 27 | import numpy as np
 28 | import os, cv2
 29 | import argparse
 30 | 
 31 | from nets.vgg16 import vgg16
 32 | from nets.resnet_v1 import resnetv1
 33 | 
 34 | import torch
 35 | 
 36 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 37 | 
 38 | CLASSES = ('__background__',
 39 |            'aeroplane', 'bicycle', 'bird', 'boat',
 40 |            'bottle', 'bus', 'car', 'cat', 'chair',
 41 |            'cow', 'diningtable', 'dog', 'horse',
 42 |            'motorbike', 'person', 'pottedplant',
 43 |            'sheep', 'sofa', 'train', 'tvmonitor')
 44 | CLASSES = (
 45 |            'aeroplane', 'bicycle', 'bird', 'boat',
 46 |            'bottle', 'bus', 'car', 'cat', 'chair',
 47 |            'cow', 'diningtable', 'dog', 'horse',
 48 |            'motorbike', 'person', 'pottedplant',
 49 |            'sheep', 'sofa', 'train', 'tvmonitor')
 50 | 
 51 | NETS = {'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),'res101': ('res101_faster_rcnn_iter_%d.pth',)}
 52 | DATASETS= {'pascal_voc': ('voc_2007_trainval',),'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
 53 | 
 54 | def vis_detections(im, class_name, dets, thresh=0.5):
 55 |     """Draw detected bounding boxes."""
 56 |     inds = np.where(dets[:, -1] >= thresh)[0]
 57 |     if len(inds) == 0:
 58 |         #print('hahaha')
 59 |         return
 60 | 
 61 |     im = im[:, :, (2, 1, 0)]
 62 |     fig, ax = plt.subplots(figsize=(12, 12))
 63 |     ax.imshow(im, aspect='equal')
 64 |     for i in inds:
 65 |         bbox = dets[i, :4]
 66 |         score = dets[i, -1]
 67 | 
 68 |         ax.add_patch(
 69 |             plt.Rectangle((bbox[0], bbox[1]),
 70 |                           bbox[2] - bbox[0],
 71 |                           bbox[3] - bbox[1], fill=False,
 72 |                           edgecolor='red', linewidth=3.5)
 73 |             )
 74 |         ax.text(bbox[0], bbox[1] - 2,
 75 |                 '{:s} {:.3f}'.format(class_name, score),
 76 |                 bbox=dict(facecolor='blue', alpha=0.5),
 77 |                 fontsize=14, color='white')
 78 | 
 79 |     ax.set_title(('{} detections with '
 80 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 81 |                                                   thresh),
 82 |                   fontsize=14)
 83 |     plt.axis('off')
 84 |     plt.tight_layout()
 85 |     plt.draw()
 86 | 
 87 | def demo(net, image_name, roidb):
 88 |     """Detect object classes in an image using pre-computed object proposals."""
 89 | 
 90 |     # Load the demo image
 91 |     #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
 92 |     im = cv2.imread(image_name)
 93 | 
 94 |     # Detect all object classes and regress object bounds
 95 |     timer = Timer()
 96 |     timer.tic()
 97 |     scores, boxes,_,_ = im_detect(net, im, roidb)
 98 |     timer.toc()
 99 |     print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))
100 | 
101 |     # Visualize detections for each class
102 |     CONF_THRESH = 0.3
103 |     NMS_THRESH = 0.3
104 |     for cls_ind, cls in enumerate(CLASSES):
105 |         #cls_ind += 1 # because we skipped background
106 |         cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
107 |         cls_scores = scores[:, cls_ind]
108 |         dets = np.hstack((cls_boxes,
109 |                           cls_scores[:, np.newaxis])).astype(np.float32)
110 |         keep = nms(torch.from_numpy(dets), NMS_THRESH)
111 |         dets = dets[keep.numpy(), :]
112 |         vis_detections(im, cls, dets, thresh=CONF_THRESH)
113 | 
114 | def parse_args():
115 |     """Parse input arguments."""
116 |     parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo')
117 |     parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
118 |                         choices=NETS.keys(), default='res101')
119 |     parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
120 |                         choices=DATASETS.keys(), default='pascal_voc_0712')
121 |     args = parser.parse_args()
122 | 
123 |     return args
124 | 
125 | if __name__ == '__main__':
126 |     cfg.TEST.HAS_RPN = True  # Use RPN for proposals
127 |     args = parse_args()
128 | 
129 |     # model path
130 |     demonet = args.demo_net
131 |     dataset = args.dataset
132 |     saved_model = os.path.join('output', demonet, DATASETS[dataset][0], 'default',
133 |                               NETS[demonet][0] %(100000 if dataset == 'pascal_voc' else 110000))
134 | 
135 | 
136 |     if not os.path.isfile(saved_model):
137 |         raise IOError(('{:s} not found.\nDid you download the proper networks from '
138 |                        'our server and place them properly?').format(saved_model))
139 |     
140 |     if args.dataset == 'pascal_voc':
141 |         test_name = 'voc_2007_test'
142 |     
143 |     imdb = get_imdb(test_name)
144 |     roidb = imdb.roidb
145 |     #imdb.competition_mode(args.comp_mode)
146 |     # load network
147 |     if demonet == 'vgg16':
148 |         net = vgg16()
149 |     elif demonet == 'res101':
150 |         net = resnetv1(num_layers=101)
151 |     else:
152 |         raise NotImplementedError
153 |     net.create_architecture(20,
154 |                           tag='default', anchor_scales=[8, 16, 32])
155 | 
156 |     net.load_state_dict(torch.load(saved_model, map_location=lambda storage, loc: storage))
157 | 
158 |     net.eval()
159 |     if not torch.cuda.is_available():
160 |         net._device = 'cpu'
161 |     net.to(net._device)
162 | 
163 |     print('Loaded network {:s}'.format(saved_model))
164 |     
165 |     
166 |     #im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
167 |     #            '001763.jpg', '004545.jpg']
168 |     #for im_name in im_names:
169 |     #    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
170 |     #    print('Demo for data/demo/{}'.format(im_name))
171 |     #    demo(net, im_name)
172 |     
173 |     index = np.random.randint(0, len(imdb.image_index), 10)
174 |     
175 |     index = np.arange(10, 20)
176 |     
177 |     for i in index:
178 |         im_path = imdb.image_path_at(i)
179 |         demo(net, im_path, roidb[i])
180 |         
181 |         
182 |     plt.show()
183 | 


--------------------------------------------------------------------------------
/tools/test_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import _init_paths
 11 | from model.test import test_net
 12 | from model.train_val import get_training_roidb
 13 | from model.config import cfg, cfg_from_file, cfg_from_list
 14 | from datasets.factory import get_imdb
 15 | import argparse
 16 | import pprint
 17 | import time, os, sys
 18 | 
 19 | from nets.vgg16 import MELM_vgg16
 20 | from nets.resnet_v1 import resnetv1
 21 | from nets.mobilenet_v1 import mobilenetv1
 22 | 
 23 | import torch
 24 | 
 25 | def parse_args():
 26 |   """
 27 |   Parse input arguments
 28 |   """
 29 |   parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 30 |   parser.add_argument('--cfg', dest='cfg_file',
 31 |             help='optional config file', default=None, type=str)
 32 |   parser.add_argument('--model', dest='model',
 33 |             help='model to test',
 34 |             default=None, type=str)
 35 |   parser.add_argument('--imdb', dest='imdb_name',
 36 |             help='dataset to test',
 37 |             default='voc_2007_test', type=str)
 38 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
 39 |             action='store_true')
 40 |   parser.add_argument('--num_dets', dest='max_per_image',
 41 |             help='max number of detections per image',
 42 |             default=100, type=int)
 43 |   parser.add_argument('--tag', dest='tag',
 44 |                         help='tag of the model',
 45 |                         default='', type=str)
 46 |   parser.add_argument('--net', dest='net',
 47 |                       help='vgg16, res50, res101, res152, mobile',
 48 |                       default='res50', type=str)
 49 |   parser.add_argument('--set', dest='set_cfgs',
 50 |                         help='set config keys', default=None,
 51 |                         nargs=argparse.REMAINDER)
 52 | 
 53 |   if len(sys.argv) == 1:
 54 |     parser.print_help()
 55 |     sys.exit(1)
 56 | 
 57 |   args = parser.parse_args()
 58 |   return args
 59 | 
 60 | def combined_roidb(imdb_names):
 61 |   """
 62 |   Combine multiple roidbs
 63 |   """
 64 | 
 65 |   def get_roidb(imdb_name):
 66 |     imdb = get_imdb(imdb_name)
 67 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
 68 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 69 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
 70 |     roidb = get_training_roidb(imdb)
 71 |     return roidb
 72 | 
 73 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 74 |   roidb = roidbs[0]
 75 |   if len(roidbs) > 1:
 76 |     for r in roidbs[1:]:
 77 |       roidb.extend(r)
 78 |     tmp = get_imdb(imdb_names.split('+')[1])
 79 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
 80 |   else:
 81 |     imdb = get_imdb(imdb_names)
 82 |   return imdb, roidb
 83 | 
 84 | 
 85 | if __name__ == '__main__':
 86 |   args = parse_args()
 87 | 
 88 |   print('Called with args:')
 89 |   print(args)
 90 | 
 91 |   if args.cfg_file is not None:
 92 |     cfg_from_file(args.cfg_file)
 93 |   if args.set_cfgs is not None:
 94 |     cfg_from_list(args.set_cfgs)
 95 | 
 96 |   print('Using config:')
 97 |   pprint.pprint(cfg)
 98 | 
 99 |   # if has model, get the name from it
100 |   # if does not, then just use the initialization weights
101 |   if args.model:
102 |     filename = os.path.splitext(os.path.basename(args.model))[0]
103 |   else:
104 |     filename = os.path.splitext(os.path.basename(args.weight))[0]
105 | 
106 |   tag = args.tag
107 |   tag = tag if tag else 'default'
108 |   filename = tag + '/' + filename
109 | 
110 |   imdb = get_imdb(args.imdb_name)
111 |   roidb = imdb.roidb
112 |   imdb.competition_mode(args.comp_mode)
113 | 
114 |   # load network
115 |   if args.net == 'vgg16':
116 |     net = MELM_vgg16()
117 |   elif args.net == 'res50':
118 |     net = resnetv1(num_layers=50)
119 |   elif args.net == 'res101':
120 |     net = resnetv1(num_layers=101)
121 |   elif args.net == 'res152':
122 |     net = resnetv1(num_layers=152)
123 |   elif args.net == 'mobile':
124 |     net = mobilenetv1()
125 |   else:
126 |     raise NotImplementedError
127 | 
128 |   # load model
129 |   net.create_architecture(imdb.num_classes, tag='default',
130 |                           anchor_scales=cfg.ANCHOR_SCALES,
131 |                           anchor_ratios=cfg.ANCHOR_RATIOS)
132 | 
133 |   net.eval()
134 |   if not torch.cuda.is_available():
135 |     net._device = 'cpu'
136 |   net.to(net._device)
137 | 
138 |   if args.model:
139 |     print(('Loading model check point from {:s}').format(args.model))
140 |     net.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage))
141 |     print('Loaded.')
142 |   else:
143 |     print(('Loading initial weights from {:s}').format(args.weight))
144 |     print('Loaded.')
145 | 
146 |   test_net(net, imdb, roidb, filename, max_per_image=args.max_per_image)
147 | 


--------------------------------------------------------------------------------
/tools/trainval_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Tensorflow Faster R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Zheqi He, Xinlei Chen, based on code from Ross Girshick
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import _init_paths
 11 | from model.train_val import get_training_roidb, train_net
 12 | from model.config import cfg, cfg_from_file, cfg_from_list, get_output_dir, get_output_tb_dir
 13 | from datasets.factory import get_imdb
 14 | import datasets.imdb
 15 | import argparse
 16 | import pprint
 17 | import numpy as np
 18 | import sys
 19 | 
 20 | from nets.vgg16 import MELM_vgg16
 21 | from nets.resnet_v1 import resnetv1
 22 | from nets.mobilenet_v1 import mobilenetv1
 23 | 
 24 | def parse_args():
 25 |   """
 26 |   Parse input arguments
 27 |   """
 28 |   parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
 29 |   parser.add_argument('--cfg', dest='cfg_file',
 30 |                       help='optional config file',
 31 |                       default=None, type=str)
 32 |   parser.add_argument('--weight', dest='weight',
 33 |                       help='initialize with pretrained model weights',
 34 |                       type=str)
 35 |   parser.add_argument('--imdb', dest='imdb_name',
 36 |                       help='dataset to train on',
 37 |                       default='voc_2007_trainval', type=str)
 38 |   parser.add_argument('--imdbval', dest='imdbval_name',
 39 |                       help='dataset to validate on',
 40 |                       default='voc_2007_test', type=str)
 41 |   parser.add_argument('--iters', dest='max_iters',
 42 |                       help='number of iterations to train',
 43 |                       default=40000, type=int)
 44 |   parser.add_argument('--tag', dest='tag',
 45 |                       help='tag of the model',
 46 |                       default=None, type=str)
 47 |   parser.add_argument('--net', dest='net',
 48 |                       help='vgg16, res50, res101, res152, mobile',
 49 |                       default='res50', type=str)
 50 |   parser.add_argument('--set', dest='set_cfgs',
 51 |                       help='set config keys', default=None,
 52 |                       nargs=argparse.REMAINDER)
 53 | 
 54 |   if len(sys.argv) == 1:
 55 |     parser.print_help()
 56 |     sys.exit(1)
 57 | 
 58 |   args = parser.parse_args()
 59 |   return args
 60 | 
 61 | 
 62 | def combined_roidb(imdb_names):
 63 |   """
 64 |   Combine multiple roidbs
 65 |   """
 66 | 
 67 |   def get_roidb(imdb_name):
 68 |     imdb = get_imdb(imdb_name)
 69 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
 70 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
 71 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
 72 |     roidb = get_training_roidb(imdb)
 73 |     return roidb
 74 | 
 75 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
 76 |   roidb = roidbs[0]
 77 |   if len(roidbs) > 1:
 78 |     for r in roidbs[1:]:
 79 |       roidb.extend(r)
 80 |     tmp = get_imdb(imdb_names.split('+')[1])
 81 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
 82 |   else:
 83 |     imdb = get_imdb(imdb_names)
 84 |   return imdb, roidb
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 |   args = parse_args()
 89 | 
 90 |   print('Called with args:')
 91 |   print(args)
 92 | 
 93 |   if args.cfg_file is not None:
 94 |     cfg_from_file(args.cfg_file)
 95 |   if args.set_cfgs is not None:
 96 |     cfg_from_list(args.set_cfgs)
 97 | 
 98 |   print('Using config:')
 99 |   pprint.pprint(cfg)
100 | 
101 |   np.random.seed(cfg.RNG_SEED)
102 | 
103 |   # train set
104 |   imdb, roidb = combined_roidb(args.imdb_name)
105 |   print('{:d} roidb entries'.format(len(roidb)))
106 | 
107 |   # output directory where the models are saved
108 |   output_dir = get_output_dir(imdb, args.tag)
109 |   print('Output will be saved to `{:s}`'.format(output_dir))
110 | 
111 |   # tensorboard directory where the summaries are saved during training
112 |   tb_dir = get_output_tb_dir(imdb, args.tag)
113 |   print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir))
114 | 
115 |   # also add the validation set, but with no flipping images
116 |   orgflip = cfg.TRAIN.USE_FLIPPED
117 |   cfg.TRAIN.USE_FLIPPED = False
118 |   _, valroidb = combined_roidb(args.imdbval_name)
119 |   print('{:d} validation roidb entries'.format(len(valroidb)))
120 |   cfg.TRAIN.USE_FLIPPED = orgflip
121 | 
122 |   # load network
123 |   if args.net == 'vgg16':
124 |     net = MELM_vgg16()
125 |   else:
126 |     raise NotImplementedError
127 |     
128 |   train_net(net, imdb, roidb, valroidb, output_dir, tb_dir,
129 |             pretrained_model=args.weight,
130 |             max_iters=args.max_iters)
131 | 


--------------------------------------------------------------------------------