├── .gitignore
├── LICENSE
├── README.md
├── data
    └── ILSVRC2015
    │   └── ImageSets
    │       ├── DET_train_30classes.txt
    │       ├── VID_train_15frames.txt
    │       ├── VID_train_every10frames.txt
    │       └── VID_val_videos.txt
├── experiments
    ├── fgfa_rfcn
    │   ├── cfgs
    │   │   ├── fgfa_rfcn_vid_demo.yaml
    │   │   └── resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem.yaml
    │   ├── fgfa_rfcn_end2end_train_test.py
    │   └── fgfa_rfcn_test.py
    └── manet_rfcn
    │   ├── cfgs
    │       ├── phase-1.yaml
    │       ├── phase-2.yaml
    │       └── phase-3.yaml
    │   └── manet_rfcn_end2end_train_test.py
├── images
    ├── table2.png
    ├── table3.png
    └── table4.png
├── init.bat
├── init.sh
├── lib
    ├── Makefile
    ├── __init__.py
    ├── bbox
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── bbox_regression.py
    │   ├── bbox_transform.py
    │   ├── setup_linux.py
    │   └── setup_windows.py
    ├── dataset
    │   ├── .ropeproject
    │   │   ├── config.py
    │   │   ├── globalnames
    │   │   ├── history
    │   │   └── objectdb
    │   ├── __init__.py
    │   ├── ds_utils.py
    │   ├── imagenet_vid.py
    │   ├── imagenet_vid_eval.py
    │   ├── imagenet_vid_eval_motion.py
    │   ├── imagenet_vid_groundtruth_motion_iou.mat
    │   ├── imdb.py
    │   └── log_test.py
    ├── nms
    │   ├── __init__.py
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.cu
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms.py
    │   ├── nms_kernel.cu
    │   ├── seq_nms.py
    │   ├── setup_linux.py
    │   ├── setup_windows.py
    │   └── setup_windows_cuda.py
    ├── rpn
    │   ├── __init__.py
    │   ├── generate_anchor.py
    │   └── rpn.py
    └── utils
    │   ├── PrefetchingIter.py
    │   ├── __init__.py
    │   ├── combine_model.py
    │   ├── create_logger.py
    │   ├── image.py
    │   ├── image_processing.py
    │   ├── load_data.py
    │   ├── load_model.py
    │   ├── lr_scheduler.py
    │   ├── roidb.py
    │   ├── save_model.py
    │   ├── show_boxes.py
    │   ├── symbol.py
    │   └── tictoc.py
├── manet_rfcn
    ├── __init__.py
    ├── _init_paths.py
    ├── config
    │   ├── __init__.py
    │   └── config.py
    ├── core
    │   ├── DataParallelExecutorGroup.py
    │   ├── __init__.py
    │   ├── callback.py
    │   ├── loader.py
    │   ├── metric.py
    │   ├── module.py
    │   ├── rcnn.py
    │   └── tester.py
    ├── demo.py
    ├── function
    │   ├── __init__.py
    │   ├── test_rcnn.py
    │   ├── test_rpn.py
    │   ├── train_rcnn.py
    │   └── train_rpn.py
    ├── operator_cxx
    │   ├── psroi_pooling-inl.h
    │   ├── psroi_pooling.cc
    │   └── psroi_pooling.cu
    ├── operator_py
    │   ├── __init__.py
    │   ├── box_annotator_ohem.py
    │   ├── proposal.py
    │   ├── proposal_target.py
    │   ├── rpn_inv_normalize.py
    │   └── tile_as.py
    ├── symbols
    │   ├── __init__.py
    │   └── resnet_v1_101_manet_rfcn.py
    ├── test.py
    └── train_end2end.py
└── run.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # IntelliJ project files
 2 | .idea
 3 | *.iml
 4 | out
 5 | gen
 6 | 
 7 | ### Vim template
 8 | [._]*.s[a-w][a-z]
 9 | [._]s[a-w][a-z]
10 | *.un~
11 | Session.vim
12 | .netrwhist
13 | *~
14 | 
15 | ### IPythonNotebook template
16 | # Temporary data
17 | .ipynb_checkpoints/
18 | 
19 | ### Python template
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | */*/*.pyc
24 | */*.pyc
25 | *.pyc
26 | *$py.class
27 | 
28 | # C extensions
29 | *.so
30 | 
31 | # Distribution / packaging
32 | .Python
33 | env/
34 | build/
35 | develop-eggs/
36 | dist/
37 | downloads/
38 | eggs/
39 | .eggs/
40 | #lib/
41 | #lib64/
42 | parts/
43 | sdist/
44 | var/
45 | *.egg-info/
46 | .installed.cfg
47 | *.egg
48 | 
49 | # PyInstaller
50 | #  Usually these files are written by a python script from a template
51 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
52 | *.manifest
53 | *.spec
54 | 
55 | # Installer logs
56 | pip-log.txt
57 | pip-delete-this-directory.txt
58 | 
59 | # Unit test / coverage reports
60 | htmlcov/
61 | .tox/
62 | .coverage
63 | .coverage.*
64 | .cache
65 | nosetests.xml
66 | coverage.xml
67 | *,cover
68 | 
69 | # Translations
70 | *.mo
71 | *.pot
72 | 
73 | # Django stuff:
74 | *.log
75 | 
76 | # Sphinx documentation
77 | docs/_build/
78 | 
79 | # PyBuilder
80 | target/
81 | 
82 | *.ipynb
83 | *.params
84 | *.json
85 | .vscode/
86 | 
87 | lib/dataset/pycocotools/*.c
88 | lib/dataset/pycocotools/*.cpp
89 | lib/nms/*.c
90 | lib/nms/*.cpp
91 | 
92 | external
93 | output
94 | model
95 | data
96 | demo
97 | 
98 | .db
99 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Fully Motion-Aware Network for Video Object Detection
  2 | 
  3 | 
  4 | This implementation is a fork of [FGFA](https://github.com/msracver/Flow-Guided-Feature-Aggregation) and extended by [Shiyao Wang](https://github.com/wangshy31) through adding instance-level aggregation and motion pattern reasoning.
  5 | 
  6 | 
  7 | 
  8 | ## Introduction
  9 | 
 10 | **Fully Motion-Aware Network for Video Object Detection (MANet)** is initially described in an [ECCV 2018 paper](https://wangshy31.github.io/papers/2-MANet.pdf). It proposes an end-to-end model called fully motion-aware network (MANet), which jointly calibrates the features of objects on both pixel-level and instance-level in a unified framework.
 11 | 
 12 | The contributions of this paper include:
 13 | 
 14 | * Propose an instance-level feature calibration method by learning instance movements through time. The instance-level calibration is more robust to occlusions and outperforms pixel-level feature calibration.
 15 | * Develop a motion pattern reasoning module to dynamically combine pixel-level and instance-level calibration according to the motion.
 16 | * Demonstrate the MANet on the large-scale [ImageNet VID dataset](http://image-net.org/challenges/LSVRC/) with state-of-the-art performance.
 17 | 
 18 | 
 19 | 
 20 | ## Installation
 21 | 
 22 | 1. Clone the repo, and we call the directory that you cloned as ${MANet_ROOT}.
 23 | 	```
 24 | 	git clone https://github.com/wangshy31/MANet_for_Video_Object_Detection.git
 25 | 	```
 26 | 2. Python packages might missing: cython, opencv-python >= 3.2.0, easydict. If `pip` is set up on your system, those packages should be able to be fetched and installed by running
 27 | 	```
 28 | 	pip install Cython
 29 | 	pip install opencv-python==3.2.0.6
 30 | 	pip install easydict==1.6
 31 | 	```
 32 | 3. Run `sh ./init.sh` to build cython module automatically and create some folders.
 33 | 
 34 | 4. Install MXNet as [FGFA](https://github.com/msracver/Flow-Guided-Feature-Aggregation):
 35 | 
 36 |    4.1 Clone MXNet and checkout to [MXNet@(v0.10.0)](https://github.com/apache/incubator-mxnet/tree/v0.10.0) by
 37 | 
 38 |    ```
 39 |    git clone --recursive https://github.com/apache/incubator-mxnet.git
 40 |    cd incubator-mxnet
 41 |    git checkout v0.10.0
 42 |    git submodule update
 43 |    ```
 44 | 
 45 |    4.2 Copy operators in `$(MANet_ROOT)/manet_rfcn/operator_cxx` to `$(YOUR_MXNET_FOLDER)/src/operator/contrib` by
 46 | 
 47 |    ```cp -r $(MANet_ROOT)/manet_rfcn/operator_cxx/* $(MXNET_ROOT)/src/operator/contrib/```
 48 | 
 49 |    4.3 Compile MXNet
 50 | 
 51 |    ```
 52 |    cd ${MXNET_ROOT}
 53 |    make -j4
 54 |    ```
 55 |    4.4 Install the MXNet Python binding by
 56 |    ```
 57 |    cd python
 58 |    sudo python setup.py install
 59 |    ```
 60 | 
 61 | 
 62 | 
 63 | ## Preparation for Training & Testing
 64 | 
 65 | **For data processing**: 
 66 | 
 67 | 1. Please download ILSVRC2015 DET and ILSVRC2015 VID dataset, and make sure it looks like this:
 68 | 
 69 |    ```
 70 |    ./data/ILSVRC2015/
 71 |    ./data/ILSVRC2015/Annotations/DET
 72 |    ./data/ILSVRC2015/Annotations/VID
 73 |    ./data/ILSVRC2015/Data/DET
 74 |    ./data/ILSVRC2015/Data/VID
 75 |    ./data/ILSVRC2015/ImageSets
 76 |    ```
 77 | 
 78 | 2. Please download ImageNet pre-trained ResNet-v1-101 model and Flying-Chairs pre-trained FlowNet model manually from [OneDrive](https://1drv.ms/u/s!Am-5JzdW2XHzhqMOBdCBiNaKbcjPrA), and put it under folder `./model`. Make sure it looks like this:
 79 | 
 80 |    ```
 81 |    ./model/pretrained_model/resnet_v1_101-0000.params
 82 |    ./model/pretrained_model/flownet-0000.params
 83 |    ```
 84 | 
 85 | **For training & testing**: 
 86 | 
 87 | 1. Three-phase training is performed on the mixture of ImageNet DET+VID which is useful for the final performance. 
 88 | 
 89 |    ​	**Phase 1**: Fix the weights of ResNet, combine pixel-level aggregated features and instance-level 		aggregated features by average operation. See script/train/phase-1;
 90 | 
 91 |    ​	**Phase 2**: Similar to phase 1 but joint train ResNet. See script/train/phase-2;
 92 | 
 93 |    ​	**Phase 3**: Fix the weights of ResNet, change the average operation to learnable weights and sample more VID data. See script/train/phase-3;
 94 | 
 95 |    We use 4 GPUs to train models on ImageNet VID. Any NVIDIA GPUs with at least 8GB memory should be OK.
 96 | 
 97 | 2. To perform experiments, run the python script with the corresponding config file as input. For example, to train and test MANet with R-FCN, use the following command
 98 | 
 99 |    ```
100 |    ./run.sh
101 |    ```
102 | 
103 |    A cache folder would be created automatically to save the model and the log under 
104 | 
105 |    `imagenet_vid/`.
106 | 
107 | 3. Please find more details in config files and in our code.
108 | 
109 | ## Main Results
110 | 
111 | 1. We conduct an ablation study so as to validate the effectiveness of the proposed network.
112 | 
113 | ![ablation study](images/table2.png)
114 | 
115 | **Table 1**. Accuracy of different methods on ImageNet VID validation, using ResNet-101 feature extraction networks. Detection accuracy of slow (motion IoU > 0.9), medium (0.7 ≤ motion IoU ≤ 0.9), and fast (motion IoU < 0.7) moving object instances.
116 | 
117 | 2. We attempt to take a deeper look at detection results and prove that two calibrated features have respective strengths.
118 | 
119 | ![visualization](images/table3.png)
120 | 
121 | **Figure 1**. Visualization of two typical examples: occluded and non-rigid objects. They show respective strengths of the two calibration methods.
122 | 
123 | ![statisticalanalysis](images/table4.png)
124 | 
125 | **Table 2**. Statistical analysis on different validation sets. The instance-level calibration is better when objects are occluded or move more regularly while the pixel-level calibration performs well on non-rigid motion. Combination of these two module can achieve best performance.
126 | 
127 | 
128 | ## Download Trained Models
129 | You can download the trained MANet from [drive](https://drive.google.com/file/d/1tKFfOKaFUeZanKTCCwVw-xaKu0wAw71t/view?usp=sharing). It can achieve 78.03% mAP without sequence-level post-processing (e.g., SeqNMS).
130 | 
131 | 
132 | 
133 | ## Citing MANet
134 | 
135 | If you find Fully Motion-Aware Network for Video Object Detection useful in your research, please consider citing:
136 | ```
137 | @inproceedings{wang2018fully,
138 |     Author = {Wang, Shiyao and Zhou, Yucong and Yan, Junjie and Deng, Zhidong},
139 |     Title = {Fully Motion-Aware Network for Video Object Detection},
140 |     booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
141 |     pages={542--557},
142 |     Year = {2018}
143 | }
144 | 
145 | ```
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/experiments/fgfa_rfcn/cfgs/fgfa_rfcn_vid_demo.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: ""
  3 | output_path: "./output/fgfa_rfcn/imagenet_vid"
  4 | gpus: '0'
  5 | CLASS_AGNOSTIC: true
  6 | SCALES:
  7 | - 600
  8 | - 1000
  9 | default:
 10 |   frequent: 100
 11 |   kvstore: device
 12 | network:
 13 |   PIXEL_MEANS:
 14 |   - 103.06
 15 |   - 115.90
 16 |   - 123.15
 17 |   IMAGE_STRIDE: 0
 18 |   RCNN_FEAT_STRIDE: 16
 19 |   RPN_FEAT_STRIDE: 16
 20 |   FIXED_PARAMS:
 21 |   - conv1
 22 |   - res2
 23 |   - bn
 24 |   ANCHOR_RATIOS:
 25 |   - 0.5
 26 |   - 1
 27 |   - 2
 28 |   ANCHOR_SCALES:
 29 |   - 8
 30 |   - 16
 31 |   - 32
 32 |   ANCHOR_MEANS:
 33 |   - 0.0
 34 |   - 0.0
 35 |   - 0.0
 36 |   - 0.0
 37 |   ANCHOR_STDS:
 38 |   - 0.1
 39 |   - 0.1
 40 |   - 0.4
 41 |   - 0.4
 42 |   NORMALIZE_RPN: TRUE
 43 |   NUM_ANCHORS: 9
 44 | dataset:
 45 |   NUM_CLASSES: 31
 46 |   dataset: ImageNetVID
 47 |   dataset_path: "./data/ILSVRC2015"
 48 |   image_set: DET_train_30classes+VID_train_15frames
 49 |   root_path: "./data"
 50 |   test_image_set: VID_val_videos
 51 |   proposal: rpn
 52 | TRAIN:
 53 |   lr: 0.00025
 54 |   lr_step: '1.333'
 55 |   warmup: false
 56 |   begin_epoch: 0
 57 |   end_epoch: 2
 58 |   model_prefix: 'fgfa_rfcn_vid'
 59 |   # whether resume training
 60 |   RESUME: false
 61 |   # whether flip image
 62 |   FLIP: true
 63 |   # whether shuffle image
 64 |   SHUFFLE: true
 65 |   # whether use OHEM
 66 |   ENABLE_OHEM: true
 67 |   # size of images for each device, 1 for e2e
 68 |   BATCH_IMAGES: 1
 69 |   # e2e changes behavior of anchor loader and metric
 70 |   END2END: true
 71 |   # group images with similar aspect ratio
 72 |   ASPECT_GROUPING: true
 73 |   # R-CNN
 74 |   # rcnn rois batch size
 75 |   BATCH_ROIS: -1
 76 |   BATCH_ROIS_OHEM: 128
 77 |   # rcnn rois sampling params
 78 |   FG_FRACTION: 0.25
 79 |   FG_THRESH: 0.5
 80 |   BG_THRESH_HI: 0.5
 81 |   BG_THRESH_LO: 0.0
 82 |   # rcnn bounding box regression params
 83 |   BBOX_REGRESSION_THRESH: 0.5
 84 |   BBOX_WEIGHTS:
 85 |   - 1.0
 86 |   - 1.0
 87 |   - 1.0
 88 |   - 1.0
 89 | 
 90 |   # RPN anchor loader
 91 |   # rpn anchors batch size
 92 |   RPN_BATCH_SIZE: 256
 93 |   # rpn anchors sampling params
 94 |   RPN_FG_FRACTION: 0.5
 95 |   RPN_POSITIVE_OVERLAP: 0.7
 96 |   RPN_NEGATIVE_OVERLAP: 0.3
 97 |   RPN_CLOBBER_POSITIVES: false
 98 |   # rpn bounding box regression params
 99 |   RPN_BBOX_WEIGHTS:
100 |   - 1.0
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 |   RPN_POSITIVE_WEIGHT: -1.0
105 |   # used for end2end training
106 |   # RPN proposal
107 |   CXX_PROPOSAL: true
108 |   RPN_NMS_THRESH: 0.7
109 |   RPN_PRE_NMS_TOP_N: 6000
110 |   RPN_POST_NMS_TOP_N: 300
111 |   RPN_MIN_SIZE: 0
112 |   # approximate bounding box regression
113 |   BBOX_NORMALIZATION_PRECOMPUTED: true
114 |   BBOX_MEANS:
115 |   - 0.0
116 |   - 0.0
117 |   - 0.0
118 |   - 0.0
119 |   BBOX_STDS:
120 |   - 0.1
121 |   - 0.1
122 |   - 0.2
123 |   - 0.2
124 | TEST:
125 |   # use rpn to generate proposal
126 |   HAS_RPN: true
127 |   # size of images for each device
128 |   BATCH_IMAGES: 1
129 |   SEQ_NMS: false
130 | 
131 |   # RPN proposal
132 |   CXX_PROPOSAL: true
133 |   RPN_NMS_THRESH: 0.7
134 |   RPN_PRE_NMS_TOP_N: 6000
135 |   RPN_POST_NMS_TOP_N: 300
136 |   RPN_MIN_SIZE: 0
137 |   # RCNN nms
138 |   NMS: 0.3
139 |   test_epoch: 2
140 | 


--------------------------------------------------------------------------------
/experiments/fgfa_rfcn/cfgs/resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: ""
  3 | output_path: "./output/fgfa_rfcn/imagenet_vid"
  4 | symbol: resnet_v1_101_flownet_rfcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_flow: "./model/pretrained_model/flownet"
 16 |   pretrained_epoch: 0
 17 |   PIXEL_MEANS:
 18 |   - 103.06
 19 |   - 115.90
 20 |   - 123.15
 21 |   IMAGE_STRIDE: 0
 22 |   RCNN_FEAT_STRIDE: 16
 23 |   RPN_FEAT_STRIDE: 16
 24 |   FIXED_PARAMS:
 25 |   - conv1
 26 |   - res2
 27 |   - bn
 28 |   ANCHOR_RATIOS:
 29 |   - 0.5
 30 |   - 1
 31 |   - 2
 32 |   ANCHOR_SCALES:
 33 |   - 8
 34 |   - 16
 35 |   - 32
 36 |   ANCHOR_MEANS:
 37 |   - 0.0
 38 |   - 0.0
 39 |   - 0.0
 40 |   - 0.0
 41 |   ANCHOR_STDS:
 42 |   - 0.1
 43 |   - 0.1
 44 |   - 0.4
 45 |   - 0.4
 46 |   NORMALIZE_RPN: TRUE
 47 |   NUM_ANCHORS: 9
 48 | dataset:
 49 |   NUM_CLASSES: 31
 50 |   dataset: ImageNetVID
 51 |   dataset_path: "./data/ILSVRC2015"
 52 |   image_set: DET_train_30classes+VID_train_15frames
 53 |   root_path: "./data"
 54 |   test_image_set: VID_val_videos
 55 |   proposal: rpn
 56 |   motion_iou_path: './lib/dataset/imagenet_vid_groundtruth_motion_iou.mat'
 57 |   enable_detailed_eval: true
 58 | TRAIN:
 59 |   lr: 0.00025
 60 |   lr_step: '1.333'
 61 |   warmup: false
 62 |   begin_epoch: 1
 63 |   end_epoch: 3
 64 |   model_prefix: 'fgfa_rfcn_vid'
 65 |   # whether resume training
 66 |   RESUME: true
 67 |   # whether flip image
 68 |   FLIP: true
 69 |   # whether shuffle image
 70 |   SHUFFLE: true
 71 |   # whether use OHEM
 72 |   ENABLE_OHEM: true
 73 |   # size of images for each device, 1 for e2e
 74 |   BATCH_IMAGES: 1
 75 |   # e2e changes behavior of anchor loader and metric
 76 |   END2END: true
 77 |   # group images with similar aspect ratio
 78 |   ASPECT_GROUPING: true
 79 |   # R-CNN
 80 |   # rcnn rois batch size
 81 |   BATCH_ROIS: -1
 82 |   BATCH_ROIS_OHEM: 128
 83 |   # rcnn rois sampling params
 84 |   FG_FRACTION: 0.25
 85 |   FG_THRESH: 0.5
 86 |   BG_THRESH_HI: 0.5
 87 |   BG_THRESH_LO: 0.0
 88 |   # rcnn bounding box regression params
 89 |   BBOX_REGRESSION_THRESH: 0.5
 90 |   BBOX_WEIGHTS:
 91 |   - 1.0
 92 |   - 1.0
 93 |   - 1.0
 94 |   - 1.0
 95 | 
 96 |   # RPN anchor loader
 97 |   # rpn anchors batch size
 98 |   RPN_BATCH_SIZE: 256
 99 |   # rpn anchors sampling params
100 |   RPN_FG_FRACTION: 0.5
101 |   RPN_POSITIVE_OVERLAP: 0.7
102 |   RPN_NEGATIVE_OVERLAP: 0.3
103 |   RPN_CLOBBER_POSITIVES: false
104 |   # rpn bounding box regression params
105 |   RPN_BBOX_WEIGHTS:
106 |   - 1.0
107 |   - 1.0
108 |   - 1.0
109 |   - 1.0
110 |   RPN_POSITIVE_WEIGHT: -1.0
111 |   # used for end2end training
112 |   # RPN proposal
113 |   CXX_PROPOSAL: true
114 |   RPN_NMS_THRESH: 0.7
115 |   RPN_PRE_NMS_TOP_N: 6000
116 |   RPN_POST_NMS_TOP_N: 300
117 |   RPN_MIN_SIZE: 0
118 |   # approximate bounding box regression
119 |   BBOX_NORMALIZATION_PRECOMPUTED: true
120 |   BBOX_MEANS:
121 |   - 0.0
122 |   - 0.0
123 |   - 0.0
124 |   - 0.0
125 |   BBOX_STDS:
126 |   - 0.1
127 |   - 0.1
128 |   - 0.2
129 |   - 0.2
130 | TEST:
131 |   # use rpn to generate proposal
132 |   HAS_RPN: true
133 |   # size of images for each device
134 |   BATCH_IMAGES: 1
135 |   SEQ_NMS: false
136 |   
137 |   # RPN proposal
138 |   CXX_PROPOSAL: true
139 |   RPN_NMS_THRESH: 0.7
140 |   RPN_PRE_NMS_TOP_N: 6000
141 |   RPN_POST_NMS_TOP_N: 300
142 |   RPN_MIN_SIZE: 0
143 |   # RCNN nms
144 |   NMS: 0.3
145 |   test_epoch: 3
146 | 


--------------------------------------------------------------------------------
/experiments/fgfa_rfcn/fgfa_rfcn_end2end_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | import os
 8 | import sys
 9 | os.environ['PYTHONUNBUFFERED'] = '1'
10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
12 | this_dir = os.path.dirname(__file__)
13 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'fgfa_rfcn'))
14 | 
15 | import train_end2end
16 | import test
17 | 
18 | if __name__ == "__main__":
19 |     #train_end2end.main()
20 |     test.main()
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/experiments/fgfa_rfcn/fgfa_rfcn_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import sys
10 | os.environ['PYTHONUNBUFFERED'] = '1'
11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
13 | this_dir = os.path.dirname(__file__)
14 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'fgfa_rfcn'))
15 | 
16 | import test
17 | 
18 | if __name__ == "__main__":
19 |     test.main()
20 | 


--------------------------------------------------------------------------------
/experiments/manet_rfcn/cfgs/phase-1.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: ""
  3 | output_path: "./imagenet_vid"
  4 | symbol: resnet_v1_101_manet_rfcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_flow: "./model/pretrained_model/flownet"
 16 |   pretrained_epoch: 0
 17 |   PIXEL_MEANS:
 18 |   - 103.06
 19 |   - 115.90
 20 |   - 123.15
 21 |   IMAGE_STRIDE: 0
 22 |   RCNN_FEAT_STRIDE: 16
 23 |   RPN_FEAT_STRIDE: 16
 24 |   FIXED_PARAMS:
 25 |   - conv1
 26 |   - res2
 27 |   - bn
 28 |   - flow
 29 |   - conv
 30 |   - res
 31 |   - Convolution
 32 |   - deconv
 33 | 
 34 |   ANCHOR_RATIOS:
 35 |   - 0.5
 36 |   - 1
 37 |   - 2
 38 |   ANCHOR_SCALES:
 39 |   - 8
 40 |   - 16
 41 |   - 32
 42 |   ANCHOR_MEANS:
 43 |   - 0.0
 44 |   - 0.0
 45 |   - 0.0
 46 |   - 0.0
 47 |   ANCHOR_STDS:
 48 |   - 0.1
 49 |   - 0.1
 50 |   - 0.4
 51 |   - 0.4
 52 |   NORMALIZE_RPN: TRUE
 53 |   NUM_ANCHORS: 9
 54 | dataset:
 55 |   NUM_CLASSES: 31
 56 |   dataset: ImageNetVID
 57 |   dataset_path: "./data/ILSVRC2015"
 58 |   image_set: DET_train_30classes+VID_train_15frames
 59 |   #image_set: DET_train_30classes+VID_pretrain_data
 60 |   root_path: "./data"
 61 |   test_image_set: VID_val_videos
 62 |   proposal: rpn
 63 |   motion_iou_path: '../lib/dataset/imagenet_vid_groundtruth_motion_iou.mat'
 64 |   enable_detailed_eval: true
 65 | TRAIN:
 66 |   lr: 0.00025
 67 |   lr_step: '2.333'
 68 |   warmup: false
 69 |   begin_epoch: 0
 70 |   end_epoch: 3
 71 |   model_prefix: 'manet_rfcn_vid'
 72 |   # whether predict occlusion
 73 |   USE_OCCLUSION: False
 74 |   # whether resume training
 75 |   RESUME: false
 76 |   # whether flip image
 77 |   FLIP: true
 78 |   # whether shuffle image
 79 |   SHUFFLE: true
 80 |   # whether use OHEM
 81 |   ENABLE_OHEM: true
 82 |   # size of images for each device, 1 for e2e
 83 |   BATCH_IMAGES: 1
 84 |   # e2e changes behavior of anchor loader and metric
 85 |   END2END: true
 86 |   # group images with similar aspect ratio
 87 |   ASPECT_GROUPING: true
 88 |   # R-CNN
 89 |   # rcnn rois batch size
 90 |   BATCH_ROIS: -1
 91 |   BATCH_ROIS_OHEM: 128
 92 |   # rcnn rois sampling params
 93 |   FG_FRACTION: 0.25
 94 |   FG_THRESH: 0.5
 95 |   BG_THRESH_HI: 0.5
 96 |   BG_THRESH_LO: 0.0
 97 |   # rcnn bounding box regression params
 98 |   BBOX_REGRESSION_THRESH: 0.5
 99 |   BBOX_WEIGHTS:
100 |   - 1.0
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 | 
105 |   # RPN anchor loader
106 |   # rpn anchors batch size
107 |   RPN_BATCH_SIZE: 256
108 |   # rpn anchors sampling params
109 |   RPN_FG_FRACTION: 0.5
110 |   RPN_POSITIVE_OVERLAP: 0.7
111 |   RPN_NEGATIVE_OVERLAP: 0.3
112 |   RPN_CLOBBER_POSITIVES: false
113 |   # rpn bounding box regression params
114 |   RPN_BBOX_WEIGHTS:
115 |   - 1.0
116 |   - 1.0
117 |   - 1.0
118 |   - 1.0
119 |   RPN_POSITIVE_WEIGHT: -1.0
120 |   # used for end2end training
121 |   # RPN proposal
122 |   CXX_PROPOSAL: true
123 |   RPN_NMS_THRESH: 0.7
124 |   RPN_PRE_NMS_TOP_N: 6000
125 |   RPN_POST_NMS_TOP_N: 300
126 |   RPN_MIN_SIZE: 0
127 |   # approximate bounding box regression
128 |   BBOX_NORMALIZATION_PRECOMPUTED: true
129 |   BBOX_MEANS:
130 |   - 0.0
131 |   - 0.0
132 |   - 0.0
133 |   - 0.0
134 |   BBOX_STDS:
135 |   - 0.1
136 |   - 0.1
137 |   - 0.2
138 |   - 0.2
139 | TEST:
140 |   # use rpn to generate proposal
141 |   HAS_RPN: true
142 |   # size of images for each device
143 |   BATCH_IMAGES: 1
144 |   SEQ_NMS: false
145 |   
146 |   # RPN proposal
147 |   CXX_PROPOSAL: true
148 |   RPN_NMS_THRESH: 0.7
149 |   RPN_PRE_NMS_TOP_N: 6000
150 |   RPN_POST_NMS_TOP_N: 300
151 |   RPN_MIN_SIZE: 0
152 |   #KEY_FRAME_INTERVAL: 6
153 |   # RCNN nms
154 |   NMS: 0.44
155 |   test_epoch: 3
156 | 


--------------------------------------------------------------------------------
/experiments/manet_rfcn/cfgs/phase-2.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: ""
  3 | output_path: "./imagenet_vid"
  4 | symbol: resnet_v1_101_manet_rfcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./model/pretrained_model/resnet_v1_101"
 15 |   pretrained_flow: "./model/pretrained_model/flownet"
 16 |   pretrained_epoch: 0
 17 |   PIXEL_MEANS:
 18 |   - 103.06
 19 |   - 115.90
 20 |   - 123.15
 21 |   IMAGE_STRIDE: 0
 22 |   RCNN_FEAT_STRIDE: 16
 23 |   RPN_FEAT_STRIDE: 16
 24 |   FIXED_PARAMS:
 25 |   - conv1
 26 |   - res2
 27 |   - bn
 28 |   #- flow
 29 |   #- conv
 30 |   #- res
 31 |   #- Convolution
 32 |   #- deconv
 33 | 
 34 |   ANCHOR_RATIOS:
 35 |   - 0.5
 36 |   - 1
 37 |   - 2
 38 |   ANCHOR_SCALES:
 39 |   - 8
 40 |   - 16
 41 |   - 32
 42 |   ANCHOR_MEANS:
 43 |   - 0.0
 44 |   - 0.0
 45 |   - 0.0
 46 |   - 0.0
 47 |   ANCHOR_STDS:
 48 |   - 0.1
 49 |   - 0.1
 50 |   - 0.4
 51 |   - 0.4
 52 |   NORMALIZE_RPN: TRUE
 53 |   NUM_ANCHORS: 9
 54 | dataset:
 55 |   NUM_CLASSES: 31
 56 |   dataset: ImageNetVID
 57 |   dataset_path: "./data/ILSVRC2015"
 58 |   image_set: DET_train_30classes+VID_train_15frames
 59 |   #image_set: DET_train_30classes+VID_pretrain_data
 60 |   root_path: "./data"
 61 |   test_image_set: VID_val_videos
 62 |   proposal: rpn
 63 |   motion_iou_path: '../lib/dataset/imagenet_vid_groundtruth_motion_iou.mat'
 64 |   enable_detailed_eval: true
 65 | TRAIN:
 66 |   lr: 0.00025
 67 |   lr_step: '2.333'
 68 |   warmup: false
 69 |   begin_epoch: 1
 70 |   end_epoch: 3
 71 |   model_prefix: 'manet_rfcn_vid'
 72 |   # whether predict occlusion
 73 |   USE_OCCLUSION: False
 74 |   # whether resume training
 75 |   RESUME: true
 76 |   # whether flip image
 77 |   FLIP: true
 78 |   # whether shuffle image
 79 |   SHUFFLE: true
 80 |   # whether use OHEM
 81 |   ENABLE_OHEM: true
 82 |   # size of images for each device, 1 for e2e
 83 |   BATCH_IMAGES: 1
 84 |   # e2e changes behavior of anchor loader and metric
 85 |   END2END: true
 86 |   # group images with similar aspect ratio
 87 |   ASPECT_GROUPING: true
 88 |   # R-CNN
 89 |   # rcnn rois batch size
 90 |   BATCH_ROIS: -1
 91 |   BATCH_ROIS_OHEM: 128
 92 |   # rcnn rois sampling params
 93 |   FG_FRACTION: 0.25
 94 |   FG_THRESH: 0.5
 95 |   BG_THRESH_HI: 0.5
 96 |   BG_THRESH_LO: 0.0
 97 |   # rcnn bounding box regression params
 98 |   BBOX_REGRESSION_THRESH: 0.5
 99 |   BBOX_WEIGHTS:
100 |   - 1.0
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 | 
105 |   # RPN anchor loader
106 |   # rpn anchors batch size
107 |   RPN_BATCH_SIZE: 256
108 |   # rpn anchors sampling params
109 |   RPN_FG_FRACTION: 0.5
110 |   RPN_POSITIVE_OVERLAP: 0.7
111 |   RPN_NEGATIVE_OVERLAP: 0.3
112 |   RPN_CLOBBER_POSITIVES: false
113 |   # rpn bounding box regression params
114 |   RPN_BBOX_WEIGHTS:
115 |   - 1.0
116 |   - 1.0
117 |   - 1.0
118 |   - 1.0
119 |   RPN_POSITIVE_WEIGHT: -1.0
120 |   # used for end2end training
121 |   # RPN proposal
122 |   CXX_PROPOSAL: true
123 |   RPN_NMS_THRESH: 0.7
124 |   RPN_PRE_NMS_TOP_N: 6000
125 |   RPN_POST_NMS_TOP_N: 300
126 |   RPN_MIN_SIZE: 0
127 |   # approximate bounding box regression
128 |   BBOX_NORMALIZATION_PRECOMPUTED: true
129 |   BBOX_MEANS:
130 |   - 0.0
131 |   - 0.0
132 |   - 0.0
133 |   - 0.0
134 |   BBOX_STDS:
135 |   - 0.1
136 |   - 0.1
137 |   - 0.2
138 |   - 0.2
139 | TEST:
140 |   # use rpn to generate proposal
141 |   HAS_RPN: true
142 |   # size of images for each device
143 |   BATCH_IMAGES: 1
144 |   SEQ_NMS: false
145 |   
146 |   # RPN proposal
147 |   CXX_PROPOSAL: true
148 |   RPN_NMS_THRESH: 0.7
149 |   RPN_PRE_NMS_TOP_N: 6000
150 |   RPN_POST_NMS_TOP_N: 300
151 |   RPN_MIN_SIZE: 0
152 |   #KEY_FRAME_INTERVAL: 6
153 |   # RCNN nms
154 |   NMS: 0.44
155 |   test_epoch: 3
156 | 


--------------------------------------------------------------------------------
/experiments/manet_rfcn/cfgs/phase-3.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | MXNET_VERSION: ""
  3 | output_path: "./imagenet_vid"
  4 | symbol: resnet_v1_101_manet_rfcn
  5 | gpus: '0,1,2,3'
  6 | CLASS_AGNOSTIC: true
  7 | SCALES:
  8 | - 600
  9 | - 1000
 10 | default:
 11 |   frequent: 100
 12 |   kvstore: device
 13 | network:
 14 |   pretrained: "./imagenet_vid/phase-2/DET_train_30classes_VID_train_15frames/manet_rfcn_vid"
 15 |   #pretrained_flow: "./model/pretrained_model/flownet"
 16 |   pretrained_epoch: 3
 17 |   PIXEL_MEANS:
 18 |   - 103.06
 19 |   - 115.90
 20 |   - 123.15
 21 |   IMAGE_STRIDE: 0
 22 |   RCNN_FEAT_STRIDE: 16
 23 |   RPN_FEAT_STRIDE: 16
 24 |   FIXED_PARAMS:
 25 |   - conv1
 26 |   - res2
 27 |   - bn
 28 |   - flow
 29 |   - conv
 30 |   - res
 31 |   - Convolution
 32 |   - deconv
 33 | 
 34 |   ANCHOR_RATIOS:
 35 |   - 0.5
 36 |   - 1
 37 |   - 2
 38 |   ANCHOR_SCALES:
 39 |   - 8
 40 |   - 16
 41 |   - 32
 42 |   ANCHOR_MEANS:
 43 |   - 0.0
 44 |   - 0.0
 45 |   - 0.0
 46 |   - 0.0
 47 |   ANCHOR_STDS:
 48 |   - 0.1
 49 |   - 0.1
 50 |   - 0.4
 51 |   - 0.4
 52 |   NORMALIZE_RPN: TRUE
 53 |   NUM_ANCHORS: 9
 54 | dataset:
 55 |   NUM_CLASSES: 31
 56 |   dataset: ImageNetVID
 57 |   dataset_path: "./data/ILSVRC2015"
 58 |   #image_set: DET_train_30classes+VID_train_15frames
 59 |   image_set: DET_train_30classes+VID_train_every10frames
 60 |   root_path: "./data"
 61 |   test_image_set: VID_val_videos
 62 |   proposal: rpn
 63 |   motion_iou_path: '../lib/dataset/imagenet_vid_groundtruth_motion_iou.mat'
 64 |   enable_detailed_eval: true
 65 | TRAIN:
 66 |   lr: 0.00025
 67 |   lr_step: '0.666'
 68 |   warmup: false
 69 |   begin_epoch: 0
 70 |   end_epoch: 1
 71 |   model_prefix: 'manet_rfcn_vid'
 72 |   # whether predict occlusion
 73 |   USE_OCCLUSION: True
 74 |   # whether resume training
 75 |   RESUME: false
 76 |   # whether flip image
 77 |   FLIP: true
 78 |   # whether shuffle image
 79 |   SHUFFLE: true
 80 |   # whether use OHEM
 81 |   ENABLE_OHEM: true
 82 |   # size of images for each device, 1 for e2e
 83 |   BATCH_IMAGES: 1
 84 |   # e2e changes behavior of anchor loader and metric
 85 |   END2END: true
 86 |   # group images with similar aspect ratio
 87 |   ASPECT_GROUPING: true
 88 |   # R-CNN
 89 |   # rcnn rois batch size
 90 |   BATCH_ROIS: -1
 91 |   BATCH_ROIS_OHEM: 128
 92 |   # rcnn rois sampling params
 93 |   FG_FRACTION: 0.25
 94 |   FG_THRESH: 0.5
 95 |   BG_THRESH_HI: 0.5
 96 |   BG_THRESH_LO: 0.0
 97 |   # rcnn bounding box regression params
 98 |   BBOX_REGRESSION_THRESH: 0.5
 99 |   BBOX_WEIGHTS:
100 |   - 1.0
101 |   - 1.0
102 |   - 1.0
103 |   - 1.0
104 | 
105 |   # RPN anchor loader
106 |   # rpn anchors batch size
107 |   RPN_BATCH_SIZE: 256
108 |   # rpn anchors sampling params
109 |   RPN_FG_FRACTION: 0.5
110 |   RPN_POSITIVE_OVERLAP: 0.7
111 |   RPN_NEGATIVE_OVERLAP: 0.3
112 |   RPN_CLOBBER_POSITIVES: false
113 |   # rpn bounding box regression params
114 |   RPN_BBOX_WEIGHTS:
115 |   - 1.0
116 |   - 1.0
117 |   - 1.0
118 |   - 1.0
119 |   RPN_POSITIVE_WEIGHT: -1.0
120 |   # used for end2end training
121 |   # RPN proposal
122 |   CXX_PROPOSAL: true
123 |   RPN_NMS_THRESH: 0.7
124 |   RPN_PRE_NMS_TOP_N: 6000
125 |   RPN_POST_NMS_TOP_N: 300
126 |   RPN_MIN_SIZE: 0
127 |   # approximate bounding box regression
128 |   BBOX_NORMALIZATION_PRECOMPUTED: true
129 |   BBOX_MEANS:
130 |   - 0.0
131 |   - 0.0
132 |   - 0.0
133 |   - 0.0
134 |   BBOX_STDS:
135 |   - 0.1
136 |   - 0.1
137 |   - 0.2
138 |   - 0.2
139 | TEST:
140 |   # use rpn to generate proposal
141 |   HAS_RPN: true
142 |   # size of images for each device
143 |   BATCH_IMAGES: 1
144 |   SEQ_NMS: false
145 |   
146 |   # RPN proposal
147 |   CXX_PROPOSAL: true
148 |   RPN_NMS_THRESH: 0.7
149 |   RPN_PRE_NMS_TOP_N: 6000
150 |   RPN_POST_NMS_TOP_N: 300
151 |   RPN_MIN_SIZE: 0
152 |   #KEY_FRAME_INTERVAL: 6
153 |   # RCNN nms
154 |   NMS: 0.44
155 |   test_epoch: 1
156 | 


--------------------------------------------------------------------------------
/experiments/manet_rfcn/manet_rfcn_end2end_train_test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fully Motion-Aware Network for Video Object Detection
 3 | # Extend FGFA by adding instance-level aggregation and motion pattern reasoning
 4 | # Modified by Shiyao Wang
 5 | # --------------------------------------------------------
 6 | 
 7 | import os
 8 | import sys
 9 | os.environ['PYTHONUNBUFFERED'] = '1'
10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
12 | this_dir = os.path.dirname(__file__)
13 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'manet_rfcn'))
14 | 
15 | import train_end2end
16 | import test
17 | 
18 | if __name__ == "__main__":
19 |     train_end2end.main()
20 |     #test.main()
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/images/table2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/images/table2.png


--------------------------------------------------------------------------------
/images/table3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/images/table3.png


--------------------------------------------------------------------------------
/images/table4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/images/table4.png


--------------------------------------------------------------------------------
/init.bat:
--------------------------------------------------------------------------------
 1 | cd /d %~dp0
 2 | mkdir .\output
 3 | mkdir .\external\mxnet
 4 | mkdir .\model\pretrained_model
 5 | pause
 6 | cd lib\bbox
 7 | python setup_windows.py build_ext --inplace
 8 | cd ..\nms
 9 | python setup_windows.py build_ext --inplace
10 | python setup_windows_cuda.py build_ext --inplace
11 | cd ..\..
12 | pause
13 | 


--------------------------------------------------------------------------------
/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p ./output
 4 | mkdir -p ./external/mxnet
 5 | mkdir -p ./model/pretrained_model
 6 | 
 7 | cd lib/bbox
 8 | python setup_linux.py build_ext --inplace
 9 | cd ../nms
10 | python setup_linux.py build_ext --inplace
11 | cd ../..
12 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	cd nms/; python setup.py build_ext --inplace; rm -rf build; cd ../../
3 | 	cd bbox/; python setup.py build_ext --inplace; rm -rf build; cd ../../
4 | 	cd dataset/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../
5 | clean:
6 | 	cd nms/; rm *.so *.c *.cpp; cd ../../
7 | 	cd bbox/; rm *.so *.c *.cpp; cd ../../
8 | 	cd dataset/pycocotools/; rm *.so; cd ../../
9 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/__init__.py


--------------------------------------------------------------------------------
/lib/bbox/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp


--------------------------------------------------------------------------------
/lib/bbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/bbox/__init__.py


--------------------------------------------------------------------------------
/lib/bbox/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Written by Sergey Karayev
 7 | # Modified by Yuwen Xiong, from from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 8 | # --------------------------------------------------------
 9 | 
10 | cimport cython
11 | import numpy as np
12 | cimport numpy as np
13 | 
14 | DTYPE = np.float
15 | ctypedef np.float_t DTYPE_t
16 | 
17 | def bbox_overlaps_cython(
18 |         np.ndarray[DTYPE_t, ndim=2] boxes,
19 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
20 |     """
21 |     Parameters
22 |     ----------
23 |     boxes: (N, 4) ndarray of float
24 |     query_boxes: (K, 4) ndarray of float
25 |     Returns
26 |     -------
27 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
28 |     """
29 |     cdef unsigned int N = boxes.shape[0]
30 |     cdef unsigned int K = query_boxes.shape[0]
31 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
32 |     cdef DTYPE_t iw, ih, box_area
33 |     cdef DTYPE_t ua
34 |     cdef unsigned int k, n
35 |     for k in range(K):
36 |         box_area = (
37 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
38 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
39 |         )
40 |         for n in range(N):
41 |             iw = (
42 |                 min(boxes[n, 2], query_boxes[k, 2]) -
43 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
44 |             )
45 |             if iw > 0:
46 |                 ih = (
47 |                     min(boxes[n, 3], query_boxes[k, 3]) -
48 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
49 |                 )
50 |                 if ih > 0:
51 |                     ua = float(
52 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
53 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
54 |                         box_area - iw * ih
55 |                     )
56 |                     overlaps[n, k] = iw * ih / ua
57 |     return overlaps
58 | 


--------------------------------------------------------------------------------
/lib/bbox/bbox_regression.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Flow-Guided Feature Aggregation
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuwen Xiong, from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | """
 11 | This file has functions about generating bounding box regression targets
 12 | """
 13 | 
 14 | import numpy as np
 15 | 
 16 | from bbox_transform import bbox_overlaps, bbox_transform
 17 | 
 18 | 
 19 | def compute_bbox_regression_targets(rois, overlaps, labels, cfg):
 20 |     """
 21 |     given rois, overlaps, gt labels, compute bounding box regression targets
 22 |     :param rois: roidb[i]['boxes'] k * 4
 23 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
 24 |     :param labels: roidb[i]['max_classes'] k * 1
 25 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
 26 |     """
 27 |     # Ensure ROIs are floats
 28 |     rois = rois.astype(np.float, copy=False)
 29 | 
 30 |     # Sanity check
 31 |     if len(rois) != len(overlaps):
 32 |         print 'bbox regression: this should not happen'
 33 | 
 34 |     # Indices of ground-truth ROIs
 35 |     gt_inds = np.where(overlaps == 1)[0]
 36 |     if len(gt_inds) == 0:
 37 |         print 'something wrong : zero ground truth rois'
 38 |     # Indices of examples for which we try to make predictions
 39 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0]
 40 | 
 41 |     # Get IoU overlap between each ex ROI and gt ROI
 42 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
 43 | 
 44 |     # Find which gt ROI each ex ROI has max overlap with:
 45 |     # this will be the ex ROI's gt target
 46 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
 47 |     gt_rois = rois[gt_inds[gt_assignment], :]
 48 |     ex_rois = rois[ex_inds, :]
 49 | 
 50 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
 51 |     targets[ex_inds, 0] = labels[ex_inds]
 52 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
 53 |     return targets
 54 | 
 55 | 
 56 | def add_bbox_regression_targets(roidb, cfg):
 57 |     """
 58 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
 59 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
 60 |     :return: means, std variances of targets
 61 |     """
 62 |     print 'add bounding box regression targets'
 63 |     assert len(roidb) > 0
 64 |     assert 'max_classes' in roidb[0]
 65 | 
 66 |     num_images = len(roidb)
 67 |     num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1]
 68 | 
 69 |     for im_i in range(num_images):
 70 |         rois = roidb[im_i]['boxes']
 71 |         max_overlaps = roidb[im_i]['max_overlaps']
 72 |         max_classes = roidb[im_i]['max_classes']
 73 |         roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg)
 74 | 
 75 |     if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
 76 |         # use fixed / precomputed means and stds instead of empirical values
 77 |         means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1))
 78 |         stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1))
 79 |     else:
 80 |         # compute mean, std values
 81 |         class_counts = np.zeros((num_classes, 1)) + 1e-14
 82 |         sums = np.zeros((num_classes, 4))
 83 |         squared_sums = np.zeros((num_classes, 4))
 84 |         for im_i in range(num_images):
 85 |             targets = roidb[im_i]['bbox_targets']
 86 |             for cls in range(1, num_classes):
 87 |                 cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
 88 |                 if cls_indexes.size > 0:
 89 |                     class_counts[cls] += cls_indexes.size
 90 |                     sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
 91 |                     squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
 92 | 
 93 |         means = sums / class_counts
 94 |         # var(x) = E(x^2) - E(x)^2
 95 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 96 | 
 97 |     print 'bbox target means:'
 98 |     print means
 99 |     print means[1:, :].mean(axis=0)  # ignore bg class
100 |     print 'bbox target stdevs:'
101 |     print stds
102 |     print stds[1:, :].mean(axis=0)  # ignore bg class
103 | 
104 | 
105 |     # normalized targets
106 |     for im_i in range(num_images):
107 |         targets = roidb[im_i]['bbox_targets']
108 |         for cls in range(1, num_classes):
109 |             cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0]
110 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
111 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
112 | 
113 |     return means.ravel(), stds.ravel()
114 | 
115 | 
116 | def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg):
117 |     """
118 |     expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
119 |     :param bbox_targets_data: [k * 5]
120 |     :param num_classes: number of classes
121 |     :return: bbox target processed [k * 4 num_classes]
122 |     bbox_weights ! only foreground boxes have bbox regression computation!
123 |     """
124 |     classes = bbox_targets_data[:, 0]
125 |     if cfg.CLASS_AGNOSTIC:
126 |         num_classes = 2
127 |     bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
128 |     bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
129 |     delta_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
130 |     indexes = np.where(classes > 0)[0]
131 |     for index in indexes:
132 |         cls = classes[index]
133 |         start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls)
134 |         end = start + 4
135 |         bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
136 |         bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS
137 |         delta_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS
138 |     return bbox_targets, bbox_weights, delta_weights
139 | 
140 | 


--------------------------------------------------------------------------------
/lib/bbox/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from bbox import bbox_overlaps_cython
  3 | 
  4 | 
  5 | def bbox_overlaps(boxes, query_boxes):
  6 |     return bbox_overlaps_cython(boxes, query_boxes)
  7 | 
  8 | 
  9 | def bbox_overlaps_py(boxes, query_boxes):
 10 |     """
 11 |     determine overlaps between boxes and query_boxes
 12 |     :param boxes: n * 4 bounding boxes
 13 |     :param query_boxes: k * 4 bounding boxes
 14 |     :return: overlaps: n * k overlaps
 15 |     """
 16 |     n_ = boxes.shape[0]
 17 |     k_ = query_boxes.shape[0]
 18 |     overlaps = np.zeros((n_, k_), dtype=np.float)
 19 |     for k in range(k_):
 20 |         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 21 |         for n in range(n_):
 22 |             iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
 23 |             if iw > 0:
 24 |                 ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
 25 |                 if ih > 0:
 26 |                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
 27 |                     all_area = float(box_area + query_box_area - iw * ih)
 28 |                     overlaps[n, k] = iw * ih / all_area
 29 |     return overlaps
 30 | 
 31 | 
 32 | def clip_boxes(boxes, im_shape):
 33 |     """
 34 |     Clip boxes to image boundaries.
 35 |     :param boxes: [N, 4* num_classes]
 36 |     :param im_shape: tuple of 2
 37 |     :return: [N, 4* num_classes]
 38 |     """
 39 |     # x1 >= 0
 40 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
 41 |     # y1 >= 0
 42 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
 43 |     # x2 < im_shape[1]
 44 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
 45 |     # y2 < im_shape[0]
 46 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
 47 |     return boxes
 48 | 
 49 | def filter_boxes(boxes, min_size):
 50 |     """
 51 |     filter small boxes.
 52 |     :param boxes: [N, 4* num_classes]
 53 |     :param min_size:
 54 |     :return: keep:
 55 |     """
 56 |     ws = boxes[:, 2] - boxes[:, 0] + 1
 57 |     hs = boxes[:, 3] - boxes[:, 1] + 1
 58 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
 59 |     return keep
 60 | 
 61 | def nonlinear_transform(ex_rois, gt_rois):
 62 |     """
 63 |     compute bounding box regression targets from ex_rois to gt_rois
 64 |     :param ex_rois: [N, 4]
 65 |     :param gt_rois: [N, 4]
 66 |     :return: [N, 4]
 67 |     """
 68 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 69 | 
 70 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 71 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 72 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 73 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 74 | 
 75 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 76 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 77 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
 78 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
 79 | 
 80 |     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
 81 |     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
 82 |     targets_dw = np.log(gt_widths / ex_widths)
 83 |     targets_dh = np.log(gt_heights / ex_heights)
 84 | 
 85 |     targets = np.vstack(
 86 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 87 |     return targets
 88 | 
 89 | 
 90 | def nonlinear_pred(boxes, box_deltas):
 91 |     """
 92 |     Transform the set of class-agnostic boxes into class-specific boxes
 93 |     by applying the predicted offsets (box_deltas)
 94 |     :param boxes: !important [N 4]
 95 |     :param box_deltas: [N, 4 * num_classes]
 96 |     :return: [N 4 * num_classes]
 97 |     """
 98 |     if boxes.shape[0] == 0:
 99 |         return np.zeros((0, box_deltas.shape[1]))
100 | 
101 |     boxes = boxes.astype(np.float, copy=False)
102 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
103 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
104 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
105 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
106 | 
107 |     dx = box_deltas[:, 0::4]
108 |     dy = box_deltas[:, 1::4]
109 |     dw = box_deltas[:, 2::4]
110 |     dh = box_deltas[:, 3::4]
111 | 
112 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
113 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
114 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
115 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
116 | 
117 |     pred_boxes = np.zeros(box_deltas.shape)
118 |     # x1
119 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
120 |     # y1
121 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
122 |     # x2
123 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
124 |     # y2
125 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
126 | 
127 |     return pred_boxes
128 | 
129 | 
130 | def iou_transform(ex_rois, gt_rois):
131 |     """ return bbox targets, IoU loss uses gt_rois as gt """
132 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
133 |     return gt_rois
134 | 
135 | 
136 | def iou_pred(boxes, box_deltas):
137 |     """
138 |     Transform the set of class-agnostic boxes into class-specific boxes
139 |     by applying the predicted offsets (box_deltas)
140 |     :param boxes: !important [N 4]
141 |     :param box_deltas: [N, 4 * num_classes]
142 |     :return: [N 4 * num_classes]
143 |     """
144 |     if boxes.shape[0] == 0:
145 |         return np.zeros((0, box_deltas.shape[1]))
146 | 
147 |     boxes = boxes.astype(np.float, copy=False)
148 |     x1 = boxes[:, 0]
149 |     y1 = boxes[:, 1]
150 |     x2 = boxes[:, 2]
151 |     y2 = boxes[:, 3]
152 | 
153 |     dx1 = box_deltas[:, 0::4]
154 |     dy1 = box_deltas[:, 1::4]
155 |     dx2 = box_deltas[:, 2::4]
156 |     dy2 = box_deltas[:, 3::4]
157 | 
158 |     pred_boxes = np.zeros(box_deltas.shape)
159 |     # x1
160 |     pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]
161 |     # y1
162 |     pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]
163 |     # x2
164 |     pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]
165 |     # y2
166 |     pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]
167 | 
168 |     return pred_boxes
169 | 
170 | 
171 | # define bbox_transform and bbox_pred
172 | bbox_transform = nonlinear_transform
173 | bbox_pred = nonlinear_pred
174 | 


--------------------------------------------------------------------------------
/lib/bbox/setup_linux.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 7 | # --------------------------------------------------------
 8 | 
 9 | 
10 | import os
11 | from os.path import join as pjoin
12 | from setuptools import setup
13 | from distutils.extension import Extension
14 | from Cython.Distutils import build_ext
15 | import numpy as np
16 | 
17 | # Obtain the numpy include directory.  This logic works across numpy versions.
18 | try:
19 |     numpy_include = np.get_include()
20 | except AttributeError:
21 |     numpy_include = np.get_numpy_include()
22 | 
23 | 
24 | def customize_compiler_for_nvcc(self):
25 |     """inject deep into distutils to customize how the dispatch
26 |     to gcc/nvcc works.
27 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
28 |     injected in, and still have the right customizations (i.e.
29 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
30 |     the OO route, I have this. Note, it's kindof like a wierd functional
31 |     subclassing going on."""
32 | 
33 |     # tell the compiler it can processes .cu
34 |     self.src_extensions.append('.cu')
35 | 
36 |     # save references to the default compiler_so and _comple methods
37 |     default_compiler_so = self.compiler_so
38 |     super = self._compile
39 | 
40 |     # now redefine the _compile method. This gets executed for each
41 |     # object but distutils doesn't have the ability to change compilers
42 |     # based on source extension: we add it.
43 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
44 |         if os.path.splitext(src)[1] == '.cu':
45 |             # use the cuda for .cu files
46 |             self.set_executable('compiler_so', CUDA['nvcc'])
47 |             # use only a subset of the extra_postargs, which are 1-1 translated
48 |             # from the extra_compile_args in the Extension class
49 |             postargs = extra_postargs['nvcc']
50 |         else:
51 |             postargs = extra_postargs['gcc']
52 | 
53 |         super(obj, src, ext, cc_args, postargs, pp_opts)
54 |         # reset the default compiler_so, which we might have changed for cuda
55 |         self.compiler_so = default_compiler_so
56 | 
57 |     # inject our redefined _compile method into the class
58 |     self._compile = _compile
59 | 
60 | 
61 | # run the customize_compiler
62 | class custom_build_ext(build_ext):
63 |     def build_extensions(self):
64 |         customize_compiler_for_nvcc(self.compiler)
65 |         build_ext.build_extensions(self)
66 | 
67 | 
68 | ext_modules = [
69 |     Extension(
70 |         "bbox",
71 |         ["bbox.pyx"],
72 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
73 |         include_dirs=[numpy_include]
74 |     ),
75 | ]
76 | 
77 | setup(
78 |     name='bbox_cython',
79 |     ext_modules=ext_modules,
80 |     # inject our custom trigger
81 |     cmdclass={'build_ext': custom_build_ext},
82 | )
83 | 


--------------------------------------------------------------------------------
/lib/bbox/setup_windows.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 7 | # --------------------------------------------------------
 8 | 
 9 | import numpy as np
10 | import os
11 | from os.path import join as pjoin
12 | #from distutils.core import setup
13 | from setuptools import setup
14 | from distutils.extension import Extension
15 | from Cython.Distutils import build_ext
16 | import subprocess
17 | 
18 | #change for windows, by MrX
19 | nvcc_bin = 'nvcc.exe'
20 | lib_dir = 'lib/x64'
21 | 
22 | import distutils.msvc9compiler
23 | distutils.msvc9compiler.VERSION = 14.0
24 | 
25 | # Obtain the numpy include directory.  This logic works across numpy versions.
26 | try:
27 |     numpy_include = np.get_include()
28 | except AttributeError:
29 |     numpy_include = np.get_numpy_include()
30 | 
31 | ext_modules = [
32 |     # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts
33 |     Extension(
34 |         "bbox",
35 |         sources=["bbox.pyx"],
36 |         extra_compile_args={},
37 |         include_dirs = [numpy_include]
38 |     ),
39 | ]
40 | 
41 | setup(
42 |     name='fast_rcnn',
43 |     ext_modules=ext_modules,
44 |     # inject our custom trigger
45 |     cmdclass={'build_ext': build_ext},
46 | )
47 | 


--------------------------------------------------------------------------------
/lib/dataset/.ropeproject/config.py:
--------------------------------------------------------------------------------
 1 | # The default ``config.py``
 2 | 
 3 | 
 4 | def set_prefs(prefs):
 5 |     """This function is called before opening the project"""
 6 | 
 7 |     # Specify which files and folders to ignore in the project.
 8 |     # Changes to ignored resources are not added to the history and
 9 |     # VCSs.  Also they are not returned in `Project.get_files()`.
10 |     # Note that ``?`` and ``*`` match all characters but slashes.
11 |     # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
12 |     # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
13 |     # '.svn': matches 'pkg/.svn' and all of its children
14 |     # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
15 |     # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
16 |     prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
17 |                                   '.hg', '.svn', '_svn', '.git']
18 | 
19 |     # Specifies which files should be considered python files.  It is
20 |     # useful when you have scripts inside your project.  Only files
21 |     # ending with ``.py`` are considered to be python files by
22 |     # default.
23 |     #prefs['python_files'] = ['*.py']
24 | 
25 |     # Custom source folders:  By default rope searches the project
26 |     # for finding source folders (folders that should be searched
27 |     # for finding modules).  You can add paths to that list.  Note
28 |     # that rope guesses project source folders correctly most of the
29 |     # time; use this if you have any problems.
30 |     # The folders should be relative to project root and use '/' for
31 |     # separating folders regardless of the platform rope is running on.
32 |     # 'src/my_source_folder' for instance.
33 |     #prefs.add('source_folders', 'src')
34 | 
35 |     # You can extend python path for looking up modules
36 |     #prefs.add('python_path', '~/python/')
37 | 
38 |     # Should rope save object information or not.
39 |     prefs['save_objectdb'] = True
40 |     prefs['compress_objectdb'] = False
41 | 
42 |     # If `True`, rope analyzes each module when it is being saved.
43 |     prefs['automatic_soa'] = True
44 |     # The depth of calls to follow in static object analysis
45 |     prefs['soa_followed_calls'] = 0
46 | 
47 |     # If `False` when running modules or unit tests "dynamic object
48 |     # analysis" is turned off.  This makes them much faster.
49 |     prefs['perform_doa'] = True
50 | 
51 |     # Rope can check the validity of its object DB when running.
52 |     prefs['validate_objectdb'] = True
53 | 
54 |     # How many undos to hold?
55 |     prefs['max_history_items'] = 32
56 | 
57 |     # Shows whether to save history across sessions.
58 |     prefs['save_history'] = True
59 |     prefs['compress_history'] = False
60 | 
61 |     # Set the number spaces used for indenting.  According to
62 |     # :PEP:`8`, it is best to use 4 spaces.  Since most of rope's
63 |     # unit-tests use 4 spaces it is more reliable, too.
64 |     prefs['indent_size'] = 4
65 | 
66 |     # Builtin and c-extension modules that are allowed to be imported
67 |     # and inspected by rope.
68 |     prefs['extension_modules'] = []
69 | 
70 |     # Add all standard c-extensions to extension_modules list.
71 |     prefs['import_dynload_stdmods'] = True
72 | 
73 |     # If `True` modules with syntax errors are considered to be empty.
74 |     # The default value is `False`; When `False` syntax errors raise
75 |     # `rope.base.exceptions.ModuleSyntaxError` exception.
76 |     prefs['ignore_syntax_errors'] = False
77 | 
78 |     # If `True`, rope ignores unresolvable imports.  Otherwise, they
79 |     # appear in the importing namespace.
80 |     prefs['ignore_bad_imports'] = False
81 | 
82 | 
83 | def project_opened(project):
84 |     """This function is called after opening the project"""
85 |     # Do whatever you like here!
86 | 


--------------------------------------------------------------------------------
/lib/dataset/.ropeproject/globalnames:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/dataset/.ropeproject/globalnames


--------------------------------------------------------------------------------
/lib/dataset/.ropeproject/history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/dataset/.ropeproject/history


--------------------------------------------------------------------------------
/lib/dataset/.ropeproject/objectdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/dataset/.ropeproject/objectdb


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from imdb import IMDB
2 | from imagenet_vid import ImageNetVID
3 | 


--------------------------------------------------------------------------------
/lib/dataset/ds_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def unique_boxes(boxes, scale=1.0):
 5 |     """ return indices of unique boxes """
 6 |     v = np.array([1, 1e3, 1e6, 1e9])
 7 |     hashes = np.round(boxes * scale).dot(v)
 8 |     _, index = np.unique(hashes, return_index=True)
 9 |     return np.sort(index)
10 | 
11 | 
12 | def filter_small_boxes(boxes, min_size):
13 |     w = boxes[:, 2] - boxes[:, 0]
14 |     h = boxes[:, 3] - boxes[:, 1]
15 |     keep = np.where((w >= min_size) & (h > min_size))[0]
16 |     return keep


--------------------------------------------------------------------------------
/lib/dataset/imagenet_vid_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Flow-Guided Feature Aggregation
  3 | # Copyright (c) 2017 Microsoft
  4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  5 | # Written by Xizhou Zhu
  6 | # --------------------------------------------------------
  7 | 
  8 | """
  9 | given a imagenet vid imdb, compute mAP
 10 | """
 11 | 
 12 | import numpy as np
 13 | import os
 14 | import cPickle
 15 | 
 16 | 
 17 | def parse_vid_rec(filename, classhash, img_ids, defaultIOUthr=0.5, pixelTolerance=10):
 18 |     """
 19 |     parse imagenet vid record into a dictionary
 20 |     :param filename: xml file path
 21 |     :return: list of dict
 22 |     """
 23 |     import xml.etree.ElementTree as ET
 24 |     tree = ET.parse(filename)
 25 |     objects = []
 26 |     for obj in tree.findall('object'):
 27 |         obj_dict = dict()
 28 |         obj_dict['label'] = classhash[obj.find('name').text]
 29 |         bbox = obj.find('bndbox')
 30 |         obj_dict['bbox'] = [float(bbox.find('xmin').text),
 31 |                             float(bbox.find('ymin').text),
 32 |                             float(bbox.find('xmax').text),
 33 |                             float(bbox.find('ymax').text)]
 34 |         gt_w = obj_dict['bbox'][2] - obj_dict['bbox'][0] + 1
 35 |         gt_h = obj_dict['bbox'][3] - obj_dict['bbox'][1] + 1
 36 |         thr = (gt_w*gt_h)/((gt_w+pixelTolerance)*(gt_h+pixelTolerance))
 37 |         obj_dict['thr'] = np.min([thr, defaultIOUthr])
 38 |         objects.append(obj_dict)
 39 |     return {'bbox' : np.array([x['bbox'] for x in objects]),
 40 |              'label': np.array([x['label'] for x in objects]),
 41 |              'thr'  : np.array([x['thr'] for x in objects]),
 42 |              'img_ids': img_ids}
 43 | 
 44 | 
 45 | def vid_ap(rec, prec):
 46 |     """
 47 |     average precision calculations
 48 |     [precision integrated to recall]
 49 |     :param rec: recall
 50 |     :param prec: precision
 51 |     :return: average precision
 52 |     """
 53 | 
 54 |     # append sentinel values at both ends
 55 |     mrec = np.concatenate(([0.], rec, [1.]))
 56 |     mpre = np.concatenate(([0.], prec, [0.]))
 57 | 
 58 |     # compute precision integration ladder
 59 |     for i in range(mpre.size - 1, 0, -1):
 60 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 61 | 
 62 |     # look for recall value changes
 63 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 64 | 
 65 |     # sum (\delta recall) * prec
 66 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 67 |     return ap
 68 | 
 69 | 
 70 | def vid_eval(multifiles, detpath, annopath, imageset_file, classname_map, annocache, ovthresh=0.5):
 71 |     """
 72 |     imagenet vid evaluation
 73 |     :param detpath: detection results detpath.format(classname)
 74 |     :param annopath: annotations annopath.format(classname)
 75 |     :param imageset_file: text file containing list of images
 76 |     :param annocache: caching annotations
 77 |     :param ovthresh: overlap threshold
 78 |     :return: rec, prec, ap
 79 |     """
 80 |     with open(imageset_file, 'r') as f:
 81 |             lines = [x.strip().split(' ') for x in f.readlines()]
 82 |     img_basenames = [x[0] for x in lines]
 83 |     gt_img_ids = [int(x[1]) for x in lines]
 84 |     classhash = dict(zip(classname_map, range(0,len(classname_map))))
 85 | 
 86 |     # load annotations from cache
 87 |     if not os.path.isfile(annocache):
 88 |         recs = []
 89 |         for ind, image_filename in enumerate(img_basenames):
 90 |             recs.append(parse_vid_rec(annopath.format('VID/' + image_filename), classhash, gt_img_ids[ind]))
 91 |             if ind % 100 == 0:
 92 |                 print 'reading annotations for {:d}/{:d}'.format(ind + 1, len(img_basenames))
 93 |         print 'saving annotations cache to {:s}'.format(annocache)
 94 |         with open(annocache, 'wb') as f:
 95 |             cPickle.dump(recs, f, protocol=cPickle.HIGHEST_PROTOCOL)
 96 |     else:
 97 |         with open(annocache, 'rb') as f:
 98 |             recs = cPickle.load(f)
 99 | 
100 |     # extract objects in :param classname:
101 |     npos = np.zeros(len(classname_map))
102 |     for rec in recs:
103 |         rec_labels = rec['label']
104 |         for x in rec_labels:
105 |             npos[x] += 1
106 | 
107 |     # read detections
108 |     splitlines = []
109 |     if (multifiles == False):
110 |         with open(detpath, 'r') as f:
111 |             lines = f.readlines()
112 |         splitlines = [x.strip().split(' ') for x in lines]
113 |     else:
114 |         for det in detpath:
115 |             with open(det, 'r') as f:
116 |                 lines = f.readlines()
117 |             splitlines += [x.strip().split(' ') for x in lines]
118 | 
119 |     img_ids = np.array([int(x[0]) for x in splitlines])
120 |     obj_labels = np.array([int(x[1]) for x in splitlines])
121 |     obj_confs = np.array([float(x[2]) for x in splitlines])
122 |     obj_bboxes = np.array([[float(z) for z in x[3:]] for x in splitlines])
123 | 
124 |     # sort by confidence
125 |     if obj_bboxes.shape[0] > 0:
126 |         sorted_inds = np.argsort(img_ids)
127 |         img_ids = img_ids[sorted_inds]
128 |         obj_labels = obj_labels[sorted_inds]
129 |         obj_confs = obj_confs[sorted_inds]
130 |         obj_bboxes = obj_bboxes[sorted_inds, :]
131 | 
132 |     num_imgs = max(max(gt_img_ids),max(img_ids)) + 1
133 |     obj_labels_cell = [None] * num_imgs
134 |     obj_confs_cell = [None] * num_imgs
135 |     obj_bboxes_cell = [None] * num_imgs
136 |     start_i = 0
137 |     id = img_ids[0]
138 |     for i in range(0, len(img_ids)):
139 |         if i == len(img_ids)-1 or img_ids[i+1] != id:
140 |             conf = obj_confs[start_i:i+1]
141 |             label = obj_labels[start_i:i+1]
142 |             bbox = obj_bboxes[start_i:i+1, :]
143 |             sorted_inds = np.argsort(-conf)
144 | 
145 |             obj_labels_cell[id] = label[sorted_inds]
146 |             obj_confs_cell[id] = conf[sorted_inds]
147 |             obj_bboxes_cell[id] = bbox[sorted_inds, :]
148 |             if i < len(img_ids)-1:
149 |                 id = img_ids[i+1]
150 |                 start_i = i+1
151 | 
152 | 
153 |     # go down detections and mark true positives and false positives
154 |     tp_cell = [None] * num_imgs
155 |     fp_cell = [None] * num_imgs
156 | 
157 |     for rec in recs:
158 |         id = rec['img_ids']
159 |         gt_labels = rec['label']
160 |         gt_bboxes = rec['bbox']
161 |         gt_thr = rec['thr']
162 |         num_gt_obj = len(gt_labels)
163 |         gt_detected = np.zeros(num_gt_obj)
164 | 
165 |         labels = obj_labels_cell[id]
166 |         bboxes = obj_bboxes_cell[id]
167 | 
168 |         num_obj = 0 if labels is None else len(labels)
169 |         tp = np.zeros(num_obj)
170 |         fp = np.zeros(num_obj)
171 | 
172 |         for j in range(0,num_obj):
173 |             bb = bboxes[j, :]
174 |             ovmax = -1
175 |             kmax = -1
176 |             for k in range(0,num_gt_obj):
177 |                 if labels[j] != gt_labels[k]:
178 |                     continue
179 |                 if gt_detected[k] > 0:
180 |                     continue
181 |                 bbgt = gt_bboxes[k, :]
182 |                 bi=[np.max((bb[0],bbgt[0])), np.max((bb[1],bbgt[1])), np.min((bb[2],bbgt[2])), np.min((bb[3],bbgt[3]))]
183 |                 iw=bi[2]-bi[0]+1
184 |                 ih=bi[3]-bi[1]+1
185 |                 if iw>0 and ih>0:            
186 |                     # compute overlap as area of intersection / area of union
187 |                     ua = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \
188 |                            (bbgt[2] - bbgt[0] + 1.) * \
189 |                            (bbgt[3] - bbgt[1] + 1.) - iw*ih
190 |                     ov=iw*ih/ua
191 |                     # makes sure that this object is detected according
192 |                     # to its individual threshold
193 |                     if ov >= gt_thr[k] and ov > ovmax:
194 |                         ovmax=ov
195 |                         kmax=k
196 |             if kmax >= 0:
197 |                 tp[j] = 1
198 |                 gt_detected[kmax] = 1
199 |             else:
200 |                 fp[j] = 1
201 | 
202 |         tp_cell[id] = tp
203 |         fp_cell[id] = fp
204 | 
205 |     tp_all = np.concatenate([x for x in np.array(tp_cell)[gt_img_ids] if x is not None])
206 |     fp_all = np.concatenate([x for x in np.array(fp_cell)[gt_img_ids] if x is not None])
207 |     obj_labels = np.concatenate([x for x in np.array(obj_labels_cell)[gt_img_ids] if x is not None])
208 |     confs = np.concatenate([x for x in np.array(obj_confs_cell)[gt_img_ids] if x is not None])
209 | 
210 |     sorted_inds = np.argsort(-confs)
211 |     tp_all = tp_all[sorted_inds]
212 |     fp_all = fp_all[sorted_inds]
213 |     obj_labels = obj_labels[sorted_inds]
214 | 
215 |     ap = np.zeros(len(classname_map))
216 |     for c in range(1, len(classname_map)):
217 |         # compute precision recall
218 |         fp = np.cumsum(fp_all[obj_labels == c])
219 |         tp = np.cumsum(tp_all[obj_labels == c])
220 |         rec = tp / float(npos[c])
221 |         # avoid division by zero in case first detection matches a difficult ground ruth
222 |         prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
223 |         ap[c] = vid_ap(rec, prec)
224 |     ap = ap[1:]
225 |     return ap
226 | 


--------------------------------------------------------------------------------
/lib/dataset/imagenet_vid_groundtruth_motion_iou.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/dataset/imagenet_vid_groundtruth_motion_iou.mat


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deformable Convolutional Networks
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int32_t, ndim=1] \
26 |         order = scores.argsort()[::-1].astype(np.int32)
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/lib/nms/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from cpu_nms import cpu_nms
 4 | from gpu_nms import gpu_nms
 5 | 
 6 | def py_nms_wrapper(thresh):
 7 |     def _nms(dets):
 8 |         return nms(dets, thresh)
 9 |     return _nms
10 | 
11 | 
12 | def cpu_nms_wrapper(thresh):
13 |     def _nms(dets):
14 |         return cpu_nms(dets, thresh)
15 |     return _nms
16 | 
17 | 
18 | def gpu_nms_wrapper(thresh, device_id):
19 |     def _nms(dets):
20 |         return gpu_nms(dets, thresh, device_id)
21 |     return _nms
22 | 
23 | 
24 | def nms(dets, thresh):
25 |     """
26 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
27 |     rule out overlap >= thresh
28 |     :param dets: [[x1, y1, x2, y2 score]]
29 |     :param thresh: retain overlap < thresh
30 |     :return: indexes to keep
31 |     """
32 |     if dets.shape[0] == 0:
33 |         return []
34 | 
35 |     x1 = dets[:, 0]
36 |     y1 = dets[:, 1]
37 |     x2 = dets[:, 2]
38 |     y2 = dets[:, 3]
39 |     scores = dets[:, 4]
40 | 
41 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
42 |     order = scores.argsort()[::-1]
43 | 
44 |     keep = []
45 |     while order.size > 0:
46 |         i = order[0]
47 |         keep.append(i)
48 |         xx1 = np.maximum(x1[i], x1[order[1:]])
49 |         yy1 = np.maximum(y1[i], y1[order[1:]])
50 |         xx2 = np.minimum(x2[i], x2[order[1:]])
51 |         yy2 = np.minimum(y2[i], y2[order[1:]])
52 | 
53 |         w = np.maximum(0.0, xx2 - xx1 + 1)
54 |         h = np.maximum(0.0, yy2 - yy1 + 1)
55 |         inter = w * h
56 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
57 | 
58 |         inds = np.where(ovr <= thresh)[0]
59 |         order = order[inds + 1]
60 | 
61 |     return keep
62 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Deformable Convolutional Networks
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License
  5 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/seq_nms.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Flow-Guided Feature Aggregation
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuqing Zhu, Xizhou Zhu
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | import numpy as np
 11 | 
 12 | import profile
 13 | import cv2
 14 | import time
 15 | import copy
 16 | import cPickle as pickle
 17 | import os
 18 | 
 19 | CLASSES = ('__background__',
 20 |            'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus',
 21 |            'car', 'cattle', 'dog', 'domestic cat', 'elephant', 'fox',
 22 |            'giant panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey',
 23 |            'motorcycle', 'rabbit', 'red panda', 'sheep', 'snake', 'squirrel',
 24 |            'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra')
 25 | 
 26 |            
 27 | NMS_THRESH = 0.3
 28 | IOU_THRESH = 0.5
 29 | MAX_THRESH=1e-2
 30 | 
 31 | 
 32 | def createLinks(dets_all):
 33 |     links_all = []
 34 | 
 35 |     frame_num = len(dets_all[0])
 36 |     cls_num = len(CLASSES) - 1
 37 |     for cls_ind in range(cls_num):
 38 |         links_cls = []
 39 |         for frame_ind in range(frame_num - 1):
 40 |             dets1 = dets_all[cls_ind][frame_ind]
 41 |             dets2 = dets_all[cls_ind][frame_ind + 1]
 42 |             box1_num = len(dets1)
 43 |             box2_num = len(dets2)
 44 |             
 45 |             if frame_ind == 0:
 46 |                 areas1 = np.empty(box1_num)
 47 |                 for box1_ind, box1 in enumerate(dets1):
 48 |                     areas1[box1_ind] = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
 49 |             else:
 50 |                 areas1 = areas2
 51 | 
 52 |             areas2 = np.empty(box2_num)
 53 |             for box2_ind, box2 in enumerate(dets2):
 54 |                 areas2[box2_ind] = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
 55 | 
 56 |             links_frame = []
 57 |             for box1_ind, box1 in enumerate(dets1):
 58 |                 area1 = areas1[box1_ind]
 59 |                 x1 = np.maximum(box1[0], dets2[:, 0])
 60 |                 y1 = np.maximum(box1[1], dets2[:, 1])
 61 |                 x2 = np.minimum(box1[2], dets2[:, 2])
 62 |                 y2 = np.minimum(box1[3], dets2[:, 3])
 63 |                 w = np.maximum(0.0, x2 - x1 + 1)
 64 |                 h = np.maximum(0.0, y2 - y1 + 1)
 65 |                 inter = w * h
 66 |                 ovrs = inter / (area1 + areas2 - inter)
 67 |                 links_box = [ovr_ind for ovr_ind, ovr in enumerate(ovrs) if
 68 |                              ovr >= IOU_THRESH]
 69 |                 links_frame.append(links_box)
 70 |             links_cls.append(links_frame)
 71 |         links_all.append(links_cls)
 72 |     return links_all
 73 | 
 74 | 
 75 | def maxPath(dets_all, links_all):
 76 | 
 77 |     for cls_ind, links_cls in enumerate(links_all):
 78 | 
 79 |         max_begin = time.time()
 80 |         delete_sets=[[]for i in range(0,len(dets_all[0]))]
 81 |         delete_single_box=[]
 82 |         dets_cls = dets_all[cls_ind]
 83 | 
 84 |         num_path=0
 85 |         # compute the number of links
 86 |         sum_links=0
 87 |         for frame_ind, frame in enumerate(links_cls):
 88 |             for box_ind,box in enumerate(frame):
 89 |                 sum_links+=len(box)
 90 | 
 91 |         while True:
 92 | 
 93 |             num_path+=1
 94 | 
 95 |             rootindex, maxpath, maxsum = findMaxPath(links_cls, dets_cls,delete_single_box)
 96 | 
 97 |             if (maxsum<MAX_THRESH or sum_links==0 or len(maxpath) <1):
 98 |                 break
 99 |             if (len(maxpath)==1):
100 |                 delete=[rootindex,maxpath[0]]
101 |                 delete_single_box.append(delete)
102 |             rescore(dets_cls, rootindex, maxpath, maxsum)
103 |             t4=time.time()
104 |             delete_set,num_delete=deleteLink(dets_cls, links_cls, rootindex, maxpath, NMS_THRESH)
105 |             sum_links-=num_delete
106 |             for i, box_ind in enumerate(maxpath):
107 |                 delete_set[i].remove(box_ind)
108 |                 delete_single_box.append([[rootindex+i],box_ind])
109 |                 for j in delete_set[i]:
110 |                     dets_cls[i+rootindex][j]=np.zeros(5)
111 |                 delete_sets[i+rootindex]=delete_sets[i+rootindex]+delete_set[i]
112 | 
113 |         for frame_idx,frame in enumerate(dets_all[cls_ind]):
114 | 
115 |             a=range(0,len(frame))
116 |             keep=list(set(a).difference(set(delete_sets[frame_idx])))
117 |             dets_all[cls_ind][frame_idx]=frame[keep,:]
118 | 
119 | 
120 |     return dets_all
121 | 
122 | 
123 | def findMaxPath(links,dets,delete_single_box):
124 | 
125 |     len_dets=[len(dets[i]) for i in xrange(len(dets))]
126 |     max_boxes=np.max(len_dets)
127 |     num_frame=len(links)+1
128 |     a=np.zeros([num_frame,max_boxes])
129 |     new_dets=np.zeros([num_frame,max_boxes])
130 |     for delete_box in delete_single_box:
131 |         new_dets[delete_box[0],delete_box[1]]=1
132 |     if(max_boxes==0):
133 |         max_path=[]
134 |         return 0,max_path,0
135 | 
136 |     b=np.full((num_frame,max_boxes),-1)
137 |     for l in xrange(len(dets)):
138 |         for j in xrange(len(dets[l])):
139 |             if(new_dets[l,j]==0):
140 |                 a[l,j]=dets[l][j][-1]
141 | 
142 | 
143 | 
144 |     for i in xrange(1,num_frame):
145 |         l1=i-1;
146 |         for box_id,box in enumerate(links[l1]):
147 |             for next_box_id in box:
148 | 
149 |                 weight_new=a[i-1,box_id]+dets[i][next_box_id][-1]
150 |                 if(weight_new>a[i,next_box_id]):
151 |                     a[i,next_box_id]=weight_new
152 |                     b[i,next_box_id]=box_id
153 | 
154 |     i,j=np.unravel_index(a.argmax(),a.shape)
155 | 
156 |     maxpath=[j]
157 |     maxscore=a[i,j]
158 |     while(b[i,j]!=-1):
159 | 
160 |             maxpath.append(b[i,j])
161 |             j=b[i,j]
162 |             i=i-1
163 | 
164 | 
165 |     rootindex=i
166 |     maxpath.reverse()
167 |     return rootindex, maxpath, maxscore
168 | 
169 | 
170 | def rescore(dets, rootindex, maxpath, maxsum):
171 |     newscore = maxsum / len(maxpath)
172 | 
173 |     for i, box_ind in enumerate(maxpath):
174 |         dets[rootindex + i][box_ind][4] = newscore
175 | 
176 | 
177 | def deleteLink(dets, links, rootindex, maxpath, thesh):
178 | 
179 |     delete_set=[]
180 |     num_delete_links=0
181 | 
182 |     for i, box_ind in enumerate(maxpath):
183 |         areas = [(box[2] - box[0] + 1) * (box[3] - box[1] + 1) for box in dets[rootindex + i]]
184 |         area1 = areas[box_ind]
185 |         box1 = dets[rootindex + i][box_ind]
186 |         x1 = np.maximum(box1[0], dets[rootindex + i][:, 0])
187 |         y1 = np.maximum(box1[1], dets[rootindex + i][:, 1])
188 |         x2 = np.minimum(box1[2], dets[rootindex + i][:, 2])
189 |         y2 = np.minimum(box1[3], dets[rootindex + i][:, 3])
190 |         w = np.maximum(0.0, x2 - x1 + 1)
191 |         h = np.maximum(0.0, y2 - y1 + 1)
192 |         inter = w * h
193 | 
194 |         ovrs = inter / (area1 + areas - inter)
195 |         #saving the box need to delete
196 |         deletes = [ovr_ind for ovr_ind, ovr in enumerate(ovrs) if ovr >= 0.3]
197 |         delete_set.append(deletes)
198 | 
199 |         #delete the links except for the last frame
200 |         if rootindex + i < len(links):
201 |             for delete_ind in deletes:
202 |                 num_delete_links+=len(links[rootindex+i][delete_ind])
203 |                 links[rootindex + i][delete_ind] = []
204 | 
205 |         if i > 0 or rootindex > 0:
206 | 
207 |             #delete the links which point to box_ind
208 |             for priorbox in links[rootindex + i - 1]:
209 |                 for delete_ind in deletes:
210 |                     if delete_ind in priorbox:
211 |                         priorbox.remove(delete_ind)
212 |                         num_delete_links+=1
213 | 
214 |     return delete_set,num_delete_links
215 | 
216 | def seq_nms(dets):
217 |     links = createLinks(dets)
218 |     dets=maxPath(dets, links)
219 |     return dets
220 | 
221 | 


--------------------------------------------------------------------------------
/lib/nms/setup_linux.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 |     and values giving the absolute path to each directory.
 31 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 32 |     is based on finding 'nvcc' in the PATH.
 33 |     """
 34 | 
 35 |     # first check if the CUDAHOME env variable is in use
 36 |     if 'CUDAHOME' in os.environ:
 37 |         home = os.environ['CUDAHOME']
 38 |         nvcc = pjoin(home, 'bin', 'nvcc')
 39 |     else:
 40 |         # otherwise, search the PATH for NVCC
 41 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 42 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 43 |         if nvcc is None:
 44 |             raise EnvironmentError('The nvcc binary could not be '
 45 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 46 |         home = os.path.dirname(os.path.dirname(nvcc))
 47 | 
 48 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 49 |                   'include': pjoin(home, 'include'),
 50 |                   'lib64': pjoin(home, 'lib64')}
 51 |     for k, v in cudaconfig.iteritems():
 52 |         if not os.path.exists(v):
 53 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 54 | 
 55 |     return cudaconfig
 56 | CUDA = locate_cuda()
 57 | 
 58 | 
 59 | # Obtain the numpy include directory.  This logic works across numpy versions.
 60 | try:
 61 |     numpy_include = np.get_include()
 62 | except AttributeError:
 63 |     numpy_include = np.get_numpy_include()
 64 | 
 65 | 
 66 | def customize_compiler_for_nvcc(self):
 67 |     """inject deep into distutils to customize how the dispatch
 68 |     to gcc/nvcc works.
 69 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 70 |     injected in, and still have the right customizations (i.e.
 71 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 72 |     the OO route, I have this. Note, it's kindof like a wierd functional
 73 |     subclassing going on."""
 74 | 
 75 |     # tell the compiler it can processes .cu
 76 |     self.src_extensions.append('.cu')
 77 | 
 78 |     # save references to the default compiler_so and _comple methods
 79 |     default_compiler_so = self.compiler_so
 80 |     super = self._compile
 81 | 
 82 |     # now redefine the _compile method. This gets executed for each
 83 |     # object but distutils doesn't have the ability to change compilers
 84 |     # based on source extension: we add it.
 85 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 86 |         if os.path.splitext(src)[1] == '.cu':
 87 |             # use the cuda for .cu files
 88 |             self.set_executable('compiler_so', CUDA['nvcc'])
 89 |             # use only a subset of the extra_postargs, which are 1-1 translated
 90 |             # from the extra_compile_args in the Extension class
 91 |             postargs = extra_postargs['nvcc']
 92 |         else:
 93 |             postargs = extra_postargs['gcc']
 94 | 
 95 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 96 |         # reset the default compiler_so, which we might have changed for cuda
 97 |         self.compiler_so = default_compiler_so
 98 | 
 99 |     # inject our redefined _compile method into the class
100 |     self._compile = _compile
101 | 
102 | 
103 | # run the customize_compiler
104 | class custom_build_ext(build_ext):
105 |     def build_extensions(self):
106 |         customize_compiler_for_nvcc(self.compiler)
107 |         build_ext.build_extensions(self)
108 | 
109 | 
110 | ext_modules = [
111 |     Extension(
112 |         "cpu_nms",
113 |         ["cpu_nms.pyx"],
114 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
115 |         include_dirs = [numpy_include]
116 |     ),
117 |     Extension('gpu_nms',
118 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
119 |         library_dirs=[CUDA['lib64']],
120 |         libraries=['cudart'],
121 |         language='c++',
122 |         runtime_library_dirs=[CUDA['lib64']],
123 |         # this syntax is specific to this build system
124 |         # we're only going to use certain compiler args with nvcc and not with
125 |         # gcc the implementation of this trick is in customize_compiler() below
126 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
127 |                             'nvcc': ['-arch=sm_35',
128 |                                      '--ptxas-options=-v',
129 |                                      '-c',
130 |                                      '--compiler-options',
131 |                                      "'-fPIC'"]},
132 |         include_dirs = [numpy_include, CUDA['include']]
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='nms',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )
142 | 


--------------------------------------------------------------------------------
/lib/nms/setup_windows.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Deformable Convolutional Networks
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import os
 10 | from os.path import join as pjoin
 11 | #from distutils.core import setup
 12 | from setuptools import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | import subprocess
 16 | 
 17 | #change for windows, by MrX
 18 | nvcc_bin = 'nvcc.exe'
 19 | lib_dir = 'lib/x64'
 20 | 
 21 | import distutils.msvc9compiler
 22 | distutils.msvc9compiler.VERSION = 14.0
 23 | 
 24 | 
 25 | def find_in_path(name, path):
 26 |     "Find a file in a search path"
 27 |     # Adapted fom
 28 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 29 |     for dir in path.split(os.pathsep):
 30 |         binpath = pjoin(dir, name)
 31 |         if os.path.exists(binpath):
 32 |             return os.path.abspath(binpath)
 33 |     return None
 34 | 
 35 | 
 36 | def locate_cuda():
 37 |     """Locate the CUDA environment on the system
 38 | 
 39 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 40 |     and values giving the absolute path to each directory.
 41 | 
 42 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 43 |     is based on finding 'nvcc' in the PATH.
 44 |     """
 45 | 
 46 |     # first check if the CUDAHOME env variable is in use
 47 |     if 'CUDA_PATH' in os.environ:
 48 |         home = os.environ['CUDA_PATH']
 49 |         print("home = %s\n" % home)
 50 |         nvcc = pjoin(home, 'bin', nvcc_bin)
 51 |     else:
 52 |         # otherwise, search the PATH for NVCC
 53 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 54 |         nvcc = find_in_path(nvcc_bin, os.environ['PATH'] + os.pathsep + default_path)
 55 |         if nvcc is None:
 56 |             raise EnvironmentError('The nvcc binary could not be '
 57 |                 'located in your $PATH. Either add it to your path, or set $CUDA_PATH')
 58 |         home = os.path.dirname(os.path.dirname(nvcc))
 59 |         print("home = %s, nvcc = %s\n" % (home, nvcc))
 60 | 
 61 | 
 62 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 63 |                   'include': pjoin(home, 'include'),
 64 |                   'lib64': pjoin(home, lib_dir)}
 65 |     for k, v in cudaconfig.iteritems():
 66 |         if not os.path.exists(v):
 67 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 68 | 
 69 |     return cudaconfig
 70 | CUDA = locate_cuda()
 71 | 
 72 | 
 73 | # Obtain the numpy include directory.  This logic works across numpy versions.
 74 | try:
 75 |     numpy_include = np.get_include()
 76 | except AttributeError:
 77 |     numpy_include = np.get_numpy_include()
 78 | 
 79 | 
 80 | def customize_compiler_for_nvcc(self):
 81 |     """inject deep into distutils to customize how the dispatch
 82 |     to gcc/nvcc works.
 83 | 
 84 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 85 |     injected in, and still have the right customizations (i.e.
 86 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 87 |     the OO route, I have this. Note, it's kindof like a wierd functional
 88 |     subclassing going on."""
 89 | 
 90 |     # tell the compiler it can processes .cu
 91 |     #self.src_extensions.append('.cu')
 92 | 
 93 | 	
 94 |     # save references to the default compiler_so and _comple methods
 95 |     #default_compiler_so = self.spawn 
 96 |     #default_compiler_so = self.rc
 97 |     super = self.compile
 98 | 
 99 |     # now redefine the _compile method. This gets executed for each
100 |     # object but distutils doesn't have the ability to change compilers
101 |     # based on source extension: we add it.
102 |     def compile(sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
103 |         postfix=os.path.splitext(sources[0])[1]
104 |         
105 |         if postfix == '.cu':
106 |             # use the cuda for .cu files
107 |             #self.set_executable('compiler_so', CUDA['nvcc'])
108 |             # use only a subset of the extra_postargs, which are 1-1 translated
109 |             # from the extra_compile_args in the Extension class
110 |             postargs = extra_postargs['nvcc']
111 |         else:
112 |             postargs = extra_postargs['gcc']
113 | 
114 | 
115 |         return super(sources, output_dir, macros, include_dirs, debug, extra_preargs, postargs, depends)
116 |         # reset the default compiler_so, which we might have changed for cuda
117 |         #self.rc = default_compiler_so
118 | 
119 |     # inject our redefined _compile method into the class
120 |     self.compile = compile
121 | 
122 | 
123 | # run the customize_compiler
124 | class custom_build_ext(build_ext):
125 |     def build_extensions(self):
126 |         customize_compiler_for_nvcc(self.compiler)
127 |         build_ext.build_extensions(self)
128 | 
129 | 
130 | ext_modules = [
131 |     # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts
132 |     Extension(
133 |         "cpu_nms",
134 |         sources=["cpu_nms.pyx"],
135 |         extra_compile_args={'gcc': []},
136 |         include_dirs = [numpy_include],
137 |     ),
138 | ]
139 | 
140 | setup(
141 |     name='fast_rcnn',
142 |     ext_modules=ext_modules,
143 |     # inject our custom trigger
144 |     cmdclass={'build_ext': custom_build_ext},
145 | )
146 | 


--------------------------------------------------------------------------------
/lib/nms/setup_windows_cuda.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | import os
  5 | # on Windows, we need the original PATH without Anaconda's compiler in it:
  6 | PATH = os.environ.get('PATH') + ';C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin'
  7 | from distutils.spawn import spawn, find_executable
  8 | from setuptools import setup, find_packages, Extension
  9 | from setuptools.command.build_ext import build_ext
 10 | import sys
 11 | 
 12 | # CUDA specific config
 13 | # nvcc is assumed to be in user's PATH
 14 | nvcc_compile_args = ['-O', '--ptxas-options=-v', '-arch=compute_35', '-code=sm_35,sm_52,sm_61', '-c', '--compiler-options=-fPIC']
 15 | nvcc_compile_args = os.environ.get('NVCCFLAGS', '').split() + nvcc_compile_args
 16 | cuda_libs = ['cublas']
 17 | nvcc_bin = 'nvcc.exe'
 18 | lib_dir = 'lib/x64'
 19 | 
 20 | 
 21 | import distutils.msvc9compiler
 22 | distutils.msvc9compiler.VERSION = 14.0
 23 | 
 24 | # Obtain the numpy include directory.  This logic works across numpy versions.
 25 | try:
 26 |     numpy_include = np.get_include()
 27 | except AttributeError:
 28 |     numpy_include = np.get_numpy_include()
 29 | 
 30 | 
 31 | cudamat_ext = Extension('gpu_nms',
 32 |                         sources=[
 33 |                                 'gpu_nms.cu'
 34 |                                 ],
 35 |                         language='c++',
 36 |                         libraries=cuda_libs,
 37 |                         extra_compile_args=nvcc_compile_args,
 38 |                         include_dirs = [numpy_include, 'C:\\Programming\\CUDA\\v8.0\\include'])
 39 | 
 40 | 
 41 | class CUDA_build_ext(build_ext):
 42 |     """
 43 |     Custom build_ext command that compiles CUDA files.
 44 |     Note that all extension source files will be processed with this compiler.
 45 |     """
 46 |     def build_extensions(self):
 47 |         self.compiler.src_extensions.append('.cu')
 48 |         self.compiler.set_executable('compiler_so', 'nvcc')
 49 |         self.compiler.set_executable('linker_so', 'nvcc --shared')
 50 |         if hasattr(self.compiler, '_c_extensions'):
 51 |             self.compiler._c_extensions.append('.cu')  # needed for Windows
 52 |         self.compiler.spawn = self.spawn
 53 |         build_ext.build_extensions(self)
 54 | 
 55 |     def spawn(self, cmd, search_path=1, verbose=0, dry_run=0):
 56 |         """
 57 |         Perform any CUDA specific customizations before actually launching
 58 |         compile/link etc. commands.
 59 |         """
 60 |         if (sys.platform == 'darwin' and len(cmd) >= 2 and cmd[0] == 'nvcc' and
 61 |                 cmd[1] == '--shared' and cmd.count('-arch') > 0):
 62 |             # Versions of distutils on OSX earlier than 2.7.9 inject
 63 |             # '-arch x86_64' which we need to strip while using nvcc for
 64 |             # linking
 65 |             while True:
 66 |                 try:
 67 |                     index = cmd.index('-arch')
 68 |                     del cmd[index:index+2]
 69 |                 except ValueError:
 70 |                     break
 71 |         elif self.compiler.compiler_type == 'msvc':
 72 |             # There are several things we need to do to change the commands
 73 |             # issued by MSVCCompiler into one that works with nvcc. In the end,
 74 |             # it might have been easier to write our own CCompiler class for
 75 |             # nvcc, as we're only interested in creating a shared library to
 76 |             # load with ctypes, not in creating an importable Python extension.
 77 |             # - First, we replace the cl.exe or link.exe call with an nvcc
 78 |             #   call. In case we're running Anaconda, we search cl.exe in the
 79 |             #   original search path we captured further above -- Anaconda
 80 |             #   inserts a MSVC version into PATH that is too old for nvcc.
 81 |             cmd[:1] = ['nvcc', '--compiler-bindir',
 82 |                        os.path.dirname(find_executable("cl.exe", PATH))
 83 |                        or cmd[0]]
 84 |             # - Secondly, we fix a bunch of command line arguments.
 85 |             for idx, c in enumerate(cmd):
 86 |                 # create .dll instead of .pyd files
 87 |                 #if '.pyd' in c: cmd[idx] = c = c.replace('.pyd', '.dll')  #20160601, by MrX
 88 |                 # replace /c by -c
 89 |                 if c == '/c': cmd[idx] = '-c'
 90 |                 # replace /DLL by --shared
 91 |                 elif c == '/DLL': cmd[idx] = '--shared'
 92 |                 # remove --compiler-options=-fPIC
 93 |                 elif '-fPIC' in c: del cmd[idx]
 94 |                 # replace /Tc... by ...
 95 |                 elif c.startswith('/Tc'): cmd[idx] = c[3:]
 96 |                 # replace /Fo... by -o ...
 97 |                 elif c.startswith('/Fo'): cmd[idx:idx+1] = ['-o', c[3:]]
 98 |                 # replace /LIBPATH:... by -L...
 99 |                 elif c.startswith('/LIBPATH:'): cmd[idx] = '-L' + c[9:]
100 |                 # replace /OUT:... by -o ...
101 |                 elif c.startswith('/OUT:'): cmd[idx:idx+1] = ['-o', c[5:]]
102 |                 # remove /EXPORT:initlibcudamat or /EXPORT:initlibcudalearn
103 |                 elif c.startswith('/EXPORT:'): del cmd[idx]
104 |                 # replace cublas.lib by -lcublas
105 |                 elif c == 'cublas.lib': cmd[idx] = '-lcublas'
106 |             # - Finally, we pass on all arguments starting with a '/' to the
107 |             #   compiler or linker, and have nvcc handle all other arguments
108 |             if '--shared' in cmd:
109 |                 pass_on = '--linker-options='
110 |                 # we only need MSVCRT for a .dll, remove CMT if it sneaks in:
111 |                 cmd.append('/NODEFAULTLIB:libcmt.lib')
112 |             else:
113 |                 pass_on = '--compiler-options='
114 |             cmd = ([c for c in cmd if c[0] != '/'] +
115 |                    [pass_on + ','.join(c for c in cmd if c[0] == '/')])
116 |             # For the future: Apart from the wrongly set PATH by Anaconda, it
117 |             # would suffice to run the following for compilation on Windows:
118 |             # nvcc -c -O -o <file>.obj <file>.cu
119 |             # And the following for linking:
120 |             # nvcc --shared -o <file>.dll <file1>.obj <file2>.obj -lcublas
121 |             # This could be done by a NVCCCompiler class for all platforms.
122 |         spawn(cmd, search_path, verbose, dry_run)
123 | 
124 | setup(name="py_fast_rcnn_gpu",
125 |       description="Performs linear algebra computation on the GPU via CUDA",
126 |       ext_modules=[cudamat_ext],
127 |       cmdclass={'build_ext': CUDA_build_ext},
128 | )
129 | 


--------------------------------------------------------------------------------
/lib/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/rpn/generate_anchor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate base anchors on index 0
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 9 |                      scales=2 ** np.arange(3, 6)):
10 |     """
11 |     Generate anchor (reference) windows by enumerating aspect ratios X
12 |     scales wrt a reference (0, 0, 15, 15) window.
13 |     """
14 | 
15 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
16 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
17 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
18 |                          for i in xrange(ratio_anchors.shape[0])])
19 |     return anchors
20 | 
21 | 
22 | def _whctrs(anchor):
23 |     """
24 |     Return width, height, x center, and y center for an anchor (window).
25 |     """
26 | 
27 |     w = anchor[2] - anchor[0] + 1
28 |     h = anchor[3] - anchor[1] + 1
29 |     x_ctr = anchor[0] + 0.5 * (w - 1)
30 |     y_ctr = anchor[1] + 0.5 * (h - 1)
31 |     return w, h, x_ctr, y_ctr
32 | 
33 | 
34 | def _mkanchors(ws, hs, x_ctr, y_ctr):
35 |     """
36 |     Given a vector of widths (ws) and heights (hs) around a center
37 |     (x_ctr, y_ctr), output a set of anchors (windows).
38 |     """
39 | 
40 |     ws = ws[:, np.newaxis]
41 |     hs = hs[:, np.newaxis]
42 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
43 |                          y_ctr - 0.5 * (hs - 1),
44 |                          x_ctr + 0.5 * (ws - 1),
45 |                          y_ctr + 0.5 * (hs - 1)))
46 |     return anchors
47 | 
48 | 
49 | def _ratio_enum(anchor, ratios):
50 |     """
51 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
52 |     """
53 | 
54 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
55 |     size = w * h
56 |     size_ratios = size / ratios
57 |     ws = np.round(np.sqrt(size_ratios))
58 |     hs = np.round(ws * ratios)
59 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
60 |     return anchors
61 | 
62 | 
63 | def _scale_enum(anchor, scales):
64 |     """
65 |     Enumerate a set of anchors for each scale wrt an anchor.
66 |     """
67 | 
68 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
69 |     ws = w * scales
70 |     hs = h * scales
71 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
72 |     return anchors
73 | 


--------------------------------------------------------------------------------
/lib/utils/PrefetchingIter.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Flow-Guided-Feature-Aggregation
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuwen Xiong
  7 | # --------------------------------------------------------
  8 | 
  9 | 
 10 | import mxnet as mx
 11 | from mxnet.io import DataDesc, DataBatch
 12 | import threading
 13 | 
 14 | 
 15 | class PrefetchingIter(mx.io.DataIter):
 16 |     """Base class for prefetching iterators. Takes one or more DataIters (
 17 |     or any class with "reset" and "next" methods) and combine them with
 18 |     prefetching. For example:
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     iters : DataIter or list of DataIter
 23 |         one or more DataIters (or any class with "reset" and "next" methods)
 24 |     rename_data : None or list of dict
 25 |         i-th element is a renaming map for i-th iter, in the form of
 26 |         {'original_name' : 'new_name'}. Should have one entry for each entry
 27 |         in iter[i].provide_data
 28 |     rename_label : None or list of dict
 29 |         Similar to rename_data
 30 | 
 31 |     Examples
 32 |     --------
 33 |     iter = PrefetchingIter([NDArrayIter({'data': X1}), NDArrayIter({'data': X2})],
 34 |                            rename_data=[{'data': 'data1'}, {'data': 'data2'}])
 35 |     """
 36 |     def __init__(self, iters, rename_data=None, rename_label=None):
 37 |         super(PrefetchingIter, self).__init__()
 38 |         if not isinstance(iters, list):
 39 |             iters = [iters]
 40 |         self.n_iter = len(iters)
 41 |         assert self.n_iter ==1, "Our prefetching iter only support 1 DataIter"
 42 |         self.iters = iters
 43 |         self.rename_data = rename_data
 44 |         self.rename_label = rename_label
 45 |         self.batch_size = len(self.provide_data) * self.provide_data[0][0][1][0]
 46 |         self.data_ready = [threading.Event() for i in range(self.n_iter)]
 47 |         self.data_taken = [threading.Event() for i in range(self.n_iter)]
 48 |         for e in self.data_taken:
 49 |             e.set()
 50 |         self.started = True
 51 |         self.current_batch = [None for _ in range(self.n_iter)]
 52 |         self.next_batch = [None for _ in range(self.n_iter)]
 53 |         def prefetch_func(self, i):
 54 |             """Thread entry"""
 55 |             while True:
 56 |                 self.data_taken[i].wait()
 57 |                 if not self.started:
 58 |                     break
 59 |                 try:
 60 |                     self.next_batch[i] = self.iters[i].next()
 61 |                 except StopIteration:
 62 |                     self.next_batch[i] = None
 63 |                 self.data_taken[i].clear()
 64 |                 self.data_ready[i].set()
 65 |         self.prefetch_threads = [threading.Thread(target=prefetch_func, args=[self, i]) \
 66 |                                  for i in range(self.n_iter)]
 67 |         for thread in self.prefetch_threads:
 68 |             thread.setDaemon(True)
 69 |             thread.start()
 70 | 
 71 |     def __del__(self):
 72 |         self.started = False
 73 |         for e in self.data_taken:
 74 |             e.set()
 75 |         for thread in self.prefetch_threads:
 76 |             thread.join()
 77 | 
 78 |     @property
 79 |     def provide_data(self):
 80 |         """The name and shape of data provided by this iterator"""
 81 |         if self.rename_data is None:
 82 |             return sum([i.provide_data for i in self.iters], [])
 83 |         else:
 84 |             return sum([[
 85 |                 DataDesc(r[x.name], x.shape, x.dtype)
 86 |                 if isinstance(x, DataDesc) else DataDesc(*x)
 87 |                 for x in i.provide_data
 88 |             ] for r, i in zip(self.rename_data, self.iters)], [])
 89 | 
 90 |     @property
 91 |     def provide_label(self):
 92 |         """The name and shape of label provided by this iterator"""
 93 |         if self.rename_label is None:
 94 |             return sum([i.provide_label for i in self.iters], [])
 95 |         else:
 96 |             return sum([[
 97 |                 DataDesc(r[x.name], x.shape, x.dtype)
 98 |                 if isinstance(x, DataDesc) else DataDesc(*x)
 99 |                 for x in i.provide_label
100 |             ] for r, i in zip(self.rename_label, self.iters)], [])
101 | 
102 |     def reset(self):
103 |         for e in self.data_ready:
104 |             e.wait()
105 |         for i in self.iters:
106 |             i.reset()
107 |         for e in self.data_ready:
108 |             e.clear()
109 |         for e in self.data_taken:
110 |             e.set()
111 | 
112 |     def iter_next(self):
113 |         for e in self.data_ready:
114 |             e.wait()
115 |         if self.next_batch[0] is None:
116 |             return False
117 |         else:
118 |             self.current_batch = self.next_batch[0]
119 |             for e in self.data_ready:
120 |                 e.clear()
121 |             for e in self.data_taken:
122 |                 e.set()
123 |             return True
124 | 
125 |     def next(self):
126 |         if self.iter_next():
127 |             return self.current_batch
128 |         else:
129 |             raise StopIteration
130 | 
131 |     def getdata(self):
132 |         return self.current_batch.data
133 | 
134 |     def getlabel(self):
135 |         return self.current_batch.label
136 | 
137 |     def getindex(self):
138 |         return self.current_batch.index
139 | 
140 |     def getpad(self):
141 |         return self.current_batch.pad
142 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/combine_model.py:
--------------------------------------------------------------------------------
 1 | from load_model import load_checkpoint
 2 | from save_model import save_checkpoint
 3 | 
 4 | 
 5 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out):
 6 |     args1, auxs1 = load_checkpoint(prefix1, epoch1)
 7 |     args2, auxs2 = load_checkpoint(prefix2, epoch2)
 8 |     arg_names = args1.keys() + args2.keys()
 9 |     aux_names = auxs1.keys() + auxs2.keys()
10 |     args = dict()
11 |     for arg in arg_names:
12 |         if arg in args1:
13 |             args[arg] = args1[arg]
14 |         if arg in args2:
15 |             args[arg] = args2[arg]
16 |     auxs = dict()
17 |     for aux in aux_names:
18 |         if aux in auxs1:
19 |             auxs[aux] = auxs1[aux]
20 |         if aux in auxs2:
21 |             auxs[aux] = auxs2[aux]
22 |     save_checkpoint(prefix_out, epoch_out, args, auxs)
23 | 


--------------------------------------------------------------------------------
/lib/utils/create_logger.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # low-Guided-Feature-Aggregation
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Bin Xiao
 6 | # --------------------------------------------------------
 7 | 
 8 | import os
 9 | import logging
10 | import time
11 | 
12 | def create_logger(root_output_path, cfg, image_set):
13 |     # set up logger
14 |     if not os.path.exists(root_output_path):
15 |         os.makedirs(root_output_path)
16 |     assert os.path.exists(root_output_path), '{} does not exist'.format(root_output_path)
17 | 
18 |     cfg_name = os.path.basename(cfg).split('.')[0]
19 |     config_output_path = os.path.join(root_output_path, '{}'.format(cfg_name))
20 |     if not os.path.exists(config_output_path):
21 |         os.makedirs(config_output_path)
22 | 
23 |     image_sets = [iset for iset in image_set.split('+')]
24 |     final_output_path = os.path.join(config_output_path, '{}'.format('_'.join(image_sets)))
25 |     if not os.path.exists(final_output_path):
26 |         os.makedirs(final_output_path)
27 | 
28 |     log_file = '{}_{}.log'.format(cfg_name, time.strftime('%Y-%m-%d-%H-%M'))
29 |     head = '%(asctime)-15s %(message)s'
30 |     logging.basicConfig(filename=os.path.join(final_output_path, log_file), format=head)
31 |     logger = logging.getLogger()
32 |     logger.setLevel(logging.INFO)
33 | 
34 |     return logger, final_output_path
35 | 
36 | 


--------------------------------------------------------------------------------
/lib/utils/image_processing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def resize(im, target_size, max_size):
 6 |     """
 7 |     only resize input image to target size and return scale
 8 |     :param im: BGR image input by opencv
 9 |     :param target_size: one dimensional size (the short side)
10 |     :param max_size: one dimensional max size (the long side)
11 |     :return:
12 |     """
13 |     im_shape = im.shape
14 |     im_size_min = np.min(im_shape[0:2])
15 |     im_size_max = np.max(im_shape[0:2])
16 |     im_scale = float(target_size) / float(im_size_min)
17 |     # prevent bigger axis from being more than max_size:
18 |     if np.round(im_scale * im_size_max) > max_size:
19 |         im_scale = float(max_size) / float(im_size_max)
20 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
21 |     return im, im_scale
22 | 
23 | 
24 | def transform(im, pixel_means, need_mean=False):
25 |     """
26 |     transform into mxnet tensor
27 |     subtract pixel size and transform to correct format
28 |     :param im: [height, width, channel] in BGR
29 |     :param pixel_means: [[[R, G, B pixel means]]]
30 |     :return: [batch, channel, height, width]
31 |     """
32 |     assert False, "shouldn't reach here."
33 |     im = im.copy()
34 |     im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)]
35 |     im = im.astype(float)
36 |     if need_mean:
37 |         im -= pixel_means
38 |     im_tensor = im[np.newaxis, :]
39 |     # put channel first
40 |     channel_swap = (0, 3, 1, 2)
41 |     im_tensor = im_tensor.transpose(channel_swap)
42 |     return im_tensor
43 | 
44 | 
45 | def transform_inverse(im_tensor, pixel_means):
46 |     """
47 |     transform from mxnet im_tensor to ordinary RGB image
48 |     im_tensor is limited to one image
49 |     :param im_tensor: [batch, channel, height, width]
50 |     :param pixel_means: [[[R, G, B pixel means]]]
51 |     :return: im [height, width, channel(RGB)]
52 |     """
53 |     assert im_tensor.shape[0] == 1
54 |     im_tensor = im_tensor.copy()
55 |     # put channel back
56 |     channel_swap = (0, 2, 3, 1)
57 |     im_tensor = im_tensor.transpose(channel_swap)
58 |     im = im_tensor[0]
59 |     assert im.shape[2] == 3
60 |     im += pixel_means
61 |     im = im.astype(np.uint8)
62 |     return im
63 | 
64 | 
65 | def tensor_vstack(tensor_list, pad=0):
66 |     """
67 |     vertically stack tensors
68 |     :param tensor_list: list of tensor to be stacked vertically
69 |     :param pad: label to pad with
70 |     :return: tensor with max shape
71 |     """
72 |     ndim = len(tensor_list[0].shape)
73 |     if ndim == 1:
74 |         return np.hstack(tensor_list)
75 |     dimensions = [0]
76 |     for dim in range(1, ndim):
77 |         dimensions.append(max([tensor.shape[dim] for tensor in tensor_list]))
78 |     for ind, tensor in enumerate(tensor_list):
79 |         pad_shape = [(0, 0)]
80 |         for dim in range(1, ndim):
81 |             pad_shape.append((0, dimensions[dim] - tensor.shape[dim]))
82 |         tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad)
83 |     all_tensor = np.vstack(tensor_list)
84 |     return all_tensor
85 | 


--------------------------------------------------------------------------------
/lib/utils/load_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from dataset import *
 3 | 
 4 | 
 5 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
 6 |                   flip=False):
 7 |     """ load ground truth roidb """
 8 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
 9 |     roidb = imdb.gt_roidb()
10 |     if flip:
11 |         roidb = imdb.append_flipped_images(roidb)
12 |     return roidb
13 | 
14 | 
15 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
16 |                         proposal='rpn', append_gt=True, flip=False):
17 |     """ load proposal roidb (append_gt when training) """
18 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
19 | 
20 |     gt_roidb = imdb.gt_roidb()
21 |     roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt)
22 |     if flip:
23 |         roidb = imdb.append_flipped_images(roidb)
24 |     return roidb
25 | 
26 | 
27 | def merge_roidb(roidbs):
28 |     """ roidb are list, concat them together """
29 |     roidb = roidbs[0]
30 |     for r in roidbs[1:]:
31 |         roidb.extend(r)
32 |     return roidb
33 | 
34 | 
35 | def filter_roidb(roidb, config):
36 |     """ remove roidb entries without usable rois """
37 | 
38 |     def is_valid(entry):
39 |         """ valid images have at least 1 fg or bg roi """
40 |         overlaps = entry['max_overlaps']
41 |         fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
42 |         bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
43 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
44 |         return valid
45 | 
46 |     num = len(roidb)
47 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
48 |     num_after = len(filtered_roidb)
49 |     print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)
50 | 
51 |     return filtered_roidb
52 | 
53 | 
54 | def load_gt_segdb(dataset_name, image_set_name, root_path, dataset_path, result_path=None,
55 |                   flip=False):
56 |     """ load ground truth segdb """
57 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path)
58 |     segdb = imdb.gt_segdb()
59 |     if flip:
60 |         segdb = imdb.append_flipped_images_for_segmentation(segdb)
61 |     return segdb
62 | 
63 | 
64 | def merge_segdb(segdbs):
65 |     """ segdb are list, concat them together """
66 |     segdb = segdbs[0]
67 |     for r in segdbs[1:]:
68 |         segdb.extend(r)
69 |     return segdb
70 | 


--------------------------------------------------------------------------------
/lib/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def load_checkpoint(prefix, epoch):
 5 |     """
 6 |     Load model checkpoint from file.
 7 |     :param prefix: Prefix of model name.
 8 |     :param epoch: Epoch number of model we would like to load.
 9 |     :return: (arg_params, aux_params)
10 |     arg_params : dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's weights.
12 |     aux_params : dict of str to NDArray
13 |         Model parameter, dict of name to NDArray of net's auxiliary states.
14 |     """
15 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
16 |     print 'load %s-%04d.params!!!' % (prefix, epoch)
17 |     arg_params = {}
18 |     aux_params = {}
19 |     for k, v in save_dict.items():
20 |         tp, name = k.split(':', 1)
21 |         if tp == 'arg':
22 |             arg_params[name] = v
23 |         if tp == 'aux':
24 |             aux_params[name] = v
25 |     return arg_params, aux_params
26 | 
27 | 
28 | def convert_context(params, ctx):
29 |     """
30 |     :param params: dict of str to NDArray
31 |     :param ctx: the context to convert to
32 |     :return: dict of str of NDArray with context ctx
33 |     """
34 |     new_params = dict()
35 |     for k, v in params.items():
36 |         new_params[k] = v.as_in_context(ctx)
37 |     return new_params
38 | 
39 | 
40 | def load_param(prefix, epoch, convert=False, ctx=None, process=False):
41 |     """
42 |     wrapper for load checkpoint
43 |     :param prefix: Prefix of model name.
44 |     :param epoch: Epoch number of model we would like to load.
45 |     :param convert: reference model should be converted to GPU NDArray first
46 |     :param ctx: if convert then ctx must be designated.
47 |     :param process: model should drop any test
48 |     :return: (arg_params, aux_params)
49 |     """
50 |     arg_params, aux_params = load_checkpoint(prefix, epoch)
51 |     if convert:
52 |         if ctx is None:
53 |             ctx = mx.cpu()
54 |         arg_params = convert_context(arg_params, ctx)
55 |         aux_params = convert_context(aux_params, ctx)
56 |     if process:
57 |         tests = [k for k in arg_params.keys() if '_test' in k]
58 |         for test in tests:
59 |             arg_params[test.replace('_test', '')] = arg_params.pop(test)
60 |     return arg_params, aux_params
61 | 


--------------------------------------------------------------------------------
/lib/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Deep Feature Flow
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | 
 9 | import logging
10 | from mxnet.lr_scheduler import LRScheduler
11 | 
12 | class WarmupMultiFactorScheduler(LRScheduler):
13 |     """Reduce learning rate in factor at steps specified in a list
14 | 
15 |     Assume the weight has been updated by n times, then the learning rate will
16 |     be
17 | 
18 |     base_lr * factor^(sum((step/n)<=1)) # step is an array
19 | 
20 |     Parameters
21 |     ----------
22 |     step: list of int
23 |         schedule learning rate after n updates
24 |     factor: float
25 |         the factor for reducing the learning rate
26 |     """
27 |     def __init__(self, step, factor=1, warmup=False, warmup_lr=0, warmup_step=0):
28 |         super(WarmupMultiFactorScheduler, self).__init__()
29 |         assert isinstance(step, list) and len(step) >= 1
30 |         for i, _step in enumerate(step):
31 |             if i != 0 and step[i] <= step[i-1]:
32 |                 raise ValueError("Schedule step must be an increasing integer list")
33 |             if _step < 1:
34 |                 raise ValueError("Schedule step must be greater or equal than 1 round")
35 |         if factor > 1.0:
36 |             raise ValueError("Factor must be no more than 1 to make lr reduce")
37 |         self.step = step
38 |         self.cur_step_ind = 0
39 |         self.factor = factor
40 |         self.count = 0
41 |         self.warmup = warmup
42 |         self.warmup_lr = warmup_lr
43 |         self.warmup_step = warmup_step
44 | 
45 |     def __call__(self, num_update):
46 |         """
47 |         Call to schedule current learning rate
48 | 
49 |         Parameters
50 |         ----------
51 |         num_update: int
52 |             the maximal number of updates applied to a weight.
53 |         """
54 | 
55 |         # NOTE: use while rather than if  (for continuing training via load_epoch)
56 |         if self.warmup and num_update < self.warmup_step:
57 |             return self.warmup_lr
58 |         while self.cur_step_ind <= len(self.step)-1:
59 |             if num_update > self.step[self.cur_step_ind]:
60 |                 self.count = self.step[self.cur_step_ind]
61 |                 self.cur_step_ind += 1
62 |                 self.base_lr *= self.factor
63 |                 logging.info("Update[%d]: Change learning rate to %0.5e",
64 |                              num_update, self.base_lr)
65 |             else:
66 |                 return self.base_lr
67 |         return self.base_lr
68 | 


--------------------------------------------------------------------------------
/lib/utils/roidb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | roidb
 3 | basic format [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
 4 | extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets']
 5 | """
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | 
10 | from bbox.bbox_regression import compute_bbox_regression_targets
11 | 
12 | 
13 | def prepare_roidb(imdb, roidb, cfg):
14 |     """
15 |     add image path, max_classes, max_overlaps to roidb
16 |     :param imdb: image database, provide path
17 |     :param roidb: roidb
18 |     :return: None
19 |     """
20 |     print 'prepare roidb'
21 |     for i in range(len(roidb)):  # image_index
22 |         roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i])
23 |         if cfg.TRAIN.ASPECT_GROUPING:
24 |             size = cv2.imread(roidb[i]['image']).shape
25 |             roidb[i]['height'] = size[0]
26 |             roidb[i]['width'] = size[1]
27 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
28 |         max_overlaps = gt_overlaps.max(axis=1)
29 |         max_classes = gt_overlaps.argmax(axis=1)
30 |         roidb[i]['max_overlaps'] = max_overlaps
31 |         roidb[i]['max_classes'] = max_classes
32 | 
33 |         # background roi => background class
34 |         zero_indexes = np.where(max_overlaps == 0)[0]
35 |         assert all(max_classes[zero_indexes] == 0)
36 |         # foreground roi => foreground class
37 |         nonzero_indexes = np.where(max_overlaps > 0)[0]
38 |         assert all(max_classes[nonzero_indexes] != 0)
39 | 


--------------------------------------------------------------------------------
/lib/utils/save_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params):
 5 |     """Checkpoint the model data into file.
 6 |     :param prefix: Prefix of model name.
 7 |     :param epoch: The epoch number of the model.
 8 |     :param arg_params: dict of str to NDArray
 9 |         Model parameter, dict of name to NDArray of net's weights.
10 |     :param aux_params: dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's auxiliary states.
12 |     :return: None
13 |     prefix-epoch.params will be saved for parameters.
14 |     """
15 |     save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()}
16 |     save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()})
17 |     param_name = '%s-%04d.params' % (prefix, epoch)
18 |     mx.nd.save(param_name, save_dict)
19 | 


--------------------------------------------------------------------------------
/lib/utils/show_boxes.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # #Flow-Guided-Feature-Aggregation
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yi Li, Haocheng Zhang, Xizhou Zhu
 6 | # --------------------------------------------------------
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | import cv2
10 | import random
11 | 
12 | def show_boxes(im, dets, classes, scale = 1.0):
13 |     plt.cla()
14 |     plt.axis("off")
15 |     plt.imshow(im)
16 |     for cls_idx, cls_name in enumerate(classes):
17 |         cls_dets = dets[cls_idx]
18 |         for det in cls_dets:
19 |             bbox = det[:4] * scale
20 |             color = (random.random(), random.random(), random.random())
21 |             rect = plt.Rectangle((bbox[0], bbox[1]),
22 |                                   bbox[2] - bbox[0],
23 |                                   bbox[3] - bbox[1], fill=False,
24 |                                   edgecolor=color, linewidth=2.5)
25 |             plt.gca().add_patch(rect)
26 | 
27 |             if cls_dets.shape[1] == 5:
28 |                 score = det[-1]
29 |                 plt.gca().text(bbox[0], bbox[1],
30 |                                '{:s} {:.3f}'.format(cls_name, score),
31 |                                bbox=dict(facecolor=color, alpha=0.5), fontsize=9, color='white')
32 |     plt.show()
33 |     return im
34 | 
35 | 
36 | def draw_boxes(im, dets, classes, scale = 1.0):
37 |     color_white = (255, 255, 255)
38 |     for cls_idx, cls_name in enumerate(classes):
39 |         cls_dets = dets[cls_idx]
40 |         for det in cls_dets:
41 |             bbox = det[:4] * scale
42 |             bbox = map(int, bbox)
43 |             color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256))
44 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=3)
45 | 
46 |             if cls_dets.shape[1] == 5:
47 |                 score = det[-1]
48 |                 cv2.putText(im, '%s %.3f' % (cls_name, score), (bbox[0], bbox[1]+10),
49 |                         color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, thickness=2)
50 |     return im
51 | 


--------------------------------------------------------------------------------
/lib/utils/symbol.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # #Flow-Guided-Feature-Aggregation
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | class Symbol:
10 |     def __init__(self):
11 |         self.arg_shape_dict = None
12 |         self.out_shape_dict = None
13 |         self.aux_shape_dict = None
14 |         self.sym = None
15 | 
16 |     @property
17 |     def symbol(self):
18 |         return self.sym
19 | 
20 |     def get_symbol(self, cfg, is_train=True):
21 |         """
22 |         return a generated symbol, it also need to be assigned to self.sym
23 |         """
24 |         raise NotImplementedError()
25 | 
26 |     def init_weights(self, cfg, arg_params, aux_params):
27 |         raise NotImplementedError()
28 | 
29 |     def get_msra_std(self, shape):
30 |         fan_in = float(shape[1])
31 |         if len(shape) > 2:
32 |             fan_in *= np.prod(shape[2:])
33 |         return np.sqrt(2 / fan_in)
34 | 
35 |     def infer_shape(self, data_shape_dict):
36 |         # infer shape
37 |         arg_shape, out_shape, aux_shape = self.sym.infer_shape(**data_shape_dict)
38 |         self.arg_shape_dict = dict(zip(self.sym.list_arguments(), arg_shape))
39 |         self.out_shape_dict = dict(zip(self.sym.list_outputs(), out_shape))
40 |         self.aux_shape_dict = dict(zip(self.sym.list_auxiliary_states(), aux_shape))
41 | 
42 |     def check_parameter_shapes(self, arg_params, aux_params, data_shape_dict, is_train=True):
43 |         for k in self.sym.list_arguments():
44 |             if k in data_shape_dict or (False if is_train else 'label' in k):
45 |                 continue
46 |             assert k in arg_params, k + ' not initialized'
47 |             assert arg_params[k].shape == self.arg_shape_dict[k], \
48 |                 'shape inconsistent for ' + k + ' inferred ' + str(self.arg_shape_dict[k]) + ' provided ' + str(
49 |                     arg_params[k].shape)
50 |         for k in self.sym.list_auxiliary_states():
51 |             assert k in aux_params, k + ' not initialized'
52 |             assert aux_params[k].shape == self.aux_shape_dict[k], \
53 |                 'shape inconsistent for ' + k + ' inferred ' + str(self.aux_shape_dict[k]) + ' provided ' + str(
54 |                     aux_params[k].shape)
55 | 


--------------------------------------------------------------------------------
/lib/utils/tictoc.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def tic():
 4 |     import time
 5 |     global startTime_for_tictoc
 6 |     startTime_for_tictoc = time.time()
 7 |     return startTime_for_tictoc
 8 | 
 9 | def toc():
10 |     if 'startTime_for_tictoc' in globals():
11 |         endTime = time.time()
12 |         return endTime - startTime_for_tictoc
13 |     else:
14 |         return None
15 | 


--------------------------------------------------------------------------------
/manet_rfcn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/manet_rfcn/__init__.py


--------------------------------------------------------------------------------
/manet_rfcn/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | lib_path = osp.join(this_dir, '..', 'lib')
11 | add_path(lib_path)
12 | 


--------------------------------------------------------------------------------
/manet_rfcn/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/manet_rfcn/config/__init__.py


--------------------------------------------------------------------------------
/manet_rfcn/config/config.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Flow-Guided Feature Aggregation
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuqing Zhu, Shuhao Fu, Xizhou Zhu, Yuwen Xiong, Bin Xiao
  7 | # --------------------------------------------------------
  8 | 
  9 | import yaml
 10 | import numpy as np
 11 | from easydict import EasyDict as edict
 12 | 
 13 | config = edict()
 14 | 
 15 | config.MXNET_VERSION = ''
 16 | config.output_path = ''
 17 | config.symbol = ''
 18 | config.gpus = ''
 19 | config.CLASS_AGNOSTIC = True
 20 | config.SCALES = [(600, 1000)]  # first is scale (the shorter side); second is max size
 21 | 
 22 | # default training
 23 | config.default = edict()
 24 | config.default.frequent = 20
 25 | config.default.kvstore = 'device'
 26 | 
 27 | # network related params
 28 | config.network = edict()
 29 | config.network.pretrained = ''
 30 | config.network.pretrained_flow = ''
 31 | config.network.pretrained_epoch = 0
 32 | config.network.PIXEL_MEANS = np.array([0, 0, 0])
 33 | config.network.IMAGE_STRIDE = 0
 34 | config.network.RPN_FEAT_STRIDE = 16
 35 | config.network.RCNN_FEAT_STRIDE = 16
 36 | config.network.FIXED_PARAMS = ['gamma', 'beta']
 37 | config.network.ANCHOR_SCALES = (8, 16, 32)
 38 | config.network.ANCHOR_RATIOS = (0.5, 1, 2)
 39 | config.network.NORMALIZE_RPN = True
 40 | config.network.ANCHOR_MEANS = (0.0, 0.0, 0.0, 0.0)
 41 | config.network.ANCHOR_STDS = (0.1, 0.1, 0.4, 0.4)
 42 | config.network.NUM_ANCHORS = len(config.network.ANCHOR_SCALES) * len(config.network.ANCHOR_RATIOS)
 43 | config.network.FGFA_FEAT_DIM = 1024 #+ 2048 # 1024 for feature network, 2048 for embedding network
 44 | 
 45 | # dataset related params
 46 | config.dataset = edict()
 47 | config.dataset.dataset = 'ImageNetVID'
 48 | config.dataset.image_set = 'DET_train_30classes+VID_train_15frames'
 49 | config.dataset.test_image_set = 'VID_val_videos'
 50 | config.dataset.root_path = './data'
 51 | config.dataset.dataset_path = './data/ILSVRC2015'
 52 | config.dataset.motion_iou_path = './lib/dataset/imagenet_vid_groundtruth_motion_iou.mat'
 53 | config.dataset.enable_detailed_eval = True
 54 | config.dataset.NUM_CLASSES = 31
 55 | 
 56 | 
 57 | config.TRAIN = edict()
 58 | 
 59 | config.TRAIN.lr = 0
 60 | config.TRAIN.lr_step = ''
 61 | config.TRAIN.lr_factor = 0.1
 62 | config.TRAIN.warmup = False
 63 | config.TRAIN.warmup_lr = 0
 64 | config.TRAIN.warmup_step = 0
 65 | config.TRAIN.momentum = 0.9
 66 | config.TRAIN.wd = 0.0005
 67 | config.TRAIN.begin_epoch = 0
 68 | config.TRAIN.end_epoch = 0
 69 | config.TRAIN.model_prefix = ''
 70 | 
 71 | # whether predict occlusion
 72 | config.TRAIN.USE_OCCLUSION = False
 73 | # whether resume training
 74 | config.TRAIN.RESUME = False
 75 | # whether flip image
 76 | config.TRAIN.FLIP = True
 77 | # whether shuffle image
 78 | config.TRAIN.SHUFFLE = True
 79 | # whether use OHEM
 80 | config.TRAIN.ENABLE_OHEM = False
 81 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
 82 | config.TRAIN.BATCH_IMAGES = 2
 83 | # e2e changes behavior of anchor loader and metric
 84 | config.TRAIN.END2END = False
 85 | # group images with similar aspect ratio
 86 | config.TRAIN.ASPECT_GROUPING = True
 87 | 
 88 | # R-CNN
 89 | # rcnn rois batch size
 90 | config.TRAIN.BATCH_ROIS = 128
 91 | config.TRAIN.BATCH_ROIS_OHEM = 128
 92 | # rcnn rois sampling params
 93 | config.TRAIN.FG_FRACTION = 0.25
 94 | config.TRAIN.FG_THRESH = 0.5
 95 | config.TRAIN.BG_THRESH_HI = 0.5
 96 | config.TRAIN.BG_THRESH_LO = 0.0
 97 | # rcnn bounding box regression params
 98 | config.TRAIN.BBOX_REGRESSION_THRESH = 0.5
 99 | config.TRAIN.BBOX_WEIGHTS = np.array([1.0, 1.0, 1.0, 1.0])
100 | 
101 | # RPN anchor loader
102 | # rpn anchors batch size
103 | config.TRAIN.RPN_BATCH_SIZE = 256
104 | # rpn anchors sampling params
105 | config.TRAIN.RPN_FG_FRACTION = 0.5
106 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
107 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
108 | config.TRAIN.RPN_CLOBBER_POSITIVES = False
109 | # rpn bounding box regression params
110 | config.TRAIN.RPN_BBOX_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
111 | config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
112 | 
113 | # used for end2end training
114 | # RPN proposal
115 | config.TRAIN.CXX_PROPOSAL = True
116 | config.TRAIN.RPN_NMS_THRESH = 0.7
117 | config.TRAIN.RPN_PRE_NMS_TOP_N = 12000
118 | config.TRAIN.RPN_POST_NMS_TOP_N = 2000
119 | config.TRAIN.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE
120 | # approximate bounding box regression
121 | config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True
122 | config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0)
123 | config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2)
124 | 
125 | # FGFA, trained image sampled from [min_offset, max_offset]
126 | config.TRAIN.MIN_OFFSET = -9
127 | config.TRAIN.MAX_OFFSET = 9
128 | 
129 | config.TEST = edict()
130 | 
131 | # R-CNN testing
132 | # use rpn to generate proposal
133 | config.TEST.HAS_RPN = False
134 | # size of images for each device
135 | config.TEST.BATCH_IMAGES = 1
136 | 
137 | # RPN proposal
138 | config.TEST.CXX_PROPOSAL = True
139 | config.TEST.RPN_NMS_THRESH = 0.7
140 | config.TEST.RPN_PRE_NMS_TOP_N = 6000
141 | config.TEST.RPN_POST_NMS_TOP_N = 300
142 | config.TEST.RPN_MIN_SIZE = config.network.RPN_FEAT_STRIDE
143 | 
144 | # RCNN nms
145 | config.TEST.NMS = 0.3
146 | config.TEST.max_per_image = 300
147 | 
148 | #
149 | config.TEST.KEY_FRAME_INTERVAL = 9
150 | config.TEST.SEQ_NMS = False
151 | 
152 | 
153 | # Test Model Epoch
154 | config.TEST.test_epoch = 0
155 | 
156 | 
157 | def update_config(config_file):
158 |     exp_config = None
159 |     with open(config_file) as f:
160 |         exp_config = edict(yaml.load(f))
161 |         for k, v in exp_config.items():
162 |             if k in config:
163 |                 if isinstance(v, dict):
164 |                     if k == 'TRAIN':
165 |                         if 'BBOX_WEIGHTS' in v:
166 |                             v['BBOX_WEIGHTS'] = np.array(v['BBOX_WEIGHTS'])
167 |                     elif k == 'network':
168 |                         if 'PIXEL_MEANS' in v:
169 |                             v['PIXEL_MEANS'] = np.array(v['PIXEL_MEANS'])
170 |                     for vk, vv in v.items():
171 |                         config[k][vk] = vv
172 |                 else:
173 |                     if k == 'SCALES':
174 |                         config[k][0] = (tuple(v))
175 |                     else:
176 |                         config[k] = v
177 |             else:
178 |                 raise ValueError("key must exist in config.py")
179 | 


--------------------------------------------------------------------------------
/manet_rfcn/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/manet_rfcn/core/__init__.py


--------------------------------------------------------------------------------
/manet_rfcn/core/callback.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Flow-Guided Feature Aggregation
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified by Yuwen Xiong
 7 | # --------------------------------------------------------
 8 | 
 9 | import time
10 | import logging
11 | import mxnet as mx
12 | 
13 | 
14 | class Speedometer(object):
15 |     def __init__(self, batch_size, frequent=50):
16 |         self.batch_size = batch_size
17 |         self.frequent = frequent
18 |         self.init = False
19 |         self.tic = 0
20 |         self.last_count = 0
21 | 
22 |     def __call__(self, param):
23 |         """Callback to Show speed."""
24 |         count = param.nbatch
25 |         if self.last_count > count:
26 |             self.init = False
27 |         self.last_count = count
28 | 
29 |         if self.init:
30 |             if count % self.frequent == 0:
31 |                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
32 |                 s = ''
33 |                 if param.eval_metric is not None:
34 |                     name, value = param.eval_metric.get()
35 |                     s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed)
36 |                     for n, v in zip(name, value):
37 |                         s += "%s=%f,\t" % (n, v)
38 |                 else:
39 |                     s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed)
40 | 
41 |                 logging.info(s)
42 |                 print(s)
43 |                 self.tic = time.time()
44 |         else:
45 |             self.init = True
46 |             self.tic = time.time()
47 | 
48 | 
49 | def do_checkpoint(prefix, means, stds):
50 |     def _callback(iter_no, sym, arg, aux):
51 |         weight = arg['rfcn_bbox_weight']
52 |         bias = arg['rfcn_bbox_bias']
53 |         repeat = bias.shape[0] / means.shape[0]
54 | 
55 |         arg['rfcn_bbox_weight_test'] = weight * mx.nd.repeat(mx.nd.array(stds), repeats=repeat).reshape((bias.shape[0], 1, 1, 1))
56 |         arg['rfcn_bbox_bias_test'] = arg['rfcn_bbox_bias'] * mx.nd.repeat(mx.nd.array(stds), repeats=repeat) + mx.nd.repeat(mx.nd.array(means), repeats=repeat)
57 |         mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
58 |         arg.pop('rfcn_bbox_weight_test')
59 |         arg.pop('rfcn_bbox_bias_test')
60 |     return _callback


--------------------------------------------------------------------------------
/manet_rfcn/core/metric.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fully Motion-Aware Network for Video Object Detection
  3 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  4 | # Extend FGFA by adding instance-level aggregation and motion pattern reasoning
  5 | # Modified by Shiyao Wang
  6 | # --------------------------------------------------------
  7 | 
  8 | 
  9 | import mxnet as mx
 10 | import numpy as np
 11 | 
 12 | 
 13 | def get_rpn_names():
 14 |     pred = ['rpn_cls_prob', 'rpn_bbox_loss']
 15 |     label = ['rpn_label', 'rpn_bbox_target', 'rpn_bbox_weight']
 16 |     return pred, label
 17 | 
 18 | 
 19 | def get_rcnn_names(cfg):
 20 |     if cfg.TRAIN.USE_OCCLUSION:
 21 |         pred = ['rcnn_cls_prob', 'rcnn_bbox_loss', 'delta_loss', 'cls_occluded_prob']
 22 |     else:
 23 |         pred = ['rcnn_cls_prob', 'rcnn_bbox_loss', 'delta_loss']
 24 |     label = ['rcnn_label', 'rcnn_bbox_target', 'rcnn_bbox_weight']
 25 |     if cfg.TRAIN.ENABLE_OHEM or cfg.TRAIN.END2END:
 26 |         pred.append('rcnn_label')
 27 |         pred.append('delta_label')
 28 |         if cfg.TRAIN.USE_OCCLUSION:
 29 |             pred.append('occluded_label')
 30 |     if cfg.TRAIN.END2END:
 31 |         rpn_pred, rpn_label = get_rpn_names()
 32 |         pred = rpn_pred + pred
 33 |         label = rpn_label
 34 |     return pred, label
 35 | 
 36 | 
 37 | class RPNAccMetric(mx.metric.EvalMetric):
 38 |     def __init__(self):
 39 |         super(RPNAccMetric, self).__init__('RPNAcc')
 40 |         self.pred, self.label = get_rpn_names()
 41 | 
 42 |     def update(self, labels, preds):
 43 |         pred = preds[self.pred.index('rpn_cls_prob')]
 44 |         label = labels[self.label.index('rpn_label')]
 45 | 
 46 |         # pred (b, c, p) or (b, c, h, w)
 47 |         pred_label = mx.ndarray.argmax_channel(pred).asnumpy().astype('int32')
 48 |         pred_label = pred_label.reshape((pred_label.shape[0], -1))
 49 |         # label (b, p)
 50 |         label = label.asnumpy().astype('int32')
 51 | 
 52 |         # filter with keep_inds
 53 |         keep_inds = np.where(label != -1)
 54 |         pred_label = pred_label[keep_inds]
 55 |         label = label[keep_inds]
 56 | 
 57 |         self.sum_metric += np.sum(pred_label.flat == label.flat)
 58 |         self.num_inst += len(pred_label.flat)
 59 | 
 60 | 
 61 | class RCNNAccMetric(mx.metric.EvalMetric):
 62 |     def __init__(self, cfg):
 63 |         super(RCNNAccMetric, self).__init__('RCNNAcc')
 64 |         self.e2e = cfg.TRAIN.END2END
 65 |         self.ohem = cfg.TRAIN.ENABLE_OHEM
 66 |         self.pred, self.label = get_rcnn_names(cfg)
 67 | 
 68 |     def update(self, labels, preds):
 69 |         pred = preds[self.pred.index('rcnn_cls_prob')]
 70 |         if self.ohem or self.e2e:
 71 |             label = preds[self.pred.index('rcnn_label')]
 72 |         else:
 73 |             label = labels[self.label.index('rcnn_label')]
 74 | 
 75 |         last_dim = pred.shape[-1]
 76 |         pred_label = pred.asnumpy().reshape(-1, last_dim).argmax(axis=1).astype('int32')
 77 |         label = label.asnumpy().reshape(-1,).astype('int32')
 78 | 
 79 |         # filter with keep_inds
 80 |         keep_inds = np.where(label != -1)
 81 |         pred_label = pred_label[keep_inds]
 82 |         label = label[keep_inds]
 83 | 
 84 |         self.sum_metric += np.sum(pred_label.flat == label.flat)
 85 |         self.num_inst += len(pred_label.flat)
 86 | 
 87 | 
 88 | class RPNLogLossMetric(mx.metric.EvalMetric):
 89 |     def __init__(self):
 90 |         super(RPNLogLossMetric, self).__init__('RPNLogLoss')
 91 |         self.pred, self.label = get_rpn_names()
 92 | 
 93 |     def update(self, labels, preds):
 94 |         pred = preds[self.pred.index('rpn_cls_prob')]
 95 |         label = labels[self.label.index('rpn_label')]
 96 | 
 97 |         # label (b, p)
 98 |         label = label.asnumpy().astype('int32').reshape((-1))
 99 |         # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c)
100 |         pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
101 |         pred = pred.reshape((label.shape[0], -1))
102 | 
103 |         # filter with keep_inds
104 |         keep_inds = np.where(label != -1)[0]
105 |         label = label[keep_inds]
106 |         cls = pred[keep_inds, label]
107 | 
108 |         cls += 1e-14
109 |         cls_loss = -1 * np.log(cls)
110 |         cls_loss = np.sum(cls_loss)
111 |         self.sum_metric += cls_loss
112 |         self.num_inst += label.shape[0]
113 | 
114 | 
115 | class RCNNLogLossMetric(mx.metric.EvalMetric):
116 |     def __init__(self, cfg):
117 |         super(RCNNLogLossMetric, self).__init__('RCNNLogLoss')
118 |         self.e2e = cfg.TRAIN.END2END
119 |         self.ohem = cfg.TRAIN.ENABLE_OHEM
120 |         self.pred, self.label = get_rcnn_names(cfg)
121 | 
122 |     def update(self, labels, preds):
123 |         pred = preds[self.pred.index('rcnn_cls_prob')]
124 |         if self.ohem or self.e2e:
125 |             label = preds[self.pred.index('rcnn_label')]
126 |         else:
127 |             label = labels[self.label.index('rcnn_label')]
128 | 
129 |         last_dim = pred.shape[-1]
130 |         pred = pred.asnumpy().reshape(-1, last_dim)
131 |         label = label.asnumpy().reshape(-1,).astype('int32')
132 | 
133 |         # filter with keep_inds
134 |         keep_inds = np.where(label != -1)[0]
135 |         label = label[keep_inds]
136 |         cls = pred[keep_inds, label]
137 | 
138 |         cls += 1e-14
139 |         cls_loss = -1 * np.log(cls)
140 |         cls_loss = np.sum(cls_loss)
141 |         self.sum_metric += cls_loss
142 |         self.num_inst += label.shape[0]
143 | 
144 | class RCNNOccludedLossMetric(mx.metric.EvalMetric):
145 |     def __init__(self, cfg):
146 |         super(RCNNOccludedLossMetric, self).__init__('RCNNOccludedLoss')
147 |         self.e2e = cfg.TRAIN.END2END
148 |         self.ohem = cfg.TRAIN.ENABLE_OHEM
149 |         self.pred, self.label = get_rcnn_names(cfg)
150 | 
151 |     def update(self, labels, preds):
152 |         pred = preds[self.pred.index('cls_occluded_prob')]
153 |         label = preds[self.pred.index('occluded_label')]
154 | 
155 |         last_dim = pred.shape[-1]
156 |         pred = pred.asnumpy().reshape(-1, last_dim)
157 |         label = label.asnumpy().reshape(-1,).astype('int32')
158 | 
159 |         # filter with keep_inds
160 |         keep_inds = np.where(label != -1)[0]
161 |         label = label[keep_inds]
162 |         cls = pred[keep_inds, label]
163 | 
164 |         cls += 1e-14
165 |         cls_loss = -1 * np.log(cls)
166 |         cls_loss = np.sum(cls_loss)
167 |         self.sum_metric += cls_loss
168 |         self.num_inst += label.shape[0]
169 | 
170 | class RCNNOccludedAccMetric(mx.metric.EvalMetric):
171 |     def __init__(self, cfg):
172 |         super(RCNNOccludedAccMetric, self).__init__('RCNNOccludedAcc')
173 |         self.e2e = cfg.TRAIN.END2END
174 |         self.ohem = cfg.TRAIN.ENABLE_OHEM
175 |         self.pred, self.label = get_rcnn_names(cfg)
176 | 
177 |     def update(self, labels, preds):
178 |         pred = preds[self.pred.index('cls_occluded_prob')]
179 |         label = preds[self.pred.index('occluded_label')]
180 | 
181 |         last_dim = pred.shape[-1]
182 |         pred_label = pred.asnumpy().reshape(-1, last_dim).argmax(axis=1).astype('int32')
183 |         label = label.asnumpy().reshape(-1,).astype('int32')
184 | 
185 |         # filter with keep_inds
186 |         keep_inds = np.where(label != -1)
187 |         pred_label = pred_label[keep_inds]
188 |         label = label[keep_inds]
189 | 
190 |         self.sum_metric += np.sum(pred_label.flat == label.flat)
191 |         self.num_inst += len(pred_label.flat)
192 | 
193 | class RPNL1LossMetric(mx.metric.EvalMetric):
194 |     def __init__(self):
195 |         super(RPNL1LossMetric, self).__init__('RPNL1Loss')
196 |         self.pred, self.label = get_rpn_names()
197 | 
198 |     def update(self, labels, preds):
199 |         bbox_loss = preds[self.pred.index('rpn_bbox_loss')].asnumpy()
200 | 
201 |         # calculate num_inst (average on those kept anchors)
202 |         label = labels[self.label.index('rpn_label')].asnumpy()
203 |         num_inst = np.sum(label != -1)
204 | 
205 |         self.sum_metric += np.sum(bbox_loss)
206 |         self.num_inst += num_inst
207 | 
208 | 
209 | class RCNNL1LossMetric(mx.metric.EvalMetric):
210 |     def __init__(self, cfg):
211 |         super(RCNNL1LossMetric, self).__init__('RCNNL1Loss')
212 |         self.e2e = cfg.TRAIN.END2END
213 |         self.ohem = cfg.TRAIN.ENABLE_OHEM
214 |         self.pred, self.label = get_rcnn_names(cfg)
215 | 
216 |     def update(self, labels, preds):
217 |         bbox_loss = preds[self.pred.index('rcnn_bbox_loss')].asnumpy()
218 |         if self.ohem:
219 |             label = preds[self.pred.index('rcnn_label')].asnumpy()
220 |         else:
221 |             if self.e2e:
222 |                 label = preds[self.pred.index('rcnn_label')].asnumpy()
223 |             else:
224 |                 label = labels[self.label.index('rcnn_label')].asnumpy()
225 | 
226 |         # calculate num_inst (average on those kept anchors)
227 |         num_inst = np.sum(label != -1)
228 | 
229 |         self.sum_metric += np.sum(bbox_loss)
230 |         self.num_inst += num_inst
231 | 
232 | class DELTAL1LossMetric(mx.metric.EvalMetric):
233 |     def __init__(self, cfg):
234 |         super(DELTAL1LossMetric, self).__init__('DELTAL1Loss')
235 |         self.e2e = cfg.TRAIN.END2END
236 |         self.ohem = cfg.TRAIN.ENABLE_OHEM
237 |         self.pred, self.label = get_rcnn_names(cfg)
238 | 
239 |     def update(self, labels, preds):
240 |         delta_loss = preds[self.pred.index('delta_loss')].asnumpy()
241 |         label = preds[self.pred.index('delta_label')].asnumpy()
242 | 
243 |         # calculate num_inst (average on those kept anchors)
244 |         num_inst = np.sum(label != -1)
245 | 
246 |         self.sum_metric += np.sum(delta_loss)
247 |         self.num_inst += num_inst
248 | 


--------------------------------------------------------------------------------
/manet_rfcn/core/rcnn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fully Motion-Aware Network for Video Object Detection
  3 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  4 | # Extend FGFA by adding instance-level aggregation and motion pattern reasoning
  5 | # Modified by Shiyao Wang
  6 | # --------------------------------------------------------
  7 | """
  8 | Fast R-CNN:
  9 | data =
 10 |     {'data': [num_images, c, h, w],
 11 |     'rois': [num_rois, 5]}
 12 | label =
 13 |     {'label': [num_rois],
 14 |     'bbox_target': [num_rois, 4 * num_classes],
 15 |     'bbox_weight': [num_rois, 4 * num_classes]}
 16 | roidb extended format [image_index]
 17 |     ['image', 'height', 'width', 'flipped',
 18 |      'boxes', 'gt_classes', 'gt_overlaps', 'max_classes', 'max_overlaps', 'bbox_targets']
 19 | """
 20 | 
 21 | import numpy as np
 22 | import numpy.random as npr
 23 | 
 24 | from utils.image import get_image, tensor_vstack
 25 | from bbox.bbox_transform import bbox_overlaps, bbox_transform
 26 | from bbox.bbox_regression import expand_bbox_regression_targets
 27 | 
 28 | 
 29 | def get_rcnn_testbatch(roidb, cfg):
 30 |     """
 31 |     return a dict of testbatch
 32 |     :param roidb: ['image', 'flipped'] + ['boxes']
 33 |     :return: data, label, im_info
 34 |     """
 35 |     # assert len(roidb) == 1, 'Single batch only'
 36 |     imgs, roidb = get_image(roidb, cfg)
 37 |     im_array = imgs
 38 |     im_info = [np.array([roidb[i]['im_info']], dtype=np.float32) for i in range(len(roidb))]
 39 | 
 40 |     im_rois = [roidb[i]['boxes'] for i in range(len(roidb))]
 41 |     rois = im_rois
 42 |     rois_array = [np.hstack((0 * np.ones((rois[i].shape[0], 1)), rois[i])) for i in range(len(rois))]
 43 | 
 44 |     data = [{'data': im_array[i],
 45 |              'rois': rois_array[i]} for i in range(len(roidb))]
 46 |     label = {}
 47 | 
 48 |     return data, label, im_info
 49 | 
 50 | 
 51 | def get_rcnn_batch(roidb, cfg):
 52 |     """
 53 |     return a dict of multiple images
 54 |     :param roidb: a list of dict, whose length controls batch size
 55 |     ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets']
 56 |     :return: data, label
 57 |     """
 58 |     num_images = len(roidb)
 59 |     imgs, roidb = get_image(roidb, cfg)
 60 |     im_array = tensor_vstack(imgs)
 61 | 
 62 |     assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \
 63 |         'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS)
 64 | 
 65 |     if cfg.TRAIN.BATCH_ROIS == -1:
 66 |         rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb])
 67 |         fg_rois_per_image = rois_per_image
 68 |     else:
 69 |         rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES
 70 |         fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)
 71 | 
 72 |     rois_array = list()
 73 |     labels_array = list()
 74 |     bbox_targets_array = list()
 75 |     bbox_weights_array = list()
 76 | 
 77 |     for im_i in range(num_images):
 78 |         roi_rec = roidb[im_i]
 79 | 
 80 |         # infer num_classes from gt_overlaps
 81 |         num_classes = roi_rec['gt_overlaps'].shape[1]
 82 | 
 83 |         # label = class RoI has max overlap with
 84 |         rois = roi_rec['boxes']
 85 |         labels = roi_rec['max_classes']
 86 |         overlaps = roi_rec['max_overlaps']
 87 |         bbox_targets = roi_rec['bbox_targets']
 88 | 
 89 |         im_rois, labels, bbox_targets, bbox_weights = \
 90 |             sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
 91 |                         labels, overlaps, bbox_targets)
 92 | 
 93 |         # project im_rois
 94 |         # do not round roi
 95 |         rois = im_rois
 96 |         batch_index = im_i * np.ones((rois.shape[0], 1))
 97 |         rois_array_this_image = np.hstack((batch_index, rois))
 98 |         rois_array.append(rois_array_this_image)
 99 | 
100 |         # add labels
101 |         labels_array.append(labels)
102 |         bbox_targets_array.append(bbox_targets)
103 |         bbox_weights_array.append(bbox_weights)
104 | 
105 |     rois_array = np.array(rois_array)
106 |     labels_array = np.array(labels_array)
107 |     bbox_targets_array = np.array(bbox_targets_array)
108 |     bbox_weights_array = np.array(bbox_weights_array)
109 | 
110 |     data = {'data': im_array,
111 |             'rois': rois_array}
112 |     label = {'label': labels_array,
113 |              'bbox_target': bbox_targets_array,
114 |              'bbox_weight': bbox_weights_array}
115 | 
116 |     return data, label
117 | 
118 | 
119 | def sample_rois(rois, delta_list, fg_rois_per_image, rois_per_image, num_classes, cfg,
120 |                 labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, occluded=None):
121 |     """
122 |     generate random sample of ROIs comprising foreground and background examples
123 |     :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
124 |     :param fg_rois_per_image: foreground roi number
125 |     :param rois_per_image: total roi number
126 |     :param num_classes: number of classes
127 |     :param labels: maybe precomputed
128 |     :param overlaps: maybe precomputed (max_overlaps)
129 |     :param bbox_targets: maybe precomputed
130 |     :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
131 |     :return: (labels, rois, bbox_targets, bbox_weights)
132 |     """
133 |     #print 'rois shape is : ', rois.shape
134 |     #print 'delta_list shape is : ', delta_list.shape
135 |     #print 'gt_boxes shape is : ', gt_boxes.shape
136 |     if labels is None:
137 |         overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
138 |         gt_assignment = overlaps.argmax(axis=1)
139 |         overlaps = overlaps.max(axis=1)
140 |         labels = gt_boxes[gt_assignment, 4]
141 |         occluded_label = occluded[gt_assignment]
142 |         delta_list_shape = delta_list.shape
143 |         delta_bef = delta_list[0:delta_list_shape[0]/2]
144 |         delta_aft = delta_list[delta_list_shape[0]/2: delta_list_shape[0]]
145 |         bef_label = delta_bef[gt_assignment,:]
146 |         aft_label = delta_aft[gt_assignment,:]
147 | 
148 |     # foreground RoI with FG_THRESH overlap
149 |     fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
150 |     # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
151 |     fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
152 |     # Sample foreground regions without replacement
153 |     if len(fg_indexes) > fg_rois_per_this_image:
154 |         fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)
155 | 
156 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
157 |     bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
158 |     # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
159 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
160 |     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
161 |     # Sample foreground regions without replacement
162 |     if len(bg_indexes) > bg_rois_per_this_image:
163 |         bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)
164 | 
165 |     # indexes selected
166 |     keep_indexes = np.append(fg_indexes, bg_indexes)
167 | 
168 |     # pad more to ensure a fixed minibatch size
169 |     while keep_indexes.shape[0] < rois_per_image:
170 |         gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
171 |         gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
172 |         keep_indexes = np.append(keep_indexes, gap_indexes)
173 | 
174 |     # select labels
175 |     labels = labels[keep_indexes]
176 |     occluded_label = occluded_label[keep_indexes]
177 |     bef_label = bef_label[keep_indexes]
178 |     aft_label = aft_label[keep_indexes]
179 |     #print 'bef_label: ', bef_label[:3]
180 |     #print 'aft_label: ', aft_label[:3]
181 |     # set labels of bg_rois to be 0
182 |     labels[fg_rois_per_this_image:] = 0
183 |     occluded_label[fg_rois_per_this_image:] = -1
184 |     rois = rois[keep_indexes]
185 | 
186 |     delta_label = np.append(bef_label, aft_label, axis=0)
187 | 
188 |     # load or compute bbox_target
189 |     if bbox_targets is not None:
190 |         bbox_target_data = bbox_targets[keep_indexes, :]
191 |     else:
192 |         targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
193 |         if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
194 |             targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
195 |                        / np.array(cfg.TRAIN.BBOX_STDS))
196 |         bbox_target_data = np.hstack((labels[:, np.newaxis], targets))
197 | 
198 |     bbox_targets, bbox_weights, delta_weights = \
199 |         expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)
200 | 
201 |     delta_weights = np.tile(delta_weights, reps=(2,1))
202 |     count = 0
203 |     for item in delta_label:
204 |         if (item==0).all():
205 |             delta_weights[count,:] = 0
206 |         count+=1
207 | 
208 |     return rois, labels, bbox_targets, bbox_weights, delta_label, delta_weights, occluded_label
209 | 
210 | 


--------------------------------------------------------------------------------
/manet_rfcn/function/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/manet_rfcn/function/__init__.py


--------------------------------------------------------------------------------
/manet_rfcn/function/test_rcnn.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Flow-Guided Feature Aggregation
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified by Yuqing Zhu, Shuhao Fu, Yuwen Xiong
 7 | # --------------------------------------------------------
 8 | 
 9 | import argparse
10 | import pprint
11 | import logging
12 | import time
13 | import os
14 | import numpy as np
15 | import mxnet as mx
16 | 
17 | from symbols import *
18 | from dataset import *
19 | from core.loader import TestLoader
20 | from core.tester import Predictor, pred_eval, pred_eval_multiprocess
21 | from utils.load_model import load_param
22 | 
23 | def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data, ctx):
24 |     # infer shape
25 |     data_shape_dict = dict(test_data.provide_data_single)
26 |     sym_instance.infer_shape(data_shape_dict)
27 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)
28 | 
29 |     # decide maximum shape
30 |     data_names = [k[0] for k in test_data.provide_data_single]
31 |     label_names = None
32 |     max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
33 |                        ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
34 |                        ]]
35 | 
36 |     # create predictor
37 |     predictor = Predictor(sym, data_names, label_names,
38 |                           context=ctx, max_data_shapes=max_data_shape,
39 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
40 |                           arg_params=arg_params, aux_params=aux_params)
41 |     return predictor
42 | 
43 | def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, motion_iou_path,
44 |               ctx, prefix, epoch,
45 |               vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None, enable_detailed_eval=True):
46 |     if not logger:
47 |         assert False, 'require a logger'
48 | 
49 |     # print cfg
50 |     pprint.pprint(cfg)
51 |     logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))
52 | 
53 |     # load symbol and testing data
54 | 
55 |     feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
56 |     aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
57 | 
58 |     feat_sym = feat_sym_instance.get_feat_symbol(cfg)
59 |     aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)
60 | 
61 |     imdb = eval(dataset)(image_set, root_path, dataset_path, motion_iou_path, result_path=output_path, enable_detailed_eval=enable_detailed_eval)
62 |     roidb = imdb.gt_roidb()
63 | 
64 |     # get test data iter
65 |     # split roidbs
66 |     gpu_num = len(ctx)
67 |     roidbs = [[] for x in range(gpu_num)]
68 |     roidbs_seg_lens = np.zeros(gpu_num, dtype=np.int)
69 |     for x in roidb:
70 |         gpu_id = np.argmin(roidbs_seg_lens)
71 |         roidbs[gpu_id].append(x)
72 |         roidbs_seg_lens[gpu_id] += x['frame_seg_len']
73 | 
74 |     # get test data iter
75 |     test_datas = [TestLoader(x, cfg, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) for x in roidbs]
76 | 
77 |     # load model
78 |     arg_params, aux_params = load_param(prefix, epoch, process=True)
79 | 
80 |     # create predictor
81 |     feat_predictors = [get_predictor(feat_sym, feat_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)]
82 |     aggr_predictors = [get_predictor(aggr_sym, aggr_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)]
83 | 
84 |     # start detection
85 |     pred_eval_multiprocess(gpu_num, feat_predictors, aggr_predictors, test_datas, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
86 | 


--------------------------------------------------------------------------------
/manet_rfcn/function/test_rpn.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # #Flow-Guided-Feature-Aggregation
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified by Yuwen Xiong
 7 | # --------------------------------------------------------
 8 | 
 9 | import argparse
10 | import pprint
11 | import logging
12 | import mxnet as mx
13 | 
14 | from symbols import *
15 | from dataset import *
16 | from core.loader import TestLoader
17 | from core.tester import Predictor, generate_proposals
18 | from utils.load_model import load_param
19 | 
20 | 
21 | def test_rpn(cfg, dataset, image_set, root_path, dataset_path,
22 |              ctx, prefix, epoch,
23 |              vis, shuffle, thresh, logger=None, output_path=None):
24 |     # set up logger
25 |     if not logger:
26 |         logging.basicConfig()
27 |         logger = logging.getLogger()
28 |         logger.setLevel(logging.INFO)
29 | 
30 |     # rpn generate proposal cfg
31 |     cfg.TEST.HAS_RPN = True
32 | 
33 |     # print cfg
34 |     pprint.pprint(cfg)
35 |     logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg)))
36 | 
37 |     # load symbol
38 |     sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
39 |     sym = sym_instance.get_symbol_rpn(cfg, is_train=False)
40 | 
41 |     # load dataset and prepare imdb for training
42 |     imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
43 |     roidb = imdb.gt_roidb()
44 |     test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True)
45 | 
46 |     # load model
47 |     arg_params, aux_params = load_param(prefix, epoch)
48 | 
49 |     # infer shape
50 |     data_shape_dict = dict(test_data.provide_data_single)
51 |     sym_instance.infer_shape(data_shape_dict)
52 | 
53 |     # check parameters
54 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)
55 | 
56 |     # decide maximum shape
57 |     data_names = [k[0] for k in test_data.provide_data[0]]
58 |     label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]]
59 |     max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
60 | 
61 |     # create predictor
62 |     predictor = Predictor(sym, data_names, label_names,
63 |                           context=ctx, max_data_shapes=max_data_shape,
64 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
65 |                           arg_params=arg_params, aux_params=aux_params)
66 | 
67 |     # start testing
68 |     imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh)
69 | 
70 |     all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
71 |     logger.info(all_log_info)
72 | 


--------------------------------------------------------------------------------
/manet_rfcn/function/train_rcnn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # #Flow-Guided-Feature-Aggregation
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuwen Xiong
  7 | # --------------------------------------------------------
  8 | 
  9 | import argparse
 10 | import logging
 11 | import pprint
 12 | import os
 13 | import mxnet as mx
 14 | 
 15 | from symbols import *
 16 | from core import callback, metric
 17 | from core.loader import ROIIter
 18 | from core.module import MutableModule
 19 | from bbox.bbox_regression import add_bbox_regression_targets
 20 | from utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb
 21 | from utils.load_model import load_param
 22 | from utils.PrefetchingIter import PrefetchingIter
 23 | from utils.lr_scheduler import WarmupMultiFactorScheduler
 24 | 
 25 | 
 26 | def train_rcnn(cfg, dataset, image_set, root_path, dataset_path,
 27 |                frequent, kvstore, flip, shuffle, resume,
 28 |                ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 29 |                train_shared, lr, lr_step, proposal, logger=None, output_path=None):
 30 |     # set up logger
 31 |     if not logger:
 32 |         logging.basicConfig()
 33 |         logger = logging.getLogger()
 34 |         logger.setLevel(logging.INFO)
 35 | 
 36 |     # load symbol
 37 |     sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
 38 |     sym = sym_instance.get_symbol_rfcn(cfg, is_train=True)
 39 | 
 40 |     # setup multi-gpu
 41 |     batch_size = len(ctx)
 42 |     input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size
 43 | 
 44 |     # print cfg
 45 |     pprint.pprint(cfg)
 46 |     logger.info('training rcnn cfg:{}\n'.format(pprint.pformat(cfg)))
 47 | 
 48 |     # load dataset and prepare imdb for training
 49 |     image_sets = [iset for iset in image_set.split('+')]
 50 |     roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path,
 51 |                                   proposal=proposal, append_gt=True, flip=flip, result_path=output_path)
 52 |               for image_set in image_sets]
 53 |     roidb = merge_roidb(roidbs)
 54 |     roidb = filter_roidb(roidb, cfg)
 55 |     means, stds = add_bbox_regression_targets(roidb, cfg)
 56 | 
 57 |     # load training data
 58 |     train_data = ROIIter(roidb, cfg, batch_size=input_batch_size, shuffle=shuffle,
 59 |                          ctx=ctx, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING)
 60 | 
 61 |     # infer max shape
 62 |     max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]
 63 | 
 64 |     # infer shape
 65 |     data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
 66 |     sym_instance.infer_shape(data_shape_dict)
 67 | 
 68 |     # load and initialize params
 69 |     if resume:
 70 |         print('continue training from ', begin_epoch)
 71 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 72 |     else:
 73 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 74 |         sym_instance.init_weight_rfcn(cfg, arg_params, aux_params)
 75 | 
 76 |     # check parameter shapes
 77 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
 78 | 
 79 |     # prepare training
 80 |     # create solver
 81 |     data_names = [k[0] for k in train_data.provide_data_single]
 82 |     label_names = [k[0] for k in train_data.provide_label_single]
 83 |     if train_shared:
 84 |         fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED
 85 |     else:
 86 |         fixed_param_prefix = cfg.network.FIXED_PARAMS
 87 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
 88 |                         logger=logger, context=ctx,
 89 |                         max_data_shapes=[max_data_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)
 90 | 
 91 |     if cfg.TRAIN.RESUME:
 92 |         mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)
 93 | 
 94 | 
 95 |     # decide training params
 96 |     # metric
 97 |     eval_metric = metric.RCNNAccMetric(cfg)
 98 |     cls_metric = metric.RCNNLogLossMetric(cfg)
 99 |     bbox_metric = metric.RCNNL1LossMetric(cfg)
100 |     eval_metrics = mx.metric.CompositeEvalMetric()
101 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
102 |         eval_metrics.add(child_metric)
103 |     # callback
104 |     batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent)
105 |     epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True),
106 |                           callback.do_checkpoint(prefix, means, stds)]
107 |     # decide learning rate
108 |     base_lr = lr
109 |     lr_factor = cfg.TRAIN.lr_factor
110 |     lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
111 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
112 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
113 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
114 |     print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
115 |     lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step)
116 |     # optimizer
117 |     optimizer_params = {'momentum': cfg.TRAIN.momentum,
118 |                         'wd': cfg.TRAIN.wd,
119 |                         'learning_rate': lr,
120 |                         'lr_scheduler': lr_scheduler,
121 |                         'rescale_grad': 1.0,
122 |                         'clip_gradient': None}
123 | 
124 |     # train
125 | 
126 |     if not isinstance(train_data, PrefetchingIter):
127 |         train_data = PrefetchingIter(train_data)
128 | 
129 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
130 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
131 |             optimizer='sgd', optimizer_params=optimizer_params,
132 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
133 | 
134 | 


--------------------------------------------------------------------------------
/manet_rfcn/function/train_rpn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # #Flow-Guided-Feature-Aggregation
  3 | # Copyright (c) 2016 by Contributors
  4 | # Copyright (c) 2017 Microsoft
  5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  6 | # Modified by Yuwen Xiong
  7 | # --------------------------------------------------------
  8 | 
  9 | import argparse
 10 | import logging
 11 | import pprint
 12 | import mxnet as mx
 13 | 
 14 | from symbols import *
 15 | from core import callback, metric
 16 | from core.loader import AnchorLoader
 17 | from core.module import MutableModule
 18 | from utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
 19 | from utils.load_model import load_param
 20 | from utils.PrefetchingIter import PrefetchingIter
 21 | from utils.lr_scheduler import WarmupMultiFactorScheduler
 22 | 
 23 | 
 24 | def train_rpn(cfg, dataset, image_set, root_path, dataset_path,
 25 |               frequent, kvstore, flip, shuffle, resume,
 26 |               ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 27 |               train_shared, lr, lr_step, logger=None, output_path=None):
 28 |     # set up logger
 29 |     if not logger:
 30 |         logging.basicConfig()
 31 |         logger = logging.getLogger()
 32 |         logger.setLevel(logging.INFO)
 33 | 
 34 |     # set up config
 35 |     cfg.TRAIN.BATCH_IMAGES = cfg.TRAIN.ALTERNATE.RPN_BATCH_IMAGES
 36 | 
 37 |     # load symbol
 38 |     sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
 39 |     sym = sym_instance.get_symbol_rpn(cfg, is_train=True)
 40 |     feat_sym = sym.get_internals()['rpn_cls_score_output']
 41 | 
 42 |     # setup multi-gpu
 43 |     batch_size = len(ctx)
 44 |     input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size
 45 | 
 46 |     # print cfg
 47 |     pprint.pprint(cfg)
 48 |     logger.info('training rpn cfg:{}\n'.format(pprint.pformat(cfg)))
 49 | 
 50 |     # load dataset and prepare imdb for training
 51 |     image_sets = [iset for iset in image_set.split('+')]
 52 |     roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path, result_path=output_path,
 53 |                             flip=flip)
 54 |               for image_set in image_sets]
 55 |     roidb = merge_roidb(roidbs)
 56 |     roidb = filter_roidb(roidb, cfg)
 57 | 
 58 |     # load training data
 59 |     train_data = AnchorLoader(feat_sym, roidb, cfg, batch_size=input_batch_size, shuffle=shuffle,
 60 |                               ctx=ctx, feat_stride=cfg.network.RPN_FEAT_STRIDE, anchor_scales=cfg.network.ANCHOR_SCALES,
 61 |                               anchor_ratios=cfg.network.ANCHOR_RATIOS, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING)
 62 | 
 63 |     # infer max shape
 64 |     max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]
 65 |     max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
 66 |     print('providing maximum shape', max_data_shape, max_label_shape)
 67 | 
 68 |     # infer shape
 69 |     data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
 70 |     sym_instance.infer_shape(data_shape_dict)
 71 | 
 72 |     # load and initialize params
 73 |     if resume:
 74 |         print('continue training from ', begin_epoch)
 75 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 76 |     else:
 77 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 78 |         sym_instance.init_weight_rpn(cfg, arg_params, aux_params)
 79 | 
 80 |     # check parameter shapes
 81 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
 82 | 
 83 |     # create solver
 84 |     data_names = [k[0] for k in train_data.provide_data_single]
 85 |     label_names = [k[0] for k in train_data.provide_label_single]
 86 |     if train_shared:
 87 |         fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED
 88 |     else:
 89 |         fixed_param_prefix = cfg.network.FIXED_PARAMS
 90 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
 91 |                         logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)],
 92 |                         max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix)
 93 | 
 94 |     # decide training params
 95 |     # metric
 96 |     eval_metric = metric.RPNAccMetric()
 97 |     cls_metric = metric.RPNLogLossMetric()
 98 |     bbox_metric = metric.RPNL1LossMetric()
 99 |     eval_metrics = mx.metric.CompositeEvalMetric()
100 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
101 |         eval_metrics.add(child_metric)
102 |     # callback
103 |     batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent)
104 |     # epoch_end_callback = mx.callback.do_checkpoint(prefix)
105 |     epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True)
106 |     # decide learning rate
107 |     base_lr = lr
108 |     lr_factor = cfg.TRAIN.lr_factor
109 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
110 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
111 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
112 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
113 |     print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
114 |     lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step)
115 |     # optimizer
116 |     optimizer_params = {'momentum': cfg.TRAIN.momentum,
117 |                         'wd': cfg.TRAIN.wd,
118 |                         'learning_rate': lr,
119 |                         'lr_scheduler': lr_scheduler,
120 |                         'rescale_grad': 1.0,
121 |                         'clip_gradient': None}
122 | 
123 |     if not isinstance(train_data, PrefetchingIter):
124 |         train_data = PrefetchingIter(train_data)
125 | 
126 |     # train
127 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
128 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
129 |             optimizer='sgd', optimizer_params=optimizer_params,
130 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
131 | 
132 | 


--------------------------------------------------------------------------------
/manet_rfcn/operator_cxx/psroi_pooling-inl.h:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Copyright (c) 2017 by Contributors
  3 |  * Copyright (c) 2017 Microsoft
  4 |  * Licensed under The Apache-2.0 License [see LICENSE for details]
  5 |  * \file psroi_pooling-inl.h
  6 |  * \brief psroi pooling operator and symbol
  7 |  * \author Yi Li, Tairui Chen, Guodong Zhang, Jifeng Dai
  8 | */
  9 | #ifndef MXNET_OPERATOR_PSROI_POOLING_INL_H_
 10 | #define MXNET_OPERATOR_PSROI_POOLING_INL_H_
 11 | 
 12 | #include <dmlc/logging.h>
 13 | #include <dmlc/parameter.h>
 14 | #include <mxnet/operator.h>
 15 | #include <map>
 16 | #include <vector>
 17 | #include <string>
 18 | #include <utility>
 19 | #include "../mshadow_op.h"
 20 | #include "../operator_common.h"
 21 | 
 22 | 
 23 | namespace mxnet {
 24 | namespace op {
 25 | 
 26 | // Declare enumeration of input order to make code more intuitive.
 27 | // These enums are only visible within this header
 28 | namespace psroipool {
 29 | enum PSROIPoolingOpInputs {kData, kBox};
 30 | enum PSROIPoolingOpOutputs {kOut, kMappingChannel};
 31 | }  // psroipool
 32 | 
 33 | struct PSROIPoolingParam : public dmlc::Parameter<PSROIPoolingParam> {
 34 |   // TShape pooled_size;
 35 |   float spatial_scale;
 36 |   int output_dim;
 37 |   int pooled_size;
 38 |   int group_size;
 39 |   DMLC_DECLARE_PARAMETER(PSROIPoolingParam) {
 40 |     DMLC_DECLARE_FIELD(spatial_scale).set_range(0.0, 1.0)
 41 |     .describe("Ratio of input feature map height (or w) to raw image height (or w). "
 42 |     "Equals the reciprocal of total stride in convolutional layers");
 43 |     DMLC_DECLARE_FIELD(output_dim).describe("fix output dim");
 44 |   DMLC_DECLARE_FIELD(pooled_size).describe("fix pooled size");
 45 |     DMLC_DECLARE_FIELD(group_size).set_default(0).describe("fix group size");
 46 |   }
 47 | };
 48 | 
 49 | template<typename xpu, typename DType>
 50 | class PSROIPoolingOp : public Operator {
 51 |  public:
 52 |   explicit PSROIPoolingOp(PSROIPoolingParam p) {
 53 |     this->param_ = p;
 54 |   }
 55 | 
 56 |   virtual void Forward(const OpContext &ctx,
 57 |                        const std::vector<TBlob> &in_data,
 58 |                        const std::vector<OpReqType> &req,
 59 |                        const std::vector<TBlob> &out_data,
 60 |                        const std::vector<TBlob> &aux_args) {
 61 |     using namespace mshadow;
 62 |     size_t expected = 2;
 63 |     CHECK_EQ(in_data.size(), expected);
 64 |     CHECK_EQ(out_data.size(), expected);
 65 |     CHECK_EQ(out_data[psroipool::kOut].shape_[0], in_data[psroipool::kBox].shape_[0]);
 66 |     CHECK_EQ(out_data[psroipool::kMappingChannel].shape_[0], in_data[psroipool::kBox].shape_[0]);
 67 |     Stream<xpu> *s = ctx.get_stream<xpu>();
 68 | 
 69 |     Tensor<xpu, 4, DType> data = in_data[psroipool::kData].get<xpu, 4, DType>(s);
 70 |     Tensor<xpu, 2, DType> bbox = in_data[psroipool::kBox].get<xpu, 2, DType>(s);
 71 |     Tensor<xpu, 4, DType> out = out_data[psroipool::kOut].get<xpu, 4, DType>(s);
 72 |     Tensor<xpu, 4, DType> mapping_channel = out_data[psroipool::kMappingChannel].get<xpu, 4, DType>(s);
 73 |     CHECK_EQ(data.CheckContiguous(), true);
 74 |     CHECK_EQ(bbox.CheckContiguous(), true);
 75 |     CHECK_EQ(out.CheckContiguous(), true);
 76 |     CHECK_EQ(mapping_channel.CheckContiguous(), true);
 77 |     out = -FLT_MAX;
 78 |     mapping_channel = -1.0f;
 79 |     PSROIPoolForward(out, data, bbox, mapping_channel, param_.spatial_scale, param_.output_dim, param_.group_size);
 80 |   }
 81 | 
 82 |   virtual void Backward(const OpContext &ctx,
 83 |                         const std::vector<TBlob> &out_grad,
 84 |                         const std::vector<TBlob> &in_data,
 85 |                         const std::vector<TBlob> &out_data,
 86 |                         const std::vector<OpReqType> &req,
 87 |                         const std::vector<TBlob> &in_grad,
 88 |                         const std::vector<TBlob> &aux_args) {
 89 |     using namespace mshadow;
 90 |     size_t expected = 2;
 91 |     CHECK_EQ(in_data.size(), expected);
 92 |     CHECK_EQ(out_data.size(), expected);
 93 |     CHECK_EQ(out_grad[psroipool::kOut].shape_[0], in_data[psroipool::kBox].shape_[0]);
 94 |     CHECK_EQ(out_data[psroipool::kMappingChannel].shape_[0], in_data[psroipool::kBox].shape_[0]);
 95 |     CHECK_NE(req[psroipool::kData], kWriteInplace) <<
 96 |       "ROIPooling: Backward doesn't support kWriteInplace.";
 97 |     CHECK_NE(req[psroipool::kBox], kWriteInplace) <<
 98 |       "ROIPooling: Backward doesn't support kWriteInplace.";
 99 |     Stream<xpu> *s = ctx.get_stream<xpu>();
100 | 
101 |     Tensor<xpu, 4, DType> grad_out = out_grad[psroipool::kOut].get<xpu, 4, DType>(s);
102 |     Tensor<xpu, 2, DType> bbox = in_data[psroipool::kBox].get<xpu, 2, DType>(s);
103 |     Tensor<xpu, 4, DType> mapping_channel = out_data[psroipool::kMappingChannel].get<xpu, 4, DType>(s);
104 |     Tensor<xpu, 4, DType> grad_in = in_grad[psroipool::kData].get<xpu, 4, DType>(s);
105 |     Tensor<xpu, 2, DType> grad_roi = in_grad[psroipool::kBox].get<xpu, 2, DType>(s);
106 | 
107 |     CHECK_EQ(grad_out.CheckContiguous(), true);
108 |     CHECK_EQ(bbox.CheckContiguous(), true);
109 |     CHECK_EQ(mapping_channel.CheckContiguous(), true);
110 |     CHECK_EQ(grad_in.CheckContiguous(), true);
111 | 
112 |     if (kAddTo == req[psroipool::kData] || kWriteTo == req[psroipool::kData]) {
113 |       if (kWriteTo == req[psroipool::kData]) {
114 |         grad_in = 0.0f;
115 |       }
116 |       PSROIPoolBackwardAcc(grad_in, grad_out, bbox, mapping_channel, param_.spatial_scale, param_.output_dim);
117 |     }
118 |     if (kWriteTo == req[psroipool::kBox]) {
119 |       grad_roi = 0.0f;
120 |     }
121 | 
122 |   }
123 | 
124 |  private:
125 |   PSROIPoolingParam param_;
126 | };  // class PSROIPoolingOp
127 | 
128 | // Decalre Factory function, used for dispatch specialization
129 | template<typename xpu>
130 | Operator* CreateOp(PSROIPoolingParam param, int dtype);
131 | 
132 | #if DMLC_USE_CXX11
133 | class PSROIPoolingProp : public OperatorProperty {
134 |  public:
135 |   std::vector<std::string> ListArguments() const override {
136 |     return {"data", "rois"};
137 |   }
138 | 
139 |   std::vector<std::string> ListOutputs() const override {
140 |     return {"output", "maxidx"};
141 |   }
142 | 
143 |   int NumOutputs() const override {
144 |     return 2;
145 |   }
146 | 
147 |   int NumVisibleOutputs() const override {
148 |     return 1;
149 |   }
150 | 
151 |   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
152 |     param_.Init(kwargs);
153 |   if (param_.group_size == 0) {
154 |     param_.group_size = param_.pooled_size;
155 |   }
156 |   }
157 | 
158 |   std::map<std::string, std::string> GetParams() const override {
159 |     return param_.__DICT__();
160 |   }
161 | 
162 |   bool InferShape(std::vector<TShape> *in_shape,
163 |                   std::vector<TShape> *out_shape,
164 |                   std::vector<TShape> *aux_shape) const override {
165 |     using namespace mshadow;
166 |     CHECK_EQ(in_shape->size(), 2) << "Input:[data, rois]";
167 | 
168 |     // data: [batch_size, c, h, w]
169 |     TShape dshape = in_shape->at(psroipool::kData);
170 |     CHECK_EQ(dshape.ndim(), 4) << "data should be a 4D tensor";
171 | 
172 |     // bbox: [num_rois, 5]
173 |     TShape bshape = in_shape->at(psroipool::kBox);
174 |     CHECK_EQ(bshape.ndim(), 2) << "bbox should be a 2D tensor of shape [batch, 5]";
175 |     CHECK_EQ(bshape[1], 5) << "bbox should be a 2D tensor of shape [batch, 5]";
176 | 
177 |     // out: [num_rois, c, pooled_h, pooled_w]
178 |     // mapping_channel: [num_rois, c, pooled_h, pooled_w]
179 |     out_shape->clear();
180 |     out_shape->push_back(
181 |          Shape4(bshape[0], param_.output_dim, param_.pooled_size, param_.pooled_size));
182 |     out_shape->push_back(
183 |          Shape4(bshape[0], param_.output_dim, param_.pooled_size, param_.pooled_size));
184 |     return true;
185 |   }
186 | 
187 |   bool InferType(std::vector<int> *in_type,
188 |                  std::vector<int> *out_type,
189 |                  std::vector<int> *aux_type) const override {
190 |     CHECK_EQ(in_type->size(), 2);
191 |     int dtype = (*in_type)[0];
192 |     CHECK_EQ(dtype, (*in_type)[1]);
193 |     CHECK_NE(dtype, -1) << "Input must have specified type";
194 | 
195 |     out_type->clear();
196 |     out_type->push_back(dtype);
197 |     out_type->push_back(dtype);
198 |     return true;
199 |   }
200 | 
201 |   OperatorProperty* Copy() const override {
202 |     PSROIPoolingProp* psroi_pooling_sym = new PSROIPoolingProp();
203 |     psroi_pooling_sym->param_ = this->param_;
204 |     return psroi_pooling_sym;
205 |   }
206 | 
207 |   std::string TypeString() const override {
208 |     return "_contrib_PSROIPooling";
209 |   }
210 | 
211 |   // decalre dependency and inplace optimization options
212 |   std::vector<int> DeclareBackwardDependency(
213 |     const std::vector<int> &out_grad,
214 |     const std::vector<int> &in_data,
215 |     const std::vector<int> &out_data) const override {
216 |     return {out_grad[psroipool::kOut], in_data[psroipool::kBox], out_data[psroipool::kMappingChannel]};
217 |   }
218 | 
219 | 
220 |   Operator* CreateOperator(Context ctx) const override {
221 |     LOG(FATAL) << "Not Implemented.";
222 |     return NULL;
223 |   }
224 | 
225 |   Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
226 |                              std::vector<int> *in_type) const override;
227 | 
228 | 
229 |  private:
230 |   PSROIPoolingParam param_;
231 | };  // class PSROIPoolingProp
232 | #endif
233 | }  // namespace op
234 | }  // namespace mxnet
235 | #endif  // MXNET_OPERATOR_PSROI_POOLING_INL_H_


--------------------------------------------------------------------------------
/manet_rfcn/operator_cxx/psroi_pooling.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 by Contributors
 3 |  * Copyright (c) 2017 Microsoft
 4 |  * Licensed under The Apache-2.0 License [see LICENSE for details]
 5 |  * \file psroi_pooling.cc
 6 |  * \brief psroi pooling operator
 7 |  * \author Yi Li, Tairui Chen, Guodong Zhang, Jifeng Dai
 8 | */
 9 | #include "./psroi_pooling-inl.h"
10 | #include <mshadow/base.h>
11 | #include <mshadow/tensor.h>
12 | #include <mshadow/packet-inl.h>
13 | #include <mshadow/dot_engine-inl.h>
14 | #include <cassert>
15 | 
16 | using std::max;
17 | using std::min;
18 | using std::floor;
19 | using std::ceil;
20 | 
21 | namespace mshadow {
22 | template<typename DType>
23 | inline void PSROIPoolForward(const Tensor<cpu, 4, DType> &out,
24 |                            const Tensor<cpu, 4, DType> &data,
25 |                            const Tensor<cpu, 2, DType> &bbox,
26 |                            const Tensor<cpu, 4, DType> &mapping_channel,
27 |                            const float spatial_scale_,
28 |                            const int output_dim_, 
29 |                            const int group_size_) {
30 |   // NOT_IMPLEMENTED;
31 |   return;
32 | }
33 | 
34 | template<typename DType>
35 | inline void PSROIPoolBackwardAcc(const Tensor<cpu, 4, DType> &in_grad,
36 |                             const Tensor<cpu, 4, DType> &out_grad,
37 |                             const Tensor<cpu, 2, DType> &bbox,
38 |                             const Tensor<cpu, 4, DType> &mapping_channel,
39 |                             const float spatial_scale_,
40 |                             const int output_dim_) {
41 |   // NOT_IMPLEMENTED;
42 |   return;
43 | }
44 | }  // namespace mshadow
45 | 
46 | namespace mxnet {
47 | namespace op {
48 | 
49 | template<>
50 | Operator *CreateOp<cpu>(PSROIPoolingParam param, int dtype) {
51 |   Operator* op = NULL;
52 |   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
53 |     op = new PSROIPoolingOp<cpu, DType>(param);
54 |   });
55 |   return op;
56 | }
57 | 
58 | Operator *PSROIPoolingProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
59 |                                            std::vector<int> *in_type) const {
60 |   std::vector<TShape> out_shape, aux_shape;
61 |   std::vector<int> out_type, aux_type;
62 |   CHECK(InferType(in_type, &out_type, &aux_type));
63 |   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
64 |   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
65 | }
66 | 
67 | DMLC_REGISTER_PARAMETER(PSROIPoolingParam);
68 | 
69 | MXNET_REGISTER_OP_PROPERTY(_contrib_PSROIPooling, PSROIPoolingProp)
70 | .describe("Performs region-of-interest pooling on inputs. Resize bounding box coordinates by "
71 | "spatial_scale and crop input feature maps accordingly. The cropped feature maps are pooled "
72 | "by max pooling to a fixed size output indicated by pooled_size. batch_size will change to "
73 | "the number of region bounding boxes after PSROIPooling")
74 | .add_argument("data", "Symbol", "Input data to the pooling operator, a 4D Feature maps")
75 | .add_argument("rois", "Symbol", "Bounding box coordinates, a 2D array of "
76 | "[[batch_index, x1, y1, x2, y2]]. (x1, y1) and (x2, y2) are top left and down right corners "
77 | "of designated region of interest. batch_index indicates the index of corresponding image "
78 | "in the input data")
79 | .add_arguments(PSROIPoolingParam::__FIELDS__());
80 | }  // namespace op
81 | }  // namespace mxnet


--------------------------------------------------------------------------------
/manet_rfcn/operator_py/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshy31/MANet_for_Video_Object_Detection/b65c2a452be6b2331e17f99117b83f803cecedf0/manet_rfcn/operator_py/__init__.py


--------------------------------------------------------------------------------
/manet_rfcn/operator_py/box_annotator_ohem.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # #Flow-Guided-Feature-Aggregation
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Yuwen Xiong
 6 | # --------------------------------------------------------
 7 | 
 8 | """
 9 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
10 | """
11 | 
12 | import mxnet as mx
13 | import numpy as np
14 | from distutils.util import strtobool
15 | 
16 | 
17 | 
18 | 
19 | class BoxAnnotatorOHEMOperator(mx.operator.CustomOp):
20 |     def __init__(self, num_classes, num_reg_classes, roi_per_img):
21 |         super(BoxAnnotatorOHEMOperator, self).__init__()
22 |         self._num_classes = num_classes
23 |         self._num_reg_classes = num_reg_classes
24 |         self._roi_per_img = roi_per_img
25 | 
26 |     def forward(self, is_train, req, in_data, out_data, aux):
27 | 
28 |         cls_score    = in_data[0]
29 |         bbox_pred    = in_data[1]
30 |         labels       = in_data[2].asnumpy()
31 |         bbox_targets = in_data[3]
32 |         bbox_weights = in_data[4]
33 | 
34 |         per_roi_loss_cls = mx.nd.SoftmaxActivation(cls_score) + 1e-14
35 |         per_roi_loss_cls = per_roi_loss_cls.asnumpy()
36 |         per_roi_loss_cls = per_roi_loss_cls[np.arange(per_roi_loss_cls.shape[0], dtype='int'), labels.astype('int')]
37 |         per_roi_loss_cls = -1 * np.log(per_roi_loss_cls)
38 |         per_roi_loss_cls = np.reshape(per_roi_loss_cls, newshape=(-1,))
39 | 
40 |         per_roi_loss_bbox = bbox_weights * mx.nd.smooth_l1((bbox_pred - bbox_targets), scalar=1.0)
41 |         per_roi_loss_bbox = mx.nd.sum(per_roi_loss_bbox, axis=1).asnumpy()
42 | 
43 |         top_k_per_roi_loss = np.argsort(per_roi_loss_cls + per_roi_loss_bbox)
44 |         labels_ohem = labels
45 |         labels_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = -1
46 |         bbox_weights_ohem = bbox_weights.asnumpy()
47 |         bbox_weights_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = 0
48 | 
49 |         labels_ohem = mx.nd.array(labels_ohem)
50 |         bbox_weights_ohem = mx.nd.array(bbox_weights_ohem)
51 | 
52 |         for ind, val in enumerate([labels_ohem, bbox_weights_ohem]):
53 |             self.assign(out_data[ind], req[ind], val)
54 | 
55 | 
56 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
57 |         for i in range(len(in_grad)):
58 |             self.assign(in_grad[i], req[i], 0)
59 | 
60 | 
61 | @mx.operator.register('BoxAnnotatorOHEM')
62 | class BoxAnnotatorOHEMProp(mx.operator.CustomOpProp):
63 |     def __init__(self, num_classes, num_reg_classes, roi_per_img):
64 |         super(BoxAnnotatorOHEMProp, self).__init__(need_top_grad=False)
65 |         self._num_classes = int(num_classes)
66 |         self._num_reg_classes = int(num_reg_classes)
67 |         self._roi_per_img = int(roi_per_img)
68 | 
69 |     def list_arguments(self):
70 |         return ['cls_score', 'bbox_pred', 'labels', 'bbox_targets', 'bbox_weights']
71 | 
72 |     def list_outputs(self):
73 |         return ['labels_ohem', 'bbox_weights_ohem']
74 | 
75 |     def infer_shape(self, in_shape):
76 |         labels_shape = in_shape[2]
77 |         bbox_weights_shape = in_shape[4]
78 | 
79 |         return in_shape, \
80 |                [labels_shape, bbox_weights_shape]
81 | 
82 |     def create_operator(self, ctx, shapes, dtypes):
83 |         return BoxAnnotatorOHEMOperator(self._num_classes, self._num_reg_classes, self._roi_per_img)
84 | 
85 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
86 |         return []
87 | 


--------------------------------------------------------------------------------
/manet_rfcn/operator_py/proposal_target.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fully Motion-Aware Network for Video Object Detection
  3 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  4 | # Extend FGFA by adding instance-level aggregation and motion pattern reasoning
  5 | # Modified by Shiyao Wang
  6 | # --------------------------------------------------------
  7 | 
  8 | 
  9 | 
 10 | """
 11 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them.
 12 | """
 13 | 
 14 | import mxnet as mx
 15 | import numpy as np
 16 | from distutils.util import strtobool
 17 | from easydict import EasyDict as edict
 18 | import cPickle
 19 | 
 20 | 
 21 | from core.rcnn import sample_rois
 22 | 
 23 | DEBUG = False
 24 | 
 25 | 
 26 | class ProposalTargetOperator(mx.operator.CustomOp):
 27 |     def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction):
 28 |         super(ProposalTargetOperator, self).__init__()
 29 |         self._num_classes = num_classes
 30 |         self._batch_images = batch_images
 31 |         self._batch_rois = batch_rois
 32 |         self._cfg = cfg
 33 |         self._fg_fraction = fg_fraction
 34 | 
 35 |         if DEBUG:
 36 |             self._count = 0
 37 |             self._fg_num = 0
 38 |             self._bg_num = 0
 39 | 
 40 |     def forward(self, is_train, req, in_data, out_data, aux):
 41 |         assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \
 42 |             'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois)
 43 |         all_rois = in_data[0].asnumpy()
 44 |         gt_boxes = in_data[1].asnumpy()
 45 |         delta_list = in_data[2].asnumpy()
 46 |         occluded = in_data[3].asnumpy()
 47 | 
 48 |         if self._batch_rois == -1:
 49 |             rois_per_image = all_rois.shape[0] + gt_boxes.shape[0]
 50 |             fg_rois_per_image = rois_per_image
 51 |         else:
 52 |             rois_per_image = self._batch_rois / self._batch_images
 53 |             fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int)
 54 | 
 55 | 
 56 |         # Include ground-truth boxes in the set of candidate rois
 57 |         zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
 58 |         all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1]))))
 59 |         # Sanity check: single batch only
 60 |         assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported'
 61 | 
 62 |         rois, labels, bbox_targets, bbox_weights, delta_label, delta_weights, occluded_label = \
 63 |             sample_rois(all_rois, delta_list, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes, occluded=occluded)
 64 | 
 65 |         if DEBUG:
 66 |             print "labels=", labels
 67 |             print 'num fg: {}'.format((labels > 0).sum())
 68 |             print 'num bg: {}'.format((labels == 0).sum())
 69 |             self._count += 1
 70 |             self._fg_num += (labels > 0).sum()
 71 |             self._bg_num += (labels == 0).sum()
 72 |             print "self._count=", self._count
 73 |             print 'num fg avg: {}'.format(self._fg_num / self._count)
 74 |             print 'num bg avg: {}'.format(self._bg_num / self._count)
 75 |             print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))
 76 | 
 77 |         for ind, val in enumerate([rois, labels, bbox_targets, bbox_weights, delta_label, delta_weights, occluded_label]):
 78 |             self.assign(out_data[ind], req[ind], val)
 79 | 
 80 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
 81 |         self.assign(in_grad[0], req[0], 0)
 82 |         self.assign(in_grad[1], req[1], 0)
 83 |         self.assign(in_grad[2], req[2], 0)
 84 |         self.assign(in_grad[3], req[3], 0)
 85 | 
 86 | 
 87 | @mx.operator.register('proposal_target')
 88 | class ProposalTargetProp(mx.operator.CustomOpProp):
 89 |     def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction='0.25'):
 90 |         super(ProposalTargetProp, self).__init__(need_top_grad=False)
 91 |         self._num_classes = int(num_classes)
 92 |         self._batch_images = int(batch_images)
 93 |         self._batch_rois = int(batch_rois)
 94 |         self._cfg = cPickle.loads(cfg)
 95 |         self._fg_fraction = float(fg_fraction)
 96 | 
 97 |     def list_arguments(self):
 98 |         return ['rois', 'gt_boxes', 'delta_list', 'occluded']
 99 | 
100 |     def list_outputs(self):
101 |         return ['rois_output', 'label', 'bbox_target', 'bbox_weight', 'delta_label', 'delta_weight', 'occluded_label']
102 | 
103 |     def infer_shape(self, in_shape):
104 |         rpn_rois_shape = in_shape[0]
105 |         gt_boxes_shape = in_shape[1]
106 |         delta_list_shape = in_shape[2]
107 |         occluded_shape = in_shape[3]
108 | 
109 |         rois = rpn_rois_shape[0] + gt_boxes_shape[0] if self._batch_rois == -1 else self._batch_rois
110 | 
111 |         output_rois_shape = (rois, 5)
112 |         label_shape = (rois, )
113 |         occluded_label_shape = (rois, )
114 |         bbox_target_shape = (rois, self._num_classes * 4)
115 |         bbox_weight_shape = (rois, self._num_classes * 4)
116 |         delta_label_shape = (rois*2, 4)
117 |         delta_weight_shape = (rois*2, 8)
118 | 
119 |         return [rpn_rois_shape, gt_boxes_shape, delta_list_shape, occluded_shape], \
120 |                [output_rois_shape, label_shape, bbox_target_shape, bbox_weight_shape, delta_label_shape, delta_weight_shape, occluded_label_shape]
121 | 
122 |     def create_operator(self, ctx, shapes, dtypes):
123 |         return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._cfg, self._fg_fraction)
124 | 
125 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
126 |         return []
127 | 


--------------------------------------------------------------------------------
/manet_rfcn/operator_py/rpn_inv_normalize.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Flow-Guided Feature Aggregation
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Xizhou Zhu
 6 | # --------------------------------------------------------
 7 | 
 8 | import mxnet as mx
 9 | import numpy as np
10 | from distutils.util import strtobool
11 | 
12 | class RPNInvNormalizeOperator(mx.operator.CustomOp):
13 |     def __init__(self, num_anchors, bbox_mean, bbox_std):
14 |         super(RPNInvNormalizeOperator, self).__init__()
15 |         self._num_anchors = num_anchors
16 |         self._bbox_mean = mx.ndarray.Reshape(mx.nd.array(bbox_mean), shape=(1,4,1,1))
17 |         self._bbox_std = mx.ndarray.Reshape(mx.nd.array(bbox_std), shape=(1,4,1,1))
18 | 
19 |     def forward(self, is_train, req, in_data, out_data, aux):
20 |         bbox_pred = in_data[0]
21 |         tile_shape = (bbox_pred.shape[0], self._num_anchors, bbox_pred.shape[2], bbox_pred.shape[3])
22 |         bbox_mean = mx.ndarray.tile(self._bbox_mean.as_in_context(bbox_pred.context), reps=tile_shape)
23 |         bbox_std = mx.ndarray.tile(self._bbox_std.as_in_context(bbox_pred.context), reps=tile_shape)
24 |         bbox_pred = bbox_pred * bbox_std + bbox_mean
25 | 
26 |         self.assign(out_data[0], req[0], bbox_pred)
27 | 
28 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
29 |         self.assign(in_grad[0], req[0], 0)
30 | 
31 | @mx.operator.register('rpn_inv_normalize')
32 | class RPNInvNormalizeProp(mx.operator.CustomOpProp):
33 |     def __init__(self, num_anchors, bbox_mean='(0.0, 0.0, 0.0, 0.0)', bbox_std='0.1, 0.1, 0.2, 0.2'):
34 |         super(RPNInvNormalizeProp, self).__init__(need_top_grad=False)
35 |         self._num_anchors = int(num_anchors)
36 |         self._bbox_mean = np.fromstring(bbox_mean[1:-1], dtype=float, sep=',')
37 |         self._bbox_std  = np.fromstring(bbox_std[1:-1], dtype=float, sep=',')
38 | 
39 |     def list_arguments(self):
40 |         return ['bbox_pred']
41 | 
42 |     def list_outputs(self):
43 |         return ['out_bbox_pred']
44 | 
45 |     def infer_shape(self, in_shape):
46 | 
47 |         return [in_shape[0]], \
48 |                [in_shape[0]]
49 | 
50 |     def create_operator(self, ctx, shapes, dtypes):
51 |         return RPNInvNormalizeOperator(self._num_anchors, self._bbox_mean, self._bbox_std)
52 | 
53 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
54 |         return []
55 | 


--------------------------------------------------------------------------------
/manet_rfcn/operator_py/tile_as.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | #Flow-Guided-Feature-Aggregation
 3 | # Copyright (c) 2017 Microsoft
 4 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 5 | # Written by Xizhou Zhu
 6 | # --------------------------------------------------------
 7 | 
 8 | import mxnet as mx
 9 | import numpy as np
10 | from distutils.util import strtobool
11 | 
12 | class TileAsOperator(mx.operator.CustomOp):
13 |     def __init__(self):
14 |         super(TileAsOperator, self).__init__()
15 | 
16 |     def forward(self, is_train, req, in_data, out_data, aux):
17 |         data_content = in_data[0]
18 |         data_tiled = mx.ndarray.tile(data_content, reps=(in_data[1].shape[0], 1, 1, 1))
19 |         self.assign(out_data[0], req[0], data_tiled)
20 | 
21 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
22 |         self.assign(in_grad[0], req[0], 0)
23 |         self.assign(in_grad[1], req[1], 0)
24 | 
25 | 
26 | @mx.operator.register('tile_as')
27 | class TileAsProp(mx.operator.CustomOpProp):
28 |     def __init__(self):
29 |         super(TileAsProp, self).__init__(need_top_grad=False)
30 | 
31 |     def list_arguments(self):
32 |         return ['data_content', 'data_shape']
33 | 
34 |     def list_outputs(self):
35 |         return ['data_tiled']
36 | 
37 |     def infer_shape(self, in_shape):
38 |         data_content_shape = in_shape[0]
39 |         data_shape_shape = in_shape[1]
40 | 
41 |         tiled_data_shape = (data_shape_shape[0], data_content_shape[1], data_content_shape[2], data_content_shape[3])
42 | 
43 |         return [data_content_shape, data_shape_shape], \
44 |                [tiled_data_shape]
45 | 
46 |     def create_operator(self, ctx, shapes, dtypes):
47 |         return TileAsOperator()
48 | 
49 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
50 |         return out_grad
51 | 


--------------------------------------------------------------------------------
/manet_rfcn/symbols/__init__.py:
--------------------------------------------------------------------------------
1 | import resnet_v1_101_manet_rfcn
2 | 


--------------------------------------------------------------------------------
/manet_rfcn/test.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Flow-Guided Feature Aggregation
 3 | # Copyright (c) 2016 by Contributors
 4 | # Copyright (c) 2017 Microsoft
 5 | # Licensed under The Apache-2.0 License [see LICENSE for details]
 6 | # Modified by Yuwen Xiong
 7 | # --------------------------------------------------------
 8 | 
 9 | import _init_paths
10 | 
11 | import cv2
12 | import argparse
13 | import os
14 | import sys
15 | import time
16 | import logging
17 | from config.config import config, update_config
18 | 
19 | def parse_args():
20 |     parser = argparse.ArgumentParser(description='Test a R-FCN network')
21 |     # general
22 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
23 | 
24 |     args, rest = parser.parse_known_args()
25 |     update_config(args.cfg)
26 | 
27 |     # rcnn
28 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
29 |     parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true')
30 |     parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float)
31 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
32 |     args = parser.parse_args()
33 |     return args
34 | 
35 | args = parse_args()
36 | curr_path = os.path.abspath(os.path.dirname(__file__))
37 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
38 | 
39 | import mxnet as mx
40 | from function.test_rcnn import test_rcnn
41 | from utils.create_logger import create_logger
42 | 
43 | 
44 | def main():
45 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
46 |     print args
47 | 
48 |     logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)
49 | 
50 |     test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path, config.dataset.motion_iou_path,
51 |               ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch,
52 |               args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path,
53 |               enable_detailed_eval=config.dataset.enable_detailed_eval)
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/manet_rfcn/train_end2end.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fully Motion-Aware Network for Video Object Detection
  3 | # Licensed under The Apache-2.0 License [see LICENSE for details]
  4 | # Extend FGFA by adding instance-level aggregation and motion pattern reasoning
  5 | # Modified by Shiyao Wang
  6 | # --------------------------------------------------------
  7 | 
  8 | import _init_paths
  9 | 
 10 | import cv2
 11 | import time
 12 | import argparse
 13 | import logging
 14 | import pprint
 15 | import os
 16 | import sys
 17 | from config.config import config, update_config
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(description='Train R-FCN network')
 21 |     # general
 22 |     parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str)
 23 | 
 24 |     args, rest = parser.parse_known_args()
 25 |     # update config
 26 |     update_config(args.cfg)
 27 | 
 28 |     # training
 29 |     parser.add_argument('--frequent', help='frequency of logging', default=config.default.frequent, type=int)
 30 |     args = parser.parse_args()
 31 |     return args
 32 | 
 33 | args = parse_args()
 34 | curr_path = os.path.abspath(os.path.dirname(__file__))
 35 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION))
 36 | 
 37 | import shutil
 38 | import numpy as np
 39 | import mxnet as mx
 40 | 
 41 | from symbols import *
 42 | from core import callback, metric
 43 | from core.loader import AnchorLoader
 44 | from core.module import MutableModule
 45 | from utils.create_logger import create_logger
 46 | from utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
 47 | from utils.load_model import load_param
 48 | from utils.PrefetchingIter import PrefetchingIter
 49 | from utils.lr_scheduler import WarmupMultiFactorScheduler
 50 | 
 51 | def train_net(args, ctx, pretrained, pretrained_flow, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
 52 |     logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
 53 |     prefix = os.path.join(final_output_path, prefix)
 54 | 
 55 |     # load symbol
 56 |     shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
 57 |     sym_instance = eval(config.symbol + '.' + config.symbol)()
 58 |     sym = sym_instance.get_train_symbol(config)
 59 |     feat_sym = sym.get_internals()['rpn_cls_score_output']
 60 | 
 61 |     # setup multi-gpu
 62 |     batch_size = len(ctx)
 63 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 64 | 
 65 |     # print config
 66 |     pprint.pprint(config)
 67 |     logger.info('training config:{}\n'.format(pprint.pformat(config)))
 68 | 
 69 |     # load dataset and prepare imdb for training
 70 |     image_sets = [iset for iset in config.dataset.image_set.split('+')]
 71 |     roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
 72 |                             flip=config.TRAIN.FLIP)
 73 |               for image_set in image_sets]
 74 |     roidb = merge_roidb(roidbs)
 75 |     roidb = filter_roidb(roidb, config)
 76 |     # load training data
 77 |     train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
 78 |                               feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
 79 |                               anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING,
 80 |                               normalize_target=config.network.NORMALIZE_RPN, bbox_mean=config.network.ANCHOR_MEANS,
 81 |                               bbox_std=config.network.ANCHOR_STDS)
 82 | 
 83 |     # infer max shape
 84 |     max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),
 85 |                       ('data_bef', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),
 86 |                       ('data_aft', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 87 |     max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
 88 |     max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
 89 |     max_data_shape.append(('delta_bef_gt', (config.TRAIN.BATCH_IMAGES, 100, 4)))
 90 |     max_data_shape.append(('delta_aft_gt', (config.TRAIN.BATCH_IMAGES, 100, 4)))
 91 |     max_data_shape.append(('occluded', (config.TRAIN.BATCH_IMAGES, 100, 1)))
 92 |     print 'providing maximum shape', max_data_shape, max_label_shape
 93 | 
 94 |     data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
 95 |     pprint.pprint(data_shape_dict)
 96 |     sym_instance.infer_shape(data_shape_dict)
 97 | 
 98 |     # load and initialize params
 99 |     if config.TRAIN.RESUME:
100 |         print('continue training from ', begin_epoch)
101 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
102 |     else:
103 |         print config.TRAIN.USE_OCCLUSION
104 |         if config.TRAIN.USE_OCCLUSION:
105 |             arg_params, aux_params = load_param(pretrained, epoch, convert=True)
106 |             sym_instance.init_occluded_weight(config, arg_params, aux_params)
107 |         else:
108 |             arg_params, aux_params = load_param(pretrained, epoch, convert=True)
109 |             arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True)
110 |             arg_params.update(arg_params_flow)
111 |             aux_params.update(aux_params_flow)
112 |             sym_instance.init_weight(config, arg_params, aux_params)
113 | 
114 |         # check parameter shapes
115 |     sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
116 | 
117 |     # create solver
118 |     fixed_param_prefix = config.network.FIXED_PARAMS
119 |     data_names = [k[0] for k in train_data.provide_data_single]
120 |     label_names = [k[0] for k in train_data.provide_label_single]
121 | 
122 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
123 |                         logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
124 |                         max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)
125 | 
126 |     if config.TRAIN.RESUME:
127 |         mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)
128 | 
129 |     # decide training params
130 |     # metric
131 |     rpn_eval_metric = metric.RPNAccMetric()
132 |     rpn_cls_metric = metric.RPNLogLossMetric()
133 |     rpn_bbox_metric = metric.RPNL1LossMetric()
134 |     delta_metric = metric.DELTAL1LossMetric(config)
135 |     eval_metric = metric.RCNNAccMetric(config)
136 |     cls_metric = metric.RCNNLogLossMetric(config)
137 |     bbox_metric = metric.RCNNL1LossMetric(config)
138 |     if config.TRAIN.USE_OCCLUSION:
139 |         occluded_metric = metric.RCNNOccludedLossMetric(config)
140 |         occluded_eval_metric = metric.RCNNOccludedAccMetric(config)
141 |     eval_metrics = mx.metric.CompositeEvalMetric()
142 |     # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
143 |     #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, delta_metric, eval_metric, cls_metric, bbox_metric]:
144 |     if config.TRAIN.USE_OCCLUSION:
145 |         for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, delta_metric, eval_metric, cls_metric, bbox_metric, occluded_metric, occluded_eval_metric]:
146 |             eval_metrics.add(child_metric)
147 |     else:
148 |         for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, delta_metric, eval_metric, cls_metric, bbox_metric]:
149 |             eval_metrics.add(child_metric)
150 |     # callback
151 |     batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
152 |     means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
153 |     stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
154 |     epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)]
155 |     # decide learning rate
156 |     base_lr = lr
157 |     lr_factor = config.TRAIN.lr_factor
158 |     lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
159 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
160 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
161 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
162 |     print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
163 |     lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
164 |     # optimizer
165 |     optimizer_params = {'momentum': config.TRAIN.momentum,
166 |                         'wd': config.TRAIN.wd,
167 |                         'learning_rate': lr,
168 |                         'lr_scheduler': lr_scheduler,
169 |                         'rescale_grad': 1.0,
170 |                         'clip_gradient': None}
171 | 
172 |     if not isinstance(train_data, PrefetchingIter):
173 |         train_data = PrefetchingIter(train_data)
174 | 
175 |     # train
176 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
177 |             batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
178 |             optimizer='sgd', optimizer_params=optimizer_params,
179 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
180 | 
181 | 
182 | def main():
183 |     print('Called with argument:', args)
184 |     ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
185 |     train_net(args, ctx, config.network.pretrained, config.network.pretrained_flow, config.network.pretrained_epoch, config.TRAIN.model_prefix,
186 |               config.TRAIN.begin_epoch, config.TRAIN.end_epoch, config.TRAIN.lr, config.TRAIN.lr_step)
187 | 
188 | if __name__ == '__main__':
189 |     main()
190 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | python -u experiments/manet_rfcn/manet_rfcn_end2end_train_test.py --cfg experiments/manet_rfcn/cfgs/phase-1.yaml
2 | 


--------------------------------------------------------------------------------